## Benchmarks 

**Read data**

In [1]:
import xarray as xr
train_ds = xr.open_mfdataset("data/train/*", engine='zarr')
test_ds = xr.open_mfdataset("data/test/*", engine='zarr')

#### Quantile Mapping

In [2]:
import numpy as np
from xclim import sdba

train_ds.ecmwf.attrs['units'] = ""
train_ds.chirps.attrs['units'] = ""
train_ds = train_ds.chunk(dict(time=-1))

# Fit QM model
QM = sdba.EmpiricalQuantileMapping.train(
    train_ds.chirps, train_ds.ecmwf, nquantiles=np.arange(0, 1.1, 0.1), group="time",
)

In [4]:
test_ds.ecmwf.attrs['units'] = ""
test_ds = test_ds.chunk(dict(time=-1))

# Bias correct test forecasts
test_qm = QM.adjust(test_ds.ecmwf, interp="linear", extrapolation="constant")

In [3]:
# Bias correct train forecasts for SPI computation
train_qm = QM.adjust(train_ds.ecmwf, interp="linear", extrapolation="constant")

In [5]:
# Save results
test_qm.to_zarr("data/benchmarks_results/QM/test_quantile_mapping.zarr")

<xarray.backends.zarr.ZarrStore at 0x7fdcdc09a740>

In [5]:
# Save results
train_qm.to_zarr("data/benchmarks_results/QM/train_quantile_mapping.zarr")

<xarray.backends.zarr.ZarrStore at 0x7f9bc72810c0>

  _reproject(


#### Bilinear interpolation

In [6]:
import hdc.algo
import rioxarray
import numpy as np
import xarray as xr

from hip.analysis.aoi.analysis_area import AnalysisArea

In [9]:
BBOX = (28.125,-29.875,41.875,-10.125)
area = AnalysisArea(bbox=BBOX, resolution=.05, datetime_range='1981-01-01/2023-12-31')

# Read issue month forecasts
def read_one_issue_month_forecasts(issue: int):
    da = area.get_dataset(
        ["ECMWF",f"RFH_FORECASTS_SEAS5_ISSUE{issue}_DAILY"],
        load_config={
            "gridded_load_kwargs": {
             "resampling": "bilinear",
            }
        }
    ).sel(ensemble=0)
    da = da.where(da.time.dt.month == issue, drop=True)
    return da

da_all_months = xr.concat([read_one_issue_month_forecasts(i) for i in range(1, 13)], dim='time') 



In [11]:
# Sum by dekad
fc = da_all_months.groupby(da_all_months.time.dekad.start_date).sum()

# Transpose dimensions
fc = fc.transpose('latitude', 'longitude', 'time')

# Drop spatial_ref
fc = fc.drop_vars('spatial_ref')

# Select time steps
train = fc.sel(time = train_ds.time)
test = fc.sel(time = test_ds.time)

In [12]:
# Filter wet season
START_SEASON = 10
END_SEASON = 4

train = train.where(train.time.dt.month.isin((np.arange(START_SEASON-1, END_SEASON+12)%12)+1), drop=True)
test = test.where(test.time.dt.month.isin((np.arange(START_SEASON-1, END_SEASON+12)%12)+1), drop=True)

In [13]:
# Keep coordinates of 28*28 square
train = train.sel(latitude=train_ds.latitude, longitude=train_ds.longitude)
test = test.sel(latitude=test_ds.latitude, longitude=test_ds.longitude)

In [11]:
# Rename dataarray
train = train.rename('bilint')
test = test.rename('bilint')

In [14]:
# Save bilinear interpolation benchmark
train.to_zarr("data/benchmarks_results/bilinear/train_bilinear.zarr")
test.to_zarr("data/benchmarks_results/bilinear/test_bilinear.zarr")

<xarray.backends.zarr.ZarrStore at 0x7f9b8d9058c0>