# 06: Bias-correction
*Develop a model that resolves differences between the climate model data and those from a more-detailed reference historical timeseries, and then use that model to ensure that future projections also reflect that level of detail.*

In [1]:
import coiled
import numpy as np
import xarray as xr
from utils import gcm_list, load_regions
from xclim import sdba
from xclim.sdba.adjustment import QuantileDeltaMapping

Set up cluster to handle multiprocessing using a Dask client.

In [None]:
cluster = coiled.Cluster(
    n_workers=5,
    worker_vm_types=["m7g.large"],
    scheduler_vm_types=["m7g.4xlarge"],
    region="us-west-2",
    spot_policy="spot_with_fallback",
)


cluster.adapt(minimum=2, maximum=50)

client = cluster.get_client()

In [25]:
def load_projection(gcm, scenario):
    """
    Load in a WBGT in the shade estimate produced by in `05_aggregate.ipynb`.
    """
    ds = xr.open_zarr(
        f"s3://carbonplan-scratch/extreme-heat/wbgt-shade-regions/{gcm}-{scenario}.zarr"
    )
    ds['time'] = ds['time'].dt.floor('D')    
    ds["WBGT"].attrs = {}
    ds["WBGT"].attrs["units"] = "degC"
    ds["processing_id"] = ds["processing_id"].astype("int")
    
    return ds

Load UHE-daily estimates developed in `05_aggregate.ipynb`. This data will be the reference for every bias-correction.

In [5]:
ref = xr.open_zarr(
    "s3://carbonplan-climate-impacts/extreme-heat/v1.1/inputs/wbgt-UHE-daily-historical.zarr"
)
ref["WBGT"].attrs["units"] = "degC"



Load the region information.

In [6]:
regions_df = load_regions(extension="central-asia")

In [7]:
def train_bias_correction(ref_ts, model_ts):
    """
    Prep timeseries for training and train the bias-correction model
    """    

    # convert all ts to the no-leap calendar and convert back to
    # gregorian after prediction
    
    ref_ts = ref_ts.convert_calendar("noleap")
    model_ts = model_ts.convert_calendar(
        "noleap",
        dim="time",
        align_on="year",
        missing=np.nan,
        use_cftime=None,
    )

    # gap fill by linearly interpolating
    model_ts = model_ts.interpolate_na(dim="time", method="linear").chunk({"time": -1})
    group = sdba.Grouper("time.dayofyear", window=31)
    nquantiles = 100

    # train the same model but using different groupers
    trained_model = QuantileDeltaMapping.train(
        ref_ts, model_ts, nquantiles=nquantiles, kind="+", group=group
    )

    return trained_model

In [48]:
def apply_bias_correction(trained_model, ts, out_store):
    """
    Apply trained bias-correction model to each model timeseries (whether historic
    or future).
    """
    bias_corrected = trained_model.adjust(ts)

    # the rolling monthly bias-correction
    # works with no-leap calendars so convert it back to gregorian
    bias_corrected = (
        bias_corrected.convert_calendar(
            "gregorian",
            align_on="year",
            missing=np.nan,
            use_cftime=None,
        )
        .interpolate_na(dim="time", method="linear")
        .chunk({"time": -1})
    )
    bias_corrected.to_dataset().to_zarr(out_store,zarr_format=2, mode='w',consolidated=True)

Data isn't available for all regions. Only apply bias-correction where data is available in both the reference and the modelled datasets.

In [49]:
ref_places = ref.processing_id.values
modelled_places = load_projection("ACCESS-CM2", "historical")[
    "WBGT"
].processing_id.values
valid_ids = list(set(ref_places) & set(modelled_places))

Define the periods over which the bias-correction will occur.

In [53]:
# note: for central asia ext added more analysis periods
analysis_period_dict = {
    "historical": slice("1985", "2014"),
    "ssp245-2030": slice("2020", "2039"),
    "ssp245-2050": slice("2040", "2059"),
    "ssp245-2070": slice("2060", "2079"),
    "ssp245-2080": slice("2070", "2089"),
    "ssp245-2090": slice("2080", "2099"),
    "ssp370-2030": slice("2020", "2039"),
    "ssp370-2050": slice("2040", "2059"),
    "ssp370-2070": slice("2060", "2079"),
    "ssp370-2080": slice("2070", "2089"),
    "ssp370-2090": slice("2080", "2099"),

}

Subset the reference dataset to the historical time period used for training (1985-2014).

In [51]:
ref = (
    ref.sel(time=analysis_period_dict["historical"])
    .sel(processing_id=valid_ids)
    .chunk({"time": -1, "processing_id": 850})
)

Load in the different datasets into a dictionary, which, instead of an Xarray object, allows for the different calendars that different GCMs use. Then, for each GCM separately, train a bias-correction model and use it to bias-correct the historic and future projections.

In [None]:

### UPDATE - SUBSET!
for gcm in gcm_list[0:1]:
    ts_dict = {}
    ts_dict["reference"] = ref["WBGT"]
    for scenario in analysis_period_dict.keys():
        ts_dict[scenario] = load_projection(gcm, scenario.split("-")[0])["WBGT"]
        ts_dict[scenario] = (
            ts_dict[scenario]
            .sel(processing_id=valid_ids)
            .chunk({"time": -1, "processing_id": 850})
        )
        ts_dict[scenario] = ts_dict[scenario].sel(time=analysis_period_dict[scenario])
        print(f'loaded: {scenario}')
    trained_model = train_bias_correction(
        ts_dict["reference"], ts_dict["historical"]
    )

    for scenario in analysis_period_dict.keys():
        apply_bias_correction(
            trained_model,
            ts_dict[scenario],
            f"s3://carbonplan-scratch/extreme-heat/wbgt-shade-regions/{gcm}-{scenario}-bc.zarr",
        )