# 06: Bias-correction
*Develop a model that resolves differences between the climate model data and those from a more-detailed reference historical timeseries, and then use that model to ensure that future projections also reflect that level of detail.*

In [None]:
import fsspec
import geopandas as gpd
import numpy as np
import xarray as xr
from dask.distributed import Client
from utils import gcm_list
from xclim import sdba
from xclim.sdba.adjustment import QuantileDeltaMapping

Set up cluster to handle multiprocessing using a Dask client.

In [None]:
client = Client(n_workers=32)
client

In [None]:
def load_projection(gcm, scenario):
    """
    Load in a WBGT in the shade estimate produced by in `05_aggregate.ipynb`.
    """
    ds = xr.open_zarr(
        f"s3://carbonplan-extreme-heat/temp/wbgt-shade-regions/{gcm}-{scenario}.zarr"
    )
    ds["WBGT"].attrs = {}
    ds["WBGT"].attrs["units"] = "degC"
    ds["processing_id"] = ds["processing_id"].astype("int")
    return ds

Load UHE-daily estimates developed in `05_aggregate.ipynb`. This data will be the reference for every bias-correction.

In [None]:
ref = xr.open_zarr(
    "s3://carbonplan-climate-impacts/extreme-heat/v1.0/inputs/wbgt-UHE-daily-historical.zarr"
)
ref["WBGT"].attrs["units"] = "degC"

Load the region information.

In [None]:
with fsspec.open(
    "s3://carbonplan-climate-impacts/extreme-heat/v1.0/inputs/all_regions_and_cities.json"
) as file:
    regions_df = gpd.read_file(file)

In [None]:
def train_bias_correction(ref_ts, model_ts, gcm):
    """
    Prep timeseries for training and train the bias-correction model
    """

    # convert all ts to the no-leap calendar and convert back to
    # gregorian after prediction
    ref_ts = ref_ts.convert_calendar("noleap")
    model_ts = model_ts.convert_calendar(
        "noleap",
        dim="time",
        align_on="year",
        missing=np.nan,
        use_cftime=None,
    )

    # gap fill by linearly interpolating
    model_ts = model_ts.interpolate_na(dim="time", method="linear").chunk({"time": -1})
    group = sdba.Grouper("time.dayofyear", window=31)
    nquantiles = 100

    # train the same model but using different groupers
    trained_model = QuantileDeltaMapping.train(
        ref_ts, model_ts, nquantiles=nquantiles, kind="+", group=group
    )

    return trained_model

In [None]:
def apply_bias_correction(trained_model, ts, gcm, out_store):
    """
    Apply trained bias-correction model to each model timeseries (whether historic
    or future).
    """
    bias_corrected = trained_model.adjust(ts)

    # the rolling monthly bias-correction
    # works with no-leap calendars so convert it back to gregorian
    bias_corrected = (
        bias_corrected.convert_calendar(
            "gregorian",
            align_on="year",
            missing=np.nan,
            use_cftime=None,
        )
        .interpolate_na(dim="time", method="linear")
        .chunk({"time": -1})
    )

    bias_corrected.to_zarr(out_store, mode="w")

Data isn't available for all regions. Only apply bias-correction where data is available in both the reference and the modelled datasets.

In [None]:
ref_places = ref.processing_id.values
modelled_places = load_projection("ACCESS-CM2", "historical")[
    "WBGT"
].processing_id.values
valid_ids = list(set(ref_places) & set(modelled_places))

Define the periods over which the bias-correction will occur.

In [None]:
analysis_period_dict = {
    "historical": slice("1985", "2014"),
    "ssp245-2030": slice("2020", "2039"),
    "ssp245-2050": slice("2040", "2059"),
}

Subset the reference dataset to the historical time period used for training (1985-2014).

In [None]:
ref = (
    ref.sel(time=analysis_period_dict["historical"])
    .sel(processing_id=valid_ids)
    .chunk({"time": -1, "processing_id": 850})
)

Load in the different datasets into a dictionary, which, instead of an Xarray object, allows for the different calendars that different GCMs use. Then, for each GCM separately, train a bias-correction model and use it to bias-correct the historic and future projections.

In [None]:
for gcm in gcm_list:
    ts_dict = {}
    ts_dict["reference"] = ref["WBGT"]
    for scenario in ["historical", "ssp245-2030", "ssp245-2050"]:
        ts_dict[scenario] = load_projection(gcm, scenario.split("-")[0])["WBGT"]
        ts_dict[scenario] = (
            ts_dict[scenario]
            .sel(processing_id=valid_ids)
            .chunk({"time": -1, "processing_id": 850})
        )
        ts_dict[scenario] = ts_dict[scenario].sel(time=analysis_period_dict[scenario])

    trained_model = train_bias_correction(
        ts_dict["reference"], ts_dict["historical"], gcm
    )

    for scenario in ["historical", "ssp245-2030", "ssp245-2050"]:
        apply_bias_correction(
            trained_model,
            ts_dict[scenario],
            gcm,
            f"s3://carbonplan-scratch/extreme-heat/wbgt-shade-regions/{gcm}-{scenario}-bc.zarr",
        )