## Set up environment


In [None]:
import intake
import proplot as plt
import numpy as np

## Create and Connect to Dask Distributed Cluster


In [None]:
from dask_gateway import Gateway
from dask.distributed import Client

gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=2, maximum=100)
# Connect to cluster
client = Client(cluster)
# Display cluster dashboard URL
cluster

In [None]:
col = intake.open_esm_datastore(
    "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
)
col

In [None]:
col.df.head()

In [None]:
## Set up Hindcast

"Ensembles of 20 historical decadal forecasts, initialized at the end of each year from 1960 to 2013,"

In [None]:
variable_id = "tas"
table_id = "Amon"
source_id = "CanESM5"
member_ids = ["r10i1p2f1", "r11i1p2f1", "r12i1p2f1", "r13i1p2f1", "r14i1p2f1"]
inits = list(np.arange(1970, 2015))
cat_cmip = col.search(
    experiment_id=["dcppA-hindcast"],
    table_id=table_id,
    variable_id=variable_id,
    source_id=source_id,
    member_id=member_ids,
    dcpp_init_year=inits,
)
cat_cmip.df["dcpp_init_year"] = cat_cmip.df.dcpp_init_year.astype(int)

In [None]:
def preprocess(ds):
    ds["time"] = np.arange(1, 1 + ds.time.size)
    return ds


dset_dict = cat_cmip.to_dataset_dict(
    zarr_kwargs={"consolidated": True, "use_cftime": False},
    preprocess=preprocess,
)
dset_dict