# This script is to download CMIP6 data using intake-esm library

Read more from link

https://intake-esm.readthedocs.io/en/stable/tutorials/loading-cmip6-data.html

How to use intake to download GCM


In [12]:
import os
import re
from typing import Iterable

import intake

In [17]:
# Information of all CMIP6 files that one can download from intake esm data store
url = "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
# open the catalog
data_source = intake.open_esm_datastore(url)
# dataframe.df.columns
# df = dataframe.df

In [13]:
def natural_sort(l: Iterable[str]) -> list[str]:
    """
    Sort names like r1i1p1f1, r1i2p1f1 in a natural (numeric) order.
    - r1: Realization (initial condition run),
    - i1: Initialization method,
    - p1: Physical parameters,
    - f1: External forcings.

    Numeric order means that r1i1p1f1 < r2i1p1f1 < r11i1p1f1.

    :param l: list of names to be sorted
    """

    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
    return sorted(l, key=alphanum_key)

In [18]:
def download_files(data_source, sid: str, exp: str, var: str):
    """
    Download files from the CMIP6 data store

    :param data_source: intake esm data store
    :param sid: source_id
    :param exp: experiment_id
    :param var: variable_id
    """

    models = data_source.search(
        experiment_id=exp,
        table_id="Amon",
        variable_id=var,
        source_id=sid,
    )
    # then one might get several files with the same conditions
    # r1: Realization (initial condition run)
    # i1: Initialization method
    # p1: Physical parameters
    # f1: External forcings

    print(var, exp, sid, len(models.df))

    # if no files exist then print out error
    if len(models.df) == 0:
        print("*** \n Error \n")

    # sort the possible files
    ml = natural_sort(models.df.member_id.values)

    # get the first one only then seach again
    mem = ml[0]
    model_s = data_source.search(
        experiment_id=exp,
        table_id="Amon",
        variable_id=var,
        source_id=sid,
        member_id=mem,
    )

    # if no files exist then print out error
    if len(model_s.df) == 0:
        print("*** \n Error \n")

    print(mem)

    print("Download")

    try:
        datasets = model_s.to_dataset_dict(
            zarr_kwargs={"consolidated": True, "decode_times": True, "use_cftime": True}
        )
        # datasets = models.to_dataset_dict(xarray_open_kwargs={"consolidated": True, "decode_times": True, "use_cftime": True})

        for k, v in datasets.items():
            odir = "Download/" + sid + "/" + exp + "/"
            if not os.path.exists(odir):
                os.makedirs(odir)
            ofile = odir + var + "_" + k + "_" + mem + ".nc"
            print("write to ", ofile)
            v.to_netcdf(ofile)
    except:
        print("fail")


def download_data():
    for sid in [
        "EC-Earth3",
        "MIROC6",
        "MRI-ESM2-0",
        "ACCESS-CM2",
        "IPSL-CM6A-LR",
        "MPI-ESM1-2-HR",
    ]:
        for exp in ["historical", "ssp585", "ssp126", "ssp370", "ssp245"]:
            for var in ["tas", "ta", "ua", "va", "hur", "zg", "ts"]:
                download_files(data_source, sid, exp, var)
                return

tas historical EC-Earth3 73
r1i1p1f1
Download

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
fail
