# Making CMIP6 multimodel means

### Get the CMIP6 catalogue

In [2]:
from intake import open_catalog

cat = open_catalog(
    "https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/master.yaml"
)["climate"]["cmip6_gcs"]
# list(cat)

### Pick out the historical temperature of the surface

In [3]:
query = dict(
    variable_id=["ts"],
    experiment_id=["historical"],#, "ssp585"],
    table_id=["Amon"],
    # institution_id=["NOAA-GFDL", ],
)
ts = cat.search(**query)

In [9]:
from cmip6_preprocessing.preprocessing import (
    rename_cmip6, 
    promote_empty_dims, 
    broadcast_lonlat, 
    replace_x_y_nominal_lat_lon
)
import dask

z_kwargs = {"consolidated": True, "decode_times": False}

def wrapper(ds):
    ds = ds.copy()
    ds = rename_cmip6(ds)
    ds = promote_empty_dims(ds)
    try:
        ds = broadcast_lonlat(ds)
        ds = replace_x_y_nominal_lat_lon(ds)
    except:
        print("problem with: ", ds)
    return ds

# pass the preprocessing directly
with dask.config.set(**{"array.slicing.split_large_chunks": True}):
    dset_dict_proc = ts.to_dataset_dict(zarr_kwargs=z_kwargs,
                                        preprocess=wrapper)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 192, lon: 288, time: 1980)
Coordinates:
  * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(288, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(399, 192, 288), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:      





problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 128, lon: 256, time: 12)
Coordinates:
  * lat        (lat) float64 -88.93 -87.54 -86.14 -84.74 ... 86.14 87.54 88.93
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(128, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.406 2.812 4.219 ... 354.4 355.8 357.2 358.6
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(256, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 2880 ... 5088 5820 6552 7284 8016
    time_bnds  (time, bnds) float64 dask.array<chunksize=(12, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(6, 128, 256), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    cmor_version:           3.3.2
    creation_date:          2021-01-23T19:33:15Z
 

  result = blockwise(
  result = blockwise(


problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 256, lon: 512, time: 1980)
Coordinates:
  * lat        (lat) float64 -89.46 -88.77 -88.07 -87.37 ... 88.07 88.77 89.46
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(256, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 0.7031 1.406 2.109 ... 357.2 357.9 358.6 359.3
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(512, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1443420 1444152 1444884 1445616
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(126, 256, 512), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    cmor_version:           3.5.0
    contact:                cmip6-data@ec



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 256, lon: 512, time: 1980)
Coordinates:
  * lat        (lat) float64 -89.46 -88.77 -88.07 -87.37 ... 88.07 88.77 89.46
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(256, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 0.7031 1.406 2.109 ... 357.2 357.9 358.6 359.3
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(512, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1443420 1444152 1444884 1445616
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(126, 256, 512), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    cmor_version:           3.5.0
    contact:                cmip6-data@ec



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 96, lon: 192, time: 1980)
Coordinates:
  * lat        (lat) float64 -90.0 -88.11 -86.21 -84.32 ... 86.21 88.11 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(96, 2), meta=np.ndarray>
  * lon        (lon) float64 0.9375 2.812 4.688 6.563 ... 355.3 357.2 359.1
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(677, 96, 192), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    NCO:                    netCDF Operators version 4.9.6 (Homepage = http:/...
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  488370


  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(


problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 160, lon: 320, time: 1980)
Coordinates:
  * lat        (lat) float64 -89.14 -88.03 -86.91 -85.79 ... 86.91 88.03 89.14
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(160, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.125 2.25 3.375 ... 355.5 356.6 357.8 358.9
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(320, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1443420 1444152 1444884 1445616
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(310, 160, 320), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  164359.0
    cmor_version:           3.5.0
    contact:                cmip6-data



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 96, lon: 192, time: 1980)
Coordinates:
  * lat        (lat) float64 -88.57 -86.72 -84.86 -83.0 ... 84.86 86.72 88.57
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(96, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.875 3.75 5.625 ... 352.5 354.4 356.2 358.1
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1443420 1444152 1444884 1445616
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(990, 96, 192), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  96057.0
    cmor_version:           3.5.0
    creation_date:          2020-04-01T06:3



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 80, lon: 180, time: 2004)
Coordinates:
  * lat        (lat) float64 -90.0 -84.82 -80.72 -77.22 ... 80.72 84.82 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(80, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(180, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1459980 1460712 1461444 1462176
    time_bnds  (time, bnds) float64 dask.array<chunksize=(2004, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(865, 80, 180), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  134685.0
    cmor_version:           3.5.0
    contact:                Lijuan Li (ljli@



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 192, lon: 288, time: 1980)
Coordinates:
  * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(288, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(395, 192, 288), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    cmor_version:           3.5.0
    comment:                none
    contact



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 192, lon: 288, time: 1980)
Coordinates:
  * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(288, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(399, 192, 288), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    cmor_version:           3.5.0
    comment:                none
    contact



problem with:  <xarray.Dataset>
Dimensions:       (axis_nbounds: 2, lat: 96, lon: 96, time: 1980)
Coordinates:
  * lat           (lat) float32 -90.0 -88.11 -86.21 -84.32 ... 86.21 88.11 90.0
  * lon           (lon) float32 0.0 3.75 7.5 11.25 ... 345.0 348.8 352.5 356.2
  * time          (time) int64 0 708 1416 2148 ... 1443192 1443924 1444656
    time_bounds   (time, axis_nbounds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * axis_nbounds  (axis_nbounds) int64 0 1
Data variables:
    ts            (time, lat, lon) float32 dask.array<chunksize=(990, 96, 96), meta=np.ndarray>
Attributes:
    CMIP6_CV_version:       cv=6.2.15.1
    Conventions:            CF-1.7 CMIP-6.2
    EXPID:                  historical
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  7305.0
    contact:                ipsl-cmip6@listes.ipsl.fr
    creation_date:          2020-05-28T15:32:53Z
    data_specs_version:     0



problem with:  problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 192, lon: 384, time: 1980)
Coordinates:
  * lat        (lat) float64 -89.28 -88.36 -87.42 -86.49 ... 87.42 88.36 89.28
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 0.9375 1.875 2.812 ... 356.2 357.2 358.1 359.1
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(384, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1443420 1444152 1444884 1445616
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(218, 192, 384), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  63552.0
    cmor_version:           3.4.0
    creation_date:    



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 192, lon: 384, time: 1980)
Coordinates:
  * lat        (lat) float64 -89.28 -88.36 -87.42 -86.49 ... 87.42 88.36 89.28
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 0.9375 1.875 2.812 ... 356.2 357.2 358.1 359.1
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(384, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1443420 1444152 1444884 1445616
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(218, 192, 384), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  54421.0
    cmor_version:           3.4.0
    creation_date:          2020-05-2



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 192, lon: 288, time: 1980)
Coordinates:
  * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(288, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(402, 192, 288), meta=np.ndarray>
Attributes:
    Conventions:               CF-1.7 CMIP-6.2
    activity_id:               CMIP
    branch_method:             Hybrid-restart from year 0671-01-01 of piControl
    branch_time:               0.0
    branch_time_in_child:      0.0
    branch_time_in_pare



problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 192, lon: 288, time: 1980)
Coordinates:
  * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(288, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(403, 192, 288), meta=np.ndarray>
Attributes:
    Conventions:               CF-1.7 CMIP-6.2
    activity_id:               CMIP
    branch_method:             Hybrid-restart from year 0671-01-01 of piControl
    branch_time:               0.0
    branch_time_in_child:      0.0
    branch_time_in_pare

  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(
  result = blockwise(


problem with:  problem with:  <xarray.Dataset>
Dimensions:    (bnds: 2, lat: 128, lon: 256, time: 1980)
Coordinates:
  * lat        (lat) float64 -90.0 -88.58 -87.17 -85.75 ... 87.17 88.58 90.0
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(128, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.406 2.812 4.219 ... 354.4 355.8 357.2 358.6
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(256, 2), meta=np.ndarray>
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * bnds       (bnds) int64 0 1
Data variables:
    ts         (time, lat, lon) float32 dask.array<chunksize=(381, 128, 256), meta=np.ndarray>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    cmor_version:           3.5.0
    contact:                Z



In [10]:
for key in dset_dict_proc:
    print(key)

CMIP.CMCC.CMCC-ESM2.historical.Amon.gn
CMIP.FIO-QLNM.FIO-ESM-2-0.historical.Amon.gn
CMIP.MIROC.MIROC-ES2H.historical.Amon.gn
CMIP.HAMMOZ-Consortium.MPI-ESM-1-2-HAM.historical.Amon.gn
CMIP.NOAA-GFDL.GFDL-CM4.historical.Amon.gr1
CMIP.CAMS.CAMS-CSM1-0.historical.Amon.gn
CMIP.EC-Earth-Consortium.EC-Earth3-AerChem.historical.Amon.gr
CMIP.MOHC.HadGEM3-GC31-MM.historical.Amon.gn
CMIP.MRI.MRI-ESM2-0.historical.Amon.gn
CMIP.E3SM-Project.E3SM-1-0.historical.Amon.gr
CMIP.MPI-M.MPI-ESM1-2-HR.historical.Amon.gn
CMIP.IPSL.IPSL-CM6A-LR-INCA.historical.Amon.gr
CMIP.MIROC.MIROC6.historical.Amon.gn
CMIP.NCAR.CESM2-WACCM.historical.Amon.gn
CMIP.CAS.FGOALS-f3-L.historical.Amon.gr
CMIP.CSIRO.ACCESS-ESM1-5.historical.Amon.gn
CMIP.CNRM-CERFACS.CNRM-ESM2-1.historical.Amon.gr
CMIP.EC-Earth-Consortium.EC-Earth3-CC.historical.Amon.gr
CMIP.KIOST.KIOST-ESM.historical.Amon.gr1
CMIP.NCAR.CESM2.historical.Amon.gn
CMIP.MOHC.UKESM1-0-LL.historical.Amon.gn
CMIP.E3SM-Project.E3SM-1-1.historical.Amon.gr
CMIP.NCC.NorESM2-L

In [12]:
dset_dict_proc["CMIP.EC-Earth-Consortium.EC-Earth3-Veg.historical.Amon.gr"].ts.mean("member_id").sel(time=slice(""))

Unnamed: 0,Array,Chunk
Bytes,16.61 GB,8.19 MB
Shape,"(8, 1980, 512, 512)","(1, 125, 32, 512)"
Count,20490 Tasks,3968 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 16.61 GB 8.19 MB Shape (8, 1980, 512, 512) (1, 125, 32, 512) Count 20490 Tasks 3968 Chunks Type float32 numpy.ndarray",8  1  512  512  1980,

Unnamed: 0,Array,Chunk
Bytes,16.61 GB,8.19 MB
Shape,"(8, 1980, 512, 512)","(1, 125, 32, 512)"
Count,20490 Tasks,3968 Chunks
Type,float32,numpy.ndarray
