`pip install git+https://github.com/jbusecke/cmip6_preprocessing.git`

`pip install PyCO2SYS`

In [1]:
import xarray as xr
from cmip6_preprocessing.preprocessing import combined_preprocessing
import PyCO2SYS as pyco2 # i have to install this package through terminal for it to work
import intake
import matplotlib.pyplot as plt
import numpy as np
import warnings
import gsw
from cmip6_preprocessing.utils import google_cmip_col
from cmip6_preprocessing.postprocessing import combine_datasets
from cmip6_preprocessing.postprocessing import merge_variables

In [2]:
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [3]:
col.search(variable_id = ['dissic', 'talk']).df['source_id'].unique()

array(['GFDL-CM4', 'GFDL-ESM4', 'GFDL-OM4p5B', 'IPSL-CM6A-LR',
       'CNRM-ESM2-1', 'CanESM5', 'CESM2', 'CESM2-WACCM', 'CanESM5-CanOE',
       'UKESM1-0-LL', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-LR', 'MPI-ESM1-2-HR',
       'GISS-E2-1-G', 'NorESM2-LM', 'GISS-E2-1-G-CC', 'MIROC-ES2L',
       'NorCPM1', 'NorESM1-F', 'NorESM2-MM', 'ACCESS-ESM1-5', 'CESM2-FV2',
       'CESM2-WACCM-FV2', 'MRI-ESM2-0', 'IPSL-CM5A2-INCA', 'KIOST-ESM',
       'EC-Earth3-CC', 'CMCC-ESM2', 'IPSL-CM6A-LR-INCA'], dtype=object)

In [4]:
variables = ['dissic','talk', 'so', 'thetao']
z_kwargs = {'consolidated': True, 'use_cftime': True}
query = dict(experiment_id=['historical'], 
#              table_id=['Omon'], 
             variable_id=variables,
             grid_label=['gr', 'gn'],
              source_id=['GFDL-CM4', 'GFDL-ESM4', 'GFDL-OM4p5B', 'IPSL-CM6A-LR',
                         'CNRM-ESM2-1', 'CanESM5', 'CESM2', 'CESM2-WACCM', 'CanESM5-CanOE',
                         'UKESM1-0-LL', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-LR', 'MPI-ESM1-2-HR',
                         'GISS-E2-1-G', 'NorESM2-LM', 'GISS-E2-1-G-CC', 'MIROC-ES2L',
                         'NorCPM1', 'NorESM1-F', 'NorESM2-MM', 'ACCESS-ESM1-5', 'CESM2-FV2',
                         'CESM2-WACCM-FV2', 'MRI-ESM2-0', 'IPSL-CM5A2-INCA', 'KIOST-ESM',
                         'EC-Earth3-CC', 'CMCC-ESM2', 'IPSL-CM6A-LR-INCA']
            )


cat = col.search(**query)

# print(cat.df['source_id'].unique())
dset_dict_old = cat.to_dataset_dict(zarr_kwargs=z_kwargs, storage_options={'token': 'anon'},
                                preprocess=combined_preprocessing, aggregate=False)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'




In [5]:
dd_new = merge_variables(dset_dict_old)



In [7]:
list(dd_new.keys())

['IPSL-CM6A-LR.gn.historical.Omon.r20i1p1f1',
 'MRI-ESM2-0.gr.historical.Omon.r2i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r17i1p1f1',
 'NorESM2-MM.gr.historical.Omon.r2i1p1f1',
 'MPI-ESM1-2-LR.gn.historical.Omon.r10i1p1f1',
 'GISS-E2-1-G.gn.historical.Omon.r9i1p1f2',
 'NorESM2-LM.gn.historical.Oyr.r2i1p1f1',
 'ACCESS-ESM1-5.gn.historical.Omon.r11i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r18i1p1f1',
 'GFDL-ESM4.gr.historical.Omon.r1i1p1f1',
 'MRI-ESM2-0.gr.historical.Omon.r5i1p1f1',
 'MIROC-ES2L.gn.historical.Omon.r1i1000p1f2',
 'NorESM2-LM.gr.historical.Omon.r3i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r13i1p1f1',
 'CESM2-WACCM.gr.historical.Omon.r2i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Odec.r25i1p1f1',
 'GISS-E2-1-G.gn.historical.Omon.r5i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Odec.r8i1p1f1',
 'CNRM-ESM2-1.gn.historical.Omon.r3i1p1f2',
 'MPI-ESM1-2-LR.gn.historical.Omon.r3i1p1f1',
 'CanESM5.gn.historical.Omon.r22i1p1f1',
 'CanESM5.gn.historical.Omon.r39i1p2f1',
 'IPSL-CM6A-LR.gn.histori

In [8]:
dd_has_all_vars={}
for name,item in dd_new.items():
    #print(name)
    #print(item.data_vars)
    present = item.data_vars
    if all(i in present for i in variables):
        #print(name)
        dd_has_all_vars[name]=item
list(dd_has_all_vars.keys())


['IPSL-CM6A-LR.gn.historical.Omon.r20i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r17i1p1f1',
 'MPI-ESM1-2-LR.gn.historical.Omon.r10i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r18i1p1f1',
 'GFDL-ESM4.gr.historical.Omon.r1i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r13i1p1f1',
 'CESM2-WACCM.gr.historical.Omon.r2i1p1f1',
 'CNRM-ESM2-1.gn.historical.Omon.r3i1p1f2',
 'MPI-ESM1-2-LR.gn.historical.Omon.r3i1p1f1',
 'CanESM5.gn.historical.Omon.r22i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r28i1p1f1',
 'CanESM5.gn.historical.Omon.r11i1p1f1',
 'CESM2.gn.historical.Omon.r11i1p1f1',
 'MPI-ESM1-2-HR.gn.historical.Omon.r3i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r11i1p1f1',
 'CESM2.gr.historical.Omon.r3i1p1f1',
 'CanESM5.gn.historical.Omon.r5i1p1f1',
 'IPSL-CM6A-LR.gn.historical.Omon.r31i1p1f1',
 'MPI-ESM1-2-HR.gn.historical.Omon.r9i1p1f1',
 'CanESM5.gn.historical.Omon.r6i1p1f1',
 'MPI-ESM1-2-LR.gn.historical.Omon.r6i1p1f1',
 'CanESM5.gn.historical.Omon.r1i1p2f1',
 'CESM2.gn.historical.Omon.r1i1p1f1',


In [29]:
def pick_first_member(ds_list, **kwargs):
    return ds_list[0]

dd_new_new = combine_datasets(
    dd_has_all_vars,
    pick_first_member,
    match_attrs=['source_id', 'grid_label', 'experiment_id', 'table_id']
)
list(dd_new_new.keys())

['IPSL-CM6A-LR.gn.historical.Omon',
 'MPI-ESM1-2-LR.gn.historical.Omon',
 'GFDL-ESM4.gr.historical.Omon',
 'CESM2-WACCM.gr.historical.Omon',
 'CNRM-ESM2-1.gn.historical.Omon',
 'CanESM5.gn.historical.Omon',
 'CESM2.gn.historical.Omon',
 'MPI-ESM1-2-HR.gn.historical.Omon',
 'CESM2.gr.historical.Omon',
 'CMCC-ESM2.gn.historical.Omon',
 'CESM2-FV2.gr.historical.Omon',
 'GISS-E2-1-G.gn.historical.Omon',
 'MIROC-ES2L.gn.historical.Omon',
 'UKESM1-0-LL.gn.historical.Omon',
 'ACCESS-ESM1-5.gn.historical.Omon',
 'CanESM5-CanOE.gn.historical.Omon',
 'GISS-E2-1-G-CC.gn.historical.Omon',
 'CESM2-WACCM-FV2.gn.historical.Omon',
 'CESM2-FV2.gn.historical.Omon',
 'NorESM2-LM.gr.historical.Omon',
 'MRI-ESM2-0.gr.historical.Omon',
 'MPI-ESM-1-2-HAM.gn.historical.Omon',
 'GFDL-CM4.gr.historical.Omon',
 'CESM2-WACCM-FV2.gr.historical.Omon',
 'CESM2-WACCM.gn.historical.Omon']

In [32]:
dd_gr={}
for name,item in dd_new_new.items():
    #print(name)
    #print(item.data_vars)
    if item.grid_label == 'gr':
        #print(name)
        dd_gr[name]=item
list(dd_gr.keys())

['GFDL-ESM4.gr.historical.Omon',
 'CESM2-WACCM.gr.historical.Omon',
 'CESM2.gr.historical.Omon',
 'CESM2-FV2.gr.historical.Omon',
 'NorESM2-LM.gr.historical.Omon',
 'MRI-ESM2-0.gr.historical.Omon',
 'GFDL-CM4.gr.historical.Omon',
 'CESM2-WACCM-FV2.gr.historical.Omon']

In [33]:
dd_gn={}
for name,item in dd_new_new.items():
    #print(name)
    #print(item.data_vars)
    if item.grid_label == 'gn':
        #print(name)
        dd_gn[name]=item
list(dd_gn.keys())

['IPSL-CM6A-LR.gn.historical.Omon',
 'MPI-ESM1-2-LR.gn.historical.Omon',
 'CNRM-ESM2-1.gn.historical.Omon',
 'CanESM5.gn.historical.Omon',
 'CESM2.gn.historical.Omon',
 'MPI-ESM1-2-HR.gn.historical.Omon',
 'CMCC-ESM2.gn.historical.Omon',
 'GISS-E2-1-G.gn.historical.Omon',
 'MIROC-ES2L.gn.historical.Omon',
 'UKESM1-0-LL.gn.historical.Omon',
 'ACCESS-ESM1-5.gn.historical.Omon',
 'CanESM5-CanOE.gn.historical.Omon',
 'GISS-E2-1-G-CC.gn.historical.Omon',
 'CESM2-WACCM-FV2.gn.historical.Omon',
 'CESM2-FV2.gn.historical.Omon',
 'MPI-ESM-1-2-HAM.gn.historical.Omon',
 'CESM2-WACCM.gn.historical.Omon']

In [34]:
list(dd_gr.keys())

['GFDL-ESM4.gr.historical.Omon',
 'CESM2-WACCM.gr.historical.Omon',
 'CESM2.gr.historical.Omon',
 'CESM2-FV2.gr.historical.Omon',
 'NorESM2-LM.gr.historical.Omon',
 'MRI-ESM2-0.gr.historical.Omon',
 'GFDL-CM4.gr.historical.Omon',
 'CESM2-WACCM-FV2.gr.historical.Omon']

`'GFDL-ESM4', 'CESM2-WACCM', 'CESM2', 'CESM2-FV2', 'NorESM2-LM', 'MRI-ESM2-0', 'GFDL-CM4', 'CESM2-WACCM-FV2'`