In [None]:
import intake
import xarray as xr
import regionmask
import matplotlib.pyplot as plt
from xmip.preprocessing import combined_preprocessing
import numpy as np
import dask
from xmip.regionmask import merged_mask

In [None]:
# Open the catalog and select the dataset source IDs
col_url = "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
%time col = intake.open_esm_datastore(col_url)
sourceIds = ['CAMS-CSM1-0', 'CMCC-CM2-SR5', 'CMCC-ESM2', 
             'ACCESS-ESM1-5', 'ACCESS-CM2', 'MPI-ESM1-2-LR']

In [None]:
# Search through the catalog for the models and variables of interest (THETAO) and AREACELLO and Source IDs
cat = col.search(member_id='r1i1p1f1',
                 experiment_id='historical',
                 activity_id='CMIP',
                 table_id='Omon',
                 variable_id='thetao',
                 grid_label='gn',
                 source_id=sourceIds)

with dask.config.set(**{'array.slicing.split_large_chunks': True}):
       data_dict = cat.to_dataset_dict(cdf_kwargs={"chunks": {"time": -1}, 
        "use_cftime": True}, preprocess=combined_preprocessing)


query = col.search(
    member_id='r1i1p1f1',
                 experiment_id='historical',
                 activity_id='CMIP',
                 table_id='Omon',
                 variable_id='areacello',
                 grid_label='gn',
                 source_id=sourceIds)
areacello = query.to_dataset_dict(cdf_kwargs={"chunks": {"time": -1}, "use_cftime": True},
                                preprocess=combined_preprocessing)

In [None]:
# Compute and save AREACELLO data for 1990-2000 to netcdf
arrayOfareacello = [areacello]

for data in arrayOfareacello:
    for key in data:
        data[key].to_netcdf('./areacello-multimodel/_' +
         data[key].attrs['intake_esm_dataset_key'] + 
         '_areacello-historical_1990-2000.nc')

In [None]:
# Compute and save THETAO data for 1990-2000 to netcdf
arrayOfDataDict = [data_dict]

for data in arrayOfDataDict:
    for key in data:
        firstTenYears = data[key].sel(time=slice('1990', '2000')).mean(dim='time')
        firstTenYears.compute()        
        firstTenYears.to_netcdf('./thetao-models-1990-2000/_' 
                                + data[key].attrs['source_id'] 
                                + '_historical_1990-2000.nc')