Authors: A.Radhakrishnan, GFDL


Search for mlotst in select CMIP models

In [1]:
import xarray as xr
import intake
import intake_esm
import sys
from dask_gateway import Gateway
import pandas as pd
pd.set_option("display.max_colwidth", None)
pd.options.display.max_rows = None

esgf-world.json is the ESM collections spec file for the netCDF data in the S3 bucket esgf-world. 
The catalog is updated on an on-demand basis for now. 
You can refer to https://github.com/aradhakrishnanGFDL/gfdl-aws-analysis/tree/community/esm-collection-spec-examples for the most recent catalogs
More examples can be found in https://github.com/aradhakrishnanGFDL/gfdl-aws-analysis/tree/community/examples 


In [2]:
col_url = "https://cmip6-nc.s3.us-east-2.amazonaws.com/esgf-world.json" 

In [3]:
col = intake.open_esm_datastore(col_url)
esmcol_data = col.esmcol_data

In [4]:
def latest_version(cat):
    """
    input
    cat: esmdatastore
    output
    esmdatastore with latest DRS versions
    """
    latest_cat = cat.df.sort_values(by=['version','path']).drop_duplicates(['temporal subset','model','mip_table',
                                               'institute','variable','ensemble_member',
                                               'grid_label','experiment_id'],keep='last')
    return latest_cat

In [5]:
query_Omon_mlotst = dict(experiment_id=['abrupt-4xCO2','1pctCO2','historical'],
                 mip_table=['Omon'],
                 ensemble_member=["r1i1p1f1","r1i1p1f2"],
                 model=['CESM2','CNRM-CM6-1','UKESM1-0-LL','GFDL-ESM4','IPSL-CM6A-LR'],
                 grid_label=['gn'],
                 variable=["mlotst"])

cat_Omon_mlotst = col.search(**query_Omon_mlotst)
cat_Omon_mlotst_lat = latest_version(cat_Omon_mlotst)


In [10]:
cat_Omon_mlotst_latest = intake.open_esm_datastore(cat_Omon_mlotst_lat,esmcol_data=esmcol_data)

In [54]:
cat_Omon_mlotst_latest.df.groupby(['model']).nunique() #'CNRM-CM6-1','UKESM1-0-LL', missing

Unnamed: 0_level_0,project,institute,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
CESM2,1,1,1,1,1,1,1,1,1,1,1,1
GFDL-ESM4,1,1,1,1,1,1,1,1,1,9,1,9
IPSL-CM6A-LR,1,1,1,1,1,1,1,1,1,1,1,1


In [44]:
cat_Omon_mlotst_latest.df.groupby(['variable','model','version','temporal subset']).nunique()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,project,institute,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,path
variable,model,version,temporal subset,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
mlotst,CESM2,v20190308,185001-201412,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,185001-186912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,187001-188912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,189001-190912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,191001-192912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,193001-194912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,195001-196912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,197001-198912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,199001-200912,1,1,1,1,1,1,1,1,1
mlotst,GFDL-ESM4,v20190726,201001-201412,1,1,1,1,1,1,1,1,1


In [46]:
dset_dict_mlotst = cat_Omon_mlotst_latest.to_dataset_dict(storage_options=dict(anon=True),cdf_kwargs={'decode_times': False, 'chunks': {}})


--> The keys in the returned dictionary of datasets are constructed as follows:
	'project.institute.model.experiment_id.mip_table'


In [50]:
dset_dict_mlotst['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon'].mlotst

Unnamed: 0,Array,Chunk
Bytes,951.86 MB,951.86 MB
Shape,"(1, 1980, 332, 362)","(1, 1980, 332, 362)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 951.86 MB 951.86 MB Shape (1, 1980, 332, 362) (1, 1980, 332, 362) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  362  332  1980,

Unnamed: 0,Array,Chunk
Bytes,951.86 MB,951.86 MB
Shape,"(1, 1980, 332, 362)","(1, 1980, 332, 362)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
