In [None]:
from glob import glob
import xarray as xr
import cftime
import nc_time_axis
import numpy as np
import matplotlib.pyplot as plt
import intake, intake_esm
!pip install cmip6_preprocessing
from cmip6_preprocessing.preprocessing import (correct_units,rename_cmip6)

In [43]:
def chunk_time(ds):
    if 'time' in ds.dims:
        ds = ds.chunk({'time':1})
    return ds

In [44]:
# Necessary for creating a common time axis for all models
# We want to create a common time axis so there will be no gaps when plotting the results 

def fix_time(ds):
    """ force calendar to noleap"""
    import xarray as xr
    
    if "time" not in ds.dims:
        return ds
    
    if ("calendar" not in ds["time"].attrs): 
        ds["time"].attrs.update({"calendar": "noleap"})
        
    if ds["time"].attrs["calendar"] not in ["noleap", "NOLEAP", "365_day"]:
        ds["time"].attrs.update({"calendar": "noleap"})
        
    ds = xr.decode_cf(ds)
    return ds

In [45]:
# Pass this function for preprocessing thetao data 
def pp_thetao(ds):
    ds = rename_cmip6(ds)
    ds = fix_time(ds)
    ds = correct_units(ds)
    return ds

In [46]:
col_url = "https://cmip6-nc.s3.us-east-2.amazonaws.com/esgf-world.json" 


In [47]:
col = intake.open_esm_datastore(col_url)

In [48]:
esmcol_data = col.esmcol_data
testds = "CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon"

In [49]:
cat_T = col.search(experiment_id=['historical'],
                 mip_table='Omon',
                 ensemble_member=["r1i1p1f1"],
                 model=['IPSL-CM6A-LR'],
                 #model=['GFDL-CM4','GFDL-ESM4'],
                 grid_label=['gn'],
                 variable=["thetao"])
               #  version=['v20190308', 'v20191120', 'v20200220','v20191108','v20190323','v20190731','v20190710','v20180803',
                #          'v20191007','v20190627','v20190311','v20190429','v20190627','v20191205','v20190914','v20190815','v20191108'])


In [51]:
cat_T.df.groupby(['model']).nunique()#CanESM5 has two versions

Unnamed: 0_level_0,project,institute,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
IPSL-CM6A-LR,1,1,1,1,1,1,1,1,1,2,1,2


In [53]:
def latest_version(ds):
    """filters latest DRS versions of datasets only"""
    ds=ds.df.sort_values(['version']).groupby(['temporal subset','model','mip_table','institute','variable','ensemble_member','grid_label','experiment_id'],as_index=False).last()
    return ds

In [54]:
cat_T_new = latest_version(cat_T)

In [55]:
cat_T_new[cat_T_new['model']=='CanESM5'] #one distinct version only, latest one.

Unnamed: 0,temporal subset,model,mip_table,institute,variable,ensemble_member,grid_label,experiment_id,project,frequency,modeling_realm,version,path


In [56]:
cat_T_new

Unnamed: 0,temporal subset,model,mip_table,institute,variable,ensemble_member,grid_label,experiment_id,project,frequency,modeling_realm,version,path
0,185001-194912,IPSL-CM6A-LR,Omon,IPSL,thetao,r1i1p1f1,gn,historical,CMIP6,mon,ocean,v20180803,s3://esgf-world/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/h...
1,195001-201412,IPSL-CM6A-LR,Omon,IPSL,thetao,r1i1p1f1,gn,historical,CMIP6,mon,ocean,v20180803,s3://esgf-world/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/h...


In [57]:
cat_T = intake.open_esm_datastore(cat_T_new,esmcol_data=esmcol_data)

In [None]:
dset_dict_T = cat_T.to_dataset_dict(cdf_kwargs={'decode_times': False, 'chunks': {'time': 1,'olevel':1}},
                                preprocess = pp_thetao,storage_options=dict(anon=True))

In [61]:
dset_dict_T['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon']

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.92 MB 1.92 MB Shape (332, 362, 4) (332, 362, 4) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",4  362  332,

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.92 MB 1.92 MB Shape (332, 362, 4) (332, 362, 4) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",4  362  332,

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,600 B,8 B
Shape,"(75, 2)","(1, 2)"
Count,76 Tasks,75 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 600 B 8 B Shape (75, 2) (1, 2) Count 76 Tasks 75 Chunks Type float32 numpy.ndarray",2  75,

Unnamed: 0,Array,Chunk
Bytes,600 B,8 B
Shape,"(75, 2)","(1, 2)"
Count,76 Tasks,75 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,31.68 kB,16 B
Shape,"(1980, 2)","(1, 2)"
Count,5942 Tasks,1980 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 31.68 kB 16 B Shape (1980, 2) (1, 2) Count 5942 Tasks 1980 Chunks Type object numpy.ndarray",2  1980,

Unnamed: 0,Array,Chunk
Bytes,31.68 kB,16 B
Shape,"(1980, 2)","(1, 2)"
Count,5942 Tasks,1980 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,480.74 kB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 1, 332, 362)"
Count,445502 Tasks,148500 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 71.39 GB 480.74 kB Shape (1, 1980, 75, 332, 362) (1, 1, 1, 332, 362) Count 445502 Tasks 148500 Chunks Type float32 numpy.ndarray",1980  1  362  332  75,

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,480.74 kB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 1, 332, 362)"
Count,445502 Tasks,148500 Chunks
Type,float32,numpy.ndarray


In [62]:
dset_dict_T['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon'].thetao

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,480.74 kB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 1, 332, 362)"
Count,445502 Tasks,148500 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 71.39 GB 480.74 kB Shape (1, 1980, 75, 332, 362) (1, 1, 1, 332, 362) Count 445502 Tasks 148500 Chunks Type float32 numpy.ndarray",1980  1  362  332  75,

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,480.74 kB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 1, 332, 362)"
Count,445502 Tasks,148500 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [64]:
dset_dict_T['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon'].lev_bounds

Unnamed: 0,Array,Chunk
Bytes,600 B,8 B
Shape,"(75, 2)","(1, 2)"
Count,76 Tasks,75 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 600 B 8 B Shape (75, 2) (1, 2) Count 76 Tasks 75 Chunks Type float32 numpy.ndarray",2  75,

Unnamed: 0,Array,Chunk
Bytes,600 B,8 B
Shape,"(75, 2)","(1, 2)"
Count,76 Tasks,75 Chunks
Type,float32,numpy.ndarray


In [65]:
#another chunking test
dset_dict_T = cat_T.to_dataset_dict(cdf_kwargs={'decode_times': False, 'chunks': {'time': 1}},
                                preprocess = pp_thetao,storage_options=dict(anon=True))


--> The keys in the returned dictionary of datasets are constructed as follows:
	'project.institute.model.experiment_id.mip_table'


In [66]:
dset_dict_T['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon']

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.92 MB 1.92 MB Shape (332, 362, 4) (332, 362, 4) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",4  362  332,

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.92 MB 1.92 MB Shape (332, 362, 4) (332, 362, 4) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",4  362  332,

Unnamed: 0,Array,Chunk
Bytes,1.92 MB,1.92 MB
Shape,"(332, 362, 4)","(332, 362, 4)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,600 B,600 B
Shape,"(75, 2)","(75, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 600 B 600 B Shape (75, 2) (75, 2) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  75,

Unnamed: 0,Array,Chunk
Bytes,600 B,600 B
Shape,"(75, 2)","(75, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,31.68 kB,16 B
Shape,"(1980, 2)","(1, 2)"
Count,5942 Tasks,1980 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 31.68 kB 16 B Shape (1980, 2) (1, 2) Count 5942 Tasks 1980 Chunks Type object numpy.ndarray",2  1980,

Unnamed: 0,Array,Chunk
Bytes,31.68 kB,16 B
Shape,"(1980, 2)","(1, 2)"
Count,5942 Tasks,1980 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,36.06 MB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 75, 332, 362)"
Count,5942 Tasks,1980 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 71.39 GB 36.06 MB Shape (1, 1980, 75, 332, 362) (1, 1, 75, 332, 362) Count 5942 Tasks 1980 Chunks Type float32 numpy.ndarray",1980  1  362  332  75,

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,36.06 MB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 75, 332, 362)"
Count,5942 Tasks,1980 Chunks
Type,float32,numpy.ndarray


In [67]:
dset_dict_T['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon'].lev_bounds

Unnamed: 0,Array,Chunk
Bytes,600 B,600 B
Shape,"(75, 2)","(75, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 600 B 600 B Shape (75, 2) (75, 2) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  75,

Unnamed: 0,Array,Chunk
Bytes,600 B,600 B
Shape,"(75, 2)","(75, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [68]:
dset_dict_T['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon'].lev

In [69]:
dset_dict_T['CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon'].thetao

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,36.06 MB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 75, 332, 362)"
Count,5942 Tasks,1980 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 71.39 GB 36.06 MB Shape (1, 1980, 75, 332, 362) (1, 1, 75, 332, 362) Count 5942 Tasks 1980 Chunks Type float32 numpy.ndarray",1980  1  362  332  75,

Unnamed: 0,Array,Chunk
Bytes,71.39 GB,36.06 MB
Shape,"(1, 1980, 75, 332, 362)","(1, 1, 75, 332, 362)"
Count,5942 Tasks,1980 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 480.74 kB 480.74 kB Shape (332, 362) (332, 362) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",362  332,

Unnamed: 0,Array,Chunk
Bytes,480.74 kB,480.74 kB
Shape,"(332, 362)","(332, 362)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
