# Pre-Industrial Control Data analysis

In [None]:
!mamba install --file /home/jovyan/pangeo/code/requirements.txt

In [1]:
from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import dask
from dask.diagnostics import progress
from tqdm.autonotebook import tqdm 
import intake
import fsspec
import seaborn as sns
import esmvalcore.preprocessor as ecpr
import pymannkendall as mkt

%matplotlib inline

  from tqdm.autonotebook import tqdm


In [2]:
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
col

# there is currently a significant amount of data for these runs
expts_full = ['historical', 'ssp126', 'ssp245','ssp370', 'ssp585', 'piControl']

query = dict(
    experiment_id=expts_full,
    table_id='Amon',                           
    variable_id=['tas', 'pr', 'ua', 'va'],
    member_id = 'r1i1p1f1',                     
)

col_subset = col.search(require_all_on=["source_id"], **query)
col_subset_var = [col_subset.search(variable_id=var_name) for var_name in query['variable_id']]
col_subset.df.groupby("source_id")[
    ["experiment_id", "variable_id", "table_id"]
].nunique()
#col_subset

Unnamed: 0_level_0,experiment_id,variable_id,table_id
source_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ACCESS-CM2,6,4,1
AWI-CM-1-1-MR,6,4,1
BCC-CSM2-MR,6,4,1
CAMS-CSM1-0,6,4,1
CESM2-WACCM,6,4,1
CMCC-CM2-SR5,6,4,1
CMCC-ESM2,6,4,1
CanESM5,6,4,1
EC-Earth3,6,4,1
EC-Earth3-Veg,6,4,1


In [3]:
def drop_all_bounds(ds):
    drop_vars = [vname for vname in ds.coords
                 if (('_bounds') in vname ) or ('_bnds') in vname]
    return ds.drop(drop_vars)

def open_dset(df):
    #assert len(df) == 1
    ds = xr.open_zarr(fsspec.get_mapper(df.zstore.values[0]), consolidated=True, decode_times=True, use_cftime=True)
    return drop_all_bounds(ds)

def open_delayed(df):
    return dask.delayed(open_dset)(df)

from collections import defaultdict
dsets = []
for col_subset in col_subset_var :
    dset = defaultdict(dict)

    for group, df in col_subset.df.groupby(by=['source_id', 'experiment_id']):
        dset[group[0]][group[1]] = open_delayed(df)
    dsets.append(dset)

In [4]:
with progress.Progressbar:
    dsets_ = [dask.compute(dict(dset))[0]for dset in dsets]

In [5]:
dset_dict = col_subset_var[0].to_dataset_dict(
    zarr_kwargs={"consolidated": True, "decode_times": True, "use_cftime": True}
)
ss = [key for key in dset_dict.keys() if 'piControl' in key]
ss[9]


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


'CMIP.CAS.FGOALS-f3-L.piControl.Amon.gr'

In [100]:
import pandas as pd
df = pd.DataFrame(columns=['Model name', 'lat_res', 'lon_res', 'shape (lat x lon)'])
for idx,s in enumerate(ss):
    ds = dset_dict[s]
    lat_num = ds.lat.shape
    lon_num = ds.lon.shape
    df.loc[len(df.index)] = [s, round(180.0/lat_num[0],3) , round(360.0/lon_num[0],3) ,(ds.tas.squeeze().shape[1], ds.tas.squeeze().shape[2])]
df

Unnamed: 0,Model name,lat_res,lon_res,shape (lat x lon)
0,CMIP.INM.INM-CM5-0.piControl.Amon.gr1,1.5,2.0,"(120, 180)"
1,CMIP.IPSL.IPSL-CM6A-LR.piControl.Amon.gr,1.259,2.5,"(143, 144)"
2,CMIP.EC-Earth-Consortium.EC-Earth3-Veg.piContr...,0.703,0.703,"(256, 512)"
3,CMIP.BCC.BCC-CSM2-MR.piControl.Amon.gn,1.125,1.125,"(160, 320)"
4,CMIP.MRI.MRI-ESM2-0.piControl.Amon.gn,1.125,1.125,"(160, 320)"
5,CMIP.CCCR-IITM.IITM-ESM.piControl.Amon.gn,1.915,1.875,"(94, 192)"
6,CMIP.CMCC.CMCC-ESM2.piControl.Amon.gn,0.938,1.25,"(192, 288)"
7,CMIP.AWI.AWI-CM-1-1-MR.piControl.Amon.gn,0.938,0.938,"(192, 384)"
8,CMIP.NOAA-GFDL.GFDL-ESM4.piControl.Amon.gr1,1.0,1.25,"(180, 288)"
9,CMIP.CAS.FGOALS-f3-L.piControl.Amon.gr,1.0,1.25,"(180, 288)"
