## Testing CMCC-ESM2 with cmip6_preprocessing

In [1]:
import intake

In [2]:
cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6-noQC.json"
col = intake.open_esm_datastore(cat_url)
cat = col.search(table_id='Omon', 
                 experiment_id=['historical'],
                 variable_id=['thetao'],
                 member_id=['r1i1p1f1'],
                 grid_label='gn',
                 source_id=['CMCC-ESM2']
                )

In [3]:
cat

Unnamed: 0,unique
activity_id,1
institution_id,1
source_id,1
experiment_id,1
member_id,1
table_id,1
variable_id,1
grid_label,1
zstore,1
dcpp_init_year,0


In [4]:
ds = cat.to_dataset_dict(zarr_kwargs={'consolidated':True, 'decode_times': True, 'use_cftime': True})


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


Install using `conda install -c conda-forge cmip6_preprocessing`

In [6]:
from cmip6_preprocessing.preprocessing import combined_preprocessing

In [7]:
ds = cat.to_dataset_dict(zarr_kwargs={'consolidated':True, 'decode_times': True, 'use_cftime': True},
                   preprocess=combined_preprocessing)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'




RuntimeError: Failed to apply pre-processing function: combined_preprocessing

Install using `pip install git+https://github.com/jbusecke/cmip6_preprocessing.git`

In [8]:
from cmip6_preprocessing.preprocessing import combined_preprocessing

In [9]:
ds = cat.to_dataset_dict(zarr_kwargs={'consolidated':True, 'decode_times': True, 'use_cftime': True},
                   preprocess=combined_preprocessing)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


RuntimeError: Failed to apply pre-processing function: combined_preprocessing

Use modified preprocessing

In [14]:
from cmip6_preprocessing.preprocessing import (
    rename_cmip6, 
    promote_empty_dims, 
    correct_coordinates, 
    correct_lon, 
    correct_units, 
    broadcast_lonlat,
    #parse_lon_lat_bounds,
    #sort_vertex_order,
    #maybe_convert_bounds_to_vertex, 
    #maybe_convert_vertex_to_bounds,
)

In [15]:
def modified_preprocessing(ds):
    ds = ds.copy()
    # fix naming
    ds = rename_cmip6(ds)
    # promote empty dims to actual coordinates
    ds = promote_empty_dims(ds)
    # demote coordinates from data_variables
    ds = correct_coordinates(ds)
    # broadcast lon/lat
    ds = broadcast_lonlat(ds)
    # shift all lons to consistent 0-360
    ds = correct_lon(ds)
    # fix the units
    ds = correct_units(ds)
    # replace x,y with nominal lon,lat
    # ds = replace_x_y_nominal_lat_lon(ds)
    # rename the `bounds` according to their style (bound or vertex)
    #ds = parse_lon_lat_bounds(ds)
    # sort verticies in a consistent manner
    #ds = sort_vertex_order(ds)
    # convert vertex into bounds and vice versa, so both are available
    #ds = maybe_convert_bounds_to_vertex(ds)
    #ds = maybe_convert_vertex_to_bounds(ds)
    return ds

In [16]:
ds = cat.to_dataset_dict(zarr_kwargs={'consolidated':True, 'decode_times': True},
                   preprocess=modified_preprocessing)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


RuntimeError: Failed to apply pre-processing function: modified_preprocessing