## Testing CMCC-ESM2 with cmip6_preprocessing 

In [7]:
import intake
from cmip6_preprocessing.preprocessing import combined_preprocessing
import netCDF4

import xarray as xr
from xgcm import Grid
import xesmf as xe

In [2]:
cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6-noQC.json"
col = intake.open_esm_datastore(cat_url)
cat = col.search(table_id='Omon', 
                 experiment_id=['historical'],
                 variable_id=['thetao'],
                 member_id=['r1i1p1f1'],
                 grid_label='gn',
                 source_id=['CMCC-ESM2']
                )

In [3]:
ds = cat.to_dataset_dict(zarr_kwargs={'consolidated':True, 'decode_times': True, 'use_cftime': True},
                   preprocess=combined_preprocessing)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [10]:
ds

{'CMIP.CMCC.CMCC-ESM2.historical.Omon.gn': <xarray.Dataset>
 Dimensions:        (bnds: 2, lev: 50, member_id: 1, time: 1980, vertex: 4, x: 292, y: 362)
 Coordinates: (12/15)
   * x              (x) float64 253.0 253.0 253.0 253.0 ... 254.3 254.4 254.5
   * y              (y) float64 50.01 50.12 50.23 50.51 ... 89.37 89.66 89.74
     lat            (x, y) float64 dask.array<chunksize=(292, 362), meta=np.ndarray>
   * lev            (lev) float64 0.5126 1.621 2.858 ... 5.498e+03 5.904e+03
     lev_bounds     (lev, bnds) float64 dask.array<chunksize=(50, 2), meta=np.ndarray>
     lon            (x, y) float64 dask.array<chunksize=(292, 362), meta=np.ndarray>
     ...             ...
     lon_verticies  (x, y, vertex) float64 dask.array<chunksize=(292, 362, 4), meta=np.ndarray>
   * bnds           (bnds) int64 0 1
   * vertex         (vertex) int64 0 1 2 3
     lon_bounds     (bnds, x, y) float64 dask.array<chunksize=(1, 292, 362), meta=np.ndarray>
     lat_bounds     (bnds, x, y) float64 

Download reanalysis data and create grid

In [8]:
URL = 'https://ds.nccs.nasa.gov/thredds/dodsC/bypass/CREATE-IP/reanalysis/CFSR/mon/ocean/thetao.ncml'
      
cfsr_thetao_nc = netCDF4.Dataset(URL)
cfsr_thetao = xr.open_dataset(xr.backends.NetCDF4DataStore(cfsr_thetao_nc))

lat = cfsr_thetao.lat
lon = cfsr_thetao.lon

obs_grid = xr.Dataset(
    {
        "lat": (["lat"], lat),
        "lon": (["lon"], lon),
    }
)
obs_lev = cfsr_thetao.lev

Interpolate CMCC-ESM2 to reanalysis horizontal grid

In [9]:
regridder = xe.Regridder(ds,obs_grid, 'nearest_s2d')
ds_regridded = regridder(ds)

ValueError: dataset must include lon/lat or be CF-compliant

Interpolate CMCC-ESM2 to reanalysis vertical grid

In [11]:
grid = Grid(ds, coords={'Z': {'center': 'lev', 'outer': 'lev_bounds'},
                        'X': {'center': 'lon'},
                        'Y': {'center': 'lat'}},
            periodic=False
            )

ds_interp = grid.transform(ds, 'Z', obs_lev, target_data=None, method='linear')

AttributeError: 'dict' object has no attribute 'dims'