In [1]:
import gcsfs
import cftime
import intake
from cmip6_preprocessing.preprocessing import combined_preprocessing
from fastjmd95 import rho
from xgcm import Grid
import numpy as np
import xesmf as xe
import xarray as xr
import pandas as pd
import cmocean as cmo

### Download CMIP6 data and regrid with CFSR data

In [2]:
cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6-noQC.json"
col = intake.open_esm_datastore(cat_url)
cat = col.search(table_id='Omon', 
                 experiment_id=['historical'],
                 variable_id=['thetao'],
                 member_id=['r1i1p1f1'],
                 grid_label='gn',
                 source_id=['MPI-ESM1-2-HR']
                )

cmip6_compiled = cat.to_dataset_dict(
            zarr_kwargs={'consolidated':True, 'decode_times': True, 'use_cftime': True},
            preprocess=combined_preprocessing
            )


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [4]:
URL = 'https://ds.nccs.nasa.gov/thredds/dodsC/bypass/CREATE-IP/reanalysis/CFSR/mon/ocean/thetao.ncml'
CFSR_thetao = xr.open_dataset(URL,engine='netcdf4')
lat = CFSR_thetao.lat
lon = CFSR_thetao.lon

obs_grid = xr.Dataset(
    {
        "lat": (["lat"], lat),
        "lon": (["lon"], lon),
    }
)

In [5]:
ds = cmip6_compiled['CMIP.MPI-M.MPI-ESM1-2-HR.historical.Omon.gn']
regridder = xe.Regridder(ds,obs_grid, 'nearest_s2d')
ds_regridded = regridder(ds)

  keep_attrs=keep_attrs,


In [6]:
test_grid = Grid(CFSR_thetao)
test_grid

<xgcm.Grid>
X Axis (periodic, boundary=None):
  * center   lon
T Axis (periodic, boundary=None):
  * center   time
Z Axis (periodic, boundary=None):
  * center   lev
Y Axis (periodic, boundary=None):
  * center   lat

In [7]:
cmip6_interp = test_grid.interp(ds_regridded.thetao, axis='Z')

KeyError: 'center'

### Try regrid with WOA data

In [8]:
URL = 'https://www.ncei.noaa.gov/thredds-ocean/dodsC/ncei/woa/temperature/decav81B0/0.25/woa18_decav81B0_t00_04.nc'
WOA_temp = xr.open_dataset(URL,engine='netcdf4',
                          decode_times=False) # unable to decode time units if decode_times=True

In [9]:
WOA_temp
lat = WOA_temp.lat
lon = WOA_temp.lon

obs_grid = xr.Dataset(
    {
        "lat": (["lat"], lat),
        "lon": (["lon"], lon),
    }
)

In [10]:
ds = cmip6_compiled['CMIP.MPI-M.MPI-ESM1-2-HR.historical.Omon.gn']
regridder = xe.Regridder(ds,obs_grid, 'nearest_s2d')
ds_regridded = regridder(ds)

  keep_attrs=keep_attrs,


In [11]:
test_grid = Grid(WOA_temp)
test_grid

<xgcm.Grid>
X Axis (periodic, boundary=None):
  * center   lon
T Axis (periodic, boundary=None):
  * center   time
Z Axis (periodic, boundary=None):
  * center   depth
Y Axis (periodic, boundary=None):
  * center   lat

In [12]:
cmip6_interp = test_grid.interp(ds_regridded, axis='Z')

KeyError: "None of the DataArray's dims Frozen(SortedKeysDict({'bnds': 2, 'lat': 720, 'lev': 40, 'lon': 1440, 'member_id': 1, 'time': 1980, 'vertex': 4})) were found in axis coords."