In [1]:
import gcsfs
import cftime
import intake
from cmip6_preprocessing.preprocessing import combined_preprocessing
from xgcm import Grid
import numpy as np
import xesmf as xe
import xarray as xr

### Download CMIP6 data and regrid with CFSR data

Download CMIP6 data

In [2]:
cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6-noQC.json"
col = intake.open_esm_datastore(cat_url)
cat = col.search(table_id='Omon', 
                 experiment_id=['historical'],
                 variable_id=['thetao'],
                 member_id=['r1i1p1f1'],
                 grid_label='gn',
                 source_id=['MPI-ESM1-2-HR']
                )

cmip6_compiled = cat.to_dataset_dict(
            zarr_kwargs={'consolidated':True, 'decode_times': True, 'use_cftime': True},
            preprocess=combined_preprocessing
            )


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


Download reanalysis dataset

In [3]:
URL = 'https://ds.nccs.nasa.gov/thredds/dodsC/bypass/CREATE-IP/reanalysis/CFSR/mon/ocean/thetao.ncml'
CFSR_thetao = xr.open_dataset(URL,engine='netcdf4')

lat = CFSR_thetao.lat
lon = CFSR_thetao.lon

obs_grid = xr.Dataset(
    {
        "lat": (["lat"], lat),
        "lon": (["lon"], lon),
    }
)

obs_lev = CFSR_thetao.lev

Regrid CMIP6 to CFSR horizontal grid

In [4]:
ds = cmip6_compiled['CMIP.MPI-M.MPI-ESM1-2-HR.historical.Omon.gn']
regridder = xe.Regridder(ds,obs_grid, 'nearest_s2d')
ds_regridded = regridder(ds)

  keep_attrs=keep_attrs,


Create grid object from CMIP6 data

In [5]:
cmip6_thetao = ds_regridded.thetao

grid = Grid(cmip6_thetao, coords={'Z': {'center': 'level', 'outer': 'level_outer'},
                        'X': {'center': 'lon', 'right': 'lon_u'},
                        'Y': {'center': 'lat', 'right': 'lat_u'}},
            periodic=['X'])

Use xgcm transform to interpolate CMIP6 temperature data to CFSR vertical grid

In [6]:
cmip6_interp_thetao = grid.transform(cmip6_thetao, 'Z', obs_lev, target_data=None, method='linear')

KeyError: "None of the DataArray's dims ('member_id', 'time', 'lev', 'lat', 'lon') were found in axis coords."

In [7]:
target_z_levels = np.array([1,100, 1000, 3000]) 
cmip6_interp_thetao = grid.transform(cmip6_thetao, 'Z', target_z_levels, target_data=None, method='linear')

KeyError: "None of the DataArray's dims ('member_id', 'time', 'lev', 'lat', 'lon') were found in axis coords."

### Try regrid with WOA data

In [8]:
URL = 'https://www.ncei.noaa.gov/thredds-ocean/dodsC/ncei/woa/temperature/decav81B0/0.25/woa18_decav81B0_t00_04.nc'
WOA_temp = xr.open_dataset(URL,engine='netcdf4',
                          decode_times=False) # unable to decode time units if decode_times=True

In [9]:
lat = WOA_temp.lat
lon = WOA_temp.lon

obs_grid = xr.Dataset(
    {
        "lat": (["lat"], lat),
        "lon": (["lon"], lon),
    }
)

obs_lev = WOA_temp.depth

In [10]:
ds = cmip6_compiled['CMIP.MPI-M.MPI-ESM1-2-HR.historical.Omon.gn']
regridder = xe.Regridder(ds,obs_grid, 'nearest_s2d')
ds_regridded = regridder(ds)

  keep_attrs=keep_attrs,


In [11]:
grid = Grid(ds_regridded, coords={'Z': {'center': 'level', 'outer': 'level_outer'},
                        'X': {'center': 'lon', 'right': 'lon_u'},
                        'Y': {'center': 'lat', 'right': 'lat_u'}},
            periodic=['X'])

cmip6_thetao = ds_regridded.thetao

In [12]:
cmip6_interp_thetao = grid.transform(cmip6_thetao, 'Z', obs_lev, target_data=None, method='linear')

KeyError: "None of the DataArray's dims ('member_id', 'time', 'lev', 'lat', 'lon') were found in axis coords."

In [13]:
target_z_levels = np.array([1,100, 1000, 3000]) 
cmip6_interp_thetao = grid.transform(cmip6_thetao, 'Z', target_z_levels, target_data=None, method='linear')

KeyError: "None of the DataArray's dims ('member_id', 'time', 'lev', 'lat', 'lon') were found in axis coords."

### Run example from docs

In [14]:
import numpy as np
import xarray as xr
from xgcm import Grid

from intake import open_catalog

cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds  = cat["GODAS"].to_dask()


ValueError: Bad Request: https://www.googleapis.com/download/storage/v1/b/pangeo-ncep-godas/o/.zmetadata?alt=media
b'User project specified in the request is invalid.'