# Example with high-resolution CMIP6 models (~100 km) using Pangeo catalog 

### Time period

We will use data from 1985 to 2014.

### Variables 

| shortname     |             Long name                   |      Units    |  levels |
| ------------- |:---------------------------------------:| -------------:|--------:|
|  prsn         |    Snowfall Flux                        | [kg m-2 s-1]  | surface |
| clw           |    Mass Fraction of Cloud Liquid Water  |  [kg kg-1]    |    ml   |
| cli           |    Mass Fraction of Cloud Ice           | [kg kg-1]     |    ml   |
| tas           |    Near-Surface Air Temperature         |   [K]         | surface |
| ta            |    Air Temperature                      |  [K]          |    ml   |


## Import python packages

In [None]:
# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings

# import packages
import xarray as xr
xr.set_options(display_style='html')
import intake
import cftime
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import xesmf as xe
from glob import glob
import pandas as pd
import numpy as np

%matplotlib inline


# reload imports
%load_ext autoreload
%autoreload 2

### Open CMIP6 online catalog

In [None]:
cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
col = intake.open_esm_datastore(cat_url)
col

### Search correspnding data

In [None]:
list_models = ['AWI-CM-1-1-MR',
               'BCC-CSM2-MR',
               'CAMS-CSM1-0',
               'CAS-ESM2-0',
               'CESM2',
               'CESM2-WACCM-FV2',
               'CMCC-CM2-HR4',
               'CMCC-CM2-SR5',
               'CMCC-ESM2',
               'E3SM-1-0',
               'E3SM-1-1',
               'E3SM-1-1-ECA',
               'EC-Earth3',
               'EC-Earth3-AerChem',
               'EC-Earth3-CC',
               'EC-Earth3-Veg',
               'FGOALS-f3-L',
               'FIO-ESM-2-0',
               'GFDL-CM4',
               'GFDL-ESM4',
               'HadGEM3-GC31-MM',
               'INM-CM4-8',
               'INM-CM5-0',
               'MPI-ESM1-2-HR',
               'MRI-ESM2-0',
               'NorESM2-MM',
               'SAM0-UNICON',
               'TaiESM1', 
               ]



In [None]:
variable_id=['prsn']
cat = col.search(source_id=list_models, experiment_id=['historical'], variable_id=variable_id[0], member_id=['r1i1p1f1'])
cat.df

In [None]:
cat.df['source_id'].unique()

### Create dictionary from the list of datasets we found

- This step may take several minutes so be patient!

In [None]:
dset_dict = cat.to_dataset_dict(zarr_kwargs={'use_cftime':True})

In [None]:
list(dset_dict.keys())

### Use data as xarray to make a simple plot

In [None]:
ds = dset_dict['CMIP.NCC.NorESM2-MM.historical.Amon.gn']
ds

In [None]:
fig = plt.figure(1, figsize=[10,10])

ax = plt.subplot(1, 1, 1, projection=ccrs.Orthographic(0, 90))
ax.coastlines()
ds['prsn'].sel(time=cftime.DatetimeNoLeap(1985, 1, 16, 12, 0, 0, 0)).plot(ax=ax, transform=ccrs.PlateCarree(), cmap='coolwarm')

### Get attributes (unique identifier)

In [None]:
ds.attrs['tracking_id']

# Regrid CMIP6 data to common NorESM2-MM grid

In [None]:
def to_DatetimeNoLeap(da):
    '''Takes a DataArray. Change the 
    calendar to DatetimeNoLeap.
    https://climate-cms.org/2019/11/12/Calendars-and-monthly-data.html'''
    val = da.copy()
    time1 = da.time.copy()
    for itime in range(val.sizes['time']):
        bb = val.time.values[itime].timetuple()
        time1.values[itime] = cftime.DatetimeNoLeap(bb[0],bb[1],bb[2])

    # We rename the time dimension and coordinate to time360 to make it clear it isn't 
    # the original time coordinate.
    val = val.rename({'time':'time'})
    time1 = time1.rename({'time':'time'})
    val = val.assign_coords({'time':time1})
    return val


In [None]:
starty = 1985; endy = 2014
year_range = range(starty, endy+1)

# create dictionary for reggridded data
ds_gridded_dict = dict()

# Read in the output grid from NorESM
ds_out = dset_dict['CMIP.NCC.NorESM2-MM.historical.Amon.gn'].isel(member_id = 0)
ds_out = ds_out.sel(time = ds_out.time.dt.year.isin(year_range)).squeeze()

counter = 0

ds_prsn_cmip = xr.Dataset(
    data_vars=dict(
        prsn=(['time', 'lon', 'lat'], np.full(shape = (0, 0, 0), fill_value=np.nan)),
        ),
    coords=dict(time=([]), lon=([]), lat=([]), ), 
    attrs=None)
ds_prsn_cmip = ds_prsn_cmip.assign_coords({"model": np.full(shape = (len(ds_gridded_dict),), fill_value=range(len(ds_gridded_dict)))})


for keys in dset_dict.keys():
    # select only models which have atmospheric monthly values
    amon = keys.split('.')[-2]
    if amon == 'Amon': 
        # select model name 
        model = keys.split('.')[2]
        
        # select where data should be saved
        filename = '{}_Amon_1deg_{}01_{}12.nc'.format(variable_id[0], starty, endy)
        savepath = '/home/franzihe/Documents/Data/output/CMIP6_hist/1deg/{}/'.format(model)
        nc_out = savepath + filename
        files = glob(nc_out)
        
        if nc_out in files:
            print('{}+{} is downloaded'.format(savepath, filename))
            counter += 1
            print('Have regridded in total: {:} files'.format(str(counter)))
        else:
            
            # Input data from CMIP6 model to be regridded
            ds_in = dset_dict[keys].isel(member_id = 0)
            ds_in = ds_in.sel(time = ds_in.time.dt.year.isin(year_range)).squeeze()
            
            # common time grid
            ds_in = to_DatetimeNoLeap(ds_in)
            
            
            # Regrid and save to file to nc_out
            
            # Regridder data
            regridder = xe.Regridder(ds_in, ds_out, 'bilinear')
  
            
            # Apply regridder to data
            # the entire dataset can be processed at once
            ds_in_regrid = regridder(ds_in)
            
            # verify that the result is the same as regridding each variable one-by-one
            for k in ds_in.data_vars:
                print(k, ds_in_regrid[k].equals(regridder(ds_in[k])))

                if ds_in_regrid[k].equals(regridder(ds_in[k])) == True:
                    ### Assign attributes from the original file to the regridded data
                    #  ds_in_regrid.attrs['Conventions'] = ds_in.attrs['Conventions']
                    # ds_in_regrid.attrs['history']     = ds_in.attrs['history']
                    ds_in_regrid.attrs = ds_in.attrs
                    
                    ds_in_regrid[k].attrs['units']         = ds_in[k].attrs['units']
                    ds_in_regrid[k].attrs['long_name']     = ds_in[k].attrs['long_name']
                    ds_in_regrid[k].attrs['standard_name'] = ds_in[k].attrs['standard_name']
                    try:
                        ds_in_regrid[k].attrs['comment']       = ds_in[k].attrs['comment']
                        ds_in_regrid[k].attrs['original_name'] = ds_in[k].attrs['original_name']
                        ds_in_regrid[k].attrs['cell_methods']  = ds_in[k].attrs['cell_methods']
                        ds_in_regrid[k].attrs['cell_measures'] = ds_in[k].attrs['cell_measures']
                    except KeyError:
                        continue
            
            # Shift the longitude from 0-->360 to -180-->180 and sort by longitude and time
            ds_in_regrid = ds_in_regrid.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon').sortby('time')
            
            
            # Save to netcdf file
            ds_in_regrid.to_netcdf(nc_out)
            ds_in_regrid.close(); ds_in.close(); ds_out.close()
            print('file written: .{}'.format(nc_out))
            
            # create dataset with all models
            ds_gridded_dict[model] = ds_in_regrid
            ds_gridded_dict[model] = ds_gridded_dict[model].assign_coords({"model": np.full(shape = (0,), fill_value=np.nan)})
            ds_prsn_cmip = xr.concat([ds_prsn_cmip.chunk(chunks={'time': 180, 'lat': 192, 'lon': 288 }), ds_gridded_dict[model].chunk(chunks={'time': 180, 'lat': 192, 'lon': 288 })], dim = 'model')
            


# Create model and seasonal mean of all regridded models

In [None]:
prsn = ds_prsn_cmip.prsn.mean('model').groupby('time.season').mean('time', keep_attrs=True)