### Instructions for activated the Jupyter kernel for the `cmip6hack-multigen` conda environment

In a Jupyterlab terminal, navigate to the `/cmip6hack-multigen/` folder and run the command:
```bash
source spinup_env.sh
```
which will create the `cmip6hack-multigen` conda environment and install it as a python kernel for jupyter.

Then, switch the kernel (drop-down menu in the top right hand corner) to cmip6hack-multigen and restart the notebook.

### Pre-processing climate model output in GCS

This notebook uses [`intake-esm`](https://intake-esm.readthedocs.io/en/latest/) to ingest and organize climate model output from various model generations and resave their time-mean fields locally.

In [1]:
import os, sys
import numpy as np
import pandas as pd
import xarray as xr
import xskillscore as xs
import xesmf as xe
from tqdm.autonotebook import tqdm  # Fancy progress bars for our loops!
import intake
# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util
import organization as org
import qc

  import sys


In [2]:
varnames = ['tas', 'pr', 'psl']
time_slice = slice('1981', '2010')

coarsen_size = 2

In [3]:
col_dict = org.get_ipcc_collection()

In [4]:
mip_ids = org.all_mip_ids

In [5]:
ds_dict = {}
for mip_id in tqdm(mip_ids):
    ds_dict[mip_id] = {}
    for varname in varnames:
        print(mip_id, varname)
        col = col_dict[mip_id]
        cat = col.search(
            experiment_id='historical',
            variable_id=varname,
            member_id='r1i1p1f1',# choose first ensemble member only (for now)
            table_id='Amon'
        )
        
        if cat.df.size == 0: continue

        with util.HiddenPrints():
            dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False})

        ds_dict[mip_id][varname] = {}
        for key, ds in dset_dict.items():            
            # rename spatial dimensions if necessary
            if ('longitude' in ds.dims) and ('latitude' in ds.dims):
                ds = ds.rename({'longitude':'lon', 'latitude': 'lat'})
            
            ds = xr.decode_cf(ds) # Need this temporarily because setting 'decode_times': True appears broken
            ds = ds.squeeze() # get rid of member_id (for now)
            
            # take long-term mean
            try:
                timeave = ds.sel(time=time_slice).mean(dim='time', keep_attrs=True)
            except:
                # A few cases of weird cftime stuff going on...
                print("Weird time units breaks ds.sel(time=time_slice)")
                continue
            
            if mip_id != 'cmip6':
                chunks = {'lat':timeave['lat'].size, 'lon':timeave['lon'].size}
                timeave = timeave.chunk(chunks)
            
            with util.HiddenPrints():
                ds_new = util.regrid_to_common(timeave[varname])
                
            ds_new.attrs.update(timeave.attrs)
            ds_new = qc.quality_control(ds_new, varname, key, mip_id)
            
            ds_new.attrs['name'] = "-".join(key.split(".")[1:3])
            
            for coord in ds_new.coords:
                if coord not in ['lat','lon']:
                    ds_new = ds_new.drop(coord)
            
            ds_new = ds_new.expand_dims({'ensemble': np.array([ds_new.attrs['name']])}, 0)
            
            ds_new.attrs['mip_id'] = mip_id
            
            coarsen_dict = {'lat': coarsen_size, 'lon': coarsen_size}
            ds_new = ds_new.coarsen(coarsen_dict, boundary='exact').mean()
            
            ds_dict[mip_id][varname][key] = ds_new  # add this to the dictionary

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))


 far tas

 far pr

 far psl

 sar tas

 sar pr

 sar psl

 tar tas

 tar pr

 tar psl

 cmip3 tas
Weird time units breaks ds.sel(time=time_slice)

 cmip3 pr
Weird time units breaks ds.sel(time=time_slice)

 cmip3 psl

 cmip5 tas

 cmip5 pr
Weird time units breaks ds.sel(time=time_slice)

 cmip5 psl

 cmip6 tas

 cmip6 pr

 cmip6 psl

