# Step 1: Pre-processing model and reanalsyis data

---

## Instructions for activating the Jupyter kernel for the `cmip6hack-multigen` conda environment

In a Jupyterlab terminal, navigate to the `/cmip6hack-multigen/` folder and run the command:
```bash
source spinup_env.sh
```
which will create the `cmip6hack-multigen` conda environment and install it as a python kernel for jupyter.

Then, switch the kernel (drop-down menu in the top right hand corner) to cmip6hack-multigen and restart the notebook.

### Pre-process climate model output in GCS

This notebook uses [`intake-esm`](https://intake-esm.readthedocs.io/en/latest/) to ingest and organize climate model output from various model generations and resave their time-mean fields locally.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr
import xskillscore as xs
import xesmf as xe
from tqdm.autonotebook import tqdm  # Fancy progress bars for our loops!
import intake

import matplotlib.pyplot as plt

# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util
import preprocess as pp
import qc

import warnings

  


In [3]:
varnames = ['tas', 'pr', 'psl']
timeslice = slice('1981', '2010')
coarsen_size = 2

In [4]:
mips = ["cmip6"]

In [5]:
ens_dict = {}
ens_dict = pp.load_ensembles(varnames, timeslice=timeslice, mip_ids=mips)

processed = {}
for key in ens_dict.keys():
    processed[key] = []

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loaded: variable_id ` tas ` from activity_id ` cmip6 `
Skip CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.Amon.tas.gn due to timesclicing error.
Loaded: variable_id ` pr ` from activity_id ` cmip6 `
Loaded: variable_id ` psl ` from activity_id ` cmip6 `



  **blockwise_kwargs,
  **blockwise_kwargs,


#### 1. Extract linear trend

In [6]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    for key, ens in tqdm(ens_dict.items()):
        ens_dict[key] = ens_dict[key].chunk({'ensemble': 1, 'time': -1, 'lat':10, 'lon':10})
        tas_trend = util.compute_slope(ens_dict[key]['tas'])
        tas_trend.name = "tas_trend"
        processed[key].append(tas_trend)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




#### 2. Extract seasonal climatology

In [None]:
anom_dict = {}

with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    for key, ens in tqdm(ens_dict.items()):
        ens_dict[key] = ens_dict[key].chunk({'ensemble': 1, 'time': 'auto', 'lat':-1, 'lon':-1})
        clim, anom, ann = util.compute_derived_variables(ens)
        rename_clim = dict([ (dv, dv+"_clim") for dv in clim.data_vars])
        processed[key].append(clim.rename(rename_clim))
        anom_dict[key] = anom

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  return self.array[key]


#### 3. Extract internal variability (Niño3.4 index)

In [None]:
enso_dict = {}

with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    for key, ens in tqdm(ens_dict.items()):
        anom = anom_dict[key]
        enso = util.pseudo_enso(anom['tas'])
        enso.name = "enso34"
        processed[key].append(enso)

#### 4. Extracting time-mean

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    for key, ens in tqdm(ens_dict.items()):
        mean = ens.mean(dim=['time'], keep_attrs=True, skipna=True)
        rename_mean = dict([ (dv, dv+"_mean") for dv in mean.data_vars])
        processed[key].append(mean.rename(rename_mean))

#### 5. Save computed interim files

In [None]:
interim_path = "../data/interim/"

In [None]:
for key in processed.keys():
    ens = xr.merge(processed[key])
    for data_var in ens.data_vars:
        # Remove empty attribute that messes up to_zarr method
        if 'intake_esm_varname' in ens[data_var].attrs:
            del ens[data_var].attrs['intake_esm_varname']
    
    ens.to_zarr(interim_path + f"{key}", "w")

### Pre-process observational data products

In [None]:
era5 = pp.load_era("../data/raw/reanalysis/ERA5_mon_2d.nc", timeslice=timeslice, coarsen_size=2)

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    era_anom, era_clim, era_ann = util.compute_derived_variables(era5['tas'])
    era_enso = util.pseudo_enso(era_anom).compute()

In [None]:
plt.figure(figsize=(12,8))
for ensemble in enso.ensemble.values:
    enso.sel(ensemble=ensemble).plot()
    
era_enso.plot(color="k", lw=3)

In [None]:
xlims = [-1, enso.ensemble.size+1]
plt.plot(np.arange(0, enso.ensemble.size), enso.std(dim='time', skipna=True))
plt.plot(xlims, era_enso.std(dim='time', skipna=True).values * np.array([1., 1.]))
plt.xticks(np.arange(0, enso.ensemble.size), enso.ensemble.values, rotation=90)
plt.ylim([0, 1.5])
plt.xlim(xlims)

In [None]:
era5.mean(dim='time', keep_attrs=True).to_zarr(interim_path + "era5_timemean", "w")