# Step 1: Pre-processing model and reanalsyis data

---

## Instructions for activating the Jupyter kernel for the `cmip6hack-multigen` conda environment

In a Jupyterlab terminal, navigate to the `/cmip6hack-multigen/` folder and run the command:
```bash
source spinup_env.sh
```
which will create the `cmip6hack-multigen` conda environment and install it as a python kernel for jupyter.

Then, switch the kernel (drop-down menu in the top right hand corner) to cmip6hack-multigen and restart the notebook.

### Pre-process climate model output in GCS

This notebook uses [`intake-esm`](https://intake-esm.readthedocs.io/en/latest/) to ingest and organize climate model output from various model generations and resave their time-mean fields locally.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr
import xskillscore as xs
import xesmf as xe
from tqdm.autonotebook import tqdm  # Fancy progress bars for our loops!
import intake

import matplotlib.pyplot as plt

# Progress bar for dask stuff
from dask.diagnostics import ProgressBar
ProgressBar().register()

# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util
import preprocess as pp
import qc

import warnings

In [None]:
varnames = ['tas', 'pr', 'psl']
timeslice = slice('1981', '2010')
coarsen_size = 2

In [None]:
ens_dict = pp.load_ensembles(varnames, timeslice=timeslice, mip_ids=['sar'])

### Testing

In [None]:
ens = ens_dict['sar']

In [None]:
ens['tas']

#### Extract linear trend

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    trend = util.compute_slope(ens['tas'].chunk({'time': -1})).compute();

#### Extract seasonal climatology

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    clim, anom, ann = util.compute_derived_variables(ens)

#### Extract internal variability (Niño3.4 index)

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    enso = util.pseudo_enso(anom['tas']).compute()

#### Extracting time-mean

In [None]:
ens_dict = util.dict_func(ens_dict, xr.Dataset.mean, on_self=True, dim =['time'], keep_attrs=True, **{'skipna': True})

In [None]:
ens_dict = util.dict_func(ens_dict, xr.Dataset.compute, on_self=True)

### Pre-process observational data products

In [None]:
era5 = pp.load_era("../data/raw/reanalysis/ERA5_mon_2d.nc", timeslice=timeslice, coarsen_size=2)

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    era_anom, era_clim, era_ann = util.compute_derived_variables(era5['tas'])
    era_enso = util.pseudo_enso(era_anom).compute()

In [None]:
plt.figure(figsize=(12,8))
for ensemble in enso.ensemble.values:
    enso.sel(ensemble=ensemble).plot()
    
era_enso.plot(color="k", lw=3)

In [None]:
xlims = [-1, enso.ensemble.size+1]
plt.plot(np.arange(0, enso.ensemble.size), enso.std(dim='time', skipna=True))
plt.plot(xlims, era_enso.std(dim='time', skipna=True).values * np.array([1., 1.]))
plt.xticks(np.arange(0, enso.ensemble.size), enso.ensemble.values, rotation=90)
plt.ylim([0, 1.5])
plt.xlim(xlims)

### Save interim files

In [None]:
interim_path = "../data/interim/"
era5.mean(dim='time', keep_attrs=True).to_zarr(interim_path + "era5_timemean", "w")

In [None]:
for key, ens in ens_dict.items():
    for data_var in ens.data_vars:
        # Remove empty attribute that messes up to_zarr method
        if 'intake_esm_varname' in ens[data_var].attrs:
            del ens[data_var].attrs['intake_esm_varname']
    ens.to_zarr(interim_path + f"{key}_timemean", "w")