### Global Validation ###

This notebook combines several validation notebooks: `global_validation_tasmax_v2.ipynb` and `global_validation_dtr_v2.ipynb` along with `check_aiqpd_downscaled_data.ipynb` to create a "master" global validation notebook. It also borrows validation code from the ERA-5 workflow, `validate_era5_hourlyORdaily_files.ipynb`. It is intended to be run with `papermill`. 

### Data Sources ###

Coarse Resolution: 
- CMIP6 
- Bias corrected data 
- ERA-5

Fine Resolution: 
- Bias corrected data 
- Downscaled data 
- ERA-5 (fine resolution)
- ERA-5 (coarse resolution resampled to fine resolution) 

### Types of Validation ### 

Basic: 
- maxes, means, mins  
    - CMIP6, bias corrected and downscaled 
    - historical (1995-2014), 2020-2040, 2040-2060, 2060-2080, 2080-2100 
- differences between historical and future time periods for bias corrected and downscaled
- differences between bias corrected and downscaled data 

Variable-specific: 
- GMST
- days over 95 (future-historical difference plots)
- max # of consecutive dry days, highest precip amount over 5-day rolling window

In [1]:
# in case you're modifying external modules while running stuff from this notebook
# %load_ext autoreload
# %autoreload

In [2]:
! pip install xclim # we don't have this package on compute.impactlab ;



In [3]:
%matplotlib inline 
import xarray as xr
import numpy as np
import dask
import dask.array as da
import dask.distributed as dd
import matplotlib.pyplot as plt
from cartopy import config
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import os 
import gcsfs 
from matplotlib import cm
import warnings 

from science_validation_manual import *
import rhg_compute_tools.kubernetes as rhgk

  from distributed.utils import LoopRunner, format_bytes


### Parametrizing ###

In [4]:
gcm = 'CAMS-CSM1-0'
ssp = 'ssp370' # ssp options: 'ssp126', 'ssp245', 'ssp370', 'ssp585'
variable = 'tasmax' # variable options: 'tasmax', 'tasmin', 'dtr', 'pr'
argo_token = ''
number_of_workers = 160

### Set up a cluster ### 

In [5]:
client, cluster = rhgk.get_standard_cluster()

In [6]:
cluster.adapt(minimum=0, maximum=number_of_workers)

In [7]:
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

In [12]:
# # very important piece of code !
client.restart() # clear all tasks - otherwise cluster won't scale down
cluster.scale(0)
client.close()
cluster.close()

### Retrieving data paths from workflow ### 

In [8]:
from models_QC import QC_models_dict
models_ID_dict = QC_models_dict()
workflow = models_ID_dict[gcm]
workflow_location = 'archived-workflows' 
manifest = get_manifest(workflow_identifier=workflow, auth_token=argo_token, workflow_location=workflow_location)
data_dict = collect_paths(manifest, gcm, ssp, variable)

### Pre-determined options ### 

In [9]:
# data output types for running validation 
cmip6 = True
bias_corrected = True
downscaled = True
basic_diagnostics = True
basic_diagnostic_types = ['min','mean','max']
gmst = False # takes too long
hot_days = True
difference_plots = True
projection_time_period = '2080_2100' # for difference plots, '2020_2040', '2040_2060', '2060_2080', '2080_2100'
units = {'tasmax': 'K', 'tasmin': 'K', 'dtr': 'K', 'pr': 'mm'}
maps_color_range = {'tasmax': [260, 320]}
years = {'hist': {'start_yr': '1995', 'end_yr': '2014'}, 
              '2020_2040': {'start_yr': '2020', 'end_yr': '2040'}, 
              '2040_2060': {'start_yr': '2040', 'end_yr': '2060'}, 
              '2060_2080': {'start_yr': '2060', 'end_yr': '2080'}, 
              '2080_2100': {'start_yr': '2080', 'end_yr': '2100'}}
years_test = {'hist': {'start_yr': '1995', 'end_yr': '2014'}, 
              '2020_2040': {'start_yr': '2020', 'end_yr': '2040'}, 
              '2040_2060': {'start_yr': '2040', 'end_yr': '2060'}}

In [18]:
# we only plot gmst if validation variable is tasmax 
if variable != 'tasmax': 
    gmst = False
    hot_days = False
    warnings.warn("gmst plotting option changed to False since validation variable is not tasmax")

### Validation ### 

### for [CMIP6, bias_corrected, downscaled], basic diagnostic plots: [min, mean, max] ### 

In [None]:
%%time 
if cmip6 and basic_diagnostics:
    for stat in basic_diagnostic_types: 
            plot_diagnostic_climo_periods(read_gcs_zarr(data_dict['coarse']['cmip6'][ssp]), 
                                          ssp, years, variable, stat, 'cmip6', 
                                          units[variable], ds_hist=read_gcs_zarr(data_dict['coarse']['cmip6']['historical']), 
                                          vmin=maps_color_range[variable][0], vmax=maps_color_range[variable][1])

In [None]:
if bias_corrected and basic_diagnostics: 
    for var in basic_diagnostic_types:    
            plot_diagnostic_climo_periods(read_gcs_zarr(data_dict['coarse']['bias_corrected'][ssp]), 
                                          ssp, years, variable, stat, 'bias_corrected', 
                                          units[variable], ds_hist=read_gcs_zarr(data_dict['coarse']['bias_corrected']['historical']), 
                                          vmin=maps_color_range[variable][0], vmax=maps_color_range[variable][1])

In [None]:
%%time
if downscaled and basic_diagnostics:
    for var in basic_diagnostic_types:

            plot_diagnostic_climo_periods(read_gcs_zarr(data_dict['fine']['downscaled'][ssp]), 
                                          ssp, years, variable, 'min', 'downscaled', 
                                          units[variable], ds_hist=read_gcs_zarr(data_dict['fine']['downscaled']['historical']), 
                                          vmin=maps_color_range[variable][0], vmax=maps_color_range[variable][1])

### GMST for CMIP6, bias corrected and downscaled ### 

In [None]:
if gmst: 
    plot_gmst_diagnostic(read_gcs_zarr(data_dict['coarse']['cmip6'][ssp]), 
                         read_gcs_zarr(data_dict['coarse']['bias_corrected'][ssp]), 
                         variable=variable, ssp=ssp, 
                         ds_hist_cmip6=read_gcs_zarr(data_dict['coarse']['cmip6']['historical']), 
                         ds_hist_bc=read_gcs_zarr(data_dict['coarse']['bias_corrected']['historical']))

### Difference plots: downscaled-bias corrected, and future-historical for both outputs ###

### downscaled minus bias corrected ### 

In [None]:
if difference_plots:
    plot_downscale_bias_correction_differences(ds_future_bc=data_dict['fine']['bias_corrected'][ssp]).sel(lat=-179.5, lon=-89.5),
                                               ds_future_ds=data_dict['fine']['downscaled'][ssp]).sel(lat=-179.5, lon=-89.5),
                                               ds_hist_bc=read_gcs_zarr(data_dict['fine']['bias_corrected']['historical']).sel(lat=-179.5, lon=-89.5),
                                               ds_hist_ds=read_gcs_zarr(data_dict['fine']['downscaled']['historical']).sel(lat=-179.5, lon=-89.5),
                                                variable=variable,
                                                units=units[variable],
                                                years=years,
                                                robust=True,
                                                ssp='370',
                                                time_period='2080_2100',
                                                xr_func=None
                                            )

### change from historical ###

In [None]:
if bias_corrected and difference_plots: 
    plot_change_from_historical(ds_future=read_gcs_zarr(data_dict['fine']['bias_corrected'][ssp]).sel(lat=-179.5, lon=-89.5),
                                ds_hist=read_gcs_zarr(data_dict['fine']['bias_corrected']['historical']).sel(lat=-179.5, lon=-89.5),
                                data_type='bias_corrected',
                                variable=variable,
                                units=units[variable],
                                years=years,
                                robust=True,
                                ssp='370',
                                time_period='2080_2100',
                                xr_func=None)

In [11]:
%%time
if downscaled and difference_plots: 
    plot_change_from_historical(ds_future=read_gcs_zarr(data_dict['fine']['downscaled'][ssp]).isel(lat=-1, lon=-1),
                                ds_hist=read_gcs_zarr(data_dict['fine']['downscaled']['historical']).isel(lat=-1, lon=-1),
                                data_type='downscaled',
                                variable=variable,
                                units=units[variable],
                                years=years,
                                robust=True,
                                ssp='370',
                                time_period='2080_2100',
                                xr_func=None)

NameError: name 'axes' is not defined

### Difference plots for days over 95 degrees F using tasmax ###

In [None]:
if hot_days and cmip6 and difference_plots:
    plot_change_from_historical(ds_future=read_gcs_zarr(data_dict['coarse']['cmip6'][ssp]).sel(lat=-179.5, lon=-89.5),
                                ds_hist=read_gcs_zarr(data_dict['coarse']['cmip6']['historical']).sel(lat=-179.5, lon=-89.5),
                                data_type='cmip6',
                                variable=variable,
                                units='days with tasmax >= 95F',
                                years=years,
                                robust=True,
                                ssp='370',
                                time_period='2080_2100',
                                xr_func=xr_conditional_count)

In [None]:
if hot_days and bias_corrected and difference_plots:
    plot_change_from_historical(ds_future=read_gcs_zarr(data_dict['fine']['bias_corrected'][ssp]).sel(lat=-179.5, lon=-89.5),
                                ds_hist=read_gcs_zarr(data_dict['fine']['bias_corrected']['historical']).sel(lat=-179.5, lon=-89.5),
                                data_type='bias_corrected',
                                variable=variable,
                                units='days with tasmax >= 95F',
                                years=years,
                                robust=True,
                                ssp='370',
                                time_period='2080_2100',
                                xr_func=xr_func=xr_conditional_count)

In [None]:
if hot_days and downscaled and difference_plots:
    plot_change_from_historical(ds_future=read_gcs_zarr(data_dict['fine']['downscaled'][ssp]).sel(lat=-179.5, lon=-89.5),
                                ds_hist=read_gcs_zarr(data_dict['fine']['downscaled']['historical']).sel(lat=-179.5, lon=-89.5),
                                data_type='downscaled',
                                variable=variable,
                                units='days with tasmax >= 95F',
                                years=years,
                                robust=True,
                                ssp='370',
                                time_period='2080_2100',
                                xr_func=xr_conditional_count)

### Precip - dry days ###

In [None]:
if variable=='pr'
    plot_diagnostic_climo_periods(read_gcs_zarr(data_dict['fine']['downscaled'][ssp]), 
                                  ssp, years, variable, basic_diag_type, 'downscaled', 
                                  units[variable], ds_hist=read_gcs_zarr(data_dict['fine']['downscaled']['historical']), 
                                  vmin=0, vmax=200, xr_func=xc_maximum_consecutive_dry_days)

### Precip - accumulated ###

In [None]:
if variable=='pr'
    plot_diagnostic_climo_periods(read_gcs_zarr(data_dict['fine']['downscaled'][ssp]), 
                                  ssp, years, variable, basic_diag_type, 'downscaled', 
                                  units[variable], ds_hist=read_gcs_zarr(data_dict['fine']['downscaled']['historical']), 
                                  vmin=0, vmax=200, xr_func=xc_maximum_consecutive_dry_days)