In [1]:
import numpy as np
import xarray as xr
import dask
import warnings

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
nex_in = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/nex-gddp/'
cil_in = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/cil-gdpcir/'
isi_in = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/isimip3b/regridded/conservative/'
cbp_in = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/carbonplan/'

out_path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/uc_results/'

In [3]:
###################
# Models
###################
from utils import nex_ssp_dict, cil_ssp_dict, isimip_ssp_dict, gardsv_ssp_dict, gardsv_var_dict, deepsdbc_dict

nex_models = list(nex_ssp_dict.keys())
cil_models = list(cil_ssp_dict.keys())
isi_models = list(isimip_ssp_dict.keys())
cbp_gard_models = list(gardsv_ssp_dict.keys())
cbp_gard_precip_models = [model for model in cbp_gard_models if 'pr' in gardsv_var_dict[model]]
cbp_deep_models = list(deepsdbc_dict.keys())

In [8]:
############
# Dask
############
from dask_jobqueue import PBSCluster

cluster = PBSCluster(cores=1, memory='45GB', resource_spec='pmem=45GB',
                     # account='open',
                     worker_extra_args=['#PBS -l feature=rhel7'], 
                     walltime='00:30:00')

cluster.scale(jobs=10)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.239:40469,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Total uncertainty

### Function definition

In [5]:
#######################################################################
# Total uncertainty: variance across all models, scenarios, ensembles 
#######################################################################
def uc_total(nex_in, nex_models, 
             cil_in, cil_models, 
             isi_in, isi_models, 
             cbp_in, cbp_gard_models, cbp_deep_models,
             metric, submetric,
             year):
    """
    Reads in all models, ssps, and calculates the total uncertainty (variance across
    all model, ssp, ensemble dimensions) for a given year (and possibly DataArray).
    For metrics like 'hot' where there are several sub-metrics based on different 
    thresholds and/or observational data, we need to select a specific DataArray
    to keep the memory manageable.
    """
    # Subfunction for general preprocessing of each model/ensemble
    def read_and_process(ensemble, path_in, model, year, metric, submetric):
        # Read netcdf or zarr
        if ensemble in ['NEX', 'ISIMIP', 'GARD-SV']:
            ds = xr.open_dataset(path_in + metric + '/' + model + '.nc')
        elif ensemble in ['CIL', 'DeepSD-BC']:
            ds = xr.open_dataset(path_in + metric + '/' + model, engine='zarr')
            
        
        # Select submetric if chosen
        if submetric:
            ds = ds[submetric]
    
        # Common preprocessing
        ds['time'] = ds.indexes['time'].year
        ds = ds.sel(time=year)
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(ensemble = ensemble)
        ds = ds.assign_coords(model = model)
        ds = ds.sel(lat=slice(-60, 90))
    
        # Fix lon to [-180,180]
        if ensemble in ['NEX', 'ISIMIP']:
            ds['lon'] = np.where(ds['lon'] > 180, ds['lon'] - 360, ds['lon'])
            ds = ds.sortby('lon')
    
        # Some models/methods are missing precip so fill with NaNs
        if (metric in ['max', 'avg']) and ('pr' not in ds.data_vars):
            ds['pr'] = xr.full_like(ds[list(ds.data_vars)[0]], np.nan)
    
        # Return
        return ds

    ######################
    # Read all ensembles
    ######################
    # NEX-GDDP 
    ds_out = []
    for model in nex_models:
        ds_out.append(read_and_process('NEX', nex_in, model, year, metric, submetric))
    ds_nex = xr.concat(ds_out, dim='model', fill_value=np.nan)

    # CIL-GDPCIR
    ds_out = []
    for model in cil_models:
        ds_out.append(read_and_process('CIL', cil_in, model, year, metric, submetric))
    ds_cil = xr.concat(ds_out, dim='model', fill_value=np.nan)

    # ISIMIP
    ds_out = []
    for model in isi_models:
        ds_out.append(read_and_process('ISIMIP', isi_in, model, year, metric, submetric))
    ds_isi = xr.concat(ds_out, dim='model', fill_value=np.nan)

    # carbonplan: GARD-SV
    ds_out = []
    for model in cbp_gard_models:
        ds_out.append(read_and_process('GARD-SV', cbp_in + '/regridded/conservative/GARD-SV/', model, year, metric, submetric))
    ds_cbp_gard = xr.concat(ds_out, dim='model', fill_value=np.nan)
    
    # carbonplan: DeepSD-BC
    ds_out = []
    for model in cbp_deep_models:
        ds_out.append(read_and_process('DeepSD-BC', cbp_in + 'native_grid/DeepSD-BC/', model, year, metric, submetric))
    ds_cbp_deep = xr.concat(ds_out, dim='model', fill_value=np.nan)

    ###########################
    # Merge all and mask ocean
    ###########################
    ds = xr.concat([ds_nex, ds_cil, ds_isi, ds_cbp_gard, ds_cbp_deep],
                       dim='ensemble', fill_value=np.nan)
    
    # Mask out ocean points (NEX is only available over land)
    ds_mask = ds.sel(ensemble='NEX').isel(ssp=0, model=0)[list(ds.keys())[0]].isnull()
    ds = xr.where(ds_mask, np.nan, ds)
    
    ##########################
    # Uncertainty calculation
    ##########################
    ## Total uncertainty
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        U_total_true = ds.var(dim=['ensemble', 'ssp', 'model']) # throws warning when all NaNs

    U_total_true = U_total_true.assign_coords(uncertainty = 'total_true')
    
    return U_total_true

## Annual averages

In [None]:
%%time

metric = 'avg'

# Dask delayed over years
delayed_res = []
for year in range(2015, 2100):
    # Read all ensembles and compute total uncertainty
    tmp_res = dask.delayed(uc_total)(nex_in, nex_models, 
                                     cil_in, cil_models, 
                                     isi_in, isi_models, 
                                     cbp_in, cbp_gard_models, cbp_deep_models,
                                     metric, False,
                                     year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'total_uncertainty/' + metric + '.nc')

CPU times: user 59.5 s, sys: 4.94 s, total: 1min 4s
Wall time: 7min 11s


## 1-day maxs

In [30]:
%%time

metric = 'max'

# Dask delayed over years
delayed_res = []
for year in range(2015, 2100):
    # Read all ensembles and compute total uncertainty
    tmp_res = dask.delayed(uc_total)(nex_in, nex_models, 
                                     cil_in, cil_models, 
                                     isi_in, isi_models, 
                                     cbp_in, cbp_gard_models, cbp_deep_models,
                                     metric, False,
                                     year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'total_uncertainty/' + metric +'.nc')

CPU times: user 55.6 s, sys: 5.52 s, total: 1min 1s
Wall time: 6min 47s


## Dry days

In [29]:
%%time

metric = 'dry'

# Dask delayed over years
delayed_res = []
for year in range(2015, 2100):
    # Read all ensembles and compute total uncertainty
    tmp_res = dask.delayed(uc_total)(nex_in, nex_models, 
                                     cil_in, cil_models, 
                                     isi_in, isi_models, 
                                     cbp_in, cbp_gard_precip_models, cbp_deep_models,
                                     metric, False,
                                     year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'total_uncertainty/' + metric +'.nc')

CPU times: user 1min 36s, sys: 16.9 s, total: 1min 53s
Wall time: 11min 36s


## Wet days

In [6]:
%%time

metric = 'wet'
submetric = ['pr_rp5gmfd_count', 'pr_rp5gmfd_streak']

# Dask delayed over years
delayed_res = []
for year in range(2015, 2100):
    # Read all ensembles and compute total uncertainty
    tmp_res = dask.delayed(uc_total)(nex_in, nex_models, 
                                     cil_in, cil_models, 
                                     isi_in, isi_models, 
                                     cbp_in, cbp_gard_precip_models, cbp_deep_models,
                                     metric, submetric,
                                     year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'total_uncertainty/' + metric + '_gmfd_rp5.nc')

FileNotFoundError: No such file or directory: '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/carbonplan/native_grid/DeepSD-BC/wet/MRI-ESM2-0'

## Hot days

In [6]:
%%time
# Hot days: tasmin
metric = 'hot'
submetric = ['tasmin_rp5gmfd_count', 'tasmin_rp5gmfd_streak']

# Dask delayed over years
delayed_res = []
for year in range(2015, 2100):
    # Read all ensembles and compute total uncertainty
    tmp_res = dask.delayed(uc_total)(nex_in, [model + '_tasmin' for model in nex_models], 
                                     cil_in, cil_models, 
                                     isi_in, [model + '_tasmin' for model in isi_models], 
                                     cbp_in, cbp_gard_models, cbp_deep_models,
                                     metric, submetric, 
                                     year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'total_uncertainty/' + metric + '_tasmin.nc')

CPU times: user 1min 45s, sys: 6.89 s, total: 1min 52s
Wall time: 12min 56s


In [7]:
%%time
# Hot days: tasmin
metric = 'hot'
submetric = ['tasmax_rp5gmfd_count', 'tasmax_rp5gmfd_streak']

# Dask delayed over years
delayed_res = []
for year in range(2015, 2100):
    # Read all ensembles and compute total uncertainty
    tmp_res = dask.delayed(uc_total)(nex_in, [model + '_tasmax' for model in nex_models], 
                                     cil_in, cil_models, 
                                     isi_in, [model + '_tasmax' for model in isi_models], 
                                     cbp_in, cbp_gard_models, cbp_deep_models,
                                     metric, submetric, 
                                     year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'total_uncertainty/' + metric + '_tasmax.nc')

CPU times: user 1min 46s, sys: 6.94 s, total: 1min 53s
Wall time: 10min 11s


# No interannual variability

In [5]:
################################################################
# Uncertainty characterization following Hawkins & Sutton 2009 
# No consideration of internal variability!
################################################################
def uc_hs09(nex_in, nex_models, 
            cil_in, cil_models, 
            isi_in, isi_models, 
            cbp_in, cbp_gard_models, cbp_deep_models,
            metric, year):
    ##################################
    # Read and format all ensembles
    ##################################
    # NEX-GDDP 
    ds_out = []
    for model in nex_models:
        ds = xr.open_dataset(nex_in + metric + '/' + model + '.nc')
        ds['time'] = ds.indexes['time'].year
        ds = ds.sel(time=year)
        ds['lon'] = np.where(ds['lon'] > 180, ds['lon'] - 360, ds['lon'])
        ds = ds.sortby('lon')
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(ensemble = 'NEX')
        ds = ds.assign_coords(model = ds.encoding['source'].replace(nex_in, '').split('/')[-1][:-3])
        ds_out.append(ds)
    ds_nex = xr.concat(ds_out, dim='model', compat='identical')

    # CIL-GDPCIR
    ds_out = []
    for model in cil_models:
        ds = xr.open_dataset(cil_in + metric + '/' + model, engine='zarr')
        ds['time'] = ds.indexes['time'].year
        ds = ds.sel(time=year)
        ds = ds.sel(lat=slice(-60, 90))
        ds = ds.assign_coords(ensemble = 'CIL')
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(model = ds.encoding['source'].replace(cil_in, '').split('/')[-1])
        ds_out.append(ds)
    ds_cil = xr.concat(ds_out, dim='model', compat='identical')

    # ISIMIP
    ds_out = []
    for model in isi_models:
        ds = xr.open_dataset(isi_in + metric + '/' + model + '.nc')
        ds['time'] = ds.indexes['time'].year
        ds = ds.sel(time=year)
        ds['lon'] = np.where(ds['lon'] > 180, ds['lon'] - 360, ds['lon'])
        ds = ds.sortby('lon')
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(ensemble = 'ISIMIP')
        ds = ds.assign_coords(model = ds.encoding['source'].replace(isi_in, '').split('/')[-1][:-3])
        ds_out.append(ds)
    ds_isi = xr.concat(ds_out, dim='model', compat='identical')

    # carbonplan: GARD-SV
    ds_out = []
    for model in cbp_gard_models:
        ds = xr.open_dataset(cbp_in + 'GARD-SV/' + metric + '/' + model + '.nc')
        ds['time'] = ds.indexes['time'].year
        ds = ds.sel(time=year)
        ds = ds.sel(lat=slice(-60, 90))
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(ensemble = 'GARD-SV')
        ds = ds.assign_coords(model = ds.encoding['source'].replace(cbp_in, '').split('/')[-1][:-3])
        # for some models/methods we are missing 
        # precip so need to fill with NaNs
        if 'pr' not in ds.data_vars:
            ds['pr'] = xr.full_like(ds['tas'], np.NaN)
        ds_out.append(ds)
    ds_cbp_gard = xr.concat(ds_out, dim='model', compat='identical')
    
    # carbonplan: DeepSD-BC
    ds_out = []
    for model in cbp_deep_models:
        ds = xr.open_dataset(cbp_in + 'DeepSD-BC/' + metric + '/' + model + '.nc')
        ds['time'] = ds.indexes['time'].year
        ds = ds.sel(time=year)
        ds = ds.sel(lat=slice(-60, 90))
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(ensemble = 'DeepSD-BC')
        ds = ds.assign_coords(model = ds.encoding['source'].replace(cbp_in, '').split('/')[-1][:-3])
        # for some models/methods we are missing 
        # precip so need to fill with NaNs
        if 'pr' not in ds.data_vars:
            ds['pr'] = xr.full_like(ds['tas'], np.NaN)
        ds_out.append(ds)
    ds_cbp_deep = xr.concat(ds_out, dim='model', compat='identical')

    ###########################
    # Merge all and mask ocean
    ###########################
    ds = xr.concat([ds_nex, ds_cil, ds_isi, ds_cbp_gard, ds_cbp_deep],
                       dim='ensemble', fill_value=np.nan)
    
    # mask out ocean points (NEX is only available over land)
    ds_mask = ds.sel(ensemble='NEX').isel(ssp=0, model=0)[list(ds.keys())[0]].isnull()
    ds = xr.where(ds_mask, np.nan, ds)
    
    ##########################
    # Uncertainty calculation
    ##########################
    ##  Model uncertainty
    # Variance across models, averaged over scenarios and ensembles
    U_model = ds.var(dim='model')
    weights = ds.isel(lat=300, lon=800)[list(ds.data_vars)[0]].count(dim='model').rename('weights')     # weights (choose point over land)
    weights = xr.where(weights == 1, 0, weights) # remove combinations where variance was calculated over 1 entry
    U_model = U_model.weighted(weights).mean(dim=['ssp', 'ensemble']) # weighted average

    ## Scenario uncertainty
    # Variance across multi-model means (HS09 approach)
    U_scen = ds.mean(dim=['model', 'ensemble']).var(dim='ssp')

    ## Downscaling uncertainy
    # Variance across ensembles, averaged over models and scenarios
    U_ens = ds.var(dim='ensemble')
    weights = ds.isel(lat=300, lon=800)[list(ds.data_vars)[0]].count(dim='ensemble').rename('weights') # weights
    weights = xr.where(weights == 1, 0, weights) # remove combinations where variance was calculated over 1 entry
    U_ens = U_ens.weighted(weights).mean(dim=['ssp', 'model'])

    ## Total uncertainty    
    # Our 'simulated' total uncertainty
    # This will in general not equal true total
    U_total = U_model + U_scen + U_ens

    ## Merge and return
    U_model = U_model.assign_coords(uncertainty = 'model')
    U_scen = U_scen.assign_coords(uncertainty = 'scenario')
    U_ens = U_ens.assign_coords(uncertainty = 'ensemble')
    U_total = U_total_sim.assign_coords(uncertainty = 'total')
    
    return xr.concat([U_model, U_scen, U_ens, U_total, U_total], dim='uncertainty')

## Annual averages

In [12]:
metric = 'avg'

In [13]:
%%time
################################
# UC on raw outputs (no iav)
################################
delayed_res = []
for year in range(2015, 2101):
    # Read all ensembles and compute UC
    tmp_res = dask.delayed(uc_hs09)(nex_in, nex_models, 
                                    cil_in, cil_models, 
                                    isi_in, isi_models, 
                                    cbp_in, cbp_gard_models, cbp_deep_models,
                                    metric, year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'hs09_no_iav/' + metric +'.nc')

## Annual maxs

In [8]:
metric = 'max'

In [8]:
%%time
################################
# UC on raw outputs (no iav)
################################
delayed_res = []
for year in range(2015, 2101):
    # Read all ensembles and compute UC
    tmp_res = dask.delayed(uc_hs09)(nex_in, nex_models, 
                                    cil_in, cil_models, 
                                    isi_in, isi_models, 
                                    cbp_in, cbp_gard_models, cbp_deep_models,
                                    metric, year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'hs09_no_iav/' + metric +'.nc')

CPU times: user 15.4 s, sys: 9.44 s, total: 24.8 s
Wall time: 7min 10s


# Interannual variability

### Function definitions

In [44]:
################################################################
# Uncertainty characterization following Hawkins & Sutton 2009 
# 'Forced response' = 10 year rolling mean
################################################################
def uc_hs09_forced(nex_in, nex_models, 
                   cil_in, cil_models, 
                   isi_in, isi_models, 
                   cbp_in, cbp_gard_models, cbp_deep_models,
                   metric, submetric, 
                   year):
    """
    Reads in all models, ssps, and calculates the uncertainty in the 'forced response'
    (10 year rolling mean) along each dimension for a given year (and possibly DataArray).
    For metrics like 'hot' where there are several sub-metrics based on different 
    thresholds and/or observational data, we need to select a specific DataArray
    to keep the memory manageable.
    """
    # Subfunction for general preprocessing of each model/ensemble
    def read_and_process(ensemble, path_in, model, year, metric, submetric):
        # Read netcdf or zarr
        if ensemble in ['NEX', 'ISIMIP', 'GARD-SV']:
            ds = xr.open_dataset(path_in + metric + '/' + model + '.nc')
        elif ensemble in ['CIL', 'DeepSD-BC']:
            ds = xr.open_dataset(path_in + metric + '/' + model, engine='zarr')
            
        
        # Select submetric if chosen
        if submetric:
            ds = ds[submetric]
    
        # Common preprocessing
        ds['time'] = ds.indexes['time'].year
        ds = ds.sel(time=slice(year-6, year+6)) # faster rolling mean
        ds = ds.rolling(time=10, center=True).mean().sel(time=year)
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(ensemble = ensemble)
        ds = ds.assign_coords(model = model)
        ds = ds.sel(lat=slice(-60, 90))
    
        # Fix lon to [-180,180]
        if ensemble in ['NEX', 'ISIMIP']:
            ds['lon'] = np.where(ds['lon'] > 180, ds['lon'] - 360, ds['lon'])
            ds = ds.sortby('lon')
    
        # Some models/methods are missing precip so fill with NaNs
        if (metric in ['max', 'avg']) and ('pr' not in ds.data_vars):
            ds['pr'] = xr.full_like(ds[list(ds.data_vars)[0]], np.nan)
    
        # Return
        return ds

    ######################
    # Read all ensembles
    ######################
    # NEX-GDDP 
    ds_out = []
    for model in nex_models:
        ds_out.append(read_and_process('NEX', nex_in, model, year, metric, submetric))
    ds_nex = xr.concat(ds_out, dim='model', fill_value=np.nan)

    # CIL-GDPCIR
    ds_out = []
    for model in cil_models:
        ds_out.append(read_and_process('CIL', cil_in, model, year, metric, submetric))
    ds_cil = xr.concat(ds_out, dim='model', fill_value=np.nan)

    # ISIMIP
    ds_out = []
    for model in isi_models:
        ds_out.append(read_and_process('ISIMIP', isi_in, model, year, metric, submetric))
    ds_isi = xr.concat(ds_out, dim='model', fill_value=np.nan)

    # carbonplan: GARD-SV
    ds_out = []
    for model in cbp_gard_models:
        ds_out.append(read_and_process('GARD-SV', cbp_in + '/regridded/conservative/GARD-SV/', model, year, metric, submetric))
    ds_cbp_gard = xr.concat(ds_out, dim='model', fill_value=np.nan)
    
    # carbonplan: DeepSD-BC
    ds_out = []
    for model in cbp_deep_models:
        ds_out.append(read_and_process('DeepSD-BC', cbp_in + 'native_grid/DeepSD-BC/', model, year, metric, submetric))
    ds_cbp_deep = xr.concat(ds_out, dim='model', fill_value=np.nan)

    ###########################
    # Merge all and mask ocean
    ###########################
    ds = xr.concat([ds_nex, ds_cil, ds_isi, ds_cbp_gard, ds_cbp_deep],
                       dim='ensemble', fill_value=np.nan)
    
    # mask out ocean points (NEX is only available over land)
    ds_mask = ds.sel(ensemble='NEX').isel(ssp=0, model=0)[list(ds.keys())[0]].isnull()
    ds = xr.where(ds_mask, np.nan, ds)
    
    ##########################
    # Uncertainty calculation
    ##########################
    ##  Model uncertainty
    # Variance across models, averaged over scenarios and ensembles
    U_model = ds.var(dim='model')
    weights = ds.isel(lat=300, lon=800)[list(ds.data_vars)[0]].count(dim='model').rename('weights') # weights (choose point over land)
    weights = xr.where(weights == 1, 0, weights) # remove combinations where variance was calculated over 1 entry
    U_model = U_model.weighted(weights).mean(dim=['ssp', 'ensemble']) # weighted average

    ## Scenario uncertainty
    # Variance across multi-model means (HS09 approach)
    U_scen = ds.mean(dim=['model', 'ensemble']).var(dim='ssp')

    ## Downscaling uncertainy
    # Variance across ensembles, averaged over models and scenarios
    U_ens = ds.var(dim='ensemble')
    weights = ds.isel(lat=300, lon=800)[list(ds.data_vars)[0]].count(dim='ensemble').rename('weights') # weights
    weights = xr.where(weights == 1, 0, weights) # remove combinations where variance was calculated over 1 entry
    U_ens = U_ens.weighted(weights).mean(dim=['ssp', 'model'])

    ## Merge and return
    U_model = U_model.assign_coords(uncertainty = 'model')
    U_scen = U_scen.assign_coords(uncertainty = 'scenario')
    U_ens = U_ens.assign_coords(uncertainty = 'ensemble')
    
    return xr.concat([U_model, U_scen, U_ens], dim='uncertainty')

In [42]:
################################################################
# Uncertainty characterization following Hawkins & Sutton 2009 
# Interannual variability (single value for all years)
################################################################
def uc_hs09_iav(path_in, ensemble, model, metric, submetric):
    """
    Calculates the internal variability (variance over all years
    of residuals from rolling mean) for a given model-ssp-ensemble
    """
    # Subfunction for general preprocessing of each model/ensemble
    def read_and_process(ensemble, path_in, model, metric, submetric):
        # Read netcdf or zarr
        if ensemble in ['NEX', 'ISIMIP', 'GARD-SV']:
            ds = xr.open_dataset(path_in + metric + '/' + model + '.nc')
        elif ensemble in ['CIL', 'DeepSD-BC']:
            ds = xr.open_dataset(path_in + metric + '/' + model, engine='zarr')
            
        # Select submetric if chosen
        if submetric:
            ds = ds[submetric]
    
        # Common preprocessing
        ds['time'] = ds.indexes['time'].year
        ds = ds.sortby('ssp')
        ds = ds.assign_coords(ensmod = ensemble + '__' + model)
        ds = ds.sel(lat=slice(-60, 90))
    
        # Fix lon to [-180,180]
        if ensemble in ['NEX', 'ISIMIP']:
            ds['lon'] = np.where(ds['lon'] > 180, ds['lon'] - 360, ds['lon'])
            ds = ds.sortby('lon')
    
        # Some models/methods are missing precip so fill with NaNs
        if (metric in ['max', 'avg']) and ('pr' not in ds.data_vars):
            ds['pr'] = xr.full_like(ds[list(ds.data_vars)[0]], np.nan)
    
        # Return
        return ds

    ###############
    # Read model
    ###############
    ds = read_and_process(ensemble, path_in, model, metric, submetric)
            
    #####################################
    # Get IAV estimate
    # Variance of rolling mean residuals
    #####################################
    ds_rolling = ds.rolling(time=10, center=True).mean().sel(time=slice(2020,2096))
    return (ds - ds_rolling).var(dim='time')



def make_delayed_list_iav(metric, submetric, submetric_var):
    """
    Make a delayed list with IAV of all models-ssps-ensembles which 
    can then be combined into one dataset and averaged for best estimate.
    """
    # Parallelize with dask over models
    delayed_res = []
    
    # NEX
    if submetric_var:
        models = [model + '_' + submetric_var for model in nex_models]
    for model in models:
        tmp_res = dask.delayed(uc_hs09_iav)(nex_in, 'NEX', model, metric, submetric)
        delayed_res.append(tmp_res)
        
    # CIL
    for model in cil_models:
        tmp_res = dask.delayed(uc_hs09_iav)(cil_in, 'CIL', model, metric, submetric)
        delayed_res.append(tmp_res)
        
    # ISIMIP
    if submetric_var:
        models = [model + '_' + submetric_var for model in isi_models]
    for model in models:
        tmp_res = dask.delayed(uc_hs09_iav)(isi_in, 'ISIMIP', model, metric, submetric)
        delayed_res.append(tmp_res)
        
    # carbonplan GARD-SV
    if metric in ['wet', 'dry']:
        models = cbp_gard_precip_models
    else:
        models = cbp_gard_models
        
    for model in models:
        tmp_res = dask.delayed(uc_hs09_iav)(cbp_in + '/regridded/conservative/GARD-SV/', 'GARD-SV', model, metric, submetric)
        delayed_res.append(tmp_res)
        
    # carbonplan DeepSD-BC
    for model in cbp_deep_models:
        tmp_res = dask.delayed(uc_hs09_iav)(cbp_in + 'native_grid/DeepSD-BC/', 'DeepSD-BC', model, metric, submetric)
        delayed_res.append(tmp_res)
        
    # return
    return delayed_res

## Annual averages

In [22]:
metric = 'avg'

In [None]:
%%time
################################
# Interannual variability
################################
delayed_res = make_delayed_list_iav(metric, False)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and average over ensemble + model (ensmod) and ssp
ds_out = xr.concat(res, dim='ensmod').mean(dim=['ensmod', 'ssp'])
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric +'_iav.nc')

In [None]:
%%time
################################
# UC on forced response
################################
delayed_res = []
for year in range(2020, 2097):
    # Read all ensembles and compute UC
    tmp_res = dask.delayed(uc_hs09_forced)(nex_in, nex_models, 
                                           cil_in, cil_models, 
                                           isi_in, isi_models, 
                                           cbp_in, cbp_gard_models, cbp_deep_models,
                                           metric, False,
                                           year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric +'.nc')

## 1-day maxs

In [7]:
metric = 'max'

In [8]:
%%time
################################
# Interannual variability
################################
delayed_res = make_delayed_list_iav(metric, False)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and average over ensemble + model (ensmod) and ssp
ds_out = xr.concat(res, dim='ensmod').mean(dim=['ensmod', 'ssp'])
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric +'_iav.nc')

CPU times: user 49.1 s, sys: 15 s, total: 1min 4s
Wall time: 4min 50s


In [9]:
%%time
################################
# UC on forced response
################################
delayed_res = []
for year in range(2020, 2097):
    # Read all ensembles and compute UC
    tmp_res = dask.delayed(uc_hs09_forced)(nex_in, nex_models, 
                                           cil_in, cil_models, 
                                           isi_in, isi_models, 
                                           cbp_in, cbp_gard_models, cbp_deep_models,
                                           metric, False,
                                           year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric +'.nc')

CPU times: user 2min 43s, sys: 26.2 s, total: 3min 9s
Wall time: 17min 22s


## Dry days

In [None]:
metric = 'dry'

In [None]:
%%time
################################
# Interannual variability
################################
delayed_res = make_delayed_list_iav(metric, False)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and average over ensemble + model (ensmod) and ssp
ds_out = xr.concat(res, dim='ensmod').mean(dim=['ensmod', 'ssp'])
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric +'_iav.nc')

In [None]:
%%time
################################
# UC on forced response
################################
delayed_res = []
for year in range(2020, 2097):
    # Read all ensembles and compute UC
    tmp_res = dask.delayed(uc_hs09_forced)(nex_in, nex_models, 
                                           cil_in, cil_models, 
                                           isi_in, isi_models, 
                                           cbp_in, cbp_gard_precip_models, cbp_deep_models,
                                           metric, False,
                                           year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric +'.nc')

## Hot days

### Tasmax

In [36]:
metric = 'hot'
submetric = ['tasmax_rp5gmfd_count', 'tasmax_rp5gmfd_streak']
submetric_var = 'tasmax'

In [None]:
%%time
################################
# Interannual variability
################################
delayed_res = make_delayed_list_iav(metric, submetric, submetric_var)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and average over ensemble + model (ensmod) and ssp
ds_out = xr.concat(res, dim='ensmod').mean(dim=['ensmod', 'ssp'])
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric + '_' + submetric_var + '_iav.nc')

In [None]:
%%time
################################
# UC on forced response
################################
delayed_res = []
for year in range(2020, 2097):
    # Read all ensembles and compute UC
    tmp_res = dask.delayed(uc_hs09_forced)(nex_in, [model + '_' + submetric_var for model in nex_models], 
                                           cil_in, cil_models, 
                                           isi_in, [model + '_' + submetric_var for model in isi_models], 
                                           cbp_in, cbp_gard_models, cbp_deep_models,
                                           metric, submetric,
                                           year)
    
    # Append
    delayed_res.append(tmp_res)
    
# Compute
res = dask.compute(*delayed_res)

# Merge and store
ds_out = xr.concat(res, dim='time')
ds_out.to_netcdf(out_path + 'hs09_iav/' + metric + '_' + submetric_var +  '.nc')