In [1]:
import numpy as np
import xarray as xr
import dask
import os
from glob import glob

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
nex_in = '/gpfs/group/kaf26/default/public/NEX-GDDP-CMIP6/models/'
nex_out = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/cmip6/nex-gddp/'

In [3]:
###################
# Models
###################

# nex models with all SSPs and variables (tas, pr)
complete_nex_models = ['ACCESS-CM2', 'ACCESS-ESM1-5', 'CanESM5', 'CMCC-ESM2', 
                       'CNRM-CM6-1', 'CNRM-ESM2-1', 'EC-Earth3',
                       'EC-Earth3-Veg-LR', 'FGOALS-g3', 'GFDL-CM4', 'GFDL-ESM4', 
                       'GISS-E2-1-G', 'INM-CM4-8', 'INM-CM5-0',
                       'IPSL-CM6A-LR', 'KACE-1-0-G', 'MIROC-ES2L', 'MIROC6',
                       'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'NorESM2-LM',
                       'NorESM2-MM', 'TaiESM1', 'UKESM1-0-LL']

# cil models with all SSPs and variables
complete_cil_models = ["INM-CM4-8", "INM-CM5-0", "BCC-CSM2-MR", "CMCC-CM2-SR5",
              "CMCC-ESM2", "MIROC-ES2L", "MIROC6", "UKESM1-0-LL", "MPI-ESM1-2-LR",
              "NorESM2-LM", "NorESM2-MM", "GFDL-ESM4", "EC-Earth3", 
              "EC-Earth3-Veg-LR", "EC-Earth3-Veg", "CanESM5"]

# intersection of models
models = np.intersect1d(complete_cil_models, complete_nex_models)

In [4]:
###################
# Model details
###################
model_info = {}
for model in models:
    tmp = glob(nex_in + model + '/ssp126/tas/*_2015.nc')
    tmp = tmp[0].replace(nex_in + model, '').replace('/ssp126/tas/tas_day_' + model + '_ssp126', '').replace('2015.nc', '')
    model_info.update({model: tmp})

In [5]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec = 'pmem=10GB', env_extra= ['#PBS -l feature=rhel7'], walltime = '00:15:00')

cluster.scale(jobs=30)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.211:42200,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Annual averages

In [7]:
# calculate annual means for single model-year over all SSPs and variables
def model_year_means(model_id, model_info, year, path):
    # read files
    ds_tmax_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmax/tasmax_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tmin_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmin/tasmin_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tas_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tas/tas_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_pr_ssp126 = xr.open_dataset(path + model_id + '/ssp126/pr/pr_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
        
    ds_tmax_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmax/tasmax_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tmin_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmin/tasmin_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tas_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tas/tas_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_pr_ssp245 = xr.open_dataset(path + model_id + '/ssp245/pr/pr_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    
    ds_tmax_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmax/tasmax_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tmin_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmin/tasmin_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tas_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tas/tas_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_pr_ssp370 = xr.open_dataset(path + model_id + '/ssp370/pr/pr_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')

    ds_tmax_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmax/tasmax_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tmin_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmin/tasmin_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tas_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tas/tas_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_pr_ssp585 = xr.open_dataset(path + model_id + '/ssp585/pr/pr_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    
    # calculate avgs
    ds_tas_ssp126 = ds_tas_ssp126.resample(time='1Y').mean()
    ds_tmin_ssp126 = ds_tmin_ssp126.resample(time='1Y').mean()
    ds_tmax_ssp126 = ds_tmax_ssp126.resample(time='1Y').mean()
    ds_pr_ssp126 = ds_pr_ssp126.resample(time='1Y').mean()

    ds_tas_ssp245 = ds_tas_ssp245.resample(time='1Y').mean()
    ds_tmin_ssp245 = ds_tmin_ssp245.resample(time='1Y').mean()
    ds_tmax_ssp245 = ds_tmax_ssp245.resample(time='1Y').mean()
    ds_pr_ssp245 = ds_pr_ssp245.resample(time='1Y').mean()

    ds_tas_ssp370 = ds_tas_ssp370.resample(time='1Y').mean()
    ds_tmin_ssp370 = ds_tmin_ssp370.resample(time='1Y').mean()
    ds_tmax_ssp370 = ds_tmax_ssp370.resample(time='1Y').mean()
    ds_pr_ssp370 = ds_pr_ssp370.resample(time='1Y').mean()

    ds_tas_ssp585 = ds_tas_ssp585.resample(time='1Y').mean()
    ds_tmin_ssp585 = ds_tmin_ssp585.resample(time='1Y').mean()
    ds_tmax_ssp585 = ds_tmax_ssp585.resample(time='1Y').mean()
    ds_pr_ssp585 = ds_pr_ssp585.resample(time='1Y').mean()

    # merge
    ds_ssp126 = xr.merge([ds_tas_ssp126, ds_tmin_ssp126, ds_tmax_ssp126, ds_pr_ssp126])
    ds_ssp245 = xr.merge([ds_tas_ssp245, ds_tmin_ssp245, ds_tmax_ssp245, ds_pr_ssp245])
    ds_ssp370 = xr.merge([ds_tas_ssp370, ds_tmin_ssp370, ds_tmax_ssp370, ds_pr_ssp370])
    ds_ssp585 = xr.merge([ds_tas_ssp585, ds_tmin_ssp585, ds_tmax_ssp585, ds_pr_ssp585])
    
    # assign and concat ssp dimension
    ds_ssp126 = ds_ssp126.assign_coords(ssp = 'ssp126')
    ds_ssp245 = ds_ssp245.assign_coords(ssp = 'ssp245')
    ds_ssp370 = ds_ssp370.assign_coords(ssp = 'ssp370')
    ds_ssp585 = ds_ssp585.assign_coords(ssp = 'ssp585')
    
    ds_out = xr.concat([ds_ssp126, ds_ssp245, ds_ssp370, ds_ssp585], dim='ssp')
    
    # unit conversions
    ds_out['tas'] = ds_out['tas'] - 273.15 # K -> C
    ds_out['tasmax'] = ds_out['tasmax'] - 273.15 # K -> C
    ds_out['tasmin'] = ds_out['tasmin'] - 273.15 # K -> C
    ds_out['pr'] = ds_out['pr'] * 86400 # kg m-2 s-1 -> mm day-1
    
    return ds_out

In [8]:
# loop through models: RUNTIME IS ~10 MINS PER MODEL WITH 30 DASK WORKERS
for model in models:
    # check if already exists
    if os.path.isfile(nex_out + 'annual_avgs/' + model + '.nc'):
        print(model + ' already done')
        continue
    else:
        # Parallelize with dask over years
        delayed_res = []
        for year in range(2015,2101):
            tmp_res = dask.delayed(model_year_means)(model, model_info[model], year, nex_in)
            delayed_res.append(tmp_res)
    
        # Run
        res = dask.compute(*delayed_res)

        # Store
        df_final = xr.combine_by_coords(res)
        df_final.to_netcdf(nex_out + 'annual_avgs/' + model + '.nc')

        print(model)

CMCC-ESM2
CanESM5 already done
EC-Earth3 already done
EC-Earth3-Veg-LR already done
GFDL-ESM4 already done
INM-CM4-8 already done
INM-CM5-0 already done
MIROC-ES2L already done
MIROC6 already done
MPI-ESM1-2-LR already done
NorESM2-LM already done
NorESM2-MM already done
UKESM1-0-LL already done


## Annual maxima

In [19]:
# calculate annual means for single model-year over all SSPs and variables
def model_year_maxima(model_id, model_info, year, path):
    # read files
    ds_tmax_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmax/tasmax_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tmin_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmin/tasmin_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tas_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tas/tas_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_pr_ssp126 = xr.open_dataset(path + model_id + '/ssp126/pr/pr_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
        
    ds_tmax_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmax/tasmax_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tmin_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmin/tasmin_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tas_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tas/tas_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_pr_ssp245 = xr.open_dataset(path + model_id + '/ssp245/pr/pr_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    
    ds_tmax_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmax/tasmax_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tmin_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmin/tasmin_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tas_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tas/tas_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_pr_ssp370 = xr.open_dataset(path + model_id + '/ssp370/pr/pr_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')

    ds_tmax_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmax/tasmax_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tmin_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmin/tasmin_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tas_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tas/tas_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_pr_ssp585 = xr.open_dataset(path + model_id + '/ssp585/pr/pr_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    
    # calculate avgs
    ds_tas_ssp126 = ds_tas_ssp126.resample(time='1Y').max()
    ds_tmin_ssp126 = ds_tmin_ssp126.resample(time='1Y').max()
    ds_tmax_ssp126 = ds_tmax_ssp126.resample(time='1Y').max()
    ds_pr_ssp126 = ds_pr_ssp126.resample(time='1Y').max()

    ds_tas_ssp245 = ds_tas_ssp245.resample(time='1Y').max()
    ds_tmin_ssp245 = ds_tmin_ssp245.resample(time='1Y').max()
    ds_tmax_ssp245 = ds_tmax_ssp245.resample(time='1Y').max()
    ds_pr_ssp245 = ds_pr_ssp245.resample(time='1Y').max()

    ds_tas_ssp370 = ds_tas_ssp370.resample(time='1Y').max()
    ds_tmin_ssp370 = ds_tmin_ssp370.resample(time='1Y').max()
    ds_tmax_ssp370 = ds_tmax_ssp370.resample(time='1Y').max()
    ds_pr_ssp370 = ds_pr_ssp370.resample(time='1Y').max()

    ds_tas_ssp585 = ds_tas_ssp585.resample(time='1Y').max()
    ds_tmin_ssp585 = ds_tmin_ssp585.resample(time='1Y').max()
    ds_tmax_ssp585 = ds_tmax_ssp585.resample(time='1Y').max()
    ds_pr_ssp585 = ds_pr_ssp585.resample(time='1Y').max()

    # merge
    ds_ssp126 = xr.merge([ds_tas_ssp126, ds_tmin_ssp126, ds_tmax_ssp126, ds_pr_ssp126])
    ds_ssp245 = xr.merge([ds_tas_ssp245, ds_tmin_ssp245, ds_tmax_ssp245, ds_pr_ssp245])
    ds_ssp370 = xr.merge([ds_tas_ssp370, ds_tmin_ssp370, ds_tmax_ssp370, ds_pr_ssp370])
    ds_ssp585 = xr.merge([ds_tas_ssp585, ds_tmin_ssp585, ds_tmax_ssp585, ds_pr_ssp585])
    
    # assign and concat ssp dimension
    ds_ssp126 = ds_ssp126.assign_coords(ssp = 'ssp126')
    ds_ssp245 = ds_ssp245.assign_coords(ssp = 'ssp245')
    ds_ssp370 = ds_ssp370.assign_coords(ssp = 'ssp370')
    ds_ssp585 = ds_ssp585.assign_coords(ssp = 'ssp585')
    
    ds_out = xr.concat([ds_ssp126, ds_ssp245, ds_ssp370, ds_ssp585], dim='ssp')
    
    # unit conversions
    ds_out['tas'] = ds_out['tas'] - 273.15 # K -> C
    ds_out['tasmax'] = ds_out['tasmax'] - 273.15 # K -> C
    ds_out['tasmin'] = ds_out['tasmin'] - 273.15 # K -> C
    ds_out['pr'] = ds_out['pr'] * 86400 # kg m-2 s-1 -> mm day-1
    
    return ds_out

In [24]:
# loop through models: RUNTIME IS ~8 MINS PER MODEL WITH 30 DASK WORKERS
for model in models:
    # check if already exists
    if os.path.isfile(nex_out + 'annual_maxs/' + model + '.nc'):
        print(model + ' already done')
        continue
    else:
        # Parallelize with dask over years
        delayed_res = []
        for year in range(2015,2101):
            tmp_res = dask.delayed(model_year_maxima)(model, model_info[model], year, nex_in)
            delayed_res.append(tmp_res)
    
        # Run
        res = dask.compute(*delayed_res)

        # Store
        df_final = xr.combine_by_coords(res)
        df_final.to_netcdf(nex_out + 'annual_maxs/' + model + '.nc')

        print(model)

CMCC-ESM2
CanESM5
EC-Earth3
EC-Earth3-Veg-LR
GFDL-ESM4
INM-CM4-8
INM-CM5-0
MIROC-ES2L
MIROC6
MPI-ESM1-2-LR
NorESM2-LM
NorESM2-MM
UKESM1-0-LL


## Annual minima (temperature only)

In [6]:
# calculate annual means for single model-year over all SSPs and variables
def model_year_minima(model_id, model_info, year, path):
    # read files
    ds_tmax_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmax/tasmax_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tmin_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmin/tasmin_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tas_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tas/tas_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
        
    ds_tmax_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmax/tasmax_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tmin_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmin/tasmin_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tas_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tas/tas_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    
    ds_tmax_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmax/tasmax_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tmin_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmin/tasmin_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tas_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tas/tas_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')

    ds_tmax_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmax/tasmax_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tmin_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmin/tasmin_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tas_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tas/tas_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    
    # calculate avgs
    ds_tas_ssp126 = ds_tas_ssp126.resample(time='1Y').min()
    ds_tmin_ssp126 = ds_tmin_ssp126.resample(time='1Y').min()
    ds_tmax_ssp126 = ds_tmax_ssp126.resample(time='1Y').min()

    ds_tas_ssp245 = ds_tas_ssp245.resample(time='1Y').min()
    ds_tmin_ssp245 = ds_tmin_ssp245.resample(time='1Y').min()
    ds_tmax_ssp245 = ds_tmax_ssp245.resample(time='1Y').min()

    ds_tas_ssp370 = ds_tas_ssp370.resample(time='1Y').min()
    ds_tmin_ssp370 = ds_tmin_ssp370.resample(time='1Y').min()
    ds_tmax_ssp370 = ds_tmax_ssp370.resample(time='1Y').min()

    ds_tas_ssp585 = ds_tas_ssp585.resample(time='1Y').min()
    ds_tmin_ssp585 = ds_tmin_ssp585.resample(time='1Y').min()
    ds_tmax_ssp585 = ds_tmax_ssp585.resample(time='1Y').min()

    # merge
    ds_ssp126 = xr.merge([ds_tas_ssp126, ds_tmin_ssp126, ds_tmax_ssp126])
    ds_ssp245 = xr.merge([ds_tas_ssp245, ds_tmin_ssp245, ds_tmax_ssp245])
    ds_ssp370 = xr.merge([ds_tas_ssp370, ds_tmin_ssp370, ds_tmax_ssp370])
    ds_ssp585 = xr.merge([ds_tas_ssp585, ds_tmin_ssp585, ds_tmax_ssp585])
    
    # assign and concat ssp dimension
    ds_ssp126 = ds_ssp126.assign_coords(ssp = 'ssp126')
    ds_ssp245 = ds_ssp245.assign_coords(ssp = 'ssp245')
    ds_ssp370 = ds_ssp370.assign_coords(ssp = 'ssp370')
    ds_ssp585 = ds_ssp585.assign_coords(ssp = 'ssp585')
    
    ds_out = xr.concat([ds_ssp126, ds_ssp245, ds_ssp370, ds_ssp585], dim='ssp')
    
    # unit conversions
    ds_out['tas'] = ds_out['tas'] - 273.15 # K -> C
    ds_out['tasmax'] = ds_out['tasmax'] - 273.15 # K -> C
    ds_out['tasmin'] = ds_out['tasmin'] - 273.15 # K -> C
    
    return ds_out

In [7]:
# loop through models: RUNTIME IS ~5 MINS PER MODEL WITH 30 DASK WORKERS
for model in models:
    # check if already exists
    if os.path.isfile(nex_out + 'annual_mins/' + model + '.nc'):
        print(model + ' already done')
        continue
    else:
        # Parallelize with dask over years
        delayed_res = []
        for year in range(2015,2101):
            tmp_res = dask.delayed(model_year_minima)(model, model_info[model], year, nex_in)
            delayed_res.append(tmp_res)
    
        # Run
        res = dask.compute(*delayed_res)

        # Store
        df_final = xr.combine_by_coords(res)
        df_final.to_netcdf(nex_out + 'annual_mins/' + model + '.nc')

        print(model)

CMCC-ESM2
CanESM5
EC-Earth3
EC-Earth3-Veg-LR
GFDL-ESM4
INM-CM4-8
INM-CM5-0
MIROC-ES2L
MIROC6
MPI-ESM1-2-LR
NorESM2-LM
NorESM2-MM
UKESM1-0-LL


## Precipitation indices

In [11]:
# calculate annual means for single model-year over all SSPs and variables
def model_year_prcp_inds(model_id, model_info, year, path):
    # read files
    ds_pr_ssp126 = xr.open_dataset(path + model_id + '/ssp126/pr/pr_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_pr_ssp245 = xr.open_dataset(path + model_id + '/ssp245/pr/pr_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_pr_ssp370 = xr.open_dataset(path + model_id + '/ssp370/pr/pr_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_pr_ssp585 = xr.open_dataset(path + model_id + '/ssp585/pr/pr_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    
    # calculate indices
    ds_sdii_ssp126 = ds_pr_ssp126.where(ds_pr_ssp126.pr >= 1./86400).resample(time='1Y').mean()
    ds_r20mm_ssp126 = ds_pr_ssp126.where(ds_pr_ssp126.pr >= 20./86400).resample(time='1Y').count()
    
    ds_sdii_ssp245 = ds_pr_ssp245.where(ds_pr_ssp245.pr >= 1./86400).resample(time='1Y').mean()
    ds_r20mm_ssp245 = ds_pr_ssp245.where(ds_pr_ssp245.pr >= 20./86400).resample(time='1Y').count()

    ds_sdii_ssp370 = ds_pr_ssp370.where(ds_pr_ssp370.pr >= 1./86400).resample(time='1Y').mean()
    ds_r20mm_ssp370 = ds_pr_ssp370.where(ds_pr_ssp370.pr >= 20./86400).resample(time='1Y').count()
    
    ds_sdii_ssp585 = ds_pr_ssp585.where(ds_pr_ssp585.pr >= 1./86400).resample(time='1Y').mean()
    ds_r20mm_ssp585 = ds_pr_ssp585.where(ds_pr_ssp585.pr >= 20./86400).resample(time='1Y').count()

    # merge
    ds_ssp126 = xr.combine_by_coords([ds_sdii_ssp126.rename({'pr': 'SDII'}),
                                      ds_r20mm_ssp126.rename({'pr': 'R20mm'})])
    
    ds_ssp245 = xr.combine_by_coords([ds_sdii_ssp245.rename({'pr': 'SDII'}),
                                      ds_r20mm_ssp245.rename({'pr': 'R20mm'})])
    
    ds_ssp370 = xr.combine_by_coords([ds_sdii_ssp370.rename({'pr': 'SDII'}),
                                      ds_r20mm_ssp370.rename({'pr': 'R20mm'})])

    ds_ssp585 = xr.combine_by_coords([ds_sdii_ssp585.rename({'pr': 'SDII'}),
                                      ds_r20mm_ssp585.rename({'pr': 'R20mm'})])
    
    # assign and concat ssp dimension
    ds_ssp126 = ds_ssp126.assign_coords(ssp = 'ssp126')
    ds_ssp245 = ds_ssp245.assign_coords(ssp = 'ssp245')
    ds_ssp370 = ds_ssp370.assign_coords(ssp = 'ssp370')
    ds_ssp585 = ds_ssp585.assign_coords(ssp = 'ssp585')
    
    ds_out = xr.concat([ds_ssp126, ds_ssp245, ds_ssp370, ds_ssp585], dim='ssp')

    # unit conversions
    ds_out['SDII'] = ds_out['SDII'] * 86400 # kg m-2 s-1 -> mm day-1
    
    return ds_out

In [12]:
# loop through models: RUNTIME IS ~5 MINS PER MODEL WITH 30 DASK WORKERS
for model in models:
    # check if already exists
    if os.path.isfile(nex_out + 'precip_inds/' + model + '.nc'):
        print(model + ' already done')
        continue
    else:
        # Parallelize with dask over years
        delayed_res = []
        for year in range(2015,2101):
            tmp_res = dask.delayed(model_year_prcp_inds)(model, model_info[model], year, nex_in)
            delayed_res.append(tmp_res)
    
        # Run
        res = dask.compute(*delayed_res)

        # Store
        df_final = xr.combine_by_coords(res)
        df_final.to_netcdf(nex_out + 'prcp_inds/' + model + '.nc')

        print(model)

CMCC-ESM2
CanESM5
EC-Earth3
EC-Earth3-Veg-LR
GFDL-ESM4
INM-CM4-8
INM-CM5-0
MIROC-ES2L
MIROC6
MPI-ESM1-2-LR
NorESM2-LM
NorESM2-MM
UKESM1-0-LL
