In [1]:
import numpy as np
import xarray as xr
import dask
import os
from glob import glob

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
in_path = '/gpfs/group/kaf26/default/dcl5300/ISIMIP3b_input_climate_data/files/'
out_path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/isimip3b/'

In [3]:
###################
# Models
###################
from utils import isimip_ssp_dict

models = list(isimip_ssp_dict.keys())

In [4]:
####################
# File year layout
####################
start_years = [2015] + [yr for yr in range(2021,2101,10)]
year_steps = [5] + [9 for yr in range(2021,2101,10)]

In [5]:
###################
# Model details
###################
model_info = {}
for model in models:
    tmp = glob(in_path + model.lower() + '*_w5e5_ssp126_pr_global_daily_2015_2020.nc')
    tmp = tmp[0].replace(in_path + model.lower() + '_', '').replace('_w5e5_ssp126_pr_global_daily_2015_2020.nc', '')
    model_info.update({model: tmp})

In [6]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec='pmem=15GB', memory='15GB',
                     env_extra= ['#PBS -l feature=rhel7'], walltime = '02:00:00')

cluster.scale(jobs=9)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.232:46764,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Simple metrics (no historical quantiles required)

In [7]:
########################################################
# Calculate the metric for a 
# single model-year, including all SSPs and variables
########################################################
def model_year_metric(path, model, model_vers, ssps, var_ids, year, year_step, metric):
    # Set up dictionary for all results
    ds_all = {}
    # Loop through SSPs
    for ssp in ssps:
        # Temporary list for each SSP
        ds_list = []
        # Loop through variables
        for var in var_ids:
            ## Temporary file for each variable
            ds_tmp = xr.open_dataset(path + model + '_' + model_vers + '_w5e5_' + 
                                     ssp + '_' + var + '_global_daily_' + str(year) 
                                     + '_' + str(year + year_step) + '.nc')
            
            ## Convert units
            # temperature: K -> C
            if var == 'tas' and ds_tmp.tas.attrs['units'] == 'K':
                ds_tmp['tas'] = ds_tmp['tas'] - 273.15
            if var == 'tasmax' and ds_tmp.tasmax.attrs['units'] == 'K':
                ds_tmp['tasmax'] = ds_tmp['tasmax'] - 273.15
            if var == 'tasmin' and ds_tmp.tasmin.attrs['units'] == 'K':
                ds_tmp['tasmin'] = ds_tmp['tasmin'] - 273.15

            # precip: kg m-2 s-1 -> mm day-1
            if var == 'pr' and ds_tmp.pr.attrs['units'] == 'kg m-2 s-1':
                ds_tmp['pr'] = ds_tmp['pr'] * 86400

            # Calculate metric
            if metric == 'mean':
                ds_tmp = ds_tmp.resample(time='1Y').mean()
            elif metric == 'max':
                ds_tmp = ds_tmp.resample(time='1Y').max()
                
            # Append to list
            ds_list.append(ds_tmp)
            
        # Append to dict
        ds_all.update({ssp: ds_list})

    # Merge and concat along ssp dimension
    for ssp in ssps:
        ds_all[ssp] = xr.merge(ds_all[ssp])
        ds_all[ssp] = ds_all[ssp].assign_coords(ssp = ssp)
    
    # Return
    ds_out = xr.concat([ds_all[ssp] for ssp in ssps], dim='ssp')
    return ds_out

### Annual averages

In [None]:
# Loop through models: RUNTIME IS ~10 MINS PER MODEL WITH 9 DASK WORKERS

# All variables
var_ids = ['tas', 'tasmin', 'tasmax', 'pr']

for model in models:
    # Check if already exists
    if os.path.isfile(out_path + 'annual_avgs/' + model + '.nc'):
        print(model + ' already done')
        continue
    
    # Parallelize with dask over years
    delayed_res = []

    for year, year_step in zip(start_years, year_steps):
        tmp_res = dask.delayed(model_year_metric)(path = in_path,
                                                  model = model.lower(),
                                                  model_vers = model_info[model],
                                                  ssps = isimip_ssp_dict[model],
                                                  var_ids = var_ids,
                                                  year = year, 
                                                  year_step = year_step,
                                                  metric = 'mean')
        delayed_res.append(tmp_res)
            
    # Compute
    res = dask.compute(*delayed_res)

    # Store
    df_final = xr.combine_by_coords(res)
    df_final.to_netcdf(out_path + 'annual_avgs/' + model + '.nc')

    print(model)

CanESM5
CNRM-CM6-1
CNRM-ESM2-1
EC-Earth3
GFDL-ESM4


### Annual maxima

In [6]:
# calculate annual means for single model-year over all SSPs and variables
def model_year_minima(model_id, model_info, year, path):
    # read files
    ds_tmax_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmax/tasmax_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tmin_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tasmin/tasmin_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
    ds_tas_ssp126 = xr.open_dataset(path + model_id + '/ssp126/tas/tas_day_' + model_id + '_ssp126' + model_info + str(year) + '.nc')
        
    ds_tmax_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmax/tasmax_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tmin_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tasmin/tasmin_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    ds_tas_ssp245 = xr.open_dataset(path + model_id + '/ssp245/tas/tas_day_' + model_id + '_ssp245' + model_info + str(year) + '.nc')
    
    ds_tmax_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmax/tasmax_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tmin_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tasmin/tasmin_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')
    ds_tas_ssp370 = xr.open_dataset(path + model_id + '/ssp370/tas/tas_day_' + model_id + '_ssp370' + model_info + str(year) + '.nc')

    ds_tmax_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmax/tasmax_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tmin_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tasmin/tasmin_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    ds_tas_ssp585 = xr.open_dataset(path + model_id + '/ssp585/tas/tas_day_' + model_id + '_ssp585' + model_info + str(year) + '.nc')
    
    # calculate avgs
    ds_tas_ssp126 = ds_tas_ssp126.resample(time='1Y').min()
    ds_tmin_ssp126 = ds_tmin_ssp126.resample(time='1Y').min()
    ds_tmax_ssp126 = ds_tmax_ssp126.resample(time='1Y').min()

    ds_tas_ssp245 = ds_tas_ssp245.resample(time='1Y').min()
    ds_tmin_ssp245 = ds_tmin_ssp245.resample(time='1Y').min()
    ds_tmax_ssp245 = ds_tmax_ssp245.resample(time='1Y').min()

    ds_tas_ssp370 = ds_tas_ssp370.resample(time='1Y').min()
    ds_tmin_ssp370 = ds_tmin_ssp370.resample(time='1Y').min()
    ds_tmax_ssp370 = ds_tmax_ssp370.resample(time='1Y').min()

    ds_tas_ssp585 = ds_tas_ssp585.resample(time='1Y').min()
    ds_tmin_ssp585 = ds_tmin_ssp585.resample(time='1Y').min()
    ds_tmax_ssp585 = ds_tmax_ssp585.resample(time='1Y').min()

    # merge
    ds_ssp126 = xr.merge([ds_tas_ssp126, ds_tmin_ssp126, ds_tmax_ssp126])
    ds_ssp245 = xr.merge([ds_tas_ssp245, ds_tmin_ssp245, ds_tmax_ssp245])
    ds_ssp370 = xr.merge([ds_tas_ssp370, ds_tmin_ssp370, ds_tmax_ssp370])
    ds_ssp585 = xr.merge([ds_tas_ssp585, ds_tmin_ssp585, ds_tmax_ssp585])
    
    # assign and concat ssp dimension
    ds_ssp126 = ds_ssp126.assign_coords(ssp = 'ssp126')
    ds_ssp245 = ds_ssp245.assign_coords(ssp = 'ssp245')
    ds_ssp370 = ds_ssp370.assign_coords(ssp = 'ssp370')
    ds_ssp585 = ds_ssp585.assign_coords(ssp = 'ssp585')
    
    ds_out = xr.concat([ds_ssp126, ds_ssp245, ds_ssp370, ds_ssp585], dim='ssp')
    
    # unit conversions
    ds_out['tas'] = ds_out['tas'] - 273.15 # K -> C
    ds_out['tasmax'] = ds_out['tasmax'] - 273.15 # K -> C
    ds_out['tasmin'] = ds_out['tasmin'] - 273.15 # K -> C
    
    return ds_out

In [7]:
# loop through models: RUNTIME IS ~5 MINS PER MODEL WITH 30 DASK WORKERS
for model in models:
    # check if already exists
    if os.path.isfile(nex_out + 'annual_mins/' + model + '.nc'):
        print(model + ' already done')
        continue
    else:
        # Parallelize with dask over years
        delayed_res = []
        for year in range(2015,2101):
            tmp_res = dask.delayed(model_year_minima)(model, model_info[model], year, nex_in)
            delayed_res.append(tmp_res)
    
        # Run
        res = dask.compute(*delayed_res)

        # Store
        df_final = xr.combine_by_coords(res)
        df_final.to_netcdf(nex_out + 'annual_mins/' + model + '.nc')

        print(model)

CMCC-ESM2
CanESM5
EC-Earth3
EC-Earth3-Veg-LR
GFDL-ESM4
INM-CM4-8
INM-CM5-0
MIROC-ES2L
MIROC6
MPI-ESM1-2-LR
NorESM2-LM
NorESM2-MM
UKESM1-0-LL


In [None]:
# All variables
var_ids = ['tas', 'tasmin', 'tasmax', 'pr']

for model in models:
    # Check if already exists
    if os.path.isfile(out_path + 'annual_maxs/' + model + '.nc'):
        print(model + ' already done')
        continue
    
    # Parallelize with dask over years
    delayed_res = []

    for year, year_step in zip(start_years, year_steps):
        tmp_res = dask.delayed(model_year_metric)(path = in_path,
                                                  model = model.lower(),
                                                  model_vers = model_info[model],
                                                  ssps = isimip_ssp_dict[model],
                                                  var_ids = var_ids,
                                                  year = year, 
                                                  year_step = year_step,
                                                  metric = 'max')
        delayed_res.append(tmp_res)
            
    # Compute
    res = dask.compute(*delayed_res)

    # Store
    df_final = xr.combine_by_coords(res)
    df_final.to_netcdf(out_path + 'annual_maxs/' + model + '.nc')

    print(model)