In [1]:
import numpy as np
import xarray as xr
import dask
import os
from glob import glob

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
in_path = '/gpfs/group/kaf26/default/public/NEX-GDDP-CMIP6/models/'
out_path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/nex-gddp/'

In [3]:
###################
# Models
###################
from utils import nex_ssp_dict

models = list(nex_ssp_dict.keys())

In [4]:
###################
# Model details
###################
model_info = {}
for model in models:
    tmp = glob(in_path + model + '/ssp126/tasmax/*_2015.nc')
    tmp = tmp[0].replace(in_path + model, '').replace('/ssp126/tasmax/tasmax_day_' + model + '_ssp126', '').replace('2015.nc', '')
    model_info.update({model: tmp})

In [5]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec = 'pmem=20GB', memory='20GB',
                     worker_extra_args= ['#PBS -l feature=rhel7'],
                     walltime = '00:30:00')

cluster.scale(jobs=30)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.228:36971,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Simple metrics (no historical quantiles required)

In [5]:
########################################################
# Calculate the metric for a 
# single model-year, including all SSPs and variables
########################################################
def model_year_metric(path, model, model_vers, ssps, var_ids, year, metric):
    # Function for longest consecutive spell if needed
    def n_longest_consecutive(ds, dim='time'):
        ds = ds.cumsum(dim=dim) - ds.cumsum(dim=dim).where(ds == 0).ffill(dim=dim).fillna(0)
        return ds.max(dim=dim)

    # Set up dictionary for all results
    ds_all = {}
    # Loop through SSPs
    for ssp in ssps:
        # Temporary list for each SSP
        ds_list = []
        # Loop through variables
        for var in var_ids:
            ## Temporary file for each variable
            ds_tmp = xr.open_dataset(path + model + '/' + ssp + '/' +
                                     var + '/' + var + '_day_' + model + 
                                     '_' + ssp + model_vers + str(year) + '.nc')
            
            ## Convert units
            # temperature: K -> C
            if var == 'tas' and ds_tmp.tas.attrs['units'] == 'K':
                ds_tmp['tas'] = ds_tmp['tas'] - 273.15
            if var == 'tasmax' and ds_tmp.tasmax.attrs['units'] == 'K':
                ds_tmp['tasmax'] = ds_tmp['tasmax'] - 273.15
            if var == 'tasmin' and ds_tmp.tasmin.attrs['units'] == 'K':
                ds_tmp['tasmin'] = ds_tmp['tasmin'] - 273.15

            # precip: kg m-2 s-1 -> mm day-1
            if var == 'pr' and ds_tmp.pr.attrs['units'] == 'kg m-2 s-1':
                ds_tmp['pr'] = ds_tmp['pr'] * 86400

            # Calculate metric
            if metric == 'avg':
                ds_tmp = ds_tmp.resample(time='1Y').mean()
            elif metric == 'max':
                ds_tmp = ds_tmp.resample(time='1Y').max()
            elif metric == 'dry':
                # Dry days
                ds_tmp_0 = (ds_tmp == 0.).resample(time='1Y').sum() # 0mm
                ds_tmp_1 = (ds_tmp < 1.).resample(time='1Y').sum() # less than 1mm
                # Longest sonsecutive dry day streak
                ds_tmp_0c = (ds_tmp == 0.).resample(time='1Y').apply(n_longest_consecutive) # 0mm longest consecutive
                ds_tmp_1c = (ds_tmp < 1.).resample(time='1Y').apply(n_longest_consecutive) # less than 1mm longest consecutive
                # Merge
                ds_tmp = xr.merge([ds_tmp_0.rename({'pr':'count_eq_0'}),
                                   ds_tmp_0c.rename({'pr':'streak_eq_0'}),
                                   ds_tmp_1.rename({'pr':'count_lt_1'}),
                                   ds_tmp_1c.rename({'pr':'streak_lt_1'})])
                
            # Append to list
            ds_list.append(ds_tmp)
            
        # Append to dict
        ds_all.update({ssp: ds_list})

    # Merge and concat along ssp dimension
    for ssp in ssps:
        ds_all[ssp] = xr.merge(ds_all[ssp])
        ds_all[ssp] = ds_all[ssp].assign_coords(ssp = ssp)
    
    # Return
    ds_out = xr.concat([ds_all[ssp] for ssp in ssps], dim='ssp')
    return ds_out

### Annual averages

In [7]:
# Loop through models: RUNTIME IS ~15 MINS PER MODEL WITH 30 DASK WORKERS
metric = 'avg'

# All variables
var_ids = ['tas', 'tasmin', 'tasmax', 'pr']

for model in models:
    # Check if already exists
    if os.path.isfile(out_path + metric + '/' + model + '.nc'):
        print(model + ' already done')
        continue
    
    # Parallelize with dask over years
    delayed_res = []

    for year in range(2015, 2101):
        tmp_res = dask.delayed(model_year_metric)(path = in_path,
                                                  model = model,
                                                  model_vers = model_info[model],
                                                  ssps = nex_ssp_dict[model],
                                                  var_ids = var_ids,
                                                  year = year, 
                                                  metric = metric)
        delayed_res.append(tmp_res)
            
    # Compute
    res = dask.compute(*delayed_res)

    # Store
    df_final = xr.combine_by_coords(res)
    df_final.to_netcdf(out_path + metric + '/' + model + '.nc')

    print(model)

ACCESS-ESM1-5 already done
BCC-CSM2-MR already done
CanESM5 already done
CMCC-ESM2 already done
CNRM-CM6-1 already done
CNRM-ESM2-1 already done
EC-Earth3 already done
EC-Earth3-Veg-LR already done
GFDL-ESM4 already done
HadGEM3-GC31-LL already done
INM-CM4-8 already done
INM-CM5-0 already done
IPSL-CM6A-LR already done
MIROC-ES2L already done
MIROC6 already done
MPI-ESM1-2-HR
MPI-ESM1-2-LR already done
MRI-ESM2-0
NESM3
NorESM2-LM already done
NorESM2-MM already done
UKESM1-0-LL already done


### Annual maxima

In [7]:
# Loop through models: RUNTIME IS ~10 MINS PER MODEL WITH 30 DASK WORKERS
metric = 'max'

# All variables
var_ids = ['tas', 'tasmin', 'tasmax', 'pr']

for model in models:
    # Check if already exists
    if os.path.isfile(out_path + metric + '/' + model + '.nc'):
        print(model + ' already done')
        continue
    
    # Parallelize with dask over years
    delayed_res = []

    for year in range(2015, 2101):
        tmp_res = dask.delayed(model_year_metric)(path = in_path,
                                                  model = model,
                                                  model_vers = model_info[model],
                                                  ssps = nex_ssp_dict[model],
                                                  var_ids = var_ids,
                                                  year = year, 
                                                  metric = metric)
        delayed_res.append(tmp_res)
            
    # Compute
    res = dask.compute(*delayed_res)

    # Store
    df_final = xr.combine_by_coords(res)
    df_final.to_netcdf(out_path + metric + '/' + model + '.nc')

    print(model)

ACCESS-ESM1-5 already done
BCC-CSM2-MR already done
CanESM5 already done
CMCC-ESM2 already done
CNRM-CM6-1 already done
CNRM-ESM2-1 already done
EC-Earth3 already done
EC-Earth3-Veg-LR already done
GFDL-ESM4 already done
HadGEM3-GC31-LL already done
INM-CM4-8 already done
INM-CM5-0 already done
IPSL-CM6A-LR already done
MIROC-ES2L already done
MIROC6 already done
MPI-ESM1-2-HR
MPI-ESM1-2-LR already done
MRI-ESM2-0
NESM3
NorESM2-LM already done
NorESM2-MM already done
UKESM1-0-LL already done


### Dry days

In [7]:
# Loop through models: RUNTIME IS ~12 MINS PER MODEL WITH 30 DASK WORKERS
metric = 'dry'

# Precip only
var_ids = ['pr']

for model in models:
    # Check if already exists
    if os.path.isfile(out_path + metric + '/' + model + '.nc'):
        print(model + ' already done')
        continue
    
    # Parallelize with dask over years
    delayed_res = []

    for year in range(2015, 2101):
        tmp_res = dask.delayed(model_year_metric)(path = in_path,
                                                  model = model,
                                                  model_vers = model_info[model],
                                                  ssps = nex_ssp_dict[model],
                                                  var_ids = var_ids,
                                                  year = year, 
                                                  metric = metric)
        delayed_res.append(tmp_res)
            
    # Compute
    res = dask.compute(*delayed_res)

    # Store
    df_final = xr.concat(res, dim='time')
    df_final.to_netcdf(out_path + metric + '/' + model + '.nc')

    print(model)

MergeError: combine_attrs='no_conflicts', but some values are not the same. Merging {'cmip6_source_id': 'ACCESS-ESM1-5', 'cmip6_institution_id': 'CSIRO', 'cmip6_license': 'CC-BY-SA 4.0', 'activity': 'NEX-GDDP-CMIP6', 'contact': 'Dr. Rama Nemani: rama.nemani@nasa.gov, Dr. Bridget Thrasher: bridget@climateanalyticsgroup.org', 'Conventions': 'CF-1.7', 'creation_date': '2022-02-22T11:07:26.220685+00:00', 'frequency': 'day', 'institution': 'NASA Earth Exchange, NASA Ames Research Center, Moffett Field, CA 94035', 'variant_label': 'r1i1p1f1', 'product': 'output', 'realm': 'atmos', 'source': 'BCSD', 'scenario': 'ssp126', 'references': 'BCSD method: Thrasher et al., 2012, Hydrol. Earth Syst. Sci.,16, 3309-3314. Ref period obs: latest version of the Princeton Global Meteorological Forcings (http://hydrology.princeton.edu/data.php), based on Sheffield et al., 2006, J. Climate, 19 (13), 3088-3111.', 'version': '1.0', 'tracking_id': '678c0a3c-7982-420c-a52e-968a47ae3ea9', 'title': 'ACCESS-ESM1-5, r1i1p1f1, ssp126, global downscaled CMIP6 climate projection data', 'resolution_id': '0.25 degree', 'history': '2022-02-22T11:07:26.220685+00:00: install global attributes', 'doi': 'https://doi.org/10.7917/OFSG3345', 'disclaimer': 'This data is considered provisional and subject to change. This data is provided as is without any warranty of any kind, either express or implied, arising by law or otherwise, including but not limited to warranties of completeness, non-infringement, accuracy, merchantability, or fitness for a particular purpose. The user assumes all risk associated with the use of, or inability to use, this data.', 'external_variables': 'areacella'} with {'cmip6_source_id': 'ACCESS-ESM1-5', 'cmip6_institution_id': 'CSIRO', 'cmip6_license': 'CC-BY-SA 4.0', 'activity': 'NEX-GDDP-CMIP6', 'contact': 'Dr. Rama Nemani: rama.nemani@nasa.gov, Dr. Bridget Thrasher: bridget@climateanalyticsgroup.org', 'Conventions': 'CF-1.7', 'creation_date': '2022-02-22T11:07:26.535384+00:00', 'frequency': 'day', 'institution': 'NASA Earth Exchange, NASA Ames Research Center, Moffett Field, CA 94035', 'variant_label': 'r1i1p1f1', 'product': 'output', 'realm': 'atmos', 'source': 'BCSD', 'scenario': 'ssp126', 'references': 'BCSD method: Thrasher et al., 2012, Hydrol. Earth Syst. Sci.,16, 3309-3314. Ref period obs: latest version of the Princeton Global Meteorological Forcings (http://hydrology.princeton.edu/data.php), based on Sheffield et al., 2006, J. Climate, 19 (13), 3088-3111.', 'version': '1.0', 'tracking_id': 'c0d6a1bc-b89a-4cf8-9154-511b15f7a9b6', 'title': 'ACCESS-ESM1-5, r1i1p1f1, ssp126, global downscaled CMIP6 climate projection data', 'resolution_id': '0.25 degree', 'history': '2022-02-22T11:07:26.535384+00:00: install global attributes', 'doi': 'https://doi.org/10.7917/OFSG3345', 'disclaimer': 'This data is considered provisional and subject to change. This data is provided as is without any warranty of any kind, either express or implied, arising by law or otherwise, including but not limited to warranties of completeness, non-infringement, accuracy, merchantability, or fitness for a particular purpose. The user assumes all risk associated with the use of, or inability to use, this data.', 'external_variables': 'areacella'}

## Less simple metrics (historical quantiles required)

In [None]:
########################################################
# Calculate the metric for a 
# single model-year, including all SSPs and variables
########################################################
def model_year_metric(path, model, model_vers, ssps, var_ids, year, metric):
    # Function for longest consecutive spell if needed
    def n_longest_consecutive(ds, dim='time'):
        ds = ds.cumsum(dim=dim) - ds.cumsum(dim=dim).where(ds == 0).ffill(dim=dim).fillna(0)
        return ds.max(dim=dim)
    
    # Read historical quantiles
    if 'tas' in var_ids:
        ds_q_era5 = xr.open_dataset('../data/ta/era5_temperature_quantiles.nc')
        ds_q_gmfd = xr.open_dataset('../data/gmfd_temperature_quantiles.nc')
    elif 'pr' in var_ids:
        ds_q_era5 = xr.open_dataset('../data/era5_precip_quantiles.nc')
        ds_q_gmfd = xr.open_dataset('../data/gmfd_precip_quantiles.nc')

    # Set up dictionary for all results
    ds_all = {}
    # Loop through SSPs
    for ssp in ssps:
        # Temporary list for each SSP
        ds_list = []
        # Loop through variables
        for var in var_ids:
            ## Temporary file for each variable
            ds_tmp = xr.open_dataset(path + model + '/' + ssp + '/' +
                                     var + '/' + var + '_day_' + model + 
                                     '_' + ssp + model_vers + str(year) + '.nc')
            
            ## Convert units
            # temperature: K -> C
            if var == 'tas' and ds_tmp.tas.attrs['units'] == 'K':
                ds_tmp['tas'] = ds_tmp['tas'] - 273.15
            if var == 'tasmax' and ds_tmp.tasmax.attrs['units'] == 'K':
                ds_tmp['tasmax'] = ds_tmp['tasmax'] - 273.15
            if var == 'tasmin' and ds_tmp.tasmin.attrs['units'] == 'K':
                ds_tmp['tasmin'] = ds_tmp['tasmin'] - 273.15

            # precip: kg m-2 s-1 -> mm day-1
            if var == 'pr' and ds_tmp.pr.attrs['units'] == 'kg m-2 s-1':
                ds_tmp['pr'] = ds_tmp['pr'] * 86400

            ## Calculate metric
            # Get above/below binary
            if metric == 'hot':
                ds_tmp_q95_era5 = ds_tmp > ds_q_era5[var_id + '_q95']
                ds_tmp_q99_era5 = ds_tmp > ds_q_era5[var_id + '_q99']
                ds_tmp_q95_gmfd = ds_tmp > ds_q_gmfd[var_id + '_q95']
                ds_tmp_q99_gmfd = ds_tmp > ds_q_gmfd[var_id + '_q99']
            elif metric == 'wet':
                ds_tmp_q95_era5 = ds_tmp > ds_q_era5[var_id + '_q95_wet']
                ds_tmp_q99_era5 = ds_tmp > ds_q_era5[var_id + '_q99_wet']
                ds_tmp_q95_gmfd = ds_tmp > ds_q_gmfd[var_id + '_q95_wet']
                ds_tmp_q99_gmfd = ds_tmp > ds_q_gmfd[var_id + '_q99_wet']
                
            # Count of hot/wet days
            ds_tmp_q95_era5_count = ds_tmp_q95_era5.resample(time='1Y').count()
            ds_tmp_q99_era5_count = ds_tmp_q99_era5.resample(time='1Y').count()
            ds_tmp_q95_gmfd_count = ds_tmp_q95_gmfd.resample(time='1Y').count()
            ds_tmp_q99_gmfd_count = ds_tmp_q99_gmfd.resample(time='1Y').count()
            # Longest consecutive hot/wet day streak
            ds_tmp_q95_era5_streak = ds_tmp_q95_era5.resample(time='1Y').apply(n_longest_consecutive)
            ds_tmp_q99_era5_streak = ds_tmp_q99_era5.resample(time='1Y').apply(n_longest_consecutive)
            ds_tmp_q95_gmfd_streak = ds_tmp_q95_gmfd.resample(time='1Y').apply(n_longest_consecutive)
            ds_tmp_q99_gmfd_streak = ds_tmp_q99_gmfd.resample(time='1Y').apply(n_longest_consecutive)
            # Merge
            ds_tmp = xr.merge([ds_tmp_q95_era5_count.rename({var_id: var_id + '_q95_era5_count'}),
                               ds_tmp_q99_era5_count.rename({var_id: var_id + '_q99_era5_count'}),
                               ds_tmp_q95_gmfd_count.rename({var_id: var_id + '_q95_gmfd_count'}),
                               ds_tmp_q99_gmfd_count.rename({var_id: var_id + '_q99_gmfd_count'}),
                               ds_tmp_q95_era5_streak.rename({var_id: var_id + '_q95_era5_streak'}),
                               ds_tmp_q99_era5_streak.rename({var_id: var_id + '_q99_era5_streak'}),
                               ds_tmp_q95_gmfd_streak.rename({var_id: var_id + '_q95_gmfd_streak'}),
                               ds_tmp_q99_gmfd_streak.rename({var_id: var_id + '_q99_gmfd_streak'})])
                
            # Append to list
            ds_list.append(ds_tmp)
            
        # Append to dict
        ds_all.update({ssp: ds_list})

    # Merge and concat along ssp dimension
    for ssp in ssps:
        ds_all[ssp] = xr.merge(ds_all[ssp])
        ds_all[ssp] = ds_all[ssp].assign_coords(ssp = ssp)
    
    # Return
    ds_out = xr.concat([ds_all[ssp] for ssp in ssps], dim='ssp')
    return ds_out

In [None]:
########################################################
# Calculate the metric for a 
# single model-year, including all SSPs and variables
########################################################
def model_year_metric(path, model, model_vers, ssps, var_ids, year, metric):
    # Function for longest consecutive spell if needed
    def n_longest_consecutive(ds, dim='time'):
        ds = ds.cumsum(dim=dim) - ds.cumsum(dim=dim).where(ds == 0).ffill(dim=dim).fillna(0)
        return ds.max(dim=dim)
    
    # Read historical quantiles
    if 'tas' in var_ids:
        ds_q_era5 = xr.open_dataset('../data/era5_temperature_quantiles.nc')
        ds_q_gmfd = xr.open_dataset('../data/gmfd_temperature_quantiles.nc')
    elif 'pr' in var_ids:
        ds_q_era5 = xr.open_dataset('../data/era5_precip_quantiles.nc')
        ds_q_gmfd = xr.open_dataset('../data/gmfd_precip_quantiles.nc')

    # Set up dictionary for all results
    ds_all = {}
    # Loop through SSPs
    for ssp in ssps:
        # Temporary list for each SSP
        ds_list = []
        
        # Read file
        ds_tmp = xr.open_dataset(path + model + '/' + ssp + '/' +
                                 var + '/' + var + '_day_' + model + 
                                 '_' + ssp + model_vers + str(year) + '.nc')
            
        ## Convert units
        # temperature: K -> C
        if var == 'tas' and ds_tmp.tas.attrs['units'] == 'K':
            ds_tmp['tas'] = ds_tmp['tas'] - 273.15
        if var == 'tasmax' and ds_tmp.tasmax.attrs['units'] == 'K':
            ds_tmp['tasmax'] = ds_tmp['tasmax'] - 273.15
        if var == 'tasmin' and ds_tmp.tasmin.attrs['units'] == 'K':
            ds_tmp['tasmin'] = ds_tmp['tasmin'] - 273.15

        # precip: kg m-2 s-1 -> mm day-1
        if var == 'pr' and ds_tmp.pr.attrs['units'] == 'kg m-2 s-1':
            ds_tmp['pr'] = ds_tmp['pr'] * 86400

        # Calculate metric
        if metric == 'hot':
            ds_tmp_q95 = (ds_tmp > 0.).resample(time='1Y').apply(n_longest_consecutive) # 0mm
            ds_tmp_q99 = (ds_tmp < 1.).resample(time='1Y').apply(n_longest_consecutive) # less than 1mm
            # merge
            ds_tmp = xr.merge([ds_tmp_0.rename({'pr':'consec_eq_0'}),
                                ds_tmp_1.rename({'pr':'consec_lt_1'})])
                
            # Append to list
            ds_list.append(ds_tmp)
            
        # Append to dict
        ds_all.update({ssp: ds_list})

    # Merge and concat along ssp dimension
    for ssp in ssps:
        ds_all[ssp] = xr.merge(ds_all[ssp])
        ds_all[ssp] = ds_all[ssp].assign_coords(ssp = ssp)
    
    # Return
    ds_out = xr.concat([ds_all[ssp] for ssp in ssps], dim='ssp')
    return ds_out