In [1]:
import os
import glob
import warnings
warnings.simplefilter("ignore", RuntimeWarning) # Ignore invalid arcsin() in EDD calculation

import dask
import numpy as np
import pandas as pd
import xarray as xr

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
nex_path = "/gpfs/group/kaf26/default/public/NEX-GDDP-CMIP6/models"  # location of NEX-GDDP-CMIP6
okr_path = "/gpfs/group/kaf26/default/public/OakRidgeCMIP6"  # location of OakRidge
loca_path = "/gpfs/group/kaf26/default/public/LOCA2"  # location of LOCA

wbm_path = "/gpfs/group/kaf26/default/dcl5300/wbm_soilM_crop_uc_lafferty-etal-2024-tbd_DATA/wbm/wbm_spool/flowdirection206_us/" # for spooled climate data

out_path = "/gpfs/group/kaf26/default/dcl5300/wbm_soilM_crop_uc_lafferty-etal-2024-tbd_DATA/climate/"

In [3]:
###################
# All models
###################
# LOCA 
loca_models = os.listdir(loca_path)
loca_dict = {}
for model in loca_models:
    if model == "training_data":
        continue
    members = os.listdir(f"{loca_path}/{model}/0p0625deg/")
    loca_dict[model] = members

# NEX
nex_models = os.listdir(nex_path)
nex_dict = {}
for model in nex_models:
    ssp = os.listdir(f"{nex_path}/{model}/")
    nex_dict[model] = ssp
    
# OakRidge
okr_models = glob.glob(f"{okr_path}/*_ssp585_*_2000.nc")
okr_models = [model.split("/")[-1][:-18] for model in okr_models]

In [4]:
############
# Dask
############
from dask_jobqueue import PBSCluster

cluster = PBSCluster(
    cores=1,
    memory="40GB",
    resource_spec="pmem=40GB",
    # account='open',
    worker_extra_args=["#PBS -l feature=rhel7"],
    walltime="00:30:00",
)

cluster.scale(jobs=3)  # ask for jobs

from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.240:36475,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# EDD anaylsis

In [5]:
# Degree day calculations
def above_threshold_each(mins, maxs, threshold):
    """Use a sinusoidal approximation to estimate the number of Growing
    Degree-Days above a given threshold, using daily minimum and maximum
    temperatures.
    mins and maxs are numpy arrays; threshold is in the same units."""
    
    """
    Code from James Rising (https://github.com/jrising/research-common/blob/master/python/gdd.py)
    """

    # Determine crossing points, as a fraction of the day
    plus_over_2 = (mins + maxs)/2
    minus_over_2 = (maxs - mins)/2
    two_pi = 2*np.pi
    # d0s is the times of crossing above; d1s is when cross below
    d0s = np.arcsin((threshold - plus_over_2) / minus_over_2) / two_pi
    d1s = .5 - d0s

    # If always above or below threshold, set crossings accordingly
    aboves = mins >= threshold
    belows = maxs <= threshold

    d0s[aboves] = 0
    d1s[aboves] = 1
    d0s[belows] = 0
    d1s[belows] = 0

    # Calculate integral
    F1s = -minus_over_2 * np.cos(2*np.pi*d1s) / two_pi + plus_over_2 * d1s
    F0s = -minus_over_2 * np.cos(2*np.pi*d0s) / two_pi + plus_over_2 * d0s
    return F1s - F0s - threshold * (d1s - d0s)

def edd_ufunc(tasmin, tasmax, threshold):
    return xr.apply_ufunc(above_threshold_each,
                          tasmin, tasmax, threshold)

## Spooled climate drivers

In [14]:
############################################
# EDD function for spooled climate drivers
############################################
def edd_spooled(wbm_path, ensemble, model, member, ssp, method, year_start, year_end, thresh, out_path):
    ds_edd_out = []
    # Loop through time slices
    for year in range(year_start, year_end+1):
        try:
            # Read
            if ensemble == "OakRidge":
                tasmax, tasmin = "tmax", "tmin"
                ds_tasmax = xr.open_dataset(f"{wbm_path}/{ensemble}_{model}_{member}_{method}_{ssp}_tmax_daily/{year}.nc")
                ds_tasmin = xr.open_dataset(f"{wbm_path}/{ensemble}_{model}_{member}_{method}_{ssp}_tmin_daily/{year}.nc")
            else:
                tasmax, tasmin = "tasmax", "tasmin"
                ds_tasmax = xr.open_dataset(f"{wbm_path}/{ensemble}_{model}_{member}_{ssp}_tasmax_daily/{year}.nc")
                ds_tasmin = xr.open_dataset(f"{wbm_path}/{ensemble}_{model}_{member}_{ssp}_tasmin_daily/{year}.nc")
            
            # Calculate EDD
            if ensemble == "OakRidge":
                thresh_units = thresh
            else:
                thresh_units = thresh + 273.15
            
            ds_edd = edd_ufunc(tasmin = ds_tasmin[tasmin],
                               tasmax = ds_tasmax[tasmax],
                               threshold = thresh_units)
            # Sum to monthly
            ds_edd = ds_edd.resample(time='1M').sum()
            # Append
            ds_edd_out.append(ds_edd)
            
        except Exception as e:
            except_path = "/storage/work/d/dcl5300/current_projects/wbm_soilM_crop_uc_lafferty-etal-2024-tbd/code/"
            with open(except_path + model + "_" + member + "_" + ssp + "_" + str(year) + ".txt", "w") as f:
                f.write(str(e))
        
            
#         # Concat
#         ds_edd_out = xr.concat(ds_edd_out, dim="time")
#         # Update attrs
#         ds_edd_out = ds_edd_out.to_dataset(name = f"edd_{thresh}c")
#         ds_edd_out.attrs = ds_tasmin.attrs
    
#         ds_edd_out.attrs["NOTE"] = f"Extreme Degree Days (EDD) calculated as in DOI: 10.1111/agec.12315 Supplementary Material with threshold {thresh}C. Author: David Lafferty - University of Illinois (davidcl2@illinois.edu). Date: August 2023"
#         ds_edd_out[f"edd_{thresh}c"].attrs["units"] = "Days"
#         ds_edd_out[f"edd_{thresh}c"].attrs["description"] = "Extreme (Growing) Degree Days"
        
#         # Store
#         if ensemble == "OakRidge":
#             ds_edd_out.to_netcdf(f"{out_path}/metrics/edd/edd_{thresh}c_{ensemble}_{model}_{member}_{ssp}_{method}.nc")
#         else:
#             ds_edd_out.to_netcdf(f"{out_path}/metrics/edd/edd_{thresh}c_{ensemble}_{model}_{member}_{ssp}.nc")

### LOCA

In [7]:
# Initial WBM runs
model_infos = [{"model": "ACCESS-CM2", "member": "r1i1p1f1"},
               {"model": "BCC-CSM2-MR", "member": "r1i1p1f1"},
               {"model": "CNRM-ESM2-1", "member": "r1i1p1f2"},
               {"model": "MPI-ESM1-2-HR", "member": "r1i1p1f1"},
               {"model": "MRI-ESM2-0", "member": "r1i1p1f1"},
              {"model": "NorESM2-MM", "member": "r1i1p1f1"},]

In [8]:
# LOCA
ensemble = "LOCA2"
ssp = "historical"

thresh = 29

year_start = 1980
year_end = 2014

delayed = []

for model_info in model_infos:
    model = model_info["model"]
    member = model_info["member"]
    # Check if done
    if not os.path.isfile(f"{out_path}/metrics/edd/edd_{thresh}c_{ensemble}_{model}_{member}_{ssp}.nc"): 
        # Calculate EDD
        delayed.append(dask.delayed(edd_spooled)(wbm_path=wbm_path,
                                                 ensemble=ensemble,
                                                 model=model,
                                                 member=member,
                                                 ssp=ssp,
                                                 method="",
                                                 year_start=year_start,
                                                 year_end=year_end,
                                                 thresh=thresh,
                                                 out_path=out_path))
        
_ = dask.compute(*delayed)

In [21]:
# ######################
# # LOCA EDD function
# ######################
# def edd_loca(loca_path, model, member, ssp, thresh, out_path):
#     ds_edd_out = []
#     # Loop through time slices
#     for time in ["2015-2044", "2045-2074", "2075-2100"]:
#         # Read
#         ds_tasmax = xr.open_dataset(f"{loca_path}/{model}/0p0625deg/{member}/{ssp}/tasmax/tasmax.{model}.{ssp}.{member}.{time}.LOCA_16thdeg_v20220413.nc")
#         ds_tasmin = xr.open_dataset(f"{loca_path}/{model}/0p0625deg/{member}/{ssp}/tasmin/tasmin.{model}.{ssp}.{member}.{time}.LOCA_16thdeg_v20220413.nc")
#         # Calculate EDD
#         ds_edd = edd_ufunc(tasmin = ds_tasmin['tasmin'],
#                            tasmax = ds_tasmax['tasmax'],
#                            threshold = thresh + 273.15)
#         # Sum to monthly
#         ds_edd = ds_edd.resample(time='1M').sum()
#         # Append
#         ds_edd_out.append(ds_edd)
        
#     # Concat
#     ds_edd_out = xr.concat(ds_edd_out, dim="time")
#     # Update attrs
#     ds_edd_out = ds_edd_out.to_dataset(name = f"edd_{thresh}")
#     ds_edd_out.attrs = ds_tasmin.attrs

#     ds_edd_out.attrs["NOTE"] = f"Extreme Degree Days (EDD) calculated as in DOI: 10.1111/agec.12315 Supplementary Material with threshold {thresh}C. Author: David Lafferty - University of Illinois (davidcl2@illinois.edu). Date: August 2023"
#     ds_edd_out.EDD.attrs["units"] = "Days"
#     ds_edd_out.EDD.attrs["description"] = "Extreme (Growing) Degree Days"
    
#     # Store
#     ds_edd_out.to_netcdf(f"{out_path}/edd_{thresh}c.{model}.{ssp}.{member}.LOCA_16thdeg.nc")

In [31]:
# # LOCA
# for model in loca_dict.keys():
#     for member in loca_dict[model]:
#         # Only single SSP
#         if ssp in os.listdir(f"{loca_in}/{model}/0p0625deg/{member}/"):
#             out = dask.delayed(process_loca)(loca_in, model, member, ssp, lat_min, lat_max, lon_min, lon_max)
#             ds_loca_out.append(out)

### NEX

In [15]:
# NEX
model_infos = [{"model": "ACCESS-CM2", "member": "r1i1p1f1"},
               {"model": "BCC-CSM2-MR", "member": "r1i1p1f1"},
               {"model": "CNRM-ESM2-1", "member": "r1i1p1f2"},
               {"model": "MPI-ESM1-2-HR", "member": "r1i1p1f1"},
               {"model": "MRI-ESM2-0", "member": "r1i1p1f1"},
              {"model": "NorESM2-MM", "member": "r1i1p1f1"}]

In [16]:
# NEX-GDDP
ensemble = "NEX-GDDP"
ssp = "historical"

thresh = 29

year_start = 1980
year_end = 2014

delayed = []

for model_info in model_infos:
    model = model_info["model"]
    member = model_info["member"]
    # Check if done
    if not os.path.isfile(f"{out_path}/metrics/edd/edd_{thresh}c_{ensemble}_{model}_{member}_{ssp}.nc"): 
        # Calculate EDD
        delayed.append(dask.delayed(edd_spooled)(wbm_path=wbm_path,
                                                 ensemble=ensemble,
                                                 model=model,
                                                 member=member,
                                                 ssp=ssp,
                                                 method="",
                                                 year_start=year_start,
                                                 year_end=year_end,
                                                 thresh=thresh,
                                                 out_path=out_path))
_ = dask.compute(*delayed)

In [10]:
# NEX-GDDP
ensemble = "NEX-GDDP"
ssp = "ssp585"

thresh = 29

year_start = 2015
year_end = 2099

delayed = []

for model_info in model_infos:
    model = model_info["model"]
    member = model_info["member"]
    # Check if done
    if not os.path.isfile(f"{out_path}/metrics/edd/edd_{thresh}c_{ensemble}_{model}_{member}_{ssp}.nc"): 
        # Calculate EDD
        delayed.append(dask.delayed(edd_spooled)(wbm_path=wbm_path,
                                                 ensemble=ensemble,
                                                 model=model,
                                                 member=member,
                                                 ssp=ssp,
                                                 method="",
                                                 year_start=year_start,
                                                 year_end=year_end,
                                                 thresh=thresh,
                                                 out_path=out_path))
_ = dask.compute(*delayed)

## OakRidge

In [12]:
methods = ["DBCCA_Daymet", "RegCM_Daymet", "DBCCA_Livneh", "RegCM_Livneh"]

model_infos = [{"model": "ACCESS-CM2", "member": "r1i1p1f1"},
               {"model": "BCC-CSM2-MR", "member": "r1i1p1f1"},
               {"model": "CNRM-ESM2-1", "member": "r1i1p1f2"},
               {"model": "MPI-ESM1-2-HR", "member": "r1i1p1f1"},
               {"model": "MRI-ESM2-0", "member": "r1i1p1f1"},
              {"model": "NorESM2-MM", "member": "r1i1p1f1"},]

In [14]:
# OakRidge
ensemble = "OakRidge"
ssp = "historical"

thresh = 29

year_start = 1980
year_end = 2014

delayed = []

for model_info in model_infos:
    model = model_info["model"]
    for method in methods:
        member = model_info["member"]
        # Check if done
        if not os.path.isfile(f"{out_path}/metrics/edd/edd_{thresh}c_{ensemble}_{model}_{member}_{ssp}_{method}.nc"): 
            # Calculate EDD
            delayed.append(dask.delayed(edd_spooled)(wbm_path=wbm_path,
                                                 ensemble=ensemble,
                                                 model=model,
                                                 member=member,
                                                 ssp=ssp,
                                                 method=method,
                                                 year_start=year_start,
                                                 year_end=year_end,
                                                 thresh=thresh,
                                                 out_path=out_path))
            
_ = dask.compute(*delayed)

In [15]:
# OakRidge
ensemble = "OakRidge"
ssp = "ssp585"

thresh = 29

year_start = 2015
year_end = 2059

delayed = []

for model_info in model_infos:
    model = model_info["model"]
    for method in methods:
        member = model_info["member"]
        # Check if done
        if not os.path.isfile(f"{out_path}/metrics/edd/edd_{thresh}c_{ensemble}_{model}_{member}_{ssp}_{method}.nc"): 
            # Calculate EDD
            delayed.append(dask.delayed(edd_spooled)(wbm_path=wbm_path,
                                                 ensemble=ensemble,
                                                 model=model,
                                                 member=member,
                                                 ssp=ssp,
                                                 method=method,
                                                 year_start=year_start,
                                                 year_end=year_end,
                                                 thresh=thresh,
                                                 out_path=out_path))
            
_ = dask.compute(*delayed)