In [1]:
import os
import glob

import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt

import dask

In [2]:
# # Get growing season
# gs = xr.open_dataset("/gpfs/group/kaf26/default/dcl5300/wbm_soilM_crop_uc_lafferty-etal-2024-tbd_DATA/wbm/wbm_spool/flowdirection206_us/CDL-US-M_nonrice_irr_Kc_av_hist/2014.nc")
# gs = gs.sel(lat=lat, lon=lon, method="nearest").MC_nonrice_irr_Kc_av.to_numpy() > 0.

## Preliminaries

In [3]:
# Paths
wbm_path = "/gpfs/group/kaf26/default/dcl5300/wbm_soilM_crop_uc_lafferty-etal-2024-tbd_DATA/wbm"
out_path = "/gpfs/group/kaf26/default/dcl5300/wbm_soilM_crop_uc_lafferty-etal-2024-tbd_DATA/metrics"

In [4]:
# Read function
def read_nc(ensemble, model, member, method, ssp, year, var_id, wbm_path):
    """
    Reads either spooled (climate) WBM inputs or (soilM) WBM outputs. 
    Can select ensemble, model, ssp, member, method.
    """
    #########
    # Setup #
    #########
    
    # LOCA projections split into 3 runs
    if ensemble == "LOCA2":
        if ssp == "historical":
            ssp_time = "historical"
        elif year < 2045:
            ssp_time = f"{ssp}_early"
        elif year < 2075:
            ssp_time = f"{ssp}_mid"
        else:
            ssp_time = f"{ssp}_late"
    else:
        ssp_time = ssp

    # OakRidge method
    if ensemble == "OakRidge":
        member_method = f"{member}_{method}"
        if var_id in ["tasmax", "tasmin"]:
            var_id_fixed = var_id.replace("as","")
        else:
            var_id_fixed = var_id
    else:
        member_method = member
        if var_id == "prcp":
            var_id_fixed = "pr"
        else:
            var_id_fixed = var_id
    
    # Final file name 
    if var_id in ["soilMoistFrac", "soilMoist"]:
        file_name = f"{wbm_path}/{ensemble}_{model}_{member_method}_{ssp_time}/wbm_output/daily/wbm_{str(year)}.nc"
    else:
        file_name = f"{wbm_path}/wbm_spool/flowdirection206_us/{ensemble}_{model}_{member_method}_{ssp_time}_{var_id_fixed}_daily/{str(year)}.nc"
    
    ########
    # Read #
    ########
    ds = xr.open_dataset(file_name)
    
    # Add details
    ds = ds.assign_coords(ensemble = ensemble)
    ds = ds.assign_coords(model = model)
    ds = ds.assign_coords(ssp = ssp)
    ds = ds.assign_coords(member = member)
    
    if ensemble == "OakRidge":
        ds = ds.assign_coords(method = method)
    else:
        ds = ds.assign_coords(method = ensemble)
        
    # Common naming
    ds = ds.rename({var_id_fixed:var_id})
        
    # Return
    return ds

In [5]:
# Degree day calculations
def above_threshold_each(mins, maxs, threshold):
    """Use a sinusoidal approximation to estimate the number of Growing
    Degree-Days above a given threshold, using daily minimum and maximum
    temperatures.
    mins and maxs are numpy arrays; threshold should be given in correct units."""
    
    """
    Code from James Rising (https://github.com/jrising/research-common/blob/master/python/gdd.py)
    """

    # Determine crossing points, as a fraction of the day
    plus_over_2 = (mins + maxs)/2
    minus_over_2 = (maxs - mins)/2
    two_pi = 2*np.pi
    # d0s is the times of crossing above; d1s is when cross below
    d0s = np.arcsin((threshold - plus_over_2) / minus_over_2) / two_pi
    d1s = .5 - d0s

    # If always above or below threshold, set crossings accordingly
    aboves = mins >= threshold
    belows = maxs <= threshold

    d0s[aboves] = 0
    d1s[aboves] = 1
    d0s[belows] = 0
    d1s[belows] = 0

    # Calculate integral
    F1s = -minus_over_2 * np.cos(2*np.pi*d1s) / two_pi + plus_over_2 * d1s
    F0s = -minus_over_2 * np.cos(2*np.pi*d0s) / two_pi + plus_over_2 * d0s
    return F1s - F0s - threshold * (d1s - d0s)

def edd_ufunc(tasmin, tasmax, threshold):
    return xr.apply_ufunc(above_threshold_each,
                          tasmin, tasmax, threshold)

In [6]:
# Calculate metrics
def calculate_metrics(ensemble, model, member, method, ssp, year, T_thresh, wbm_path, out_path):
    # OakRidge method
    if ensemble == "OakRidge":
        member_method = f"{member}_{method}"
    else:
        member_method = member
    
    # Read variables
    try:
        ds_tasmax = read_nc(ensemble, model, member, method, ssp, year, "tasmax", wbm_path)
        ds_tasmin = read_nc(ensemble, model, member, method, ssp, year, "tasmin", wbm_path)
        ds_prcp = read_nc(ensemble, model, member, method, ssp, year, "prcp", wbm_path)
        ds_soilM = read_nc(ensemble, model, member, method, ssp, year, "soilMoistFrac", wbm_path)
        
        # Calculate daily EDD
        EDD = edd_ufunc(ds_tasmin['tasmin'], ds_tasmax['tasmax'], threshold = T_thresh[1])
    
        # Calculate daily GDD
        GDD = edd_ufunc(ds_tasmin['tasmin'], ds_tasmax['tasmax'], threshold = T_thresh[0])
        GDD = GDD - EDD
    
        # Combine
        ds_out = xr.combine_by_coords([EDD.to_dataset(name = f"edd_{T_thresh[1]}c"),
                                       GDD.to_dataset(name = f"gdd_{T_thresh[0]}-{T_thresh[1]}c"),
                                       ds_prcp['prcp'].to_dataset(name = 'prcp')])
    
        # Annual degree days without SM
        ds_out = ds_out.resample(time='M').sum()
    
        # EDD/SM variables
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_0-10"] = EDD.where(ds_soilM['soilMoistFrac'] <= 0.1).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_10-20"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.1) & (ds_soilM['soilMoistFrac'] <= 0.2)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_20-30"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.2) & (ds_soilM['soilMoistFrac'] <= 0.3)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_30-40"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.3) & (ds_soilM['soilMoistFrac'] <= 0.4)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_40-50"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.4) & (ds_soilM['soilMoistFrac'] <= 0.5)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_50-60"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.5) & (ds_soilM['soilMoistFrac'] <= 0.6)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_60-70"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.6) & (ds_soilM['soilMoistFrac'] <= 0.7)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_70-80"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.7) & (ds_soilM['soilMoistFrac'] <= 0.8)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_80-90"] = EDD.where((ds_soilM['soilMoistFrac'] > 0.8) & (ds_soilM['soilMoistFrac'] <= 0.9)).resample(time='M').sum()
        ds_out[f"edd_{T_thresh[1]}c_soilMoistFrac_90-100"] = EDD.where(ds_soilM['soilMoistFrac'] > 0.9).resample(time='M').sum()
    
        # Standard SM metrics
        ds_out['soilMoistFrac'] = ds_soilM['soilMoistFrac'].resample(time='M').mean()
        
        # Get squares
        ds_out['soilMoistFrac2'] = ds_out['soilMoistFrac']**2 
        ds_out['prcp2'] = ds_out['prcp']**2
        
        # Tidy
        ds_out.attrs["NOTE1"] = f"Degree Days calculated as in 10.1111/agec.12315 Supplementary Material with thresholds {T_thresh[0]}C, {T_thresh[1]}C. Author: David Lafferty - University of Illinois (davidcl2@illinois.edu). Date: September 2023"
        ds_out['prcp'].attrs['units'] = 'mm'
    
        # Return
        ds_out.to_netcdf(f"{out_path}/monthly_dd_soilM_binned/{ensemble}_{model}_{member_method}_{ssp}_{str(year)}.nc")
    except Exception as e:
        with open(f"{out_path}/monthly_dd_soilM_binned/logs/{ensemble}_{model}_{member_method}_{ssp}.txt", "w") as f:
            f.write(str(e))
            return None

In [13]:
# Wrapper for read function
def calculate_metrics_all(year_start, year_end, T_thresh, wbm_path, out_path):
    """
    Reads all WBM outputs within specified time range 
    across all climate scenarios and calculates metrics
    """
    delayed = []
        
    # Read all climate drivers 
    df_climate_drivers = pd.read_csv("../utils/climate_drivers.csv")

    # Loop through climate drivers
    for _, info in df_climate_drivers.iterrows():
        ensemble = info["ensemble"]
        model = info["model"]
        member = info["member"]
        ssp = info["ssp"]
        method = info["method"]
        
        # Add method to ORNL
        if ensemble == "OakRidge":
            member_method = f"{member}_{method}"
        else:
            member_method = member
            
        # Skip historical
        if ssp == "historical":
            continue
        
        # Loop through years
        for year in range(year_start, year_end+1):
            if not os.path.isfile(f"{out_path}/monthly_dd_soilM_binned/{ensemble}_{model}_{member_method}_{ssp}_{str(year)}.nc"):
                df_tmp = dask.delayed(calculate_metrics)(ensemble, model, member, method, ssp, year, T_thresh, wbm_path, out_path)
                delayed.append(df_tmp)
        
    # Return
    return delayed

In [11]:
############
### Dask ###
############
from dask_jobqueue import PBSCluster

cluster = PBSCluster(
    cores=1,
    resource_spec="pmem=40GB",
    memory="40GB",
    worker_extra_args=["#PBS -l feature=rhel7"],
    walltime="03:00:00"
)

cluster.scale(jobs=40)  # ask for jobs

from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.10.110:38105,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: 3 minutes ago,Total memory: 0 B


# Calculate metrics

## Weather variables and/or soil moisture

In [14]:
# Year range
year_start, year_end = 2020, 2059

# Temperature thresholds
T_thresh = [10,29]

In [15]:
# Delayed
delayed = calculate_metrics_all(year_start, year_end, T_thresh, wbm_path, out_path)

In [16]:
len(delayed)

1159

In [17]:
_ = dask.compute(*delayed)

## Crop yields (?)