In [1]:
import os
from glob import glob

import dask
import numpy as np
import pandas as pd
import xarray as xr
import xesmf as xe

from utils import city_list
import metric_funcs as mf

## Preliminaries

In [2]:
################
#### Paths #####
################
# Update these for reproduction

project_data_path = "/storage/group/pches/default/users/dcl5300/conus_comparison_lafferty-etal-2024/"
project_code_path = "/storage/home/dcl5300/work/current_projects/conus_comparison_lafferty-etal-2024/"
star_path = "/storage/group/pches/default/users/dcl5300/STAR-ESDM/" # raw STAR-ESDM outputs

In [3]:
##############
### Models ###
##############
ssp245_gcms = np.unique([file.split('/')[-1].split('.')[1] for file in glob(f"{star_path}/ssp245/*.nc")])
ssp585_gcms = np.unique([file.split('/')[-1].split('.')[1] for file in glob(f"{star_path}/ssp585/*.nc")])

if (ssp245_gcms == ssp585_gcms).all():
    gcms = ssp245_gcms
else:
    print('Model mismatch')

In [4]:
############
### Dask ###
############
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    # account="pches",
    account="open",
    cores=1,
    memory="30GiB",
    walltime="10:00:00"
)

cluster.scale(jobs=25)  # ask for jobs

from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.8.14:45277,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Calculate metrics

In [5]:
###############################
# Metric calulcation function #
###############################
def calculate_metric(metric_func, var_id, gcm, ssp, needed_vars, star_path, out_path):
    """
    """
    # Some preprocessing
    def read_star(file_path):
        _preprocess = lambda ds: ds[[ds.encoding["source"].split('/')[-1].split('.')[3]]]
        ds = xr.open_mfdataset(file_path, preprocess=_preprocess, decode_times=False,
                               chunks={'time':365, 'latitude':-1, 'longitude':-1})
        ds.time.attrs['calendar'] = '365_day'
        return xr.decode_cf(ds, decode_times=True)
    
    # Start calculation
    try:
        # Check if done
        if os.path.isfile(out_path):
            print(f"{ssp} {gcm} already done.")
            return None

        # Read
        ds_tmp = xr.merge([read_star(f"{star_path}/{ssp}/downscaled.{gcm}.r1i1p1f1.{var}*") for var in needed_vars])
    
        # Calculate metric
        ds_out = metric_func(ds_tmp, var_id)

        # Store
        ds_out.to_netcdf(out_path)
        print(f"{ssp} {gcm}")
            
    # Log if error
    except Exception as e:
        except_path = f"{project_code_path}/code/logs"
        with open(f"{except_path}/{gcm}_{ssp}_{var_id}_STAR.txt", "w") as f:
            f.write(str(e))

In [7]:
%%time
#############
## CDD max ##
#############
var_id = "cdd"
metric_func = mf.calculate_dd_max
needed_vars = ['tasmin', 'tasmax']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/max_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2 already done.
ssp245 ACCESS-ESM1-5 already done.
ssp245 BCC-CSM2-MR already done.
ssp245 CMCC-ESM2
ssp245 CanESM5
ssp245 EC-Earth3
ssp245 EC-Earth3-Veg
ssp245 EC-Earth3-Veg-LR
ssp245 FGOALS-g3
ssp245 GFDL-CM4
ssp245 GFDL-ESM4
ssp245 INM-CM4-8
ssp245 INM-CM5-0
ssp245 IPSL-CM6A-LR
ssp245 KACE-1-0-G
ssp245 KIOST-ESM
ssp245 MIROC6
ssp245 MPI-ESM1-2-HR
ssp245 MPI-ESM1-2-LR
ssp245 MRI-ESM2-0
ssp245 NESM3
ssp245 NorESM2-LM
ssp245 NorESM2-MM
ssp245 TaiESM1
ssp585 ACCESS-CM2 already done.
ssp585 ACCESS-ESM1-5 already done.
ssp585 BCC-CSM2-MR
ssp585 CMCC-ESM2
ssp585 CanESM5
ssp585 EC-Earth3
ssp585 EC-Earth3-Veg
ssp585 EC-Earth3-Veg-LR
ssp585 FGOALS-g3
ssp585 GFDL-CM4
ssp585 GFDL-ESM4
ssp585 INM-CM4-8
ssp585 INM-CM5-0
ssp585 IPSL-CM6A-LR
ssp585 KACE-1-0-G
ssp585 KIOST-ESM
ssp585 MIROC6
ssp585 MPI-ESM1-2-HR
ssp585 MPI-ESM1-2-LR
ssp585 MRI-ESM2-0
ssp585 NESM3
ssp585 NorESM2-LM
ssp585 NorESM2-MM
ssp585 TaiESM1
CPU times: user 3min 48s, sys: 26.1 s, total: 4min 14s
Wall time: 2h 50m

In [8]:
%%time
#############
## CDD sum ##
#############
var_id = "cdd"
metric_func = mf.calculate_dd_sum
needed_vars = ['tasmin', 'tasmax']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/sum_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2 already done.
ssp245 ACCESS-ESM1-5 already done.
ssp245 BCC-CSM2-MR already done.
ssp245 CMCC-ESM2 already done.
ssp245 CanESM5 already done.
ssp245 EC-Earth3 already done.
ssp245 EC-Earth3-Veg already done.
ssp245 EC-Earth3-Veg-LR already done.
ssp245 FGOALS-g3 already done.
ssp245 GFDL-CM4 already done.
ssp245 GFDL-ESM4 already done.
ssp245 INM-CM4-8 already done.
ssp245 INM-CM5-0 already done.
ssp245 IPSL-CM6A-LR already done.
ssp245 KACE-1-0-G already done.
ssp245 KIOST-ESM already done.
ssp245 MIROC6 already done.
ssp245 MPI-ESM1-2-HR already done.
ssp245 MPI-ESM1-2-LR already done.
ssp245 MRI-ESM2-0 already done.
ssp245 NESM3 already done.
ssp245 NorESM2-LM already done.
ssp245 NorESM2-MM already done.
ssp245 TaiESM1 already done.
ssp585 ACCESS-CM2 already done.
ssp585 ACCESS-ESM1-5 already done.
ssp585 BCC-CSM2-MR already done.
ssp585 CMCC-ESM2 already done.
ssp585 CanESM5 already done.
ssp585 EC-Earth3 already done.
ssp585 EC-Earth3-Veg already done.
ssp585 EC

In [11]:
%%time
#############
## HDD sum ##
#############
var_id = "hdd"
metric_func = mf.calculate_dd_sum
needed_vars = ['tasmin', 'tasmax']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/sum_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2 already done.
ssp245 ACCESS-ESM1-5 already done.
ssp245 BCC-CSM2-MR already done.
ssp245 CMCC-ESM2 already done.
ssp245 CanESM5 already done.
ssp245 EC-Earth3 already done.
ssp245 EC-Earth3-Veg already done.
ssp245 EC-Earth3-Veg-LR already done.
ssp245 FGOALS-g3 already done.
ssp245 GFDL-CM4 already done.
ssp245 GFDL-ESM4 already done.
ssp245 INM-CM4-8 already done.
ssp245 INM-CM5-0 already done.
ssp245 IPSL-CM6A-LR already done.
ssp245 KACE-1-0-G already done.
ssp245 KIOST-ESM already done.
ssp245 MIROC6 already done.
ssp245 MPI-ESM1-2-HR already done.
ssp245 MPI-ESM1-2-LR already done.
ssp245 MRI-ESM2-0 already done.
ssp245 NESM3 already done.
ssp245 NorESM2-LM already done.
ssp245 NorESM2-MM already done.
ssp245 TaiESM1 already done.
ssp585 ACCESS-CM2 already done.
ssp585 ACCESS-ESM1-5 already done.
ssp585 BCC-CSM2-MR already done.
ssp585 CMCC-ESM2 already done.
ssp585 CanESM5 already done.
ssp585 EC-Earth3
ssp585 EC-Earth3-Veg
ssp585 EC-Earth3-Veg-LR
ssp585 FGOALS

In [10]:
%%time
#############
## HDD max ##
#############
var_id = "hdd"
metric_func = mf.calculate_dd_max
needed_vars = ['tasmin', 'tasmax']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/max_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2
ssp245 ACCESS-ESM1-5
ssp245 BCC-CSM2-MR
ssp245 CMCC-ESM2
ssp245 CanESM5
ssp245 EC-Earth3
ssp245 EC-Earth3-Veg
ssp245 EC-Earth3-Veg-LR
ssp245 FGOALS-g3
ssp245 GFDL-CM4
ssp245 GFDL-ESM4
ssp245 INM-CM4-8
ssp245 INM-CM5-0
ssp245 IPSL-CM6A-LR
ssp245 KACE-1-0-G
ssp245 KIOST-ESM
ssp245 MIROC6
ssp245 MPI-ESM1-2-HR
ssp245 MPI-ESM1-2-LR
ssp245 MRI-ESM2-0
ssp245 NESM3
ssp245 NorESM2-LM
ssp245 NorESM2-MM
ssp245 TaiESM1
ssp585 ACCESS-CM2
ssp585 ACCESS-ESM1-5
ssp585 BCC-CSM2-MR
ssp585 CMCC-ESM2
ssp585 CanESM5
ssp585 EC-Earth3
ssp585 EC-Earth3-Veg
ssp585 EC-Earth3-Veg-LR
ssp585 FGOALS-g3
ssp585 GFDL-CM4
ssp585 GFDL-ESM4
ssp585 INM-CM4-8
ssp585 INM-CM5-0
ssp585 IPSL-CM6A-LR
ssp585 KACE-1-0-G
ssp585 KIOST-ESM
ssp585 MIROC6
ssp585 MPI-ESM1-2-HR
ssp585 MPI-ESM1-2-LR
ssp585 MRI-ESM2-0
ssp585 NESM3
ssp585 NorESM2-LM
ssp585 NorESM2-MM
ssp585 TaiESM1
CPU times: user 4min 41s, sys: 32.6 s, total: 5min 13s
Wall time: 3h 16min 24s


In [8]:
%%time
#########################
## Average Temperature ##
#########################
var_id = 'tas'
metric_func = mf.calculate_avg
needed_vars = ['tasmin', 'tasmax']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/avg_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2 already done.
ssp245 ACCESS-ESM1-5
ssp245 BCC-CSM2-MR
ssp245 CMCC-ESM2
ssp245 CanESM5
ssp245 EC-Earth3
ssp245 EC-Earth3-Veg
ssp245 EC-Earth3-Veg-LR
ssp245 FGOALS-g3
ssp245 GFDL-CM4
ssp245 GFDL-ESM4
ssp245 INM-CM4-8
ssp245 INM-CM5-0
ssp245 IPSL-CM6A-LR
ssp245 KACE-1-0-G
ssp245 KIOST-ESM
ssp245 MIROC6
ssp245 MPI-ESM1-2-HR
ssp245 MPI-ESM1-2-LR
ssp245 MRI-ESM2-0
ssp245 NESM3
ssp245 NorESM2-LM
ssp245 NorESM2-MM
ssp245 TaiESM1
ssp585 ACCESS-CM2 already done.
ssp585 ACCESS-ESM1-5
ssp585 BCC-CSM2-MR
ssp585 CMCC-ESM2
ssp585 CanESM5
ssp585 EC-Earth3
ssp585 EC-Earth3-Veg
ssp585 EC-Earth3-Veg-LR
ssp585 FGOALS-g3
ssp585 GFDL-CM4
ssp585 GFDL-ESM4
ssp585 INM-CM4-8
ssp585 INM-CM5-0
ssp585 IPSL-CM6A-LR
ssp585 KACE-1-0-G
ssp585 KIOST-ESM
ssp585 MIROC6
ssp585 MPI-ESM1-2-HR
ssp585 MPI-ESM1-2-LR
ssp585 MRI-ESM2-0
ssp585 NESM3
ssp585 NorESM2-LM
ssp585 NorESM2-MM
ssp585 TaiESM1
CPU times: user 16min 13s, sys: 1min 6s, total: 17min 20s
Wall time: 2h 15min 13s


In [11]:
%%time
#########################
## Total Precipitation ##
#########################
var_id = 'pr'
metric_func = mf.calculate_sum
needed_vars = ['pr']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/sum_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2 already done.
ssp245 ACCESS-ESM1-5
ssp245 BCC-CSM2-MR
ssp245 CMCC-ESM2
ssp245 CanESM5
ssp245 EC-Earth3
ssp245 EC-Earth3-Veg
ssp245 EC-Earth3-Veg-LR
ssp245 FGOALS-g3
ssp245 GFDL-CM4
ssp245 GFDL-ESM4
ssp245 INM-CM4-8
ssp245 INM-CM5-0
ssp245 IPSL-CM6A-LR
ssp245 KACE-1-0-G
ssp245 KIOST-ESM
ssp245 MIROC6
ssp245 MPI-ESM1-2-HR
ssp245 MPI-ESM1-2-LR
ssp245 MRI-ESM2-0
ssp245 NESM3
ssp245 NorESM2-LM
ssp245 NorESM2-MM
ssp245 TaiESM1
ssp585 ACCESS-CM2
ssp585 ACCESS-ESM1-5
ssp585 BCC-CSM2-MR
ssp585 CMCC-ESM2
ssp585 CanESM5
ssp585 EC-Earth3
ssp585 EC-Earth3-Veg
ssp585 EC-Earth3-Veg-LR
ssp585 FGOALS-g3
ssp585 GFDL-CM4
ssp585 GFDL-ESM4
ssp585 INM-CM4-8
ssp585 INM-CM5-0
ssp585 IPSL-CM6A-LR
ssp585 KACE-1-0-G
ssp585 KIOST-ESM
ssp585 MIROC6
ssp585 MPI-ESM1-2-HR
ssp585 MPI-ESM1-2-LR
ssp585 MRI-ESM2-0
ssp585 NESM3
ssp585 NorESM2-LM
ssp585 NorESM2-MM
ssp585 TaiESM1
CPU times: user 7min 13s, sys: 29.2 s, total: 7min 42s
Wall time: 57min 27s


In [12]:
%%time
#########################
## Maximum Temperature ##
#########################
var_id = 'tasmax'
metric_func = mf.calculate_max
needed_vars = ['tasmax']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/max_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2 already done.
ssp245 ACCESS-ESM1-5
ssp245 BCC-CSM2-MR
ssp245 CMCC-ESM2
ssp245 CanESM5
ssp245 EC-Earth3
ssp245 EC-Earth3-Veg
ssp245 EC-Earth3-Veg-LR
ssp245 FGOALS-g3
ssp245 GFDL-CM4
ssp245 GFDL-ESM4
ssp245 INM-CM4-8
ssp245 INM-CM5-0
ssp245 IPSL-CM6A-LR
ssp245 KACE-1-0-G
ssp245 KIOST-ESM
ssp245 MIROC6
ssp245 MPI-ESM1-2-HR
ssp245 MPI-ESM1-2-LR
ssp245 MRI-ESM2-0
ssp245 NESM3
ssp245 NorESM2-LM
ssp245 NorESM2-MM
ssp245 TaiESM1
ssp585 ACCESS-CM2
ssp585 ACCESS-ESM1-5
ssp585 BCC-CSM2-MR
ssp585 CMCC-ESM2
ssp585 CanESM5
ssp585 EC-Earth3
ssp585 EC-Earth3-Veg
ssp585 EC-Earth3-Veg-LR
ssp585 FGOALS-g3
ssp585 GFDL-CM4
ssp585 GFDL-ESM4
ssp585 INM-CM4-8
ssp585 INM-CM5-0
ssp585 IPSL-CM6A-LR
ssp585 KACE-1-0-G
ssp585 KIOST-ESM
ssp585 MIROC6
ssp585 MPI-ESM1-2-HR
ssp585 MPI-ESM1-2-LR
ssp585 MRI-ESM2-0
ssp585 NESM3
ssp585 NorESM2-LM
ssp585 NorESM2-MM
ssp585 TaiESM1
CPU times: user 9min 6s, sys: 36 s, total: 9min 42s
Wall time: 1h 10min 56s


In [13]:
%%time
#########################
# Maximum Precipitation #
#########################
var_id = 'pr'
metric_func = mf.calculate_max
needed_vars = ['pr']

out_path = lambda gcm, ssp: f"{project_data_path}/metrics/STAR-ESDM/max_{var_id}_{gcm}_r1i1p1f1_{ssp}.nc"

# Loop through all
for ssp in ["ssp245", "ssp585"]:
    for gcm in gcms:
        # Calculate metric
        calculate_metric(metric_func = metric_func,
                         var_id = var_id,
                         gcm = gcm,
                         ssp = ssp,
                         needed_vars = needed_vars,
                         star_path = star_path,
                         out_path = out_path(gcm, ssp))

ssp245 ACCESS-CM2
ssp245 ACCESS-ESM1-5
ssp245 BCC-CSM2-MR
ssp245 CMCC-ESM2
ssp245 CanESM5
ssp245 EC-Earth3
ssp245 EC-Earth3-Veg
ssp245 EC-Earth3-Veg-LR
ssp245 FGOALS-g3
ssp245 GFDL-CM4
ssp245 GFDL-ESM4
ssp245 INM-CM4-8
ssp245 INM-CM5-0
ssp245 IPSL-CM6A-LR
ssp245 KACE-1-0-G
ssp245 KIOST-ESM
ssp245 MIROC6
ssp245 MPI-ESM1-2-HR
ssp245 MPI-ESM1-2-LR
ssp245 MRI-ESM2-0
ssp245 NESM3
ssp245 NorESM2-LM
ssp245 NorESM2-MM
ssp245 TaiESM1
ssp585 ACCESS-CM2
ssp585 ACCESS-ESM1-5
ssp585 BCC-CSM2-MR
ssp585 CMCC-ESM2
ssp585 CanESM5
ssp585 EC-Earth3
ssp585 EC-Earth3-Veg
ssp585 EC-Earth3-Veg-LR
ssp585 FGOALS-g3
ssp585 GFDL-CM4
ssp585 GFDL-ESM4
ssp585 INM-CM4-8
ssp585 INM-CM5-0
ssp585 IPSL-CM6A-LR
ssp585 KACE-1-0-G
ssp585 KIOST-ESM
ssp585 MIROC6
ssp585 MPI-ESM1-2-HR
ssp585 MPI-ESM1-2-LR
ssp585 MRI-ESM2-0
ssp585 NESM3
ssp585 NorESM2-LM
ssp585 NorESM2-MM
ssp585 TaiESM1
CPU times: user 9min 38s, sys: 37 s, total: 10min 15s
Wall time: 1h 10min 45s


# Regrid

In [7]:
# We use LOCA grid as target
loca_lat_grid = np.linspace(23.90625, 53.46875, 474)
loca_lon_grid = np.linspace(234.53125, 293.46875, 944)
    
ds_out = xr.Dataset({"lat": (["lat"], loca_lat_grid,
                             {"standard_name": "latitude", "units": "degrees_north"}),
                     "lon": (["lon"], loca_lon_grid,
                             {"standard_name": "longitude", "units": "degrees_east"})
                    })
    
# Add mask from LOCA output
loca_nans = np.load(f'{project_code_path}/code/utils/LOCA2_NaNs.npy')
ds_out["mask"] = xr.DataArray(~loca_nans, dims=['lat','lon'])
    
# STAR grid to construct regridder
example_file = glob(f"{project_data_path}/metrics/STAR-ESDM/*.nc")[0]
ds_in = xr.open_dataset(example_file)
    
# Regridder
conservative_regridder = xe.Regridder(ds_in, ds_out, "conservative")
# nn_s2d_regridder = xe.Regridder(ds_in, ds_out, "nearest_s2d")

In [8]:
%%time

# Out path
out_path = f"{project_data_path}/metrics_regridded/STAR-ESDM/"

# Regridder
regridder_names = ["conservative"]
regridders = [conservative_regridder]

# Metrics
metrics_ids = ["avg_tas", "sum_pr", "max_tasmax", "max_pr", "max_tas"]

# Loop through all
for regridder_name, regridder in zip(regridder_names, regridders):
    for ssp in ["ssp245", "ssp585"]:
        for model in models:
            for metric_id in metrics_ids:
                if not os.path.isfile(f"{out_path}/{regridder_name}/{metric_id}_{model}_{ssp}.nc"):
                    try:
                        # Read
                        metric, var_id = metric_id.split('_')
                        file_path = glob(f"{project_data_path}/metrics/STAR-ESDM/{metric_id}.downscaled.{model}.r1i1p1f1.{var_id}.{ssp}*.nc")[0]
                        ds_star_in = xr.open_dataset(file_path)
                        ds_star_in = ds_star_in.rename({var_id: metric_id})
        
                        # NOTE: use high NaN threshold to try to not introduce NaNs
                        # not already present in the LOCA2 grid
                        ds_star_out = regridder(ds_star_in, skipna=True, na_thres=0.99)
        
                        # Store
                        ds_star_out.to_netcdf(f"{out_path}/{regridder_name}/{metric_id}_{model}_{ssp}.nc")
                        print(f"{metric_id} {model} {ssp}")
                    except:
                        print(f"ERROR {metric_id} {model} {ssp}")

max_tas ACCESS-CM2 ssp245
max_tas ACCESS-ESM1-5 ssp245
max_tas BCC-CSM2-MR ssp245
max_tas CMCC-ESM2 ssp245
max_tas CanESM5 ssp245
max_tas EC-Earth3 ssp245
max_tas EC-Earth3-Veg ssp245
max_tas EC-Earth3-Veg-LR ssp245
max_tas FGOALS-g3 ssp245
max_tas GFDL-CM4 ssp245
max_tas GFDL-ESM4 ssp245
max_tas INM-CM4-8 ssp245
max_tas INM-CM5-0 ssp245
max_tas IPSL-CM6A-LR ssp245
max_tas KACE-1-0-G ssp245
max_tas KIOST-ESM ssp245
max_tas MIROC6 ssp245
max_tas MPI-ESM1-2-HR ssp245
max_tas MPI-ESM1-2-LR ssp245
max_tas MRI-ESM2-0 ssp245
max_tas NESM3 ssp245
max_tas NorESM2-LM ssp245
max_tas NorESM2-MM ssp245
max_tas TaiESM1 ssp245
max_tas ACCESS-CM2 ssp585
max_tas ACCESS-ESM1-5 ssp585
max_tas BCC-CSM2-MR ssp585
max_tas CMCC-ESM2 ssp585
max_tas CanESM5 ssp585
max_tas EC-Earth3 ssp585
max_tas EC-Earth3-Veg ssp585
max_tas EC-Earth3-Veg-LR ssp585
max_tas FGOALS-g3 ssp585
max_tas GFDL-CM4 ssp585
max_tas GFDL-ESM4 ssp585
max_tas INM-CM4-8 ssp585
max_tas INM-CM5-0 ssp585
max_tas IPSL-CM6A-LR ssp585
max_tas KAC

# Summaries

## Indices

In [9]:
# Simple preprocessing function to add model and year coordinates
def _preprocess(ds):
    # Add model and SSP as coordinates
    model = ds.encoding['source'].split('/')[-1].split('_')[2]
    ds = ds.assign_coords(model = model)

    # Time -> year
    ds['time'] = ds['time'].dt.year

    return ds

In [10]:
# Calculates summary indices for NEX-GDDP-CMIP6 ensemble for given SSP
def get_summary_indices(metric_id, ssp, years, out_path, out_str):
    """
    Current summary indices calculated: mean, 99th quantile, 99% quantile range
    `years` define the window over which all outputs are pooled. 
    """
    
    # Check if done
    if not os.path.isfile(f"{out_path}/{out_str}.nc"):
        # Read all
        ds = xr.open_mfdataset(f"{project_data_path}/metrics_regridded/STAR-ESDM/conservative/{metric_id}_*_{ssp}.nc", chunks='auto',
                               preprocess=_preprocess, combine='nested', concat_dim=['model'])

        # Time slice
        ds_sel = ds.sel(time=slice(years[0],years[1]))
    
        ## Summary indices
        # Mean
        ds_mean = ds_sel.mean(dim=['model', 'time']).assign_coords(indice = 'mean')
        # Quantiles
        ds_qlow = ds_sel.chunk(dict(model=-1)).quantile(0.005, dim=['model', 'time'])
        ds_qhigh = ds_sel.chunk(dict(model=-1)).quantile(0.995, dim=['model', 'time'])
        ds_qrange = (ds_qhigh - ds_qlow).assign_coords(indice = '99range')
    
        ds_q99 = ds_sel.chunk(dict(model=-1)).quantile(0.99, dim=['model', 'time']).assign_coords(indice = 'q99')

        # Store
        ds_out = xr.concat([ds_mean, ds_qrange, ds_q99], dim='indice')
        ds_out.to_netcdf(f"{out_path}/{out_str}.nc")

In [11]:
%%time
for years in [[2020,2040], [2050,2070], [2080,2100]]:
    for ssp in ["ssp245", "ssp585"]:
        for metric_id in ['avg_tas', 'sum_pr', 'max_tasmax', 'max_pr', 'max_tas']:
            get_summary_indices(metric_id = metric_id,
                                ssp = ssp,
                                years = years,
                                out_path=f"{project_data_path}/summary_indices",
                                out_str=f"STAR-ESDM_{ssp}_{str(years[0])}-{str(years[1])}_{metric_id}")

CPU times: user 30.9 s, sys: 3.76 s, total: 34.6 s
Wall time: 2min 43s


## Timeseries

### Raw

In [21]:
# Calculates summary indices for STAR-ESDM ensemble for given SSP
def get_raw_data(metric_id, ssp, years, lat, lon, out_path, out_str):
    """
    Current summary indices calculated: mean, 99th quantile, 99% quantile range
    `years` define the window over which all outputs are pooled. 
    """
    def read_and_process(metric_id, model, ssp, years, lat, lon):
        # Read
        var_id = metric_id.split('_')[1]
        file_path = glob(f"{project_data_path}/metrics/STAR-ESDM/{metric_id}.downscaled.{model}.r1i1p1f1.{var_id}.{ssp}*.nc")[0]
        ds_tmp = xr.open_dataset(file_path)
        ds_tmp['time'] = ds_tmp["time"].dt.year

        # Time slice
        if years is not None:
            ds_sel = ds_tmp.sel(time=slice(years[0],years[1]))
        else:
            ds_sel = ds_tmp.copy()
            
        # Location selection
        if lon < 0:
            lon = 360 + lon
        ds_sel = ds_sel.sel(latitude=lat, longitude=lon, method='nearest')
        
        # Construct dataframe
        df_tmp = ds_sel.to_dataframe().drop(columns=["latitude", "longitude"]).reset_index()
        df_tmp["ssp"] = ssp
        df_tmp["model"] = model

        # Return 
        return df_tmp
        
    # Check if done
    if not os.path.isfile(f"{out_path}/{out_str}.csv"):
        df_delayed = []
        # Read all
        for model in models:
            df_tmp = dask.delayed(read_and_process)(metric_id, model, ssp, years, lat, lon)
            df_delayed.append(df_tmp)
        
        # Compute and store
        df_out = dask.compute(*df_delayed)
        pd.concat(df_out).to_csv(f"{out_path}/{out_str}.csv", index=False)

In [23]:
%%time
for city in ['chicago', 'nyc', 'denver']:
    lat, lon = city_list[city]
    for ssp in ["ssp245", "ssp585"]:
        for metric_id in ['avg_tas', 'sum_pr', 'max_tasmax', 'max_pr', 'max_tas']:
            get_raw_data(metric_id = metric_id, 
                         ssp=ssp,
                         years=None,
                         lat=lat, lon=lon,
                         out_path=f"{project_data_path}/summary_raw_original_grid",
                         out_str=f"{city}_STAR-ESDM_{ssp}_{metric_id}")

CPU times: user 2.15 s, sys: 185 ms, total: 2.33 s
Wall time: 7.6 s


### Regridded

In [12]:
# Calculates summary indices for STAR-ESDM ensemble for given SSP
def get_raw_data(metric_id, ssp, years, lat, lon, out_path, out_str):
    """
    Current summary indices calculated: mean, 99th quantile, 99% quantile range
    `years` define the window over which all outputs are pooled. 
    """
    def read_and_process(metric_id, model, ssp, years, lat, lon):
        # Read
        ds_tmp = xr.open_dataset(f"{project_data_path}/metrics_regridded/STAR-ESDM/conservative/{metric_id}_{model}_{ssp}.nc")
        ds_tmp['time'] = ds_tmp["time"].dt.year

        # Time slice
        ds_sel = ds_tmp.sel(time=slice(years[0],years[1]))
    
        # Location selection
        if lon < 0:
            lon = 360 + lon
        ds_sel = ds_sel.sel(lat=lat, lon=lon, method='nearest')
        
        # Construct dataframe
        df_tmp = ds_sel.to_dataframe().drop(columns=["lat", "lon"]).reset_index()
        df_tmp["ssp"] = ssp
        df_tmp["model"] = model

        # Return 
        return df_tmp
        
    # Check if done
    if not os.path.isfile(f"{out_path}/{out_str}.csv"):
        df_delayed = []
        # Read all
        for model in models:
            df_tmp = dask.delayed(read_and_process)(metric_id, model, ssp, years, lat, lon)
            df_delayed.append(df_tmp)
        
        # Compute and store
        df_out = dask.compute(*df_delayed)
        pd.concat(df_out).to_csv(f"{out_path}/{out_str}.csv", index=False)

In [None]:
%%time
for city in city_list.keys():
    lat, lon = city_list[city]
    for years in [[2020,2040], [2050,2070], [2080,2100]]:
        for ssp in ["ssp245", "ssp585"]:
            for metric_id in ['avg_tas', 'sum_pr', 'max_tasmax', 'max_pr', 'max_tas']:
                get_raw_data(metric_id = metric_id, 
                             ssp=ssp, years=years,
                             lat=lat, lon=lon,
                             out_path=f"{project_data_path}/summary_raw",
                             out_str=f"{city}_STAR-ESDM_{ssp}_{str(years[0])}-{str(years[1])}_{metric_id}")