In [1]:
import os
from glob import glob

import dask
import numpy as np
import pandas as pd
import xarray as xr

from utils import city_list, metric_ids
import metric_funcs as mf

## Calculate metrics 

### Preliminaries

In [2]:
################
#### Paths #####
################
# NOTE: this is run on a different system from other datasets
# Update these for reproduction
from utils import hopper_code_path as project_code_path
from utils import hopper_data_path as project_data_path

gard_path = "/home/shared/vs498_0001/GARD-LENS" # GARD-LENS raw
gard_gcms = ['canesm5', 'cesm2', 'ecearth3']

In [3]:
# Check all same
gardlens_info = {}

for gcm in gard_gcms: 
    t_mean_files = glob(f"{gard_path}/t_mean/GARDLENS_{gcm}_*.nc")
    t_range_files = glob(f"{gard_path}/t_range/GARDLENS_{gcm}_*.nc")
    pcp_files = glob(f"{gard_path}/pcp/GARDLENS_{gcm}_*.nc")
    assert len(t_mean_files) == len(t_range_files)
    assert len(t_mean_files) == len(pcp_files)

In [4]:
# Get all model members
models_members = glob(f"{gard_path}/t_mean/GARDLENS_*.nc")
models_members = [file.split('GARDLENS')[1].split('t_')[0][1:-1] for file in models_members]

In [21]:
###############################
# Metric calulcation function #
###############################
def calculate_metric(metric_func, var_id, model_member, needed_vars, gard_path, out_path):
    """
    Inputs: selected model, member, variable, and metric to calculate (from GARD-LENS)
    Outputs: calculated (annual) metric
    """
    try:
        # Check if done
        if os.path.isfile(out_path):
            # print(f"{model_member} already done.")
            return None

        # Read
        if model_member.split('_')[0] == 'ecearth3':
            time_range = '1970_2100'
        else:
            time_range = '1950_2100'
            
        # Read
        ds_tmp = xr.merge([xr.open_dataset(f"{gard_path}/{var}/GARDLENS_{model_member}_{var}_{time_range}_CONUS.nc", chunks='auto') for var in needed_vars])
    
        # Calculate metric
        ds_out = metric_func(ds_tmp, var_id)
    
        # Store
        ds_out.to_netcdf(out_path)
        print(f"{model_member}")
            
    # Log if error
    except Exception as e:
        except_path = f"{project_code_path}/scripts/logs"
        with open(f"{except_path}/{model_member}_{var_id}_GARDLENS.txt", "w") as f:
            f.write(str(e))

In [7]:
############
### Dask ###
############
from dask.distributed import LocalCluster
cluster = LocalCluster(n_workers = 10)
client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 10
Total threads: 10,Total memory: 187.41 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:33325,Workers: 10
Dashboard: /proxy/8787/status,Total threads: 10
Started: Just now,Total memory: 187.41 GiB

0,1
Comm: tcp://127.0.0.1:42683,Total threads: 1
Dashboard: /proxy/46503/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:44751,
Local directory: /tmp/dask-scratch-space/worker-vpxtiupj,Local directory: /tmp/dask-scratch-space/worker-vpxtiupj

0,1
Comm: tcp://127.0.0.1:37093,Total threads: 1
Dashboard: /proxy/46561/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:46471,
Local directory: /tmp/dask-scratch-space/worker-bepw2ua6,Local directory: /tmp/dask-scratch-space/worker-bepw2ua6

0,1
Comm: tcp://127.0.0.1:44347,Total threads: 1
Dashboard: /proxy/42501/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:38747,
Local directory: /tmp/dask-scratch-space/worker-ga6g4avu,Local directory: /tmp/dask-scratch-space/worker-ga6g4avu

0,1
Comm: tcp://127.0.0.1:38177,Total threads: 1
Dashboard: /proxy/45029/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:37561,
Local directory: /tmp/dask-scratch-space/worker-x1ve3ppd,Local directory: /tmp/dask-scratch-space/worker-x1ve3ppd

0,1
Comm: tcp://127.0.0.1:45887,Total threads: 1
Dashboard: /proxy/45571/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:41273,
Local directory: /tmp/dask-scratch-space/worker-if243nco,Local directory: /tmp/dask-scratch-space/worker-if243nco

0,1
Comm: tcp://127.0.0.1:34187,Total threads: 1
Dashboard: /proxy/41977/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:35581,
Local directory: /tmp/dask-scratch-space/worker-zt873evo,Local directory: /tmp/dask-scratch-space/worker-zt873evo

0,1
Comm: tcp://127.0.0.1:33667,Total threads: 1
Dashboard: /proxy/43531/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:46583,
Local directory: /tmp/dask-scratch-space/worker-riwl7d3d,Local directory: /tmp/dask-scratch-space/worker-riwl7d3d

0,1
Comm: tcp://127.0.0.1:35533,Total threads: 1
Dashboard: /proxy/44755/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:41345,
Local directory: /tmp/dask-scratch-space/worker-uy75gddj,Local directory: /tmp/dask-scratch-space/worker-uy75gddj

0,1
Comm: tcp://127.0.0.1:39347,Total threads: 1
Dashboard: /proxy/35525/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:43901,
Local directory: /tmp/dask-scratch-space/worker-rqlday9c,Local directory: /tmp/dask-scratch-space/worker-rqlday9c

0,1
Comm: tcp://127.0.0.1:39027,Total threads: 1
Dashboard: /proxy/35175/status,Memory: 18.74 GiB
Nanny: tcp://127.0.0.1:33543,
Local directory: /tmp/dask-scratch-space/worker-2y8efls2,Local directory: /tmp/dask-scratch-space/worker-2y8efls2


2025-01-16 13:49:26,326 - distributed.scheduler - ERROR - Couldn't gather keys: {('store-map-1b7b8bc7bf72ee2721fc22461f1024c6', 3, 5, 0): 'waiting'}
2025-01-16 14:31:53,740 - distributed.scheduler - ERROR - Couldn't gather keys: {('store-map-29f7a26c4800e22cbc9f0065857e481f', 2, 3, 4): 'waiting', ('store-map-29f7a26c4800e22cbc9f0065857e481f', 2, 3, 3): 'waiting', ('store-map-29f7a26c4800e22cbc9f0065857e481f', 3, 4, 1): 'processing'}
2025-01-16 15:57:53,765 - bokeh.server.protocol_handler - ERROR - error handling message
 message: Message 'PATCH-DOC' content: {'events': [{'kind': 'ModelChanged', 'model': {'id': 'p929587'}, 'attr': 'inner_width', 'new': 1093}, {'kind': 'ModelChanged', 'model': {'id': 'p929587'}, 'attr': 'inner_height', 'new': 476}, {'kind': 'ModelChanged', 'model': {'id': 'p929587'}, 'attr': 'outer_width', 'new': 1113}, {'kind': 'ModelChanged', 'model': {'id': 'p929587'}, 'attr': 'outer_height', 'new': 556}]} 
 error: AssertionError()
Traceback (most recent call last):
 

### Calculations

In [14]:
%%time
##############################
# Cooling degree days: max
##############################
var_id = 'cdd'
metric_func = mf.calculate_dd_max
needed_vars = ['t_mean', 't_range']

out_path = lambda model_member: f'{project_data_path}/metrics/GARD-LENS/max_{var_id}_{model_member}_ssp370.nc'

for model_member in models_members:
    # Calculate metric
    calculate_metric(metric_func = metric_func,
                     var_id = var_id,
                     model_member = model_member,
                     needed_vars = needed_vars,
                     gard_path = gard_path,
                     out_path = out_path(model_member))

CPU times: user 1.3 ms, sys: 0 ns, total: 1.3 ms
Wall time: 1.14 ms


In [15]:
%%time
##############################
# Cooling degree days: sum
##############################
var_id = 'cdd'
metric_func = mf.calculate_dd_sum
needed_vars = ['t_mean', 't_range']

out_path = lambda model_member: f'{project_data_path}/metrics/GARD-LENS/sum_{var_id}_{model_member}_ssp370.nc'

for model_member in models_members:
    # Calculate metric
    calculate_metric(metric_func = metric_func,
                     var_id = var_id,
                     model_member = model_member,
                     needed_vars = needed_vars,
                     gard_path = gard_path,
                     out_path = out_path(model_member))

CPU times: user 3.74 ms, sys: 1.2 ms, total: 4.94 ms
Wall time: 18.6 ms


In [16]:
%%time
##############################
# Average temperature
##############################
var_id = 'tas'
metric_func = mf.calculate_avg
needed_vars = ['t_mean']

out_path = lambda model_member: f'{project_data_path}/metrics/GARD-LENS/avg_{var_id}_{model_member}_ssp370.nc'

for model_member in models_members:
    # Calculate metric
    calculate_metric(metric_func = metric_func,
                     var_id = var_id,
                     model_member = model_member,
                     needed_vars = needed_vars,
                     gard_path = gard_path,
                     out_path = out_path(model_member))

CPU times: user 2.18 ms, sys: 3.11 ms, total: 5.29 ms
Wall time: 19.5 ms


In [17]:
%%time
##############################
# Maximum temperature
##############################
var_id = 'tasmax'
metric_func = mf.calculate_max
needed_vars = ['t_mean', 't_range']

out_path = lambda model_member: f'{project_data_path}/metrics/GARD-LENS/max_{var_id}_{model_member}_ssp370.nc'

for model_member in models_members:
    # Calculate metric
    calculate_metric(metric_func = metric_func,
                     var_id = var_id,
                     model_member = model_member,
                     needed_vars = needed_vars,
                     gard_path = gard_path,
                     out_path = out_path(model_member))

CPU times: user 3.33 ms, sys: 2.47 ms, total: 5.79 ms
Wall time: 11 ms


In [18]:
%%time
##############################
# Minimum temperature
##############################
var_id = 'tasmin'
metric_func = mf.calculate_min
needed_vars = ['t_mean', 't_range']

out_path = lambda model_member: f'{project_data_path}/metrics/GARD-LENS/min_{var_id}_{model_member}_ssp370.nc'

for model_member in models_members:
    # Calculate metric
    calculate_metric(metric_func = metric_func,
                     var_id = var_id,
                     model_member = model_member,
                     needed_vars = needed_vars,
                     gard_path = gard_path,
                     out_path = out_path(model_member))

CPU times: user 1.4 ms, sys: 2.99 ms, total: 4.39 ms
Wall time: 10.1 ms


In [19]:
%%time
#################
# Maximum precip
#################
var_id = 'pr'
metric_func = mf.calculate_max
needed_vars = ['pcp']

out_path = lambda model_member: f'{project_data_path}/metrics/GARD-LENS/max_{var_id}_{model_member}_ssp370.nc'

for model_member in models_members:
    # Calculate metric
    calculate_metric(metric_func = metric_func,
                     var_id = var_id,
                     model_member = model_member,
                     needed_vars = needed_vars,
                     gard_path = gard_path,
                     out_path = out_path(model_member))

CPU times: user 2.08 ms, sys: 2.1 ms, total: 4.19 ms
Wall time: 10.9 ms


In [None]:
%%time
#################
# Sum precip
#################
var_id = 'pr'
metric_func = mf.calculate_sum
needed_vars = ['pcp']

out_path = lambda model_member: f'{project_data_path}/metrics/GARD-LENS/sum_{var_id}_{model_member}_ssp370.nc'

for model_member in models_members:
    # Calculate metric
    calculate_metric(metric_func = metric_func,
                     var_id = var_id,
                     model_member = model_member,
                     needed_vars = needed_vars,
                     gard_path = gard_path,
                     out_path = out_path(model_member))

cesm2_1231_11
cesm2_1251_07
cesm2_1301_03
ecearth3_r134i1p1f1
canesm5_r18i1p2f1
cesm2_1281_18
cesm2_1191_10
ecearth3_r121i1p1f1
canesm5_r12i1p1f1
cesm2_1231_04
cesm2_1251_12
ecearth3_r148i1p1f1
canesm5_r7i1p2f1
cesm2_1301_16
cesm2_1061_04
ecearth3_r149i1p1f1
canesm5_r6i1p2f1
canesm5_r13i1p1f1
cesm2_1231_12
cesm2_1251_04
ecearth3_r120i1p1f1
cesm2_1231_07
cesm2_1251_11
cesm2_1301_15
canesm5_r19i1p2f1
ecearth3_r135i1p1f1
ecearth3_r109i1p1f1
ecearth3_r137i1p1f1
cesm2_1251_14
cesm2_1231_02
cesm2_1081_05
cesm2_1301_10
canesm5_r25i1p2f1
cesm2_1251_01
cesm2_1231_17
ecearth3_r122i1p1f1
cesm2_1301_05
canesm5_r4i1p2f1
canesm5_r11i1p1f1
cesm2_1251_17
cesm2_1231_01
canesm5_r10i1p1f1
cesm2_1301_13


2025-01-16 13:32:20,109 - distributed.worker - ERROR - Compute Failed
Key:       ('concatenate-transpose-store-map-e28a4c42cf87cf70bc94ffd03a714a9e', 0, 4, 5)
State:     executing
Task:  <Task ('concatenate-transpose-store-map-e28a4c42cf87cf70bc94ffd03a714a9e', 0, 4, 5) store_chunk(...)>
Exception: "OSError(-101, 'NetCDF: HDF error')"
Traceback: '  File "/home/fs01/dcl257/projects/conus_comparison_lafferty-etal-2024/.venv/lib/python3.12/site-packages/dask/array/core.py", line 4575, in store_chunk\n    return load_store_chunk(x, out, index, lock, return_stored, False)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/home/fs01/dcl257/projects/conus_comparison_lafferty-etal-2024/.venv/lib/python3.12/site-packages/dask/array/core.py", line 4557, in load_store_chunk\n    out[index] = x\n    ~~~^^^^^^^\n  File "/home/fs01/dcl257/projects/conus_comparison_lafferty-etal-2024/.venv/lib/python3.12/site-packages/xarray/backends/netCDF4_.py", line 82, in __setitem_

cesm2_1001_01
cesm2_1281_08
ecearth3_r123i1p1f1
canesm5_r24i1p2f1
cesm2_1251_02
cesm2_1231_14
cesm2_1041_03




ecearth3_r136i1p1f1
cesm2_1301_06
ecearth3_r108i1p1f1
ecearth3_r132i1p1f1
cesm2_1281_17
cesm2_1251_08
canesm5_r20i1p2f1
ecearth3_r119i1p1f1
ecearth3_r127i1p1f1
cesm2_1301_19
cesm2_1181_10
cesm2_1281_02
canesm5_r14i1p1f1
canesm5_r1i1p2f1
canesm5_r15i1p1f1
cesm2_1071_04
ecearth3_r126i1p1f1
cesm2_1281_14
ecearth3_r118i1p1f1
cesm2_1281_01
canesm5_r21i1p2f1
cesm2_1231_08




ecearth3_r133i1p1f1
ecearth3_r131i1p1f1
cesm2_1281_04
cesm2_1091_05
canesm5_r23i1p2f1
canesm5_r8i1p1f1
cesm2_1281_11
ecearth3_r124i1p1f1
cesm2_1231_18
canesm5_r2i1p2f1
canesm5_r17i1p1f1
cesm2_1281_07
canesm5_r16i1p1f1
canesm5_r3i1p2f1
ecearth3_r125i1p1f1
cesm2_1301_20
cesm2_1251_18
cesm2_1011_01
canesm5_r9i1p1f1
cesm2_1301_09
canesm5_r22i1p2f1
cesm2_1281_12
cesm2_1051_03
ecearth3_r130i1p1f1
canesm5_r14i1p2f1
cesm2_1301_02
canesm5_r1i1p1f1
cesm2_1251_06
cesm2_1231_10
ecearth3_r106i1p1f1
cesm2_1281_19
ecearth3_r138i1p1f1
ecearth3_r144i1p1f1
canesm5_r20i1p1f1
cesm2_1301_17
ecearth3_r113i1p1f1
cesm2_1251_13
cesm2_1231_05
ecearth3_r112i1p1f1
cesm2_1301_01
canesm5_r21i1p1f1
cesm2_1251_05
ecearth3_r145i1p1f1
cesm2_1231_13
ecearth3_r139i1p1f1
cesm2_1301_14
ecearth3_r107i1p1f1
cesm2_1251_10
cesm2_1231_06
ecearth3_r150i1p1f1
canesm5_r15i1p2f1
canesm5_r2i1p1f1


2025-01-16 16:10:52,965 - distributed.worker - ERROR - Worker stream died during communication: tcp://127.0.0.1:34671
Traceback (most recent call last):
  File "/home/fs01/dcl257/projects/conus_comparison_lafferty-etal-2024/.venv/lib/python3.12/site-packages/tornado/iostream.py", line 861, in _read_to_buffer
    bytes_read = self.read_from_fd(buf)
                 ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/fs01/dcl257/projects/conus_comparison_lafferty-etal-2024/.venv/lib/python3.12/site-packages/tornado/iostream.py", line 1116, in read_from_fd
    return self.socket.recv_into(buf, len(buf))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ConnectionResetError: [Errno 104] Connection reset by peer

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/fs01/dcl257/projects/conus_comparison_lafferty-etal-2024/.venv/lib/python3.12/site-packages/distributed/worker.py", line 2075, in gather_dep
    response = await get_data_from_work

canesm5_r17i1p2f1


## Timeseries

### Preliminaries

In [2]:
################
#### Paths #####
################
# Update these for reproduction
from utils import roar_code_path as project_code_path
from utils import roar_data_path as project_data_path
gard_gcms = ['canesm5', 'cesm2', 'ecearth3']

In [3]:
# Get all model members
models_members = [file.split('/')[-1] for file in glob(f"{project_data_path}/metrics/GARD-LENS/max_tasmax_*.nc")]
models_members = [file.split('_')[2:5] if file.split('_')[2] == 'cesm2' else file.split('_')[2:4] for file in models_members]
models_members = ['_'.join(model_member) for model_member in models_members]

In [4]:
############
### Dask ###
############
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    # account="pches",
    account="open",
    cores=1,
    memory="10GiB",
    walltime="01:00:00"
)

cluster.scale(jobs=10)  # ask for jobs

from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.8.110:36305,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Raw

In [5]:
# Calculates summary indices for GARD-LENS ensemble member
def get_raw_data(metric_id, model_member, years, lat, lon, out_path, out_str):
    """
    """
    # Check if done:
    if not os.path.isfile(f"{out_path}/{out_str}.csv"):
        
        # Read
        ds = xr.open_dataset(f'{project_data_path}/metrics/GARD-LENS/{metric_id}_{model_member}_ssp370.nc')
        
        # Location selection first
        ds_sel = ds.sel(lat=lat, lon=lon, method='nearest')
        
        # Time slice
        if years is not None:
            ds_sel = ds_sel.sel(time=slice(years[0],years[1]))
        ds_sel['time'] = ds_sel['time'].dt.year
        
        # Construct dataframe
        df_out = ds_sel.to_dataframe().reset_index().dropna().drop(columns=["lat", "lon"])
        df_out["ssp"] = 'ssp370'
        # Get info
        model_member_split = model_member.split('_')
        model = model_member_split[0]
        member = model_member_split[1] if len(model_member_split) == 2 else '_'.join(model_member_split[1:])
        df_out["model"] = model
        df_out["member"] = member
            
        # Store
        df_out.to_csv(f"{out_path}/{out_str}.csv", index=False)

In [8]:
# Calculate for whole ensemble
def calculate_all_city_years(metric_id):
    # Save path
    out_path = f"{project_data_path}/timeseries/original_grids/"    

    # Loop through all
    delayed = []

    for model_member in models_members:
        for city in city_list:
            lat, lon = city_list[city]
            delayed.append(dask.delayed(get_raw_data)(metric_id = metric_id, 
                                                      model_member=model_member,
                                                      years = None, 
                                                      lat=lat, lon=lon,
                                                      out_path = out_path,
                                                      out_str = f"{metric_id}_GARD-LENS_{model_member}_ssp370_{city}"))

    # Compute
    _ = dask.compute(*delayed)

In [9]:
%%time
# Compute and store all
for metric_id in metric_ids:
    calculate_all_city_years(metric_id)
    print(metric_id)

max_tasmax
max_cdd
max_hdd
max_pr
min_tasmin
CPU times: user 32.5 s, sys: 1.86 s, total: 34.4 s
Wall time: 2min 28s


### Regridded

In [24]:
# Calculates summary indices for GARD-LENS ensemble for given GCM
def get_raw_data(metric_id, model, years, lat, lon, out_path, out_str):
    """
    """
    # Check if done
    if not os.path.isfile(f"{out_path}/{out_str}.csv"):
        
        # Read all
        ds = xr.open_mfdataset(f"{project_data_path}/metrics_regridded/GARD-LENS/conservative/{metric_id}_{model}_*.nc", chunks='auto')
    
        # Time slice
        ds = ds.rename({'year':'time'})
        ds_sel = ds.sel(time=slice(years[0],years[1]))

        # Location selection
        if lon < 0:
            lon = 360 + lon
        ds_sel = ds_sel.sel(lat=lat, lon=lon, method='nearest')
    
        # Construct dataframe
        df_out = ds_sel.to_dataframe().drop(columns=["lat", "lon"]).reset_index()
        df_out["ssp"] = 'ssp370'
        df_out["model"] = model
        
        # Store
        df_out.to_csv(f"{out_path}/{out_str}.csv", index=False)

In [29]:
%%time
for city in city_list.keys():
    lat, lon = city_list[city]
    for years in [[2020,2040], [2050,2070], [2080,2100]]:
        for metric_id in ['avg_tas', 'sum_pr', 'max_pr']:
            for model in gard_info.keys():
                get_raw_data(metric_id = metric_id, 
                             model = model,
                             years=years,
                             lat=lat, lon=lon,
                             out_path=f"{project_data_path}/summary_raw",
                             out_str=f"{city}_GARD-LENS_{model}_ssp370_{years[0]}-{years[1]}_{metric_id}")

CPU times: user 3min 23s, sys: 31.6 s, total: 3min 55s
Wall time: 16min 26s
