In [2]:
import os
from glob import glob
import numpy as np
import pandas as pd
import xarray as xr
import dask
import gev_utils as gevu
from regridding import regrid_to_loca
from utils import loca_gard_mapping, gev_metric_ids

In [2]:
################
#### Paths #####
################
# Update these for reproduction

project_data_path = "/storage/group/pches/default/users/dcl5300/conus_comparison_lafferty-etal-2024"
project_code_path = "/storage/home/dcl5300/work/current_projects/conus_comparison_lafferty-etal-2024"

In [3]:
############
### Dask ###
############
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    # account="pches",
    account="open",
    cores=1,
    memory="30GiB",
    walltime="01:00:00",
)

cluster.scale(jobs=20)  # ask for jobs

from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.8.110:41063,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Fit GEV

In [8]:
%%time
# Annual max tasmax
metric_id = "max_tasmax"
gev_fit_all(metric_id)

CPU times: user 2.16 s, sys: 82.9 ms, total: 2.25 s
Wall time: 4.36 s


In [10]:
%%time
# Annual max cdd
metric_id = "max_cdd"
gev_fit_all(metric_id)

CPU times: user 1min 31s, sys: 8.9 s, total: 1min 40s
Wall time: 25min 26s


In [11]:
%%time
# Annual max hdd
metric_id = "max_hdd"
gev_fit_all(metric_id)

CPU times: user 42.8 s, sys: 4.13 s, total: 46.9 s
Wall time: 25min 31s


In [9]:
%%time
# Annual min tasmin
metric_id = "min_tasmin"
gev_fit_all(metric_id)

CPU times: user 3min 11s, sys: 17.2 s, total: 3min 28s
Wall time: 24min 40s


In [13]:
%%time
# Annual max precip
metric_id = "max_pr"
gev_fit_all(metric_id)

CPU times: user 44.6 s, sys: 4.37 s, total: 48.9 s
Wall time: 26min 32s


In [14]:
client.shutdown()

## Regridding

In [11]:
%%time
# Regrid to LOCA using NN
method = "nearest"

for metric_id in metric_ids:
    # Save path
    store_path = f"{project_data_path}/extreme_value/loca_grid/{metric_id}"
    # Get all files
    files = glob(f"{project_data_path}/extreme_value/original_grids/{metric_id}/STAR-ESDM*") + \
            glob(f"{project_data_path}/extreme_value/original_grids/{metric_id}/GARD-LENS*")

    # Loop through
    for file in files:
        # Check if done
        file_name = file.split('/')[-1]
        store_name = file_name.replace(".nc", f"_{method}.nc")
        if not os.path.exists(f"{store_path}/{store_name}"):
            # Regrid and store
            ds_tmp = xr.open_dataset(file)
            ds_out = regrid_to_loca(ds_tmp, method=method)
            ds_out.to_netcdf(f"{store_path}/{store_name}")

CPU times: user 28.6 s, sys: 6.82 s, total: 35.4 s
Wall time: 58.6 s
