In [1]:
import os
from glob import glob

import xarray as xr
from regridding import regrid

import gev_stat_utils as gevsu

from utils import gev_metric_ids
from utils import roar_data_path as project_data_path

In [2]:
############
### Dask ###
############
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    account="pches_cr_default",
    queue='basic',
    cores=1,
    processes=1,
    memory="40GiB",
    walltime="02:00:00",
)

cluster.scale(jobs=1)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.8.214:41831,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


2025-04-04 15:25:10,064 - distributed.scheduler - ERROR - Task gev_fit_single_bootstrap-ec5bfd25-c743-475f-83e3-a535c0b72c93 marked as failed because 4 workers died while trying to run it


## Fit GEV

In [4]:
# Stationary, L-moments, main
for metric_id in gev_metric_ids:
    gevsu.gev_fit_all(
        metric_id=metric_id,
        stationary=True,
        fit_method="lmom",
        periods_for_level=[10, 25, 50, 100],
        levels_for_period=None,
        proj_years=[2050, 2100],
        hist_years=[1950, 2014],
        bootstrap=False,
    )

CPU times: user 13.8 s, sys: 829 ms, total: 14.7 s
Wall time: 31.7 s


In [None]:
## Stationary, L-moments, bootstrap

# Can separate STAR-ESDM since it's higher resolution
# and requires significantly more memory
for metric_id in gev_metric_ids[:1]:
    gevsu.gev_fit_all(
        metric_id=metric_id,
        stationary=True,
        fit_method="lmom",
        periods_for_level=[10, 25, 50, 100],
        levels_for_period=None,
        proj_years=[2050, 2100],
        hist_years=[1950, 2014],
        bootstrap=True,
        n_boot_proj=100,
        n_boot_hist=1,
        include_STAR_ESDM=False,
    )

## Regridding

In [2]:
%%time
# Regrid to LOCA using NN
method = "nearest"
target = "LOCA2"

for metric_id in gev_metric_ids:
    # Save path
    store_path = f"{project_data_path}/extreme_value/loca_grid/{metric_id}"
    # Get all files
    files = glob(
        f"{project_data_path}/extreme_value/original_grid/{metric_id}/STAR-ESDM*"
    ) + glob(f"{project_data_path}/extreme_value/original_grid/{metric_id}/GARD-LENS*")

    # Loop through
    for file in files:
        # Check if done
        file_name = file.split("/")[-1]
        store_name = file_name.replace(".nc", f"_{method}.nc")
        if not os.path.exists(f"{store_path}/{store_name}"):
            # Regrid and store
            ds_tmp = xr.open_dataset(file)
            ds_out = regrid(ds_tmp, method=method, target=target)
            ds_out.to_netcdf(f"{store_path}/{store_name}")

CPU times: user 1min 6s, sys: 49 s, total: 1min 55s
Wall time: 2min 53s


In [3]:
%%time
# Regrid to GARD-LENS using NN
method = "nearest"
target = "GARD-LENS"

for metric_id in gev_metric_ids:
    # Save path
    store_path = f"{project_data_path}/extreme_value/gard_grid/{metric_id}"
    # Get all files
    files = glob(
        f"{project_data_path}/extreme_value/original_grid/{metric_id}/STAR-ESDM*"
    ) + glob(f"{project_data_path}/extreme_value/original_grid/{metric_id}/LOCA2*")

    # Loop through
    for file in files:
        # Check if done
        file_name = file.split("/")[-1]
        store_name = file_name.replace(".nc", f"_{method}.nc")
        if not os.path.exists(f"{store_path}/{store_name}"):
            # Regrid and store
            ds_tmp = xr.open_dataset(file)
            ds_out = regrid(ds_tmp, method=method, target=target)
            ds_out.to_netcdf(f"{store_path}/{store_name}")

CPU times: user 1min 36s, sys: 1min 13s, total: 2min 50s
Wall time: 5min 39s
