In [1]:
import os
from glob import glob
import numpy as np
import pandas as pd
import xarray as xr
import dask
import gev_utils as gevu
from regridding import regrid
from utils import loca_gard_mapping, gev_metric_ids

In [2]:
################
#### Paths #####
################
# Update these for reproduction

project_data_path = "/storage/group/pches/default/users/dcl5300/conus_comparison_lafferty-etal-2024"
project_code_path = "/storage/home/dcl5300/work/current_projects/conus_comparison_lafferty-etal-2024"

In [3]:
# ############
# ### Dask ###
# ############
# from dask_jobqueue import SLURMCluster

# cluster = SLURMCluster(
#     # account="pches",
#     account="open",
#     cores=1,
#     memory="10GiB",
#     walltime="00:20:00",
# )

# cluster.scale(jobs=10)  # ask for jobs

# from dask.distributed import Client

# client = Client(cluster)

# client

## Fit GEV

In [3]:
ds_in = xr.open_mfdataset('/storage/group/pches/default/users/dcl5300/conus_comparison_lafferty-etal-2024/metrics/LOCA2/max_pr_CanESM5_r1i1p1f1_ssp585_*.nc')

ds_in = ds_in.sel(time=slice("2050-01-01", "2100-12-31")).load()

In [4]:
%%time
ds_fit = gevu.fit_gev_xr(
    ds=ds_in,
    metric_id="max_pr",
    stationary=True,
    fit_method="lmom",
    years=[2050,2100],
    expected_length=2100-2050+1,
    periods_for_level=[10,25,50,100],
    numba=False
)

CPU times: user 1min 45s, sys: 73.9 ms, total: 1min 45s
Wall time: 1min 46s


In [4]:
%%time
ds_fit_numba = gevu.fit_gev_xr(
    ds=ds_in,
    metric_id="max_pr",
    stationary=True,
    fit_method="lmom",
    years=[2050,2100],
    expected_length=2100-2050+1,
    periods_for_level=[10,25,50,100],
    numba=True
)

CPU times: user 11.8 s, sys: 226 ms, total: 12 s
Wall time: 12.2 s


In [None]:
%%time
out = gevu.fit_gev_xr_bootstrap(
    ensemble="LOCA2",
    gcm="CanESM5",
    member="r1i1p1f1",
    ssp="ssp585",
    metric_id="max_pr",
    years=[2050,2100],
    expected_length=2100-2050+1,
    fit_method="lmom",
    store_path=f"{project_data_path}/extreme_value/original_grid/max_pr/",
    bootstrap="parametric",
    n_boot=100,
    stationary=True,
    periods_for_level=[10,25,50,100],
    numba=True
)

  return function_base._ureduce(a,
  level = loc + scale / shape * (1 - (-np.log(quantile)) ** (shape))
  level = loc + scale / shape * (1 - (-np.log(quantile)) ** (shape))


In [4]:
%%time
# Stationary, L-moments
for metric_id in gev_metric_ids:
    gevu.gev_fit_all(
        metric_id = metric_id,
        stationary = True,
        fit_method = "lmom",
        periods_for_level = [10, 25, 50, 100],
        levels_for_period = None,
        proj_years = [2050,2100],
        hist_years = [1950,2014]
    )

CPU times: user 46.6 s, sys: 3.4 s, total: 50 s
Wall time: 6min 43s


## Regridding

In [4]:
%%time
# Regrid to LOCA using NN
method = "nearest"
target = "LOCA2"

for metric_id in gev_metric_ids:
    # Save path
    store_path = f"{project_data_path}/extreme_value/loca_grid/{metric_id}"
    # Get all files
    files = glob(f"{project_data_path}/extreme_value/original_grid/{metric_id}/STAR-ESDM*") + \
            glob(f"{project_data_path}/extreme_value/original_grid/{metric_id}/GARD-LENS*")

    # Loop through
    for file in files:
        # Check if done
        file_name = file.split('/')[-1]
        store_name = file_name.replace(".nc", f"_{method}.nc")
        if not os.path.exists(f"{store_path}/{store_name}"):
            # Regrid and store
            ds_tmp = xr.open_dataset(file)
            ds_out = regrid(ds_tmp, method=method, target=target)
            ds_out.to_netcdf(f"{store_path}/{store_name}")

CPU times: user 2min 33s, sys: 1min 19s, total: 3min 53s
Wall time: 8min 12s


In [11]:
%%time
# Regrid to GARD-LENS using NN
method = "nearest"
target = "GARD-LENS"

for metric_id in gev_metric_ids:
    # Save path
    store_path = f"{project_data_path}/extreme_value/gard_grid/{metric_id}"
    # Get all files
    files = glob(f"{project_data_path}/extreme_value/original_grid/{metric_id}/STAR-ESDM*") + \
            glob(f"{project_data_path}/extreme_value/original_grid/{metric_id}/LOCA2*")

    # Loop through
    for file in files:
        # Check if done
        file_name = file.split('/')[-1]
        store_name = file_name.replace(".nc", f"_{method}.nc")
        if not os.path.exists(f"{store_path}/{store_name}"):
            # Regrid and store
            ds_tmp = xr.open_dataset(file)
            ds_out = regrid(ds_tmp, method=method, target=target)
            ds_out.to_netcdf(f"{store_path}/{store_name}")

CPU times: user 2min 11s, sys: 1min 13s, total: 3min 24s
Wall time: 9min 6s
