In [1]:
import os
import warnings
import numpy as np
import pandas as pd
import dask
import gev_bayes_utils as gevbu
import gev_city_utils as gevcu
import sa_city_utils as sacu

from utils import city_list, gev_metric_ids, trend_metric_ids
from utils import roar_data_path as project_data_path
from utils import roar_code_path as project_code_path



In [2]:
# # For Bayes
# ############
# ### Dask ###
# ############
# from dask_jobqueue import SLURMCluster

# cluster = SLURMCluster(
#     # account="pches",
#     account="open",
#     cores=3,
#     processes=1,
#     job_cpu=3,
#     memory="3GiB",
#     walltime="12:00:00",
#     job_script_prologue=[
#         f"export PYTHONPATH={project_code_path}/.venv/lib/python3.12/site-packages:$PYTHONPATH",  # Put venv first
#         "export JAX_PLATFORM_NAME=cpu",
#         "export XLA_FLAGS='--xla_force_host_platform_device_count=1'",
#         # Force PyTensor to not use caching at all
#         "export PYTENSOR_FLAGS='cxx=,",
#         "mode=FAST_COMPILE,",  # Less aggressive optimization but more stable
#         "allow_gc=True,",
#         "cache_size=0'"        # Disable caching completely
#     ],
#     death_timeout=60,
#     local_directory="/tmp"
# )

# cluster.scale(5)

# from dask.distributed import Client
# client = Client(cluster)
# client

In [2]:
#######################
### Dask
### For non-Bayes
#######################
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    account="pches",
    # account="open",
    cores=1,
    processes=1,
    memory="10GiB",
    walltime="06:00:00",
)

cluster.scale(30)

from dask.distributed import Client
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.8.83:35159,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Get city timeseries

In [5]:
# Run for all
for city in city_list.keys():
    for metric_id in gev_metric_ids:
        sacu.get_city_timeseries_all(city, metric_id)
    for metric_id in trend_metric_ids:
        sacu.get_city_timeseries_all(city, metric_id)

## Stationary GEV with bootstrap

In [3]:
stationary = True
fit_method = "lmom"
periods_for_level = [10,25,50,100]
hist_slice = [1950,2014]
proj_slice = [2050,2100]

# Loop through all
delayed = []

for city in city_list:
    for metric_id in gev_metric_ids:
        for n_boot in [100,1000]:
            delayed.append(dask.delayed(gevcu.fit_ensemble_gev_city)
                           (city=city, 
                            metric_id=metric_id,
                            stationary=stationary,
                            fit_method=fit_method,
                            hist_slice=hist_slice,
                            proj_slice=proj_slice,
                            n_boot = n_boot,
                            periods_for_level=periods_for_level))

_ = dask.compute(*delayed)

## Non-stationary GEV with bootstrap

In [3]:
fit_method = 'mle'
periods_for_level = [10,25,50,100]
hist_slice = None
proj_slice = None
stationary = False
years = [1950,2100]
return_period_years = [1975,2075]
return_period_diffs = [[1975,2075]]
n_boot = 250

# Loop through all
delayed = []

for city in city_list:
    for metric_id in gev_metric_ids:
        delayed.append(dask.delayed(gevcu.fit_ensemble_gev_city)
                       (city=city, 
                        metric_id=metric_id,
                        stationary=stationary,
                        fit_method=fit_method,
                        hist_slice=hist_slice,
                        proj_slice=proj_slice,
                        years=years,
                        n_boot = n_boot,
                        periods_for_level=periods_for_level,
                        return_period_years=return_period_years,
                        return_period_diffs=return_period_diffs))

_ = dask.compute(*delayed)

## Fit Bayesian GEV

### Fit across ensemble

In [None]:
%%time
# Fit info: non-stationary
future_years = [2015,2100]
stationary = False
return_periods = [100]

# Parallelize with dask delayed
delayed = []

# Loop thorugh all combos
for city in city_list.keys():
    for metric_id in gev_metric_ids:
        tmp = dask.delayed(gevbu.fit_bayesian_gev_ensemble)(
            city=city,
            metric_id=metric_id,
            years=future_years,
            stationary=stationary,
            shape_sigma=0.2,
            prior_identifier='shape_sigma_02',
            return_periods=return_periods,
        )
        delayed.append(tmp)

_ = dask.compute(*delayed)

In [2]:
# %%time
# # Fit info: stationary
# hist_years = [1950,2014]
# future_years = [2050,2100]
# stationary = True
# return_periods = [100]

# # Parallelize with dask delayed
# delayed = []

# # Loop thorugh all combos
# for city in city_list.keys():
#     for metric_id in gev_metric_ids:
#         for years in [hist_years, future_years]:
#             tmp = dask.delayed(gevbu.fit_bayesian_gev_ensemble)(
#                     city=city,
#                     metric_id=metric_id,
#                     years=years,
#                     stationary=stationary,
#                     return_periods=return_periods,
#                     dask=False
#             )
#             delayed.append(tmp)

# _ = dask.compute(*delayed)

### Gather results

In [3]:
%%time
# Loop thorugh all combos and store
store_path = f"{project_data_path}/extreme_value/cities/original_grid/bayes_combined/"

return_periods = [100]

prior_identifier = "shape_sigma_02"

stationary = False
stationary_string = "stat" if stationary else "nonstat"

# for city in city_list.keys():
for city in ['nyc', 'chicago', 'denver']:
    for metric_id in ['max_tasmax', 'max_pr', 'min_tasmin']:
    # for metric_id in gev_metric_ids:
        for years in [None, [2015,2100]]:
            # Check if done
            change_identifier = "" if years is None else f"_change_{years[0]}-{years[1]}"
            file_path = f"{store_path}/{city}_{metric_id}_{stationary_string}_{prior_identifier}{change_identifier}.csv"
            if os.path.exists(file_path):
                continue
            # Read
            df = gevbu.gather_bayesian_gev_results_all(
                city = city,
                metric_id = metric_id,
                return_periods = return_periods,
                stationary = stationary,
                prior_identifier = prior_identifier,
                years = years,
            )
            # Store
            df.to_csv(file_path, index=False)

CPU times: user 2min 39s, sys: 11.7 s, total: 2min 51s
Wall time: 15min 58s
