In [None]:
import os
import pandas as pd
import dask
import gev_bayes_utils as gevbu

from utils import city_list, gev_metric_ids
from utils import roar_data_path as project_data_path
from utils import roar_code_path as project_code_path

In [2]:
# ############
# ### Dask ###
# ############
# from dask_jobqueue import SLURMCluster

# cluster = SLURMCluster(
#     # account="pches",
#     account="open",
#     cores=3,
#     processes=1,
#     job_cpu=3,
#     memory="10GiB",
#     walltime="12:00:00",
#     job_script_prologue=[
#         f"export PYTHONPATH={project_code_path}/.venv/lib/python3.12/site-packages:$PYTHONPATH",  # Put venv first
#         "export JAX_PLATFORM_NAME=cpu",
#         "export XLA_FLAGS='--xla_force_host_platform_device_count=1'",
#         # Force PyTensor to not use caching at all
#         "export PYTENSOR_FLAGS='cxx=,",
#         "mode=FAST_COMPILE,",  # Less aggressive optimization but more stable
#         "allow_gc=True,",
#         "cache_size=0'"        # Disable caching completely
#     ],
#     death_timeout=60,
#     local_directory="/tmp"
# )

# cluster.scale(30)

# from dask.distributed import Client
# client = Client(cluster)
# client

In [2]:
# ############
# ### Dask ###
# ############
# from dask_jobqueue import SLURMCluster

# cluster = SLURMCluster(
#     # account="pches",
#     account="open",
#     cores=1,
#     processes=1,
#     job_cpu=1,
#     memory="10GiB",
#     walltime="00:30:00",
# )

# cluster.scale(20)

# from dask.distributed import Client
# client = Client(cluster)
# client

## Get city timeseries

In [3]:
# # Run for all
# for city in city_list.keys():
#     for metric_id in gev_metric_ids:
#         gevbu.get_city_timeseries_all(city, metric_id)

## Fit Bayesian GEV

### Fit across ensemble

In [None]:
%%time
# Fit info: non-stationary
future_years = [2015,2100]
stationary = False
return_periods = [100]

# Parallelize with dask delayed
delayed = []

# Loop thorugh all combos
for city in city_list.keys():
    for metric_id in gev_metric_ids:
        tmp = dask.delayed(gevbu.fit_bayesian_gev_ensemble)(
            city=city,
            metric_id=metric_id,
            years=future_years,
            stationary=stationary,
            shape_sigma=0.2,
            prior_identifier='shape_sigma_02',
            return_periods=return_periods,
        )
        delayed.append(tmp)

_ = dask.compute(*delayed)

In [2]:
# %%time
# # Fit info: stationary
# hist_years = [1950,2014]
# future_years = [2050,2100]
# stationary = True
# return_periods = [100]

# # Parallelize with dask delayed
# delayed = []

# # Loop thorugh all combos
# for city in city_list.keys():
#     for metric_id in gev_metric_ids:
#         for years in [hist_years, future_years]:
#             tmp = dask.delayed(gevbu.fit_bayesian_gev_ensemble)(
#                     city=city,
#                     metric_id=metric_id,
#                     years=years,
#                     stationary=stationary,
#                     return_periods=return_periods,
#                     dask=False
#             )
#             delayed.append(tmp)

# _ = dask.compute(*delayed)

### Gather results

In [4]:
%%time
# Loop thorugh all combos and store
store_path = f"{project_data_path}/extreme_value/cities/original_grid/bayes_combined/"

return_periods = [100]

stationary = True
stationary_string = "stat" if stationary else "nonstat"

for city in city_list.keys():
    for metric_id in gev_metric_ids:
        # Check if done
        file_path = f"{store_path}/{city}_{metric_id}_{stationary_string}.csv"
        if os.path.exists(file_path):
            continue
        # Read
        df = gevbu.gather_bayesian_gev_results_all(
            city=city,
            metric_id=metric_id,
            return_periods = return_periods,
            stationary=stationary
        )
        # Store
        df.to_csv(file_path, index=False)