# Extreme Distributions

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "" # first gpu
os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'FALSE'

import jax
jax.config.update('jax_platform_name', 'cpu')

import numpyro
numpyro.set_platform("cpu")

In [2]:
import autoroot
from pathlib import Path
import numpy as np
import xarray as xr
import pandas as pd
import pint_xarray



from loguru import logger


from numpyro.infer import Predictive
import arviz as az

import xarray as xr

import matplotlib.pyplot as plt
import seaborn as sns
sns.reset_defaults()
sns.set_context(context="talk", font_scale=0.7)



# num_devices = 5
# numpyro.set_host_device_count(num_devices)


%matplotlib inline
%load_ext autoreload
%autoreload 2

## Temperature

In [3]:
import os
from pathlib import Path

logger.info("Initializaing paths...")

raw_data_dir = Path(os.getenv("RAW_DATA_SAVEDIR"))
clean_data_dir = Path(os.getenv("CLEAN_DATA_SAVEDIR"))

DATA_URL = clean_data_dir.joinpath("t2m_stations_spain.zarr")

[32m2024-12-04 14:25:54.167[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mInitializaing paths...[0m


### Data

In [4]:
ds = xr.open_dataset(DATA_URL, engine="zarr")
ds = ds.sel(time=slice(None, "2019"))

variable_name = "t2m_max"

## Block Maximum Extremes

### Method I - Masks

In [5]:
from st_evt.extremes import block_maxima_year, block_maxima_yearly_group

In [6]:
%%time
ds_bm = ds.copy()

# block maximum (YEAR)
logger.info(f"{variable_name.upper()} | Calculating BM (Year) ...")
ds_bm[f"{variable_name}_bm_year"] = block_maxima_year(ds_bm[variable_name].transpose("time", "station_id"))

# block maximum (Season)
logger.info(f"{variable_name.upper()} | Calculating BM (Season) ...")
group = "time.season"
ds_bm[f"{variable_name}_bm_season"] = block_maxima_yearly_group(ds_bm[variable_name].transpose("time", "station_id"), group=group)

# block maximum (Month)
logger.info(f"{variable_name.upper()} | Calculating BM (Month) ...")
group = "time.month"
ds_bm[f"{variable_name}_bm_month"] = block_maxima_yearly_group(ds_bm[variable_name].transpose("time", "station_id"), group=group)


[32m2024-12-04 14:25:59.453[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mT2M_MAX | Calculating BM (Year) ...[0m
[32m2024-12-04 14:26:02.312[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mT2M_MAX | Calculating BM (Season) ...[0m
[32m2024-12-04 14:26:08.369[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mT2M_MAX | Calculating BM (Month) ...[0m


CPU times: user 18.6 s, sys: 2.89 s, total: 21.5 s
Wall time: 21.8 s


#### Saving

In [7]:
logger.info(f"{variable_name.upper()} | Saving Yearly BM to disk ...")
save_name = f"{variable_name}_stations_spain_gmst_bm.zarr"
full_save_path = clean_data_dir.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm.to_zarr(full_save_path, mode="w");

[32m2024-12-04 14:26:21.475[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mT2M_MAX | Saving Yearly BM to disk ...[0m
[32m2024-12-04 14:26:21.475[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [34m[1mSave file: /home/juanjohn/pool_data/dynev4eo/data/clean/t2m_max_stations_spain_gmst_bm.zarr[0m


### Method II - Resampling

Alternatively, we can resample using the blocks that we wish to use.
This method is arguably simpler to implement.
However, we lose all of the information regarding which actual date we have an extreme event.


In other words, using the above method with the mask, we can always obtain a resampled array at a particular frequency. 
However, resampling first will lose information.

In [8]:
%%time

# block maximum (YEAR)
logger.info(f"{variable_name.upper()} | Calculating BM (Year) ...")
ds_bm_year = ds.resample(time="1YE").max().sel(time=slice(None, "2019"))

# block maximum (SEASON)
logger.info(f"{variable_name.upper()} | Calculating BM (Season) ...")
ds_bm_season = ds.resample(time='QS-DEC').max().sel(time=slice(None, "2019"))

# block maximum (MONTH)
logger.info(f"{variable_name.upper()} | Calculating BM (Month) ...")
ds_bm_month = ds.resample(time="1ME").max().sel(time=slice(None, "2019"))

[32m2024-12-04 14:26:26.962[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mT2M_MAX | Calculating BM (Year) ...[0m
[32m2024-12-04 14:26:27.431[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mT2M_MAX | Calculating BM (Season) ...[0m
[32m2024-12-04 14:26:28.541[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mT2M_MAX | Calculating BM (Month) ...[0m


CPU times: user 3.95 s, sys: 809 ms, total: 4.76 s
Wall time: 4.84 s


#### Saving

In [9]:
logger.info(f"{variable_name.upper()} | Saving Yearly BM to disk...")
save_name = f"{variable_name}_stations_spain_gmst_bm_year.zarr"
full_save_path = DATA_URL.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm_year.to_zarr(full_save_path, mode="w");


logger.info(f"{variable_name.upper()} | Saving Seasonal BM to disk...")
save_name = f"{variable_name}_stations_spain_gmst_bm_season.zarr"
full_save_path = DATA_URL.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm_season.to_zarr(full_save_path, mode="w");

logger.info(f"{variable_name.upper()} | Saving Monthly BM to disk...")
save_name = f"{variable_name}_stations_spain_gmst_bm_month.zarr"
full_save_path = DATA_URL.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm_month.to_zarr(full_save_path, mode="w");

[32m2024-12-04 14:26:37.057[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mT2M_MAX | Saving Yearly BM to disk...[0m
[32m2024-12-04 14:26:37.058[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [34m[1mSave file: /home/juanjohn/pool_data/dynev4eo/data/clean/t2m_stations_spain.zarr/t2m_max_stations_spain_gmst_bm_year.zarr[0m
[32m2024-12-04 14:26:37.692[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mT2M_MAX | Saving Seasonal BM to disk...[0m
[32m2024-12-04 14:26:37.693[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [34m[1mSave file: /home/juanjohn/pool_data/dynev4eo/data/clean/t2m_stations_spain.zarr/t2m_max_stations_spain_gmst_bm_season.zarr[0m
[32m2024-12-04 14:26:38.300[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [1mT2M_MAX | Saving Monthly BM to disk...[0m
[32m2024-12-04 14:26:38.300[0m | [34m[1mDEBUG   [0m | [3

## Precipitation

In [14]:
DATA_URL = clean_data_dir.joinpath("pr_stations_spain.zarr")

### Data

In [15]:
ds = xr.open_dataset(DATA_URL, engine="zarr")
ds = ds.sel(time=slice(None, "2019"))

variable_name = "pr"

## Block Maximum Extremes

### Method I - Masks

In [18]:
%%time
ds_bm = ds.copy()

# block maximum (YEAR)
logger.info(f"{variable_name.upper()} | Calculating BM (Year)...")
ds_bm[f"{variable_name}_bm_year"] = block_maxima_year(ds_bm[variable_name].transpose("time", "station_id"))

# block maximum (Season)
logger.info(f"{variable_name.upper()} | Calculating BM (Season)...")
group = "time.season"
ds_bm[f"{variable_name}_bm_season"] = block_maxima_yearly_group(ds_bm[variable_name].transpose("time", "station_id"), group=group)

# # block maximum (Month)
# logger.info(f"{variable_name.upper()} | Calculating BM (Month)...")
# group = "time.month"
# ds_bm[f"{variable_name}_bm_month"] = block_maxima_yearly_group(ds_bm[variable_name].transpose("time", "station_id"), group=group)


[32m2024-12-04 14:28:00.442[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mPR | Calculating BM (Year)...[0m
[32m2024-12-04 14:28:05.501[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mPR | Calculating BM (Season)...[0m


CPU times: user 15.6 s, sys: 1.55 s, total: 17.1 s
Wall time: 17.2 s


#### Saving

In [19]:
logger.info(f"{variable_name.upper()} | Saving Yearly BM to disk...")
save_name = f"{variable_name}_stations_spain_bm.zarr"
full_save_path = clean_data_dir.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm.to_zarr(full_save_path, mode="w");

[32m2024-12-04 14:28:17.761[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mPR | Saving Yearly BM to disk...[0m
[32m2024-12-04 14:28:17.762[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [34m[1mSave file: /home/juanjohn/pool_data/dynev4eo/data/clean/pr_stations_spain_bm.zarr[0m


### Method II - Resampling

Alternatively, we can resample using the blocks that we wish to use.
This method is arguably simpler to implement.
However, we lose all of the information regarding which actual date we have an extreme event.


In other words, using the above method with the mask, we can always obtain a resampled array at a particular frequency. 
However, resampling first will lose information.

In [20]:
%%time

# block maximum (YEAR)
logger.info(f"{variable_name.upper()} | Calculating BM (Year)...")
ds_bm_year = ds.resample(time="1YE").max().sel(time=slice(None, "2019"))

# block maximum (SEASON)
logger.info(f"{variable_name.upper()} | Calculating BM (Season)...")
ds_bm_season = ds.resample(time='QS-DEC').max().sel(time=slice(None, "2019"))

# block maximum (MONTH)
logger.info(f"{variable_name.upper()} | Calculating BM (Month) ...")
ds_bm_month = ds.resample(time="1ME").max().sel(time=slice(None, "2019"))

[32m2024-12-04 14:28:20.609[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mPR | Calculating BM (Year)...[0m
[32m2024-12-04 14:28:21.260[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mPR | Calculating BM (Season)...[0m
[32m2024-12-04 14:28:23.654[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mPR | Calculating BM (Month) ...[0m


CPU times: user 9.13 s, sys: 1.04 s, total: 10.2 s
Wall time: 10.3 s


#### Saving

In [21]:
logger.info(f"{variable_name.upper()} | Saving Yearly BM to disk...")
save_name = f"{variable_name}_stations_spain_gmst_bm_year.zarr"
full_save_path = clean_data_dir.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm_year.to_zarr(full_save_path, mode="w");


logger.info(f"{variable_name.upper()} | Saving Seasonal BM to disk...")
save_name = f"{variable_name}_stations_spain_gmst_bm_season.zarr"
full_save_path = clean_data_dir.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm_season.to_zarr(full_save_path, mode="w");

logger.info(f"{variable_name.upper()} | Saving Monthly BM to disk...")
save_name = f"{variable_name}_stations_spain_gmst_bm_month.zarr"
full_save_path = clean_data_dir.joinpath(save_name)
logger.debug(f"Save file: {full_save_path}")
ds_bm_month.to_zarr(full_save_path, mode="w");

[32m2024-12-04 14:28:30.994[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mPR | Saving Yearly BM to disk...[0m
[32m2024-12-04 14:28:30.995[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [34m[1mSave file: /home/juanjohn/pool_data/dynev4eo/data/clean/pr_stations_spain_gmst_bm_year.zarr[0m
[32m2024-12-04 14:28:33.180[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mPR | Saving Seasonal BM to disk...[0m
[32m2024-12-04 14:28:33.181[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [34m[1mSave file: /home/juanjohn/pool_data/dynev4eo/data/clean/pr_stations_spain_gmst_bm_season.zarr[0m
[32m2024-12-04 14:28:35.316[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [1mPR | Saving Monthly BM to disk...[0m
[32m2024-12-04 14:28:35.317[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [34m[1mSave file: /home/