In [None]:
import scipy.stats
import numpy
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import xarray as xr

## set plotting style
sns.set(rc={"axes.facecolor": "white", "axes.grid": False})

## initialize random number generator
rng = np.random.default_rng()

## Load data

In [None]:
# Load from CSV
data = pd.read_csv("../data/train/A_tas_lat.41_lon.272_spatial.2_reduce.max.csv")

# set time data as index
data = data.set_index("time")

# convert to xarray
data = xr.Dataset.from_dataframe(data)["tas"]

# update time axis (string to cftime object)
updated_time = xr.cftime_range(
    start=data["time"].values[0], periods=len(data["time"]), freq="1D"
)
data["time"] = updated_time

## Resample to blocks (for GEV)

In [None]:
data_blocked = data.groupby("time.year").max()

## Fit GEV

In [None]:
c, loc, scale = scipy.stats.genextreme.fit(data_blocked)
rv = scipy.stats.genextreme(c=c, loc=loc, scale=scale)

## Empirical PDF

In [None]:
## histogram
bin_width = 1
bin_edges = np.arange(304, 326, bin_width)
counts, _ = np.histogram(data_blocked, bins=bin_edges)

## empirical PDF (normalize histogram)
pdf_empirical = counts / (counts * bin_width).sum()

## Plot result

In [None]:
xvals = np.linspace(304, 326, 200)
pdf = rv.pdf(xvals)

fig, ax = plt.subplots(figsize=(4, 3))
ax.stairs(pdf_empirical, edges=bin_edges, color="gray", fill=True, alpha=0.3)
ax.plot(xvals, pdf, c="k")

ax.set_xlabel(r"Annual max ($K$)")
ax.set_ylabel("Prob.")
ax.set_ylim([-0.01, None])
ax.set_yticks([0, 0.05, 0.1])

## plot max value
ax.scatter(data_blocked.max(), 0, marker="x", c="r", s=50)

plt.show()

## Return level

#### theoretical return level

In [None]:
t_r = np.logspace(0.0, 3.5)

## Compute return level
x_r = rv.isf(1 / t_r)

## compare to second method
x_r_test = rv.ppf(1 - 1 / t_r)

print(np.allclose(x_r, x_r_test))

#### empirical return level

In [None]:
data_sorted = data_blocked.isel(year=np.argsort(data_blocked.values))
n = len(data_sorted)
m = np.arange(1, n + 1)

cdf_empirical = m / (n + 1)
x_r_empirical = 1 / (1 - cdf_empirical)

## Plot as function of number of blocks

In [None]:
fig, ax = plt.subplots(figsize=(4, 3))

## plot modeled return period
ax.plot(t_r, x_r)

## plot empirical return period
ax.scatter(x_r_empirical, data_sorted, c="k", s=1.5)

## label axes
ax.set_xlabel("Return period (years)")
ax.set_ylabel(r"$T_{2m}$ ($K$)")
ax.set_xscale("log")

plt.show()