In [None]:
from pathlib import Path
import pandas as pd
import plotly.express as px
from sklearn.metrics.pairwise import haversine_distances
import numpy as np
from datetime import datetime
import scipy.stats as scs

# 1. Loading the data

In [None]:
DATA_PATH = Path("data")
WEATHER_DATA_PATH = DATA_PATH / "RR59"

In [None]:
weather_data = pd.read_csv(WEATHER_DATA_PATH / "Q_59_previous-1950-2022_RR-T-Vent.csv", sep = ";", parse_dates=["AAAAMMJJ"])

## 1.1 Selecting one station of interest

In [None]:
station_of_interest = "DUNKERQUE"

In [None]:
dunkerque_data = weather_data[weather_data["NOM_USUEL"] == station_of_interest]
dunkerque_rainfall = dunkerque_data.set_index("AAAAMMJJ")["RR"]

# 2. Time serie analysis

In [None]:
dunkerque_rainfall.plot(backend="plotly")

In [None]:
autocorr = pd.Series(
    [dunkerque_rainfall.autocorr(lag=i) for i in range(0, 30)],
    name="Autocorrelation",
    index=range(0, 30),
)

In [None]:
autocorr.plot(backend="plotly")

# 3. Study yearly maximum

## 3.1 Resampling data

In [None]:
yearly_max = dunkerque_rainfall.resample("1y").max()

In [None]:
yearly_max.hist(backend="plotly", nbins=100, histnorm="probability", opacity = 0.5)

## 3.2 Try different statistical fit on data

In [None]:
x = np.linspace(1, 100, num=101)

In [None]:
norm_parameters = scs.norm.fit(yearly_max)

fig = yearly_max.hist(backend="plotly", nbins=100, histnorm="probability", opacity = 0.5)
norm_plot = px.line(x=x, y=scs.norm.pdf(x, *norm_parameters), labels="Normal fit")

fig.add_trace(norm_plot.data[0])

In [None]:
gumbel_parameters = scs.gumbel_r.fit(yearly_max)
gumbel_plot = px.line(
    x=x, y=scs.gumbel_r.pdf(x, *gumbel_parameters), labels="Gumbel fit"
)

gompertz_parameters = scs.gompertz.fit(yearly_max)
gompertz_plot = px.line(
    x=x, y=scs.gompertz.pdf(x, *gompertz_parameters), labels="Gompertz fit"
)

weibull_parameters = scs.weibull_max.fit(yearly_max)
weibull_plot = px.line(
    x=x, y=scs.weibull_max.pdf(x, *weibull_parameters), labels="Gumbel fit"
)

fig = yearly_max.hist(backend="plotly", nbins=100, histnorm="probability", opacity=0.5)


fig.add_trace(gumbel_plot.data[0])
fig.add_trace(weibull_plot.data[0])
fig.add_trace(gompertz_plot.data[0])

In [None]:
gumbel_parameters = scs.gumbel_r.fit(yearly_max)

fig = yearly_max.hist(backend="plotly", nbins=100, histnorm="probability", opacity=0.5)
gumbel_plot = px.line(
    x=x, y=scs.gumbel_r.pdf(x, *gumbel_parameters), labels="Gumbel fit"
)

fig.add_trace(gumbel_plot.data[0])

In [None]:
pareto_parameters = scs.pareto.fit(yearly_max)

fig = yearly_max.hist(backend="plotly", nbins=100, histnorm="probability", opacity=0.5)
pareto_plot = px.line(x=x, y=scs.pareto.pdf(x, *pareto_parameters), labels="Pareto fit")

fig.add_trace(pareto_plot.data[0])

## 3.3 Compare Exceedance probabilities of different distributions

In [None]:
px.line(
    x=x,
    y=[
        scs.norm.sf(x, *norm_parameters),
        scs.gumbel_r.sf(x, *gumbel_parameters),
        scs.pareto.sf(x, *pareto_parameters),
    ],
    labels=["Normal", "Gumbel", "Pareto"],
    log_x=True,
    log_y=True,
)

In [None]:
gumbel_over_norm = scs.gumbel_r.sf(x, *gumbel_parameters) / scs.norm.sf(
    x, *norm_parameters
)
pareto_over_norm = scs.pareto.sf(x, *pareto_parameters) / scs.norm.sf(
    x, *norm_parameters
)

In [None]:
px.line(x=x, y=[gumbel_over_norm, pareto_over_norm], log_x = True, log_y = True)

# 4. Numerical approximations

In [None]:
gumbel_parameters_mle = scs.gumbel_r.fit(yearly_max, method="MLE")
gumbel_parameters_mm = scs.gumbel_r.fit(yearly_max, method="MM")

In [None]:
px.line(
    x=x,
    y=[
        scs.gumbel_r.sf(x, *gumbel_parameters_mle),
        scs.gumbel_r.sf(x, *gumbel_parameters_mm),
    ],
    log_x=True,
    log_y=True,
)

# 5. Pyextremes pipeline

## 5.1 Block Maxima

In [None]:
from pyextremes import EVA

model_bm = EVA(dunkerque_rainfall)

In [None]:
model_bm.get_extremes(method="BM", block_size="365.2425D")
model_bm.plot_extremes()


In [None]:
model_bm.fit_model()
model_bm.plot_diagnostic(alpha=0.95)

In [None]:
fig, ax = model_bm.plot_diagnostic(alpha=0.95)

In [None]:
fig

## 5.2 POT

In [None]:
model_pot = EVA(dunkerque_rainfall)


In [None]:
model_pot.get_extremes(method="POT", threshold=40)
fig, ax = model_pot.plot_extremes()

In [None]:
fig

In [None]:
model_pot.fit_model()

In [None]:

fig, ax = model_pot.plot_diagnostic(alpha=0.95)

In [None]:
fig

# 6. Conclusion

What is the return period (in years) of a daily rainfall superior than 80cm at Dunkerque ?