In [1]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import seaborn as sns
import plotly.graph_objects as go

In [None]:
dataPath = Path("../..") / "validation-meteo-data" / "donneesmeteo_2010-2024" / "donneesmeteo_2010-2024.csv"
meteodf = pd.read_csv(dataPath, sep=";")
meteodf.head()

In [None]:
print("Parameters:", meteodf.libellecourt.unique())
meteodf.shape

In [None]:
meteodf.fillna({"valeurorigine": meteodf.valeur}, inplace=True) # NaNs in valeurorigine means that there has been no correction of the original value
meteodf

In [None]:
meteodf.replace(to_replace={"valeurorigine":-999}, value=np.nan, inplace=True) # -999 can be considered as actual NaNs
meteodf

In [6]:
meteodfToUse = meteodf[meteodf.columns[:5]] # For the classification model we only need the first 5 columns

In [None]:
meteodfToUse.dropna(inplace=True)


In [8]:
meteodfToUse.reset_index(drop=True, inplace=True)

In [None]:
meteodfToUse.datemesure = pd.to_datetime(meteodfToUse.datemesure.apply(lambda value: value.split(" ")[0])) # all measurements date at 00:00:00 as hour so we can just drop it

In [None]:
meteodfToUse["correction"] = (meteodfToUse.valeur != meteodfToUse.valeurorigine).astype(int)
meteodfToUse

In [None]:

correctiondf = meteodfToUse.groupby([meteodfToUse.datemesure.dt.year, meteodfToUse.libellecourt]).correction.sum().reset_index()
correctiondf

In [None]:
parameters = correctiondf.libellecourt.unique()
fig = go.Figure()

for parameter in parameters:
    data = correctiondf[correctiondf.libellecourt == parameter]
    fig.add_trace(
        go.Scatter(x=data.datemesure, y=data.correction, name=parameter)
    )
fig.update_layout(title=dict(text="Number of corrections per year"), 
                  xaxis=dict(title=dict(text="Year")),
                  yaxis=dict(title=dict(text="Corrections")))
fig.show()