## Loading some libraries and setting some variables

In [None]:
import os
import numpy as np
import pandas as pd


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data_folder = '../data/belgian_synop/'
data_file = 'synop_data_6447.csv'

## Example of loading only the Tmax of Uccle

First actually we load everything

In [None]:
dataset = pd.read_csv(os.path.join(data_folder, data_file))

In [None]:
dataset

Now only keeping Tmax values

In [None]:
var = 'temp_max'

In [None]:
uccle_sel_var = dataset[dataset[var].notna()]

and introducing some time related quantities

In [None]:
uccle_sel_var['timestamp'] = uccle_sel_var['timestamp'].astype('datetime64[ns]')

In [None]:
uccle_sel_var['day_of_year'] = uccle_sel_var.timestamp.dt.day_of_year  # will be usefull later

In [None]:
uccle_sel_var['year'] = uccle_sel_var.timestamp.dt.year  # will be usefull later

and setting the timestamps as index

In [None]:
uccle_sel_var = uccle_sel_var.set_index('timestamp')
uccle_sel_var = uccle_sel_var.sort_index()

In [None]:
uccle_sel_var

## Plotting the Tmax

Plotting the current year of Tmax:

In [None]:
uccle_sel_var[var].loc['2025'].plot()
plt.xlabel('date')
plt.ylabel('Temperature maximale °C');

Plotting a scatter plot of maximum temperature between 1981 and 2010, and then 1991 and 2020:

In [None]:
sns.scatterplot(uccle_sel_var.loc['1981':'2010'], x='day_of_year', y=var, label='Période 1981-2010')
sns.scatterplot(uccle_sel_var.loc['1991':'2020'], x='day_of_year', y=var, label='Période 1991-2020')
plt.xlabel("jour de l'année")
plt.ylabel('Temperature maximale °C');
plt.legend();

In [None]:
sns.lineplot(uccle_sel_var.loc['1981':'2010'][[var,'day_of_year']].groupby('day_of_year').mean(), x='day_of_year', y=var, label='Période 1981-2010')
sns.lineplot(uccle_sel_var.loc['1991':'2020'][[var,'day_of_year']].groupby('day_of_year').mean(), x='day_of_year', y=var, label='Période 1991-2020')
plt.xlabel("jour de l'année")
plt.ylabel('Moyenne de la Temperature maximale °C');
plt.legend();

In [None]:
window_size=16
difference_climato = uccle_sel_var.loc['1991':'2020'][[var,'day_of_year']].groupby('day_of_year').mean() - uccle_sel_var.loc['1981':'2010'][[var,'day_of_year']].groupby('day_of_year').mean()
sns.lineplot(difference_climato, x='day_of_year', y=var, label='Différence (1991-2020) - (1981-2010)');
sns.lineplot(difference_climato.rolling(window_size, center=True).mean(), x='day_of_year', y=var, label=f'Rolling mean ({window_size} days)');
plt.xlabel("jour de l'année")
plt.ylabel('Moyenne de la Temperature maximale °C');
plt.legend();

In [None]:
sns.lineplot(uccle_sel_var.loc['1981':'2010'][[var,'day_of_year']].groupby('day_of_year').max(), x='day_of_year', y=var, label='Période 1981-2010')
sns.lineplot(uccle_sel_var.loc['1991':'2020'][[var,'day_of_year']].groupby('day_of_year').max(), x='day_of_year', y=var, label='Période 1991-2020')
plt.xlabel("jour de l'année")
plt.ylabel('Maximum de la Temperature maximale °C');
plt.legend();

Plotting a scatter plot of maximum temperature between 1981 and 2000, and then 2001 and 2020:

In [None]:
sns.scatterplot(uccle_sel_var.loc['1981':'2000'], x='day_of_year', y=var, label='Période 1981-2000')
sns.scatterplot(uccle_sel_var.loc['2001':'2020'], x='day_of_year', y=var, label='Période 2000-2020')
plt.xlabel("jour de l'année")
plt.ylabel('Temperature maximum °C');
plt.legend();

In [None]:
sns.lineplot(uccle_sel_var.loc['1981':'2000'][[var,'day_of_year']].groupby('day_of_year').mean(), x='day_of_year', y=var, label='Période 1981-2000')
sns.lineplot(uccle_sel_var.loc['2001':'2020'][[var,'day_of_year']].groupby('day_of_year').mean(), x='day_of_year', y=var, label='Période 2000-2020')
plt.xlabel("jour de l'année")
plt.ylabel('Moyenne de la Temperature maximale °C');
plt.legend();

In [None]:
window_size=16
difference_climato = uccle_sel_var.loc['2001':'2020'][[var,'day_of_year']].groupby('day_of_year').mean() - uccle_sel_var.loc['1981':'2000'][[var,'day_of_year']].groupby('day_of_year').mean()
sns.lineplot(difference_climato, x='day_of_year', y=var, label='Différence (1991-2020) - (1981-2010)');
sns.lineplot(difference_climato.rolling(window_size, center=True).mean(), x='day_of_year', y=var, label=f'Rolling mean ({window_size} days)');
plt.xlabel("jour de l'année")
plt.ylabel('Moyenne de la Temperature maximale °C');
plt.legend();

In [None]:
sns.lineplot(uccle_sel_var.loc['1981':'2000'][[var,'day_of_year']].groupby('day_of_year').max(), x='day_of_year', y=var, label='Période 1981-2000')
sns.lineplot(uccle_sel_var.loc['2001':'2020'][[var,'day_of_year']].groupby('day_of_year').max(), x='day_of_year', y=var, label='Période 2000-2020')
plt.xlabel("jour de l'année")
plt.ylabel('Maximum de la Temperature maximale °C');
plt.legend();

In [None]:
sns.lineplot(uccle_sel_var.loc['1981':'2000'][[var,'day_of_year']].groupby('day_of_year').min(), x='day_of_year', y=var, label='Période 1981-2000')
sns.lineplot(uccle_sel_var.loc['2001':'2020'][[var,'day_of_year']].groupby('day_of_year').min(), x='day_of_year', y=var, label='Période 2000-2020')
plt.xlabel("jour de l'année")
plt.ylabel('Minimum de la Temperature maximale °C');
plt.legend();

Plotting year to year curve:

In [None]:
sns.set_theme(style="dark")

df = uccle_sel_var[[var,'day_of_year','year']]
g = sns.relplot(
    data=df,
    x="day_of_year", y=var, col="year", hue="year",
    kind="line", palette="autumn_r", linewidth=4, zorder=5,
    col_wrap=3, height=2, aspect=1.5, legend=False,
)

for year, ax in g.axes_dict.items():

    # Add the title as an annotation within the plot
    ax.text(.8, .85, year, transform=ax.transAxes, fontweight="bold")

    # Plot every year's time series in the background
    sns.lineplot(
        data=df, x="day_of_year", y=var, units="year",
        estimator=None, color=".7", linewidth=1, ax=ax,
    )
    ax.set_xlim(0.,365.)

# ax.set_xticks(ax.get_xticks()[::2])
g.set_titles("")
g.set_axis_labels("jour de l'année", "Temp. max. °C")
g.tight_layout()
plt.savefig('tmax_uccle.png', dpi=300)

Plotting the timeseries on the same plot:

In [None]:
cmap = plt.get_cmap('autumn_r')
plt.figure(figsize=(10,8))
ax = plt.subplot()
for i, year in enumerate(range(1981, 2010)):
    if i==0:
        sns.lineplot(data=uccle_sel_var.loc[str(year)], x='day_of_year', y=var, ax=ax, color='tab:gray', label='1981-2010')  #color=cmap((year-1981)/(2025.-1981))) label=str(year)
    else:
        sns.lineplot(data=uccle_sel_var.loc[str(year)], x='day_of_year', y=var, ax=ax, color='tab:gray')  #color=cmap((year-1981)/(2025.-1981))) label=str(year)

for year in range(2020, 2026):
    print()
    sns.lineplot(data=uccle_sel_var.loc[str(year)], x='day_of_year', y=var, color=cmap((year-2019)/(2025.-2019)), ax=ax, label=str(year))


ax.set_xlabel("jour de l'année")
ax.set_ylabel("Temp. max. °C")
ax.set_xlim(0,365.)

In [None]:
cmap = plt.get_cmap('autumn_r')
plt.figure(figsize=(10,8))
ax = plt.subplot()
for i, year in enumerate(range(1981, 2010)):
    if i==0:
        sns.lineplot(data=uccle_sel_var.loc[str(year)], x='day_of_year', y=var, ax=ax, color='tab:gray', label='1981-2010')  #color=cmap((year-1981)/(2025.-1981))) label=str(year)
    else:
        sns.lineplot(data=uccle_sel_var.loc[str(year)], x='day_of_year', y=var, ax=ax, color='tab:gray')  #color=cmap((year-1981)/(2025.-1981))) label=str(year)

for year in range(2020, 2026):
    print()
    sns.lineplot(data=uccle_sel_var.loc[str(year)], x='day_of_year', y=var, color=cmap((year-2019)/(2025.-2019)), ax=ax, label=str(year))


ax.set_xlabel("jour de l'année")
ax.set_ylabel("Temp. max. °C")

ax.set_xlim(165,240)