In [None]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

dly files can be downloaded from https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/all/

In [None]:
def compute_mean(dfs, elem_name, last_year=2022, year_window=[5,30]):
    n = len(dfs)
    recent, history = np.nan + np.zeros(n), np.nan + np.zeros(n)
    for i,df in enumerate(dfs):
        if elem_name in df:
            tmp = df.groupby('year').agg(np.nanmean)[elem_name]
            year = tmp.index.to_numpy()
            elem = tmp.to_numpy()
            idx = (year>last_year-year_window[0]) & (year<=last_year)
            jdx = (year>last_year-np.sum(year_window)) & (year<=last_year-year_window[0])
            if any(idx) and any(jdx):
                recent[i] = elem[idx].mean()
                history[i] = elem[jdx].mean()
    return recent, history

In [None]:
station_coords = pd.read_csv('ghcnd-stations.csv', header=None, index_col=0, 
                       names=['latitude','longitude', 'elevation','name', 'prcp', 'prcp_attributes'])

In [None]:
country_code = 'IT'
data_dir = 'ghcnd_all'
data_files = sorted(glob.glob(os.path.join(data_dir, country_code + '*.parquet*')))
station_IDs = [os.path.basename(f).split('.')[0] for f in data_files]
dfs = [pd.read_parquet(f) for f in data_files]

In [None]:
last_year = 2022
window = [5, 30]
data = {}
for elem_name in ('tmax', 'tmin'):
    rec,hist = compute_mean(dfs, elem_name, last_year, window)
    data[elem_name + '_recent'] = rec
    data[elem_name + '_history'] = hist
    data[elem_name + '_delta'] = rec - hist
df = pd.DataFrame(data=data, index=station_IDs)

In [None]:
df = df.join(station_coords[['latitude','longitude','elevation','name']], how='inner')
idx, = np.where(df['tmax_delta'] < -5)
jdx = [df.columns.get_loc(col) for col in ('tmax_delta','tmin_delta')]
df.iloc[idx, jdx] = np.nan

In [None]:
fig,ax = plt.subplots(1, 1)
df.plot.scatter(x='longitude', y='latitude', c='tmax_delta', colormap='coolwarm', ax=ax)
ax.axis('equal')

In [None]:
fig,ax = plt.subplots(1, 2, figsize=(7,3))
for i,key in enumerate(recent):
    ax[i].plot(history[key], recent[key], 'o', color=[.5,.5,.5], markersize=4,
               markerfacecolor='w', markeredgewidth=1)
    m = min(np.nanmin(history[key]), np.nanmin(recent[key]))
    M = max(np.nanmax(history[key]), np.nanmax(recent[key]))
    m = m * 0.9 if m > 0 else m * 1.1
    M = M * 1.1 if M > 0 else M * 0.9
    ax[i].plot([m,M], [m,M], 'k', lw=2)
    ax[i].set_title(key)
    ax[i].set_xlabel('Mean {} to {}'.format(last_year-np.sum(window)+1, last_year-window[0]))
ax[0].set_ylabel('Mean {} to {}'.format(last_year-window[0]+1, last_year))
sns.despine()
fig.tight_layout()

In [None]:
delta_T = (recent['tmax'] - history['tmax'])
delta_T = delta_T[np.logical_not(np.isnan(delta_T))]
de = 0.2
edges = np.r_[-2 : 5 : de]
n,_ = np.histogram(delta_T, edges)

In [None]:
fig,ax = plt.subplots(1, 1, figsize=(5,3))
ax.bar(edges[:-1], n, width=de*0.75, align='edge', facecolor=.7+np.zeros(3), edgecolor='k', linewidth=1)
ax.set_xlabel('ΔT [C]')
ax.set_ylabel('Count')
sns.despine()
fig.tight_layout()