# Insitu reference network

## Import packages

In [None]:
import os

import cdsapi
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import xarray as xr
from c3s_eqc_automatic_quality_control import download

plt.style.use("seaborn-v0_8-notebook")

## Define Parameters

In [None]:
# Time period
start = "2006-05"
stop = "2020-03"

# Stations
stations = ["BAR", "SOD", "NYA"]  # Use None to analyse all stations
assert isinstance(stations, list | None)

# Directory for csv files
csv_dir = "./csv_files"

# CDS credentials
os.environ["CDSAPI_RC"] = os.path.expanduser("~/ciardini_virginia/.cdsapirc")

## Define request

In [None]:
collection_id = "insitu-observations-gruan-reference-network"
request = {
    "version": "1_0_0",
    "variable": [
        "air_temperature",
        "relative_humidity",
        "air_pressure",
        "altitude",
        "water_vapour_volume_mixing_ratio",
    ],
    "data_format": "netcdf",
}

client = cdsapi.Client()
requests = []
for date in pd.date_range(start, stop, freq="1MS"):
    time_request = {"year": date.strftime("%Y"), "month": date.strftime("%m")}
    time_request["day"] = client.client.apply_constraints(
        collection_id, request | time_request
    )["day"]
    if time_request["day"]:
        requests.append(request | time_request)

## Functions to cache

In [None]:
def _reorganize_dataset(ds):
    # Rename
    (varname,) = set(ds["observed_variable"].values)
    ds = ds.rename(observation_value=str(varname)).drop_vars("observed_variable")
    ds = ds.rename(
        {
            var: "_".join([varname, var.replace("_value", "")])
            for var in ds.data_vars
            if var.startswith("uncertainty")
        }
    )
    # Update attrs
    for var, da in ds.data_vars.items():
        match var:
            case "pressure":
                da.attrs["long_name"] = "Pressure"
            case "air_temperature":
                da.attrs["long_name"] = "Temperature"
            case "altitude":
                da.attrs["long_name"] = "Altitude"
            case "relative_humidity":
                da.attrs["long_name"] = "Relative"
            case "water_vapour_mixing_ratio":
                da.attrs["long_name"] = "Mixing"
        for string in ("units", "type"):
            if string in var:
                ds = ds.drop_vars(var)
                (value,) = set(da.values)
                attrs_var = varname if var == string else var.replace("_" + string, "")
                ds[attrs_var].attrs[string] = value
    return ds


def reorganize_dataset(ds, stations):
    for var, da in ds.data_vars.items():
        if np.issubdtype(da.dtype, np.bytes_):
            ds[var].values = np.char.decode(da.values, "utf-8")

    if stations is not None:
        ds = ds.where(ds["primary_station_id"].isin(stations), drop=True)

    if not ds.sizes["index"]:
        return ds

    datasets = []
    for var, ds in ds.groupby("observed_variable"):
        datasets.append(_reorganize_dataset(ds))
    with xr.set_options(use_new_combine_kwarg_defaults=True):
        return xr.merge(datasets)


def compute_specific_humidity_from_water_vapour_mixing_ratio(
    water_vapour_mixing_ratio,
    molar_mass_water=18.01528,
    molar_mass_dry_air=28.9647,
):
    specific_humidity = (
        (molar_mass_water * water_vapour_mixing_ratio)
        / (molar_mass_dry_air + molar_mass_water * water_vapour_mixing_ratio)
        * 1000
    )
    specific_humidity.attrs = {"long_name": "Specific Humidity", "units": "g/kg"}
    return specific_humidity


def compute_integrated_water_vapour(specific_humidity):
    specific_humidity = specific_humidity / 1.0e3  # g/kg → kg/kg
    delta_altitude = specific_humidity["altitude"].diff("altitude").fillna(0)  # m

    integrated_water_vapour = (specific_humidity * delta_altitude).sum("altitude")
    integrated_water_vapour.attrs = {
        "long_name": "Integrated Water Vapour",
        "units": "kg/m²",
    }
    return integrated_water_vapour


def compute_profiles(ds, stations):
    ds = reorganize_dataset(ds, stations)

    # Add variables
    ds["specific_humidity"] = compute_specific_humidity_from_water_vapour_mixing_ratio(
        ds["water_vapour_mixing_ratio"]
    )
    ds["time"] = ("index", pd.to_datetime(ds["report_timestamp"]).values)

    # Compute profiles
    subset = ["air_temperature", "relative_humidity", "specific_humidity", "altitude"]
    profiles = []
    for station, ds_station in ds.groupby("primary_station_id"):
        for time, profile in ds_station.groupby("time"):
            profile = profile.swap_dims(index="altitude")[subset]
            profile = profile.sortby("altitude")
            profile = profile.dropna("altitude", how="any", subset=subset)
            profile = profile.drop_duplicates("altitude")
            if (profile["altitude"].diff("altitude") > 2_000).any():
                continue
            profile = profile.interp(altitude=range(50, 30_001, 50))
            profile = profile.expand_dims(time=[time])
            profile = profile.assign_coords(station=("time", [station]))
            profiles.append(profile)
    ds = xr.concat(profiles, "time")

    # Add integrated water vapour
    ds["integrated_water_vapour"] = compute_integrated_water_vapour(
        ds["specific_humidity"]
    )
    return ds

## Download and transform

In [None]:
ds_profiles = download.download_and_transform(
    collection_id,
    requests,
    chunks={"year": 1, "month": 1},
    transform_func=compute_profiles,
    transform_func_kwargs={"stations": sorted(stations) if stations else stations},
    cached_open_mfdataset_kwargs={"concat_dim": "time", "combine": "nested"},
).compute()

## TODO: Plotting

## Seasonal profiles

In [None]:
season_order = ["DJF", "MAM", "JJA", "SON"]
season_colors = {
    "DJF": "tab:blue",
    "MAM": "tab:green",
    "JJA": "tab:orange",
    "SON": "tab:red",
}

# Setup figura
fig, axs = plt.subplots(1, 3, figsize=(18, 6), sharey=True)

for ax, (station, ds_station) in zip(axs, ds_profiles.groupby("station")):
    # Converti temperatura in °C
    temp_C = ds_station["air_temperature"] - 273.15

    # Calcola media e std per stagione
    seasonal_mean = temp_C.groupby("time.season").mean("time")
    seasonal_std = temp_C.groupby("time.season").std("time")

    # Plot per ogni stagione
    for season in season_order:
        if season in seasonal_mean.season.values:
            mean_profile = seasonal_mean.sel(season=season)
            std_profile = seasonal_std.sel(season=season)
            color = season_colors[season]

            ax.plot(
                mean_profile, mean_profile["altitude"], label=f"{season}", color=color
            )
            ax.fill_betweenx(
                mean_profile["altitude"],
                mean_profile - std_profile,
                mean_profile + std_profile,
                color=color,
                alpha=0.3,
            )

    ax.set_title(f"Stazione: {station}")
    ax.set_xlabel("Temperature (°C)")
    ax.grid(True)
    ax.legend(title="Season")

axs[0].set_ylabel("Altitudine (m)")
fig.suptitle("Seasonal profiles ±1σ", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

## Monthly mean temperature

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
for ax, (station, ds_station) in zip(axs, ds_profiles.groupby("station")):
    # Calcola media mensile (inizio mese)
    monthly_mean = (
        ds_station["air_temperature"]
        .resample(time="1MS")
        .mean("time")
        .dropna("altitude", how="any")
    )

    pcm = ax.pcolormesh(
        monthly_mean["time"].values,
        monthly_mean["altitude"].values,
        monthly_mean.transpose("altitude", "time"),
        shading="auto",
        cmap="coolwarm",
    )
    ax.set_title(f"Stazione: {station}")
    ax.set_xlabel("Time")
    ax.grid(True)
    # Formatta le date sull'asse x come 'YY-MM'
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%y-%m"))
    ax.tick_params(axis="x", rotation=45)
# Colorbar comune a tutti i plot
axs[0].set_ylabel("Altitude (m)")
fig.colorbar(pcm, ax=axs, orientation="vertical", label="Temperature (K)")
fig.suptitle("Monthly Mean Temperature", fontsize=16)
plt.show()

## Temperature monthly trend

In [None]:
months = np.arange(1, 13)

fig, axs = plt.subplots(1, 3, figsize=(18, 6), sharey=True, constrained_layout=True)

for ax, (station, ds_station) in zip(axs, ds_profiles.groupby("station")):
    temp_C = ds_station["air_temperature"] - 273.15
    monthly_temp = temp_C.resample(time="1MS").mean("time").load()

    altitudes = monthly_temp["altitude"].values
    slope_matrix = np.full((len(altitudes), 12), np.nan)
    stderr_matrix = np.full((len(altitudes), 12), np.nan)

    time = monthly_temp["time"].values
    years = pd.to_datetime(time).year
    months_all = pd.to_datetime(time).month

    # Trend per altitudine e mese
    for a_idx, alt in enumerate(altitudes):
        series = monthly_temp.sel(altitude=alt)
        temps = series.values
        df = pd.DataFrame({"year": years, "month": months_all, "temp": temps}).dropna()

        for m_idx, m in enumerate(months):
            df_m = df[df["month"] == m]
            if len(df_m) >= 1:
                res = scipy.stats.linregress(df_m["year"], df_m["temp"])
                slope_matrix[a_idx, m_idx] = res.slope
                stderr_matrix[a_idx, m_idx] = res.stderr

    significant_mask = np.abs(slope_matrix) > stderr_matrix

    # Colormap
    mesh = ax.pcolormesh(
        months,
        altitudes,
        slope_matrix,
        shading="auto",
        cmap="coolwarm",
        vmin=-0.5,
        vmax=0.5,
    )
    fig.colorbar(mesh, ax=ax, label="Temperature trend (°C/year)")

    # Tratteggio
    for m_idx in range(12):
        for a_idx in range(len(altitudes)):
            if significant_mask[a_idx, m_idx]:
                rect = mpatches.Rectangle(
                    (months[m_idx] - 0.5, altitudes[a_idx]),
                    1,
                    altitudes[1] - altitudes[0],
                    linewidth=0,
                    edgecolor=None,
                    facecolor="none",
                    hatch="///",
                    zorder=3,
                )
                ax.add_patch(rect)

    ax.set_title(f"{station} – Monthly Trend")
    ax.set_xlabel("Months")
    ax.set_xticks(months)
    ax.set_xticklabels([f"{m:02d}" for m in months])

axs[0].set_ylabel("Altitude (m)")
plt.suptitle("Temperature Monthly Trend", fontsize=16)
plt.show()

## Integrated water vapour

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(10, 10), sharex=True, constrained_layout=True)
for ax, (station, ds_station) in zip(axs, ds_profiles.groupby("station")):
    da = ds_station["integrated_water_vapour"].resample(time="1MS").mean("time")
    time_num = (
        (da["time"].values - da["time"].values[0]) / np.timedelta64(1, "D")
    ) / 30
    # Seleziona il DataArray per la singola stazione
    y = da.values  # ora dovrebbe essere 1D
    mask = ~np.isnan(y)

    if mask.sum() > 2:
        slope, intercept, r_value, p_value, _ = scipy.stats.linregress(
            time_num[mask], y[mask]
        )
        print(f"{station}: Trend = {slope:.3f} kg/m²/year, p = {p_value:.3f}")

        ax.plot(da["time"], y, label="IWV")
        ax.plot(da["time"], intercept + slope * time_num, "r--", label="Trend")
        ax.set_title(f"IWV – {station}")
        ax.set_ylabel("IWV (kg/m²)")
        ax.grid(True)
        ax.legend()
_ = axs[-1].set_xlabel("Time")

## Specific humidity and integrate water vapour

In [None]:
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(16, 12), constrained_layout=True)
for i, (station, ds_station) in enumerate(ds_profiles.groupby("station")):
    wvmr = ds_station["specific_humidity"]
    monthly_mean = wvmr.resample(time="1MS").mean("time").dropna("altitude", how="any")
    da_iwv = ds_station["integrated_water_vapour"]
    # IWV per la stazione
    if "primary_station_id" in da_iwv.dims:
        da_iwv_station = da_iwv.sel(primary_station_id=station)
    else:
        da_iwv_station = da_iwv

    if "primary_station_id" in da_iwv_station.dims:
        da_iwv_station = da_iwv_station.isel(primary_station_id=0)

    ax1 = axs[i, 0]
    mesh = ax1.pcolormesh(
        monthly_mean["time"],
        monthly_mean["altitude"],
        monthly_mean.transpose("altitude", "time"),
        shading="auto",
        cmap="viridis",
    )
    fig.colorbar(mesh, ax=ax1, label="specific humidity (g/kg)")
    ax1.set_title(f"{station} – WVMR")
    ax1.set_xlabel("Time")
    ax1.set_ylabel("Altitude (m)")

    ax2 = axs[i, 1]
    ax2.plot(da_iwv_station["time"], da_iwv_station, marker="o", linestyle="-")
    ax2.set_title(f"{station} – IWV")
    ax2.set_xlabel("Time")
    ax2.set_ylabel("IWV (kg/m²)")
    ax2.grid(True)

fig.suptitle("Spec hum e IWV", fontsize=18)
plt.show()