# Reference Upper-Air Network for trend analysis

## Import libraries

In [None]:
import earthkit.data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

plt.style.use("seaborn-v0_8-notebook")

## Set parameters

In [None]:
# TODO: Temporary workaround as the CDS download form is disabled
filename = "GRUAN_20160201_20160229_subset_cdm-lev_nya.csv"

## Open data and add attributes

In [None]:
ds = earthkit.data.from_source(
    "file", filename, pandas_read_csv_kwargs={"header": 17}
).to_xarray()
ds["time"] = ("index", pd.to_datetime(ds["report_timestamp"]).values)
ds = ds.where(ds["station_name"] == "NYA", drop=True)
variable_attrs = {
    "air_pressure": {"long_name": "Pressure", "units": "Pa"},
    "air_temperature": {"long_name": "Temperature", "units": "K"},
    "altitude": {"long_name": "Altitude", "units": "m"},
    "relative_humidity": {"long_name": "Relative Humidity", "units": "%"},
}
for variable, attrs in variable_attrs.items():
    ds[variable].attrs.update(attrs)

## Define transform functions

In [None]:
def compute_specific_humidity(ds):
    pressure_hpa = ds["air_pressure"] * 0.01
    temperature_celsius = ds["air_temperature"] - 273.15
    sat_vap_p = 6.112 * np.exp(
        (17.67 * temperature_celsius) / (temperature_celsius + 243.5)
    )
    da = 622 * ds["relative_humidity"] * sat_vap_p / (100 * pressure_hpa)
    da.attrs = {"long_name": "Specific Humidity", "units": "g/kg"}
    return da


def compute_saturation_vapor_pressure(ds):
    temperature = ds["air_temperature"] - 273.15
    return 6.112 * np.exp((17.67 * temperature) / (temperature + 243.5))


def compute_integrated_water_vapour(ds):
    e_s = compute_saturation_vapor_pressure(ds)
    e = e_s * (ds["relative_humidity"]) / 100
    rho_v = (e * 18.015) / (10 * 8.3145 * ds["air_temperature"])
    iwv_value = rho_v * ds["altitude"].diff("altitude")
    da = iwv_value.sum("altitude")
    da.attrs = {"long_name": "Integrated Water Vapour", "units": "kg/m²"}
    return da

## Transform data

In [None]:
# Add specific humidity
ds["specific_humidity"] = compute_specific_humidity(ds)

# Compute profiles
subset = ["air_temperature", "relative_humidity", "specific_humidity", "time"]
profiles = []
for time, profile in ds.groupby("time"):
    profile = profile.swap_dims(index="altitude")[subset]
    profile = profile.sortby("altitude").dropna("altitude", how="any", subset=subset)
    if (profile["altitude"].diff("altitude") > 2_000).any():
        continue

    profile = profile.interp(altitude=range(50, 30001, 50))
    profiles.append(profile.expand_dims(time=[time]))
ds_profiles = xr.concat(profiles, "time")

# Compute integrated water vapour
da_iwv = compute_integrated_water_vapour(ds_profiles)

## Plot profiles

In [None]:
plot_kwargs = {"y": "altitude", "ylim": [ds["altitude"].min(), ds["altitude"].max()]}
for var, da in ds_profiles.data_vars.items():
    da.plot(hue="time", add_legend=False, **plot_kwargs)
    mean = da.mean("time", keep_attrs=True)
    std = da.std("time", keep_attrs=True)
    for sign in (-1, +1):
        (mean + std * sign).plot(
            color="k",
            linestyle="--",
            label="mean ± std" if sign > 0 else None,
            **plot_kwargs,
        )
    mean.plot(color="k", linestyle="-", label="mean", **plot_kwargs)
    plt.legend()
    plt.grid()
    plt.show()

## Plot timeseries

In [None]:
da_iwv.plot(marker="o")
plt.grid()