# Tropopause

## Import packages

In [None]:
import os

import cdsapi
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from c3s_eqc_automatic_quality_control import download

plt.style.use("seaborn-v0_8-notebook")

## Define Parameters

In [None]:
# Time period
start = "2006-05"
stop = "2020-03"

# Stations
stations = ["TEN", "LIN", "NYA"]  # Use None to analyse all stations
assert isinstance(stations, list | None)

# CDS credentials
os.environ["CDSAPI_RC"] = os.path.expanduser("~/ciardini_virginia/.cdsapirc")

## Define request

In [None]:
collection_id = "insitu-observations-gruan-reference-network"
request = {
    "variable": [
        "air_temperature",
        "relative_humidity",
        "air_pressure",
        "altitude",
        "water_vapour_mixing_ratio",
    ],
    "data_format": "netcdf",
}

client = cdsapi.Client()
requests = []
for date in pd.date_range(start, stop, freq="1MS"):
    time_request = {"year": date.strftime("%Y"), "month": date.strftime("%m")}
    time_request["day"] = client.client.apply_constraints(
        collection_id, request | time_request
    )["day"]
    if time_request["day"]:
        requests.append(request | time_request)

## Functions to cache

In [None]:
def _reorganize_dataset(ds):
    # Rename
    (varname,) = set(ds["observed_variable"].values)
    ds = ds.rename(observation_value=str(varname)).drop_vars("observed_variable")
    ds = ds.rename(
        {
            var: "_".join([varname, var.replace("_value", "")])
            for var in ds.data_vars
            if var.startswith("uncertainty")
        }
    )
    # Update attrs
    for var, da in ds.data_vars.items():
        match var:
            case "pressure":
                da.attrs["long_name"] = "Pressure"
            case "air_temperature":
                da.attrs["long_name"] = "Temperature"
            case "altitude":
                da.attrs["long_name"] = "Altitude"
            case "relative_humidity":
                da.attrs["long_name"] = "Relative"
            case "water_vapour_mixing_ratio":
                da.attrs["long_name"] = "Mixing"
        for string in ("units", "type"):
            if string in var:
                ds = ds.drop_vars(var)
                (value,) = set(da.values)
                attrs_var = varname if var == string else var.replace("_" + string, "")
                ds[attrs_var].attrs[string] = value
    return ds


def reorganize_dataset(ds, stations):
    for var, da in ds.data_vars.items():
        if np.issubdtype(da.dtype, np.bytes_):
            ds[var].values = np.char.decode(da.values, "utf-8")

    if stations is not None:
        ds = ds.where(ds["primary_station_id"].isin(stations), drop=True)

    if not ds.sizes["index"]:
        return ds

    datasets = []
    for var, ds in ds.groupby("observed_variable"):
        datasets.append(_reorganize_dataset(ds))
    ds = xr.merge(datasets)
    return ds

## Download and transform

In [None]:
ds = download.download_and_transform(
    collection_id,
    requests,
    chunks={"year": 1, "month": 1},
    transform_func=reorganize_dataset,
    transform_func_kwargs={"stations": sorted(stations) if stations else stations},
    cached_open_mfdataset_kwargs={"concat_dim": "index", "combine": "nested"},
)