# Insitu precipitation completeness for climate monitoring

## Import packages

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymannkendall as mk
import scipy.stats
import xarray as xr
from c3s_eqc_automatic_quality_control import diagnostics, download, plot

plt.style.use("seaborn-v0_8-notebook")

## Define parameters

In [None]:
# Time periods
years_start = [1951, 1961, 1971, 1981, 1991]
years_stop = [1980, 1990, 2000, 2010, 2020]
colors = ["deepskyblue", "green", "gold", "darkorange", "red"]
assert len(years_start) == len(years_stop) == len(colors)

# Region of interst
area = [44, -10, 36, 1]  # N, W, S, E
assert len(area) == 4

## Define request

In [None]:
collection_id = "insitu-gridded-observations-europe"
request = {
    "variable": ["precipitation_amount"],
    "grid_resolution": "0_25deg",
    "period": "full_period",
    "version": ["28_0e"],
    "area": area,
}

collection_id_era5 = "reanalysis-era5-single-levels"
request_era5 = {
    "product_type": ["ensemble_mean"],
    "variable": ["total_precipitation"],
    "time": [f"{hour:02d}:00" for hour in range(0, 24, 3)],
    "area": area,
}
start = f"{min(years_start)}-01"
stop = f"{max(years_stop)}-12"
requests_era5 = download.update_request_date(request_era5, start, stop)

## Define function to cache

In [None]:
def dayofyear_reindex(ds, years_start, years_stop):
    # 15-day rolling mean
    ds_rolled = ds.rolling(time=15, center=True).mean()

    # Extract periods
    datasets = []
    for year_start, year_stop in zip(years_start, years_stop):
        period = f"{year_start}-{year_stop}"
        ds_masked = ds_rolled.where(
            (ds_rolled["time"].dt.year >= year_start)
            & (ds_rolled["time"].dt.year <= year_stop),
            drop=True,
        )
        datasets.append(
            ds_masked.groupby("time.dayofyear").mean().expand_dims(period=[period])
        )
    ds_dayofyear = xr.merge(datasets)

    # Add season (pick any leap year)
    season = xr.DataArray(
        pd.to_datetime(ds_dayofyear["dayofyear"].values - 1, unit="D", origin="2008"),
    ).dt.season
    return ds_dayofyear.assign_coords(season=("dayofyear", season.values))


def accumulated_spatial_weighted_mean(ds):
    ds = ds.resample(time="1D").sum(keep_attrs=True)
    return diagnostics.spatial_weighted_mean(ds)

## Download and compute

In [None]:
dataarrays = []
for reduction in ("mean", "spread"):
    print(f"{reduction=}")
    da = download.download_and_transform(
        collection_id,
        request | {"product_type": f"ensemble_{reduction}"},
        transform_func=dayofyear_reindex,
        transform_func_kwargs={"years_start": years_start, "years_stop": years_stop},
    )["rr"]
    dataarrays.append(da.rename(reduction))
    da.attrs["long_name"] += f" {reduction}"
ds_periods = xr.merge(dataarrays)

# Timeseries
da_eobs = download.download_and_transform(
    collection_id,
    request | {"product_type": "ensemble_mean"},
    transform_func=diagnostics.spatial_weighted_mean,
)["rr"]
da_eobs = da_eobs.sel(time=slice(start, stop))

da_era5 = download.download_and_transform(
    collection_id_era5,
    requests_era5,
    transform_func=accumulated_spatial_weighted_mean,
    backend_kwargs={"time_dims": ["valid_time"]},
    chunks={"year": 1},
)["tp"]
da_timeseries = xr.concat(
    [
        da_eobs.expand_dims(product=["E-OBS"]),
        (da_era5 * 1.0e3).expand_dims(product=["ERA5"]),
    ],
    "product",
)

## Define useful functions

In [None]:
def make_statistics_dataframe(da):
    dims = set(da.dims) - {"period"}
    return pd.DataFrame.from_dict(
        {
            "period": da["period"],
            "number": da.notnull().sum(dims),
            "mean": da.mean(dims),
            "maximum": da.max(dims),
            "minimum": da.min(dims),
            "st.deviation": da.std(dims),
        }
    )


def compute_hist(da, **kwargs):
    hist, bin_edges = np.histogram(da, **kwargs)
    da_hist = xr.DataArray(hist, coords={"bins": (bin_edges[1:] + bin_edges[:-1]) / 2})
    da_hist["bins"].attrs = da.attrs
    da_hist.attrs["long_name"] = "Probability Density"
    return da_hist


def plot_pdf(da, colors, bins=None, **kwargs):
    if bins is None:
        bins = np.linspace(da.min().values, da.max().values, 50)

    dims = []
    for key in {"hue", "row", "col"} & set(kwargs):
        dims.append(kwargs[key])
    da = da.groupby(dims).map(compute_hist, bins=bins, density=True)
    with plt.rc_context(
        {
            "axes.prop_cycle": plt.cycler(color=colors),
            "axes.grid": True,
        }
    ):
        return da.plot(**kwargs)

## Show Statistics

In [None]:
make_statistics_dataframe(ds_periods["mean"])

In [None]:
make_statistics_dataframe(ds_periods["spread"])

## Compare timeseries

In [None]:
da_timeseries.plot(hue="product")
plt.title("Comparison")
plt.grid()

In [None]:
da_timeseries_yearly = da_timeseries.sel(product="E-OBS").groupby("time.year").mean()
zooms = {
    "maximum": int(da_timeseries_yearly.idxmax("year").squeeze()),
    "minimum": int(da_timeseries_yearly.idxmin("year").squeeze()),
}
for label, year in zooms.items():
    da = da_timeseries.sel(time=str(year))
    da.plot(hue="product")
    plt.grid()
    plt.title(f"The year with the {label} is {year}")
    plt.show()

## Plot each period

In [None]:
maps_kwargs = {"col": "period", "cmap": "jet", "robust": True}
pdf_kwargs = {"colors": colors, "hue": "period", "figsize": [15, 5]}
for da in ds_periods.data_vars.values():
    plot.projected_map(da.mean("dayofyear", keep_attrs=True), **maps_kwargs)
    plt.show()

    plot_pdf(da, **pdf_kwargs)
    plt.show()

## Plot maps for each season and period

In [None]:
for da in ds_periods.data_vars.values():
    plot.projected_map(
        da.groupby("season").mean(keep_attrs=True),
        row="season",
        **maps_kwargs,
    )
    plt.show()

    plot_pdf(da, col="season", **pdf_kwargs)
    plt.show()

## Plot bars

In [None]:
df = (da_timeseries > 10).groupby("time.year").sum().to_pandas()
axes = df.T.plot.bar(figsize=[15, 5], subplots=True)
for ax, (product, df_product) in zip(axes, df.groupby("product")):
    years = df_product.columns
    x_values = years - years[0]
    mk_result = mk.original_test(df_product.squeeze().values)
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        x_values,
        df_product.squeeze(),
    )
    ax.plot(x_values, intercept + slope * (x_values), "k--", label="Sen's Slope")
    text = "\n".join(
        [
            f"Slope: {slope:.4f} days/year",
            f"P-value: {p_value:.4f}",
            f"Tau: {mk_result.Tau:.4f}",
            f"Intercept: {intercept:.4f} days",
        ]
    )
    ax.text(1.01, 1, text, transform=ax.transAxes, fontsize=10, ha="left", va="top")
    ax.legend()