In [1]:
import sys

import numpy as np
import pandas as pd
import xarray as xr
from lmoments3.distr import gev
from scipy.stats import genextreme
from xclim.indices.stats import fit, parametric_quantile

sys.path.append("C:\\Users\\ospin\\OneDrive\\Documents\\Github\\xhydro\\src\\xhydro")
import pmp

In [2]:
def precipitable_water_100y(da_pw, dist, method, n, mf=0.2):
    """Compute the 100-year return period of precipitable water for each month of the year.

    Parameters
    ----------
    da_pw : xr.DataArray
        Dataset containing the precipitable water.
    dist : lmoments3 distribution object
        Probability distributions.
    method : {"ML" or "MLE", "MM", "PWM", "APP"}
        Fitting method, either maximum likelihood (ML or MLE), method of moments (MM) or approximate method (APP).
        Can also be the probability weighted moments (PWM), also called L-Moments, if a compatible `dist` object is passed.
        The PWM method is usually more robust to outliers.
    n : int
        Minimum number of data points to fit the distribution. When n is larger than the number of data points,
        the maximum value of the precipitable is set.
    mf : float
        The annual maximums of the precipitable water plus a porcentage (mf) are used as a upper limit.

    Returns
    -------
    xr.DataArray
        Precipitable water for a 100-year return period.
        It has the same dimensions as da_pw.
    """
    # Compute max monthly and add a «year» dimension.
    da_pw_m = da_pw.resample({"time": "ME"}).max()
    year = da_pw_m.time.dt.year
    month = da_pw_m.time.dt.month
    da_pw_m = da_pw_m.assign_coords(
        year=("time", year.data), month=("time", month.data)
    )
    da_pw_m = da_pw_m.set_index(time=("year", "month")).unstack("time")
    da_pw_m = da_pw_m.rename({"year": "time"}).squeeze()

    # ----------------------------------------------------------------------------------------------
    count = da_pw_m.count(dim="time")
    cond = count > n
    gev_values = genextreme.rvs(
        c=0.71233406, loc=9.56850054, scale=1.39081282, size=da_pw_m.shape[2]
    )
    shape = da_pw_m.shape
    num_elements = np.prod(shape)
    padded_array = np.resize(gev_values, num_elements)
    array_4d = padded_array.reshape((shape[0], shape[1], shape[3], shape[2]))
    array_4d = np.transpose(array_4d, axes=(0, 1, 3, 2))

    new_xarray = xr.DataArray(array_4d, dims=da_pw_m.dims, coords=da_pw_m.coords)

    da_pw_m2 = da_pw_m.where(cond, other=new_xarray)

    # ----------------------------------------------------------------------------------------------------------------------------------

    # Fits distribution
    params = fit(da_pw_m2, dist=dist, method=method)
    pw100_m2 = parametric_quantile(params, q=1 - 1 / 100).squeeze().rename("pw100")

    pw100_m = pw100_m2.where(
        cond, other=da_pw_m.max(dim="time")
    )  # ------------------------------------------------------------------------------------------

    pw_mm = da_pw_m.rename("precipitable_water_monthly")
    pw_mm = pw_mm.rename({"time": "year"})

    # Add a limit to PW100 to limit maximization factors.
    pw_mm_mf = (pw_mm.max(dim="year") * (1.0 + mf)).squeeze()

    pw100_m = pw100_m.where(pw100_m < pw_mm_mf, other=pw_mm_mf)
    pw100_m = pw100_m.expand_dims(dim={"year": np.unique(year.values)})
    pw100_m = pw100_m.stack(stacked_coords=("month", "year"))
    date_index = pd.DatetimeIndex(
        pd.to_datetime(
            pd.DataFrame(
                {
                    "year": pw100_m.year.data,
                    "month": pw100_m.month.data,
                    "day": (xr.ones_like(pw100_m.month).data),
                    "hour": (xr.ones_like(pw100_m.month).data) * 12,
                }
            )
        )
    )
    pw100_m = pw100_m.assign_coords(time=("stacked_coords", date_index))

    pw100_m = pw100_m.swap_dims({"stacked_coords": "time"}).sortby("time")
    pw100_m = pw100_m.convert_calendar("noleap")
    pw100_m = pw100_m.rename("pw100").to_dataset()

    if "x" and "y" in pw100_m.coords:
        pw100_m = pw100_m.transpose("time", "y", "x")
        da_pw = da_pw.drop_vars(["x", "y"])

    # ----------------------------------------------------------------------------------------------------------------------------------
    nan_months = (
        pw100_m.groupby(pw100_m.month)
        .apply(lambda x: x.month.where(np.any(np.isnan(x))))
        .values
    )
    mask = da_pw.time.dt.month.isin(nan_months)
    pw100_m_ri = pw100_m.pw100.reindex(time=da_pw.time).ffill(dim="time")
    da_pw100 = pw100_m_ri.where(~mask, np.nan)
    # ----------------------------------------------------------------------------------------------------------------------------------

    # da_pw100 = xr.merge([pw100_m, da_pw]).ffill(dim="time") #-----------------------------------------------------

    da_pw100 = da_pw100.squeeze().drop_vars(["month", "year", "stacked_coords"])

    return da_pw100

In [3]:
ds = xr.open_zarr(
    "c:\\Users\\ospin\\OneDrive\\Documents\\CMP\\Data\\CMIP.CCCma.CanESM5.historical.r1i1p1f1.day.gn.zarr"
)
ds_fx = xr.open_zarr(
    "c:\\Users\\ospin\\OneDrive\\Documents\\CMP\\Data\\CMIP.CCCma.CanESM5.historical.r1i1p1f1.fx.gn.zarr"
)

In [4]:
windows = [1, 2, 3, 4]
snw_threshold = 0.01
rf_threshold = 0.01
dist = gev
winter_months = [10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [5]:
ds_wint = ds.sel(time=ds.time.dt.month.isin(winter_months))

prsn = ds_wint.prsn
rf = ds_wint.rf
pr = ds_wint.pr

### The maximum precipitable water (wmax)

The paper use the maximum value of w for each CRCM tile during an event and the three 6-h time steps preceding the event.

In [6]:
wmax = pmp.precipitable_water(ds_wint, ds_fx, windows=windows)

TypeError: precipitable_water() missing 1 required positional argument: 'orog'

### PW snow events

To ensure that wmax consists of values that lead to snowfall only, the paper consider exclusively values of w from time steps that show at least 0.25 mm/6 h of solid precipitation (given in water equivalent of the snowfall (a.k.a., snow water equivalent, SWE)) and less than 0.1 mm/6 h of rain.

The paper recommends to use the methodology M3 to define the events associated to snowfall events. For this, the methodology M2 is first computed by considering only time steps that show at least the minimum amount snowfall.

In [None]:
pe = pmp.major_precipitation_events(
    prsn, windows=windows, quantile=0.0
)  # quantile = 0 to take all the events.

snw_thresholds = snw_threshold * np.array(windows)
rf_thresholds = rf_threshold * np.array(windows)

list_where = []
for window, snw_threshold in zip(windows, snw_thresholds):
    pe_win = pe.sel(windows=window)
    list_where.append(
        pe_win.where((pe_win >= snw_threshold) & (rf.values <= rf_threshold))
    )

pe_snow = xr.concat(list_where, dim="windows")

pw_snow_events_M2 = wmax.where(pe_snow > 0)

Then, M2 events are multiplied by the ratio of snowfall over total precipitation when more than the minimum amount of rain occurs. The multiplication provides a means to estimate the amount of precipitable water that leads to snowfall.

In [None]:
pe_pr = pmp.major_precipitation_events(
    pr, windows=windows, quantile=0.0
)  # quantile = 0 to take all the events.

ratio_snw_pr = pe / pe_pr
ratio_snw_pr = ratio_snw_pr.where(pe_snow > 0)  # ratio to compute the the M3 method

pw_snow_events_M3 = pw_snow_events_M2.where(
    ((prsn >= snw_threshold) & (rf.values >= rf_threshold)),
    pw_snow_events_M2 * ratio_snw_pr,
)

### Pw100

To compute the value of the 100-year return period the wmax is filtered by the snow and rainfall thresholds.

A value of mf=1000 is used to do not imposed a limit to pw100 as in the paper they assumed that the CRCM is physically sound and its output holistically coherent. Thus, they decided to work with the data without restricting anything.

The function  precipitable_water_100y was modified to set the maximum value of wmax as w100 when there is less than n data to compute the distribution. Paper uses n = 20.

In [None]:
pw100 = precipitable_water_100y(
    pw_snow_events_M3.sel(windows=1).chunk(dict(time=-1)),
    dist=gev,
    method="PWM",
    mf=1000,
    n=3,
)  # mf=1000 to do not imposed a limit
pw100 = pw100.sel(time=pw100.time.dt.month.isin(winter_months))

### Maximization ratio (r) and Maximized snowfall (snowmax)

In [None]:
r = pw100 / pw_snow_events_M3
snowmax = r * pe_snow

In [None]:
# Get the snow events that were not maximized
list_where2 = []
for window, snw_threshold in zip(windows, snw_thresholds):
    pe_win = pe.sel(windows=window)
    list_where2.append(pe_win.where(pe_win < snw_threshold))

pe_snow2 = xr.concat(list_where2, dim="windows")

snow_NO_max = pe_snow2

In [None]:
snow_sum = xr.concat([snowmax, snow_NO_max], dim="variable").sum("variable")

In [None]:
pmsa_annual = snow_sum.resample(time="YS-JUL").sum(dim="time")
pmsa = pmsa_annual.max(dim="time")

array([[[ 487.49921143,  898.87266135, 1303.81225332, 1615.23844062],
        [ 390.69169999,  821.18292136, 1279.44780067, 1649.42235957]],

       [[ 487.49230991, 1090.20182714, 1592.82797013, 2038.18447456],
        [ 365.83691094,  743.78658646, 1122.46213099, 1498.32315001]]])

## Questions to solve

- Do we want to study the non-stationarity in the wmax? (i) a linear trend and (ii) an abrupt shift (or change-point). T0, T1, T2, T3
    -  Since there is a physical limit to this absolute humidity, a breakpoint analysis should not be neglected in future studies.
    -  Yet we decided to not account for this behavior in the methodology.
    -  We still chose to use type T1 (model with linear time dependence in the expected value or, for the Gumbel and GEV distributions, in the location parameter)

- Do we want to consider a limit for w100 to be consistent with the PMP_summer?

- Get data to test.

    -All simulations were generated using CRCM version 4.2.3
    (Caya and Laprise, 1999; de Elía and Côté, 2010; Paquin, 2010) over
    a domain centered on the Province of Quebec (111 x 87 grid
    points) with a horizontal grid-size mesh of 45 km (true at 60N).
    The variables of interest are available from January 1961 to December 2100 for all three
simulations with a 6-h temporal resolution.
The afx and agr simulations were driven by outputs of the third
generation Canadian Global Coupled Climate Model (CGCM3/T47
at approximately 3.57  3.75 degree latitude–longitude; 4th and
5th members, respectively; Flato and Boer, 2001; Flato et al.,
2000; Scinocca et al., 2008). The aha simulation was driven by simulation outputs of the German Coupled Global Climate Model
(ECHAM5 at approximately 1.87  1.87 degree latitude–longitude;
1st member; Jungclaus et al., 2006).

## Note

The function precipitable_water_100y use a dummy distribution to fill the values where n < 3. This is not gonna be necessary when we work with more data or when we use Kamil's wrapper.