### Dataset Setup

In [1]:
# Basic imports
import hvplot.xarray
import numpy as np
import xarray as xr
import xclim.indices.stats as stats
import xdatasets as xd
from scipy.stats import genextreme, genpareto, gumbel_r

import xhydro as xh
import xhydro.frequency_analysis as xhfa
from xhydro.extreme_value_analysis.parameterestimation import *

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [2]:
ds = (
    xd.Query(
        **{
            "datasets": {
                "deh": {
                    "id": ["020*"],
                    "regulated": ["Natural"],
                    "variables": ["streamflow"],
                }
            },
            "time": {"start": "1970-01-01", "minimum_duration": (15 * 365, "d")},
        }
    )
    .data.squeeze()
    .load()
)

# This dataset lacks some of the aforementioned attributes, so we need to add them.
ds["id"].attrs["cf_role"] = "timeseries_id"
ds["streamflow"].attrs = {
    "long_name": "Streamflow",
    "units": "m3 s-1",
    "standard_name": "water_volume_transport_in_river_channel",
    "cell_methods": "time: mean",
}

# Some examples
timeargs = {
    "spring": {"date_bounds": ["02-11", "06-19"]},
    "summer": {"doy_bounds": [152, 243]},
    "fall": {"month": [9, 10, 11]},
    "winter": {
        "season": ["DJF"],
        "freq": "YS-DEC",
    },  # To correctly wrap around the year, we need to specify the resampling frequency.
    "august": {"month": [8]},
    "annual": {},
}
# Here, we hide years with more than 15% of missing data.
ds_4fa = xh.indicators.get_yearly_op(
    ds, op="max", timeargs=timeargs, missing="pct", missing_options={"tolerance": 0.15}
)

# Create a mask beforehand
import random

nyears = np.unique(ds.time.dt.year).size
dom_start = xr.DataArray(
    np.random.randint(1, 30, size=(nyears,)).astype("str"),
    dims=("year"),
    coords={"year": np.unique(ds.time.dt.year)},
)
dom_end = xr.DataArray(
    np.random.randint(1, 30, size=(nyears,)).astype("str"),
    dims=("year"),
    coords={"year": np.unique(ds.time.dt.year)},
)

mask = xr.zeros_like(ds["streamflow"])
for y in dom_start.year.values:
    # Random mask of dates per year, between April and June.
    mask.loc[
        {
            "time": slice(
                str(y) + "-04-" + str(dom_start.sel(year=y).item()),
                str(y) + "-06-" + str(dom_end.sel(year=y).item()),
            )
        }
    ] = 1
# The name of the indexer will be used to identify the variable created here
timeargs_custom = {"custom": {}}

# We use where() to mask the data that we want to ignore
masked = ds.where(mask == 1)
# Since we masked almost all of the year, our tolerance for missing data should be changed accordingly
missing = "at_least_n"
missing_options = {"n": 45}

# We use xr.merge() to combine the results with the previous dataset.
ds_4fa = xr.merge(
    [
        ds_4fa,
        xh.indicators.get_yearly_op(
            masked,
            op="max",
            timeargs=timeargs_custom,
            missing=missing,
            missing_options=missing_options,
        ),
    ]
)
# Get a daily volume from a daily streamflow
ds["volume"] = xh.indicators.compute_volume(ds["streamflow"], out_units="hm3")

# We'll take slightly different indexers
timeargs_vol = {"spring": {"date_bounds": ["04-30", "06-15"]}, "annual": {}}

# The operation that we want here is the sum, not the max.
ds_4fa = xr.merge(
    [
        ds_4fa,
        xh.indicators.get_yearly_op(
            ds,
            op="sum",
            input_var="volume",
            timeargs=timeargs_vol,
            missing="pct",
            missing_options={"tolerance": 0.15},
            interpolate_na=True,
        ),
    ]
)

# for id in range(len(ds_4fa.coords["id"])):
#     print(ds_4fa.streamflow_max_spring[id,:].values)

print(ds_4fa.streamflow_max_summer[0, :].values)

ds_4fa

[   nan    nan    nan    nan    nan    nan    nan    nan    nan    nan
    nan    nan    nan    nan    nan    nan    nan    nan    nan    nan
  31.4   38.8   63.78  54.82 123.2  108.2  108.1   98.52  42.24  33.45
  60.    84.23  48.07  77.71  38.37  42.02  46.      nan    nan    nan
    nan    nan    nan    nan    nan    nan    nan    nan    nan    nan
    nan    nan    nan    nan    nan    nan]


# extreme_value_analysis.parameterestimation.fit()

In [3]:
params = fit(ds_4fa, dist=genextreme, method="ml")
params

# xclim.stats.fit()

In [4]:
from lmoments3.distr import gev, gpa, gum

params = stats.fit(ds_4fa, dist=gev, method="ml")
params