In [62]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [215]:
import numpy as np
import pandas as pd
import xarray as xr
import xdatasets as xd
from sklearn.cluster import HDBSCAN, OPTICS, AgglomerativeClustering
from sklearn.decomposition import PCA

import xhydro as xh
import xhydro.frequency_analysis as xhfa
import xhydro.gis as xhgis

ModuleNotFoundError: No module named 'xhfa'

In [3]:
ds = (
    xd.Query(
        **{
            "datasets": {
                "deh": {
                    "id": ["020*"],
                    "regulated": ["Natural"],
                    "variables": ["streamflow"],
                }
            },
            "time": {"start": "1970-01-01", "minimum_duration": (15 * 365, "d")},
        }
    )
    .data.squeeze()
    .load()
)

# This dataset lacks some of the aforementioned attributes, so we need to add them.
ds["id"].attrs["cf_role"] = "timeseries_id"
ds["streamflow"].attrs = {
    "long_name": "Streamflow",
    "units": "m3 s-1",
    "standard_name": "water_volume_transport_in_river_channel",
    "cell_methods": "time: mean",
}

ds

In [10]:
# Some examples
timeargs = {
    "spring": {"date_bounds": ["02-11", "06-19"]},
    "annual": {},
}

In [11]:
# Here, we hide years with more than 15% of missing data.
ds_4fa = xh.indicators.get_yearly_op(
    ds, op="max", timeargs=timeargs, missing="pct", missing_options={"tolerance": 0.15}
)

ds_4fa

In [12]:
# Get a daily volume from a daily streamflow
ds["volume"] = xh.indicators.compute_volume(ds["streamflow"], out_units="hm3")

# We'll take slightly different indexers
timeargs_vol = {"spring": {"date_bounds": ["04-30", "06-15"]}, "annual": {}}

# The operation that we want here is the sum, not the max.
ds_4fa = xr.merge(
    [
        ds_4fa,
        xh.indicators.get_yearly_op(
            ds,
            op="sum",
            input_var="volume",
            timeargs=timeargs_vol,
            missing="pct",
            missing_options={"tolerance": 0.15},
            interpolate_na=True,
        ),
    ]
)
ds_4fa

In [13]:
# Extraction des SHP pour les staton hydrométriques provinciales
gdf = xd.Query(
    **{
        "datasets": {
            "deh_polygons": {
                "id": ["020*"],
                "regulated": ["Natural"],
                "variables": ["streamflow"],
            }
        },
        "time": {"start": "1970-01-01", "minimum_duration": (15 * 365, "d")},
    }
).data.reset_index()
gdf

Unnamed: 0,Station,Superficie,geometry
0,20302,1071.505249,"POLYGON ((-65.54653 48.91282, -65.54639 48.912..."
1,20404,664.096924,"POLYGON ((-65.1477 49.05904, -65.14748 49.0589..."
2,20502,57.292057,"POLYGON ((-64.45703 48.9948, -64.45692 48.9946..."
3,20602,626.996155,"POLYGON ((-64.97292 49.17614, -64.97283 49.176..."
4,20802,1184.306641,"POLYGON ((-65.26495 49.21157, -65.26473 49.211..."


In [39]:
dswp = xhgis.watershed_properties(
    gdf[["Station", "geometry"]], unique_id="Station", output_format="xarray"
)
cent = dswp["centroid"].to_numpy()
lon = [ele[0] for ele in cent]
lat = [ele[1] for ele in cent]
dswp = dswp.assign(lon=("Station", lon))
dswp = dswp.assign(lat=("Station", lat))
dswp = dswp.drop("centroid")
dswp

In [163]:
data, pca = xhfa.regional.fit_pca(dswp, n_components=3)
data

In [168]:
data.to_dataframe(name="value").reset_index().pivot(
    index="Station", columns="components"
).corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,value,value,value
Unnamed: 0_level_1,components,0,1,2
Unnamed: 0_level_2,components,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
value,0,1.0,8.896369000000001e-17,5.999128e-16
value,1,8.896369000000001e-17,1.0,8.403164e-16
value,2,5.999128e-16,8.403164e-16,1.0


In [170]:
pca.n_components_

3

In [None]:
xhfa.regional.get_group_from_fit(HDBSCAN, {"min_cluster_size": 2}, data)

In [None]:
xhfa.regional.get_group_from_fit(OPTICS, {"min_samples": 2}, data)

In [233]:
groupes = xhfa.regional.get_group_from_fit(
    AgglomerativeClustering, {"n_clusters": 3}, data
)
groupes

[array(['020404', '020602', '020802'], dtype=object),
 array(['020502'], dtype=object),
 array(['020302'], dtype=object)]

In [231]:
from xhydro.frequency_analysis.regional import moment_l_vector

In [232]:
ds_moment = xr.apply_ufunc(
    moment_l_vector, ds_4fa, input_core_dims=[["time"]], output_core_dims=[["lmom"]]
).assign_coords(lmom=["l1", "l2", "l3", "tau", "tau3", "tau4"])
ds_moment

In [235]:
ds_groups = xr.concat(
    [
        ds_4fa.sel(id=groupes[i]).assign_coords(group_id=i).expand_dims("group_id")
        for i in range(len(groupes))
    ],
    dim="group_id",
)
ds_moments_groups = xr.concat(
    [
        ds_moment.sel(id=groupes[i]).assign_coords(group_id=i).expand_dims("group_id")
        for i in range(len(groupes))
    ],
    dim="group_id",
)

In [236]:
ds_groups

In [237]:
ds_moments_groups