Skip to content

Commit

Permalink
Merge 3f4259f into 320f299
Browse files Browse the repository at this point in the history
  • Loading branch information
TC-FF committed Feb 14, 2024
2 parents 320f299 + 3f4259f commit 97e2a99
Show file tree
Hide file tree
Showing 4 changed files with 319 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ New features and enhancements
* Added a new set of functions to support creating and updating `pooch` registries, caching testing datasets from `hydrologie/xhydro-testdata`, and ensuring that testing datasets can be loaded into temporary directories. (:pull:`62`).
* `xhydro` is now configured to use `pooch` to download and cache testing datasets from `hydrologie/xhydro-testdata`. (:pull:`62`).
* `xhydro` is now `Semantic Versioning v2.0.0 <https://semver.org/spec/v2.0.0.html>`_ compliant. (:pull:`70`).
* Added new functions to `xhydro.frequency_analysis.local` to calculate plotting positions and to prepare plots. (:pull:`87`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
96 changes: 92 additions & 4 deletions docs/notebooks/local_frequency_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@
" **{\n",
" \"datasets\":{\n",
" \"deh\":{\n",
" \"id\" :[\"020*\"],\n",
" \"regulated\":[\"Natural\"],\n",
" \"id\" :[\"0510*\"],\n",
" \"variables\":[\"streamflow\"],\n",
" }\n",
" }, \"time\":{\"start\": \"1970-01-01\", \n",
Expand Down Expand Up @@ -141,6 +140,13 @@
"ds_4fa"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -314,8 +320,90 @@
"source": [
"rp = xhfa.local.parametric_quantiles(params, t=[20, 100])\n",
"\n",
"rp"
"rp.load()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In a future release, plotting will be handled by a proper function. For now, we'll show an example in this Notebook using preliminary utilities.\n",
"\n",
"`xhfa.local._prepare_plots` generates datapoints required to plot the results of the frequency analysis. If `log=True`,will it return log-spaced x values between `xmin` and `xmax`.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = xhfa.local._prepare_plots(params, xmin=1, xmax = 1000, npoints=50, log=True)\n",
"data.load()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`xhfa.local._get_plotting_positions` allows you to get plotting positions for all variables in the dataset. It accepts `alpha` `beta` arguments. See the [SciPy documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mstats.plotting_positions.html) for typical values. By default, (0.4, 0.4) will be used, which corresponds to approximately quantile unbiased (Cunnane).\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pp = xhfa.local._get_plotting_positions(ds_4fa[['streamflow_max_spring']])\n",
"pp"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Lets plot the observations\n",
"p1 = data.streamflow_max_spring.hvplot(x='return_period', by='scipy_dist', grid=True, groupby=['id'], logx=True) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Lets now plot the distributions\n",
"p2 = pp.hvplot.scatter(x='streamflow_max_spring_pp',y='streamflow_max_spring', grid=True, groupby=['id'], logx=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#And now combining the plots\n",
"p1 * p2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -334,7 +422,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.7"
},
"vscode": {
"interpreter": {
Expand Down
116 changes: 116 additions & 0 deletions tests/test_local.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import numpy as np
import pytest
import xarray as xr
from xclim.testing.helpers import test_timeseries as timeseries

import xhydro.frequency_analysis as xhfa
from xhydro.frequency_analysis.local import _get_plotting_positions, _prepare_plots


class TestFit:
Expand Down Expand Up @@ -153,3 +155,117 @@ def test_criteria():
[118.12140939, 118.51930466, 118.56585383],
],
)


class TestGetPlottingPositions:
def test_default(self):
data = timeseries(
np.array([50, 65, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [1.16666667, 1.61538462, 2.625, 7.0]
result = _get_plotting_positions(data)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = _get_plotting_positions(data_2d)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)

def test_nan(self):
data = timeseries(
np.array([50, np.nan, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [1.23076923, np.nan, 2.0, 5.33333333]
result = _get_plotting_positions(data)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = _get_plotting_positions(data_2d)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)

def test_return_period(self):
data = timeseries(
np.array([50, 65, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [0.14285714, 0.38095238, 0.61904762, 0.85714286]
result = _get_plotting_positions(data, return_period=False)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = _get_plotting_positions(data_2d, return_period=False)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)

def test_alpha_beta(self):
data = timeseries(
np.array([50, 65, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [1.25, 1.66666667, 2.5, 5.0]
result = _get_plotting_positions(data, alpha=0, beta=0)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = _get_plotting_positions(data_2d, alpha=0, beta=0)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)


class Testprepare_plots:
ds = timeseries(
np.array([50, 65, 80, 95, 110, 125, 140, 155, 170, 185, 200]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
params = xhfa.local.fit(ds, distributions=["gamma", "pearson3"])

def test_prepare_plots_default(self):
result = _prepare_plots(self.params)
assert result.streamflow.shape == (2, 100)
assert result.return_period.min() == 1
assert result.return_period.max() == 10000
expected = [
[-30.78466504, 63.64266447, 78.19229358, 88.62699148, 97.15369501],
[-np.inf, 61.08708903, 79.72126025, 92.05411647, 101.59650405],
]
np.testing.assert_array_almost_equal(result.streamflow.head(), expected)

def test_prepare_plots_linear(self):
result = _prepare_plots(self.params, log=False)

expected = [
[-30.78466504, 262.72577254, 281.56238078, 292.27851004, 299.75980757],
[-np.inf, 235.6878466, 247.41104463, 253.8825982, 258.32114457],
]
np.testing.assert_array_almost_equal(result.streamflow.head(), expected)

def test_prepare_plots_range(self):
result = _prepare_plots(self.params, xmin=5, xmax=500)
assert result.return_period.min() == pytest.approx(5)
assert result.return_period.max() == pytest.approx(500)

def test_prepare_plots_npoints(self):
result = _prepare_plots(self.params, npoints=50).load()
assert result.streamflow.shape == (2, 50)
110 changes: 110 additions & 0 deletions xhydro/frequency_analysis/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import statsmodels
import xarray as xr
import xclim.indices.stats
from scipy.stats.mstats import plotting_positions
from statsmodels.tools import eval_measures

__all__ = [
Expand Down Expand Up @@ -265,3 +266,112 @@ def _get_criteria_1d(da, params, dist):
out.attrs = ds.attrs

return out


def _get_plotting_positions(data_array, alpha=0.4, beta=0.4, return_period=True):
"""Calculate plotting positions for data.
Parameters
----------
data_array : xarray DataArray
Input data.
alpha : float, optional
Plotting position parameter, by default 0.4.
beta : float, optional
Plotting position parameter, by default 0.4.
return_period : bool, optional
If True, return periods instead of probabilities, by default True.
Returns
-------
xarray DataArray
The data with plotting positions assigned.
Notes
-----
See scipy.stats.mstats.plotting_positions for typical values for alpha and beta. (0.4, 0.4) : approximately quantile unbiased (Cunnane).
"""
data_copy = data_array.copy(deep=True)

def vec_plotting_positions(vec_data, alpha=0.4, beta=0.4):
"""Calculate plotting positions for vectorized data.
Parameters
----------
vec_data : ndarray
Input data, with time dimension first.
alpha : float, optional
Plotting position parameter.
beta : float, optional
Plotting position parameter.
Returns
-------
ndarray
Array with plotting positions assigned to valid data points,
and NaNs assigned to invalid data points.
"""
out = []
if vec_data.ndim == 1:
valid = ~np.isnan(vec_data)
pp = plotting_positions(vec_data[valid], alpha, beta)

out = np.full_like(vec_data.astype(float), np.nan)
out[valid] = pp

else:
for data in vec_data:
valid = ~np.isnan(data)
pp = plotting_positions(data[valid], alpha, beta)

pp_full = np.full_like(data.astype(float), np.nan)
pp_full[valid] = pp

out.append(pp_full)
return out

pp = xr.apply_ufunc(
vec_plotting_positions,
data_array,
alpha,
beta,
input_core_dims=[["time"], [], []],
output_core_dims=[["time"]],
)

if return_period:
pp = -1 / (pp - 1)

for name in pp.data_vars:
pp = pp.rename({name: name + "_pp"})
pp[name] = data_copy[name]

return pp


def _prepare_plots(params, xmin=1, xmax=10000, npoints=100, log=True):
"""Prepare x-values for plotting frequency analysis results.
Parameters
----------
params : xarray DataArray
Input data.
xmin : float, optional
Minimum x value, by default 1.
xmax : float, optional
Maximum x value, by default 10000.
npoints : int, optional
Number of x values, by default 100.
log : bool, optional
If True, return log-spaced x values, by default True.
Returns
-------
xarray DataArray
The data with plotting positions assigned.
"""
if log:
x = np.logspace(np.log10(xmin), np.log10(xmax), npoints, endpoint=True)
else:
x = np.linspace(xmin, xmax, npoints)
return parametric_quantiles(params, x)

0 comments on commit 97e2a99

Please sign in to comment.