Skip to content

Commit

Permalink
Merge b64ebb1 into 6d214a7
Browse files Browse the repository at this point in the history
  • Loading branch information
TC-FF committed Feb 7, 2024
2 parents 6d214a7 + b64ebb1 commit 0cdbbe5
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 4 deletions.
82 changes: 78 additions & 4 deletions docs/notebooks/local_frequency_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@
" **{\n",
" \"datasets\":{\n",
" \"deh\":{\n",
" \"id\" :[\"020*\"],\n",
" \"regulated\":[\"Natural\"],\n",
" \"id\" :[\"051001\"],\n",
" \"variables\":[\"streamflow\"],\n",
" }\n",
" }, \"time\":{\"start\": \"1970-01-01\", \n",
Expand Down Expand Up @@ -141,6 +140,13 @@
"ds_4fa"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -314,7 +320,75 @@
"source": [
"rp = xhfa.local.parametric_quantiles(params, t=[20, 100])\n",
"\n",
"rp"
"rp.load()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`xhfa.local.prepare_plots` generates points for later plotting xmin and xmax reprensent maximum return_period but outputs will be in frequency\n",
"If log=True, returns log-spaced x values between xmin and xmax.\n",
"Otherwise returns linearly spaced values between xmin and xmax.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = xhfa.local.prepare_plots(params)\n",
"data['return_period'] = -1 / ( data['return_period'] - 1)\n",
"data.load()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`xhfa.local.get_plotting_positions` allows you to get ploting positions for all variables in the dataset.\n",
"It accpets alpha and beta in arguments, see scipy.stats.mstats.plotting_positions for typical values for alpha and beta.\n",
"<br>By default (.4,.4) : approximately quantile unbiased (Cunnane)\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pp = xhfa.local.get_plotting_positions(ds_4fa[['streamflow_max_spring']])\n",
"pp"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"p1 = data.streamflow_max_spring.hvplot(x='return_period', by='scipy_dist', grid=True, groupby=['id'], logx=True) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"p2 = pp.hvplot.scatter(x='streamflow_max_spring_pp',y='streamflow_max_spring', grid=True, groupby=['id'], logx=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"p1 * p2"
]
}
],
Expand All @@ -334,7 +408,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.7"
},
"vscode": {
"interpreter": {
Expand Down
76 changes: 76 additions & 0 deletions tests/test_local.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import numpy as np
import pytest
import xarray as xr
from xclim.testing.helpers import test_timeseries as timeseries

import xhydro.frequency_analysis as xhfa
from xhydro.frequency_analysis.local import get_plotting_positions


class TestFit:
Expand Down Expand Up @@ -153,3 +155,77 @@ def test_criteria():
[118.12140939, 118.51930466, 118.56585383],
],
)


class TestGetPlottingPositions:
def test_default(self):
data = timeseries(
np.array([50, 65, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [1.16666667, 1.61538462, 2.625, 7.0]
result = get_plotting_positions(data)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = get_plotting_positions(data_2d)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)

def test_nan(self):
data = timeseries(
np.array([50, np.nan, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [1.23076923, np.nan, 2.0, 5.33333333]
result = get_plotting_positions(data)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = get_plotting_positions(data_2d)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)

def test_return_period(self):
data = timeseries(
np.array([50, 65, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [0.14285714, 0.38095238, 0.61904762, 0.85714286]
result = get_plotting_positions(data, return_period=False)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = get_plotting_positions(data_2d, return_period=False)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)

def test_alpha_beta(self):
data = timeseries(
np.array([50, 65, 80, 95]),
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)
expected = [1.25, 1.66666667, 2.5, 5.0]
result = get_plotting_positions(data, alpha=0, beta=0)
np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)

data_2d = xr.concat([data, data], dim="id")
result = get_plotting_positions(data_2d, alpha=0, beta=0)
np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)
107 changes: 107 additions & 0 deletions xhydro/frequency_analysis/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import statsmodels
import xarray as xr
import xclim.indices.stats
from scipy.stats.mstats import plotting_positions
from statsmodels.tools import eval_measures

__all__ = [
Expand Down Expand Up @@ -265,3 +266,109 @@ def _get_criteria_1d(da, params, dist):
out.attrs = ds.attrs

return out


def get_plotting_positions(data_array, alpha=0.4, beta=0.4, return_period=True):
"""Calculate plotting positions for data.
Parameters
----------
data_array : xarray DataArray
Input data.
alpha : float, optional
Plotting position parameter, by default 0.4.
See scipy.stats.mstats.plotting_positions for typical values for alpha and beta. (.4,.4) : approximately quantile unbiased (Cunnane).
beta : float, optional
Plotting position parameter, by default 0.4.
return_period : bool, optional
If True, return periods instead of probabilities, by default True.
Returns
-------
xarray DataArray
The data with plotting positions assigned.
"""
data_copy = data_array.copy(deep=True)

def vec_plotting_positions(vec_data, alpha=0.4, beta=0.4):
"""Calculate plotting positions for vectorized data.
Parameters
----------
vec_data : ndarray
Input data, with time dimension first.
alpha : float, optional
Plotting position parameter.
beta : float, optional
Plotting position parameter.
Returns
-------
ndarray
Array with plotting positions assigned to valid data points,
and NaNs assigned to invalid data points.
"""
out = []
if vec_data.ndim == 1:
valid = ~np.isnan(vec_data)
pp = plotting_positions(vec_data[valid], alpha, beta)

out = np.full_like(vec_data.astype(float), np.nan)
out[valid] = pp

else:
for data in vec_data:
valid = ~np.isnan(data)
pp = plotting_positions(data[valid], alpha, beta)

pp_full = np.full_like(data.astype(float), np.nan)
pp_full[valid] = pp

out.append(pp_full)
return out

pp = xr.apply_ufunc(
vec_plotting_positions,
data_array,
alpha,
beta,
input_core_dims=[["time"], [], []],
output_core_dims=[["time"]],
)

if return_period:
pp = -1 / (pp - 1)

for name in pp.data_vars:
pp = pp.rename({name: name + "_pp"})
pp[name] = data_copy[name]

return pp


def prepare_plots(params, xmin=1, xmax=10000, npoints=100, log=True):
"""Prepare x-values for plotting frequency analysis results.
Parameters
----------
params : xarray DataArray
Input data.
xmin : float, optional
Minimum x value, by default 1.
xmax : float, optional
Maximum x value, by default 10000.
npoints : int, optional
Number of x values, by default 100.
log : bool, optional
If True, return log-spaced x values, by default True.
Returns
-------
xarray DataArray
The data with plotting positions assigned.
"""
if log:
x = np.logspace(np.log10(xmin), np.log10(xmax), npoints, endpoint=True)
else:
x = np.linspace(xmin, xmax, npoints)
return parametric_quantiles(params, x)

0 comments on commit 0cdbbe5

Please sign in to comment.