Merge b64ebb1 into 6d214a7

hydrologie · Feb 7, 2024 · 0cdbbe5 · 0cdbbe5
2 parents 6d214a7 + b64ebb1
commit 0cdbbe5
Show file tree

Hide file tree

Showing 3 changed files with 261 additions and 4 deletions.
diff --git a/docs/notebooks/local_frequency_analysis.ipynb b/docs/notebooks/local_frequency_analysis.ipynb
@@ -47,8 +47,7 @@
         "    **{\n",
         "        \"datasets\":{\n",
         "            \"deh\":{\n",
-        "                \"id\" :[\"020*\"],\n",
-        "                \"regulated\":[\"Natural\"],\n",
+        "                \"id\" :[\"051001\"],\n",
         "                \"variables\":[\"streamflow\"],\n",
         "            }\n",
         "        }, \"time\":{\"start\": \"1970-01-01\", \n",
@@ -141,6 +140,13 @@
         "ds_4fa"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -314,7 +320,75 @@
       "source": [
         "rp = xhfa.local.parametric_quantiles(params, t=[20, 100])\n",
         "\n",
-        "rp"
+        "rp.load()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "`xhfa.local.prepare_plots` generates points for later plotting  xmin and xmax reprensent maximum return_period but outputs will be in frequency\n",
+        "If log=True, returns log-spaced x values between xmin and xmax.\n",
+        "Otherwise returns linearly spaced values between xmin and xmax.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "data = xhfa.local.prepare_plots(params)\n",
+        "data['return_period'] = -1 / ( data['return_period'] - 1)\n",
+        "data.load()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "`xhfa.local.get_plotting_positions` allows you to get ploting positions for all variables in the dataset.\n",
+        "It accpets alpha and beta in arguments, see scipy.stats.mstats.plotting_positions for typical values for alpha and beta.\n",
+        "<br>By default (.4,.4) : approximately quantile unbiased (Cunnane)\n",
+        "\n",
+        "    "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "pp = xhfa.local.get_plotting_positions(ds_4fa[['streamflow_max_spring']])\n",
+        "pp"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "p1 = data.streamflow_max_spring.hvplot(x='return_period', by='scipy_dist', grid=True, groupby=['id'], logx=True) "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "p2 = pp.hvplot.scatter(x='streamflow_max_spring_pp',y='streamflow_max_spring', grid=True, groupby=['id'], logx=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "p1 * p2"
       ]
     }
   ],
@@ -334,7 +408,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.10.12"
+      "version": "3.11.7"
     },
     "vscode": {
       "interpreter": {

diff --git a/tests/test_local.py b/tests/test_local.py
@@ -1,8 +1,10 @@
 import numpy as np
 import pytest
+import xarray as xr
 from xclim.testing.helpers import test_timeseries as timeseries
 
 import xhydro.frequency_analysis as xhfa
+from xhydro.frequency_analysis.local import get_plotting_positions
 
 
 class TestFit:
@@ -153,3 +155,77 @@ def test_criteria():
             [118.12140939, 118.51930466, 118.56585383],
         ],
     )
+
+
+class TestGetPlottingPositions:
+    def test_default(self):
+        data = timeseries(
+            np.array([50, 65, 80, 95]),
+            variable="streamflow",
+            start="2001-01-01",
+            freq="YS",
+            as_dataset=True,
+        )
+        expected = [1.16666667, 1.61538462, 2.625, 7.0]
+        result = get_plotting_positions(data)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
+        np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)
+
+        data_2d = xr.concat([data, data], dim="id")
+        result = get_plotting_positions(data_2d)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
+        np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)
+
+    def test_nan(self):
+        data = timeseries(
+            np.array([50, np.nan, 80, 95]),
+            variable="streamflow",
+            start="2001-01-01",
+            freq="YS",
+            as_dataset=True,
+        )
+        expected = [1.23076923, np.nan, 2.0, 5.33333333]
+        result = get_plotting_positions(data)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
+        np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)
+
+        data_2d = xr.concat([data, data], dim="id")
+        result = get_plotting_positions(data_2d)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
+        np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)
+
+    def test_return_period(self):
+        data = timeseries(
+            np.array([50, 65, 80, 95]),
+            variable="streamflow",
+            start="2001-01-01",
+            freq="YS",
+            as_dataset=True,
+        )
+        expected = [0.14285714, 0.38095238, 0.61904762, 0.85714286]
+        result = get_plotting_positions(data, return_period=False)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
+        np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)
+
+        data_2d = xr.concat([data, data], dim="id")
+        result = get_plotting_positions(data_2d, return_period=False)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
+        np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)
+
+    def test_alpha_beta(self):
+        data = timeseries(
+            np.array([50, 65, 80, 95]),
+            variable="streamflow",
+            start="2001-01-01",
+            freq="YS",
+            as_dataset=True,
+        )
+        expected = [1.25, 1.66666667, 2.5, 5.0]
+        result = get_plotting_positions(data, alpha=0, beta=0)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, expected)
+        np.testing.assert_array_almost_equal(result.streamflow, data.streamflow)
+
+        data_2d = xr.concat([data, data], dim="id")
+        result = get_plotting_positions(data_2d, alpha=0, beta=0)
+        np.testing.assert_array_almost_equal(result.streamflow_pp, [expected, expected])
+        np.testing.assert_array_equal(result.streamflow, data_2d.streamflow)
diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py
@@ -7,6 +7,7 @@
 import statsmodels
 import xarray as xr
 import xclim.indices.stats
+from scipy.stats.mstats import plotting_positions
 from statsmodels.tools import eval_measures
 
 __all__ = [
@@ -265,3 +266,109 @@ def _get_criteria_1d(da, params, dist):
     out.attrs = ds.attrs
 
     return out
+
+
+def get_plotting_positions(data_array, alpha=0.4, beta=0.4, return_period=True):
+    """Calculate plotting positions for data.
+
+    Parameters
+    ----------
+    data_array : xarray DataArray
+        Input data.
+    alpha : float, optional
+        Plotting position parameter, by default 0.4.
+        See scipy.stats.mstats.plotting_positions for typical values for alpha and beta. (.4,.4) : approximately quantile unbiased (Cunnane).
+    beta : float, optional
+        Plotting position parameter, by default 0.4.
+    return_period : bool, optional
+        If True, return periods instead of probabilities, by default True.
+
+    Returns
+    -------
+    xarray DataArray
+        The data with plotting positions assigned.
+    """
+    data_copy = data_array.copy(deep=True)
+
+    def vec_plotting_positions(vec_data, alpha=0.4, beta=0.4):
+        """Calculate plotting positions for vectorized data.
+
+        Parameters
+        ----------
+        vec_data : ndarray
+            Input data, with time dimension first.
+        alpha : float, optional
+            Plotting position parameter.
+        beta : float, optional
+            Plotting position parameter.
+
+        Returns
+        -------
+        ndarray
+            Array with plotting positions assigned to valid data points,
+            and NaNs assigned to invalid data points.
+        """
+        out = []
+        if vec_data.ndim == 1:
+            valid = ~np.isnan(vec_data)
+            pp = plotting_positions(vec_data[valid], alpha, beta)
+
+            out = np.full_like(vec_data.astype(float), np.nan)
+            out[valid] = pp
+
+        else:
+            for data in vec_data:
+                valid = ~np.isnan(data)
+                pp = plotting_positions(data[valid], alpha, beta)
+
+                pp_full = np.full_like(data.astype(float), np.nan)
+                pp_full[valid] = pp
+
+                out.append(pp_full)
+        return out
+
+    pp = xr.apply_ufunc(
+        vec_plotting_positions,
+        data_array,
+        alpha,
+        beta,
+        input_core_dims=[["time"], [], []],
+        output_core_dims=[["time"]],
+    )
+
+    if return_period:
+        pp = -1 / (pp - 1)
+
+    for name in pp.data_vars:
+        pp = pp.rename({name: name + "_pp"})
+        pp[name] = data_copy[name]
+
+    return pp
+
+
+def prepare_plots(params, xmin=1, xmax=10000, npoints=100, log=True):
+    """Prepare x-values for plotting frequency analysis results.
+
+    Parameters
+    ----------
+    params : xarray DataArray
+        Input data.
+    xmin : float, optional
+        Minimum x value, by default 1.
+    xmax : float, optional
+        Maximum x value, by default 10000.
+    npoints : int, optional
+        Number of x values, by default 100.
+    log : bool, optional
+        If True, return log-spaced x values, by default True.
+
+    Returns
+    -------
+    xarray DataArray
+        The data with plotting positions assigned.
+    """
+    if log:
+        x = np.logspace(np.log10(xmin), np.log10(xmax), npoints, endpoint=True)
+    else:
+        x = np.linspace(xmin, xmax, npoints)
+    return parametric_quantiles(params, x)