diff --git a/csaps/__init__.py b/csaps/__init__.py index 2fdd6fc..3f9e68e 100644 --- a/csaps/__init__.py +++ b/csaps/__init__.py @@ -26,8 +26,14 @@ MultivariateDataType, NdGridDataType, ) +from csaps._shortcut import csaps, SmoothingResult __all__ = [ + # Shortcut + 'csaps', + 'SmoothingResult', + + # Classes 'SplinePPFormBase', 'ISmoothingSpline', 'SplinePPForm', diff --git a/csaps/_shortcut.py b/csaps/_shortcut.py new file mode 100644 index 0000000..ab244a0 --- /dev/null +++ b/csaps/_shortcut.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +""" +The module provised `csaps` shortcut function for smoothing data + +""" + +from collections import abc as c_abc +from typing import Optional, Union, Sequence, NamedTuple + +import numpy as np + +from csaps._base import ISmoothingSpline +from csaps._sspumv import UnivariateCubicSmoothingSpline +from csaps._sspndg import ndgrid_prepare_data_sites, NdGridCubicSmoothingSpline +from csaps._types import ( + UnivariateDataType, + UnivariateVectorizedDataType, + NdGridDataType, +) + +_XDataType = Union[UnivariateDataType, NdGridDataType] +_YDataType = Union[UnivariateVectorizedDataType, np.ndarray] +_XiDataType = Optional[Union[UnivariateDataType, NdGridDataType]] +_WeightsDataType = Optional[Union[UnivariateDataType, NdGridDataType]] +_SmoothDataType = Optional[Union[float, Sequence[Optional[float]]]] + +SmoothingResult = NamedTuple('SmoothingResult', [ + ('values', _YDataType), + ('smooth', _SmoothDataType), +]) + +_ReturnType = Union[ + _YDataType, + SmoothingResult, + ISmoothingSpline, +] + + +def csaps(xdata: _XDataType, + ydata: _YDataType, + xidata: _XiDataType = None, + weights: _WeightsDataType = None, + smooth: _SmoothDataType = None, + axis: Optional[int] = None) -> _ReturnType: + """Smooths the univariate/multivariate/gridded data or computes the corresponding splines + + This function might be used in procedural code. + + Parameters + ---------- + xdata : np.ndarray, array-like + [required] The data sites ``x1 < x2 < ... < xN``: + - 1-D data vector/sequence (array-like) for univariate/multivariate ydata case + - The sequence of 1-D data vectors for nd-gridded ydata case + + ydata : np.ndarray, array-like + [required] The data values: + - 1-D data vector/sequence (array-like) for univariate data case + - N-D array/array-like for multivariate data case + - N-D array for nd-gridded data case + + xidata : np.ndarray, array-like, sequence[array-like] + [optional] The data sites for output smoothed data: + - 1-D data vector/sequence (array-like) for univariate/multivariate ydata case + - The sequence of 1-D data vectors for nd-gridded ydata case + If this argument was not set, the function will return computed spline for given data + in `ISmoothingSpline` object. + + weights : np.ndarray, array-like, sequence[array-like] + [optional] The weights data vectors: + - 1-D data vector/sequence (array-like) for univariate/multivariate ydata case + - The sequence of 1-D data vectors for nd-gridded ydata case + + smooth : float, sequence[float] + [optional] The smoothing factor value(s): + - float value in the range ``[0, 1]`` for univariate/multivariate ydata case + - the sequence of float in the range ``[0, 1]`` or None for nd-gridded ydata case + If this argument was not set or None or sequence with None-items, the function will return + named tuple `SmoothingResult` with computed smoothed data values and smoothing factor value(s). + + axis : int + [optional] The ydata axis. Axis along which "ydata" is assumed to be varying. + If this argument was not set the last axis will be used. + Currently, `axis` will be ignored for nd-gridded ydata case. + + Returns + ------- + yidata : np.ndarray + Smoothed data values if `xidata` and `smooth` were set. + smoothed_data : SmoothingResult + The named tuple with two fileds: + - 'values' -- smoothed data values + - 'smooth' -- computed smoothing factor + This result will be returned if `xidata` was set and `smooth` was not set. + sspobj : ISmoothingSpline + Smoothing spline object if `xidata` was not set: + - `UnivariateCubicSmoothingSpline` instance for univariate/multivariate data + - `NdGridCubicSmoothingSpline` instance for nd-gridded data + + Examples + -------- + + Univariate data smoothing + + .. code-block:: python + + import numpy as np + from csaps import csaps + + x = np.linspace(-5., 5., 25) + y = np.exp(-(x/2.5)**2) + (np.random.rand(25) - 0.2) * 0.3 + xi = np.linspace(-5., 5., 150) + + # Smooth data with smoothing factor 0.85 + yi = csaps(x, y, xi, smooth=0.85) + + # Smooth data and compute smoothing factor automatically + yi, smooth = csaps(x, y, xi) + + # Do not evaluate the spline, only compute it + sp = csaps(x, y, smooth=0.98) + + See Also + -------- + + `UnivariateCubicSmoothingSpline` + `NdGridCubicSmoothingSpline` + + """ + + if isinstance(xdata, c_abc.Sequence): + try: + ndgrid_prepare_data_sites(xdata, 'xdata') + except ValueError: + umv = True + else: + umv = False + else: + umv = True + + if umv: + axis = -1 if axis is None else axis + sp = UnivariateCubicSmoothingSpline(xdata, ydata, weights, smooth, axis) + else: + sp = NdGridCubicSmoothingSpline(xdata, ydata, weights, smooth) + + if xidata is None: + return sp + + yidata = sp(xidata) + + auto_smooth = smooth is None + if isinstance(smooth, Sequence): + auto_smooth = any(sm is None for sm in smooth) + + if auto_smooth: + return SmoothingResult(yidata, sp.smooth) + else: + return yidata diff --git a/csaps/_sspndg.py b/csaps/_sspndg.py index 7585218..232afa9 100644 --- a/csaps/_sspndg.py +++ b/csaps/_sspndg.py @@ -15,6 +15,24 @@ from csaps._sspumv import SplinePPForm, UnivariateCubicSmoothingSpline +def ndgrid_prepare_data_sites(data, name) -> ty.Tuple[np.ndarray, ...]: + if not isinstance(data, c_abc.Sequence): + raise TypeError("'{}' must be a sequence of the vectors.".format(name)) + + data = list(data) + + for i, di in enumerate(data): + di = np.array(di, dtype=np.float64) + if di.ndim > 1: + raise ValueError("All '{}' elements must be a vector.".format(name)) + if di.size < 2: + raise ValueError( + "'{}' must contain at least 2 data points.".format(name)) + data[i] = di + + return tuple(data) + + class NdGridSplinePPForm(SplinePPFormBase[ty.Sequence[np.ndarray], ty.Tuple[int, ...]]): """N-D grid spline representation in PP-form @@ -134,27 +152,9 @@ def spline(self) -> NdGridSplinePPForm: """ return self._spline - @staticmethod - def _prepare_grid_vectors(data, name) -> ty.Tuple[np.ndarray, ...]: - if not isinstance(data, c_abc.Sequence): - raise TypeError('{} must be sequence of vectors'.format(name)) - - data = list(data) - - for i, di in enumerate(data): - di = np.array(di, dtype=np.float64) - if di.ndim > 1: - raise ValueError('All {} elements must be vector'.format(name)) - if di.size < 2: - raise ValueError( - '{} must contain at least 2 data points'.format(name)) - data[i] = di - - return tuple(data) - @classmethod def _prepare_data(cls, xdata, ydata, weights, smooth): - xdata = cls._prepare_grid_vectors(xdata, 'xdata') + xdata = ndgrid_prepare_data_sites(xdata, 'xdata') data_ndim = len(xdata) if ydata.ndim != data_ndim: @@ -169,7 +169,7 @@ def _prepare_data(cls, xdata, ydata, weights, smooth): if not weights: weights = [None] * data_ndim else: - weights = cls._prepare_grid_vectors(weights, 'weights') + weights = ndgrid_prepare_data_sites(weights, 'weights') if len(weights) != data_ndim: raise ValueError( @@ -197,7 +197,7 @@ def _prepare_data(cls, xdata, ydata, weights, smooth): return xdata, ydata, weights, smooth def __call__(self, xi: NdGridDataType) -> np.ndarray: - xi = self._prepare_grid_vectors(xi, 'xi') + xi = ndgrid_prepare_data_sites(xi, 'xi') if len(xi) != self._ndim: raise ValueError( diff --git a/tests/test_shortcut.py b/tests/test_shortcut.py new file mode 100644 index 0000000..bf7589c --- /dev/null +++ b/tests/test_shortcut.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +import pytest +import numpy as np + +from csaps import csaps, SmoothingResult, UnivariateCubicSmoothingSpline, NdGridCubicSmoothingSpline + + +@pytest.fixture(scope='module') +def curve(): + np.random.seed(12345) + + x = np.linspace(-5., 5., 25) + y = np.exp(-(x / 2.5) ** 2) + (np.random.rand(25) - 0.2) * 0.3 + return x, y + + +@pytest.fixture(scope='module') +def surface(): + np.random.seed(12345) + + x = [np.linspace(-3, 3, 61), np.linspace(-3.5, 3.5, 51)] + i, j = np.meshgrid(*x, indexing='ij') + + y = (3 * (1 - j) ** 2. * np.exp(-(j ** 2) - (i + 1) ** 2) + - 10 * (j / 5 - j ** 3 - i ** 5) * np.exp(-j ** 2 - i ** 2) + - 1 / 3 * np.exp(-(j + 1) ** 2 - i ** 2)) + y += np.random.randn(*y.shape) * 0.75 + return x, y + + +@pytest.fixture +def data(curve, surface, request): + if request.param == 'univariate': + x, y = curve + xi = np.linspace(x[0], x[-1], 150) + return x, y, xi, 0.85, UnivariateCubicSmoothingSpline + + elif request.param == 'ndgrid': + x, y = surface + + return x, y, x, [0.85, 0.85], NdGridCubicSmoothingSpline + + +@pytest.mark.parametrize('data', [ + 'univariate', + 'ndgrid', +], indirect=True) +def test_shortcut_output(data): + x, y, xi, smooth, sp_cls = data + + yi = csaps(x, y, xi, smooth=smooth) + assert isinstance(yi, np.ndarray) + + smoothed_data = csaps(x, y, xi) + assert isinstance(smoothed_data, SmoothingResult) + + sp = csaps(x, y) + assert isinstance(sp, sp_cls) + + +@pytest.mark.parametrize('smooth, cls', [ + (0.85, np.ndarray), + ([0.85, 0.85], np.ndarray), + (None, SmoothingResult), + ([None, 0.85], SmoothingResult), + ([0.85, None], SmoothingResult), + ([None, None], SmoothingResult), +]) +def test_shortcut_ndgrid_smooth_output(surface, smooth, cls): + x, y = surface + + output = csaps(x, y, x, smooth=smooth) + assert isinstance(output, cls)