From 2a6fd359f43da2b965b8743401b3cf5b4a68effd Mon Sep 17 00:00:00 2001 From: Eugene Prilepin Date: Mon, 30 Dec 2019 21:15:47 +0300 Subject: [PATCH 1/5] Add 'csaps' shortcut function --- csaps/__init__.py | 6 ++ csaps/_shortcut.py | 139 +++++++++++++++++++++++++++++++++++++++++ tests/test_shortcut.py | 74 ++++++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100644 csaps/_shortcut.py create mode 100644 tests/test_shortcut.py diff --git a/csaps/__init__.py b/csaps/__init__.py index 2fdd6fc..5b280ab 100644 --- a/csaps/__init__.py +++ b/csaps/__init__.py @@ -26,8 +26,14 @@ MultivariateDataType, NdGridDataType, ) +from csaps._shortcut import csaps, SmoothedData __all__ = [ + # Shortcut + 'csaps', + 'SmoothedData', + + # Classes 'SplinePPFormBase', 'ISmoothingSpline', 'SplinePPForm', diff --git a/csaps/_shortcut.py b/csaps/_shortcut.py new file mode 100644 index 0000000..e87bdb8 --- /dev/null +++ b/csaps/_shortcut.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- + +""" +The module provised `csaps` shortcut function for smoothing data + +""" + +from typing import Optional, Union, Sequence, NamedTuple + +import numpy as np + +from csaps._base import ISmoothingSpline +from csaps._sspumv import UnivariateCubicSmoothingSpline +from csaps._sspndg import NdGridCubicSmoothingSpline +from csaps._types import ( + UnivariateDataType, + UnivariateVectorizedDataType, + MultivariateDataType, + NdGridDataType, +) + +_XDataType = Union[UnivariateDataType, MultivariateDataType, NdGridDataType] +_YDataType = Union[UnivariateVectorizedDataType, np.ndarray] +_XiDataType = Optional[Union[UnivariateDataType, NdGridDataType]] +_WeightsDataType = Optional[Union[UnivariateDataType, NdGridDataType]] +_SmoothDataType = Optional[Union[float, Sequence[Optional[float]]]] + +SmoothedData = NamedTuple('SmoothedData', [ + ('values', _YDataType), + ('smooth', _SmoothDataType), +]) + +_ReturnType = Union[ + _YDataType, + SmoothedData, + ISmoothingSpline, +] + + +def csaps(xdata: _XDataType, + ydata: _YDataType, + xidata: _XiDataType = None, + weights: _WeightsDataType = None, + smooth: _SmoothDataType = None, + axis: Optional[int] = None) -> _ReturnType: + """Smooths the univariate/multivariate/gridded data or computes the corresponding splines + + This function might be used in procedural code. + + Parameters + ---------- + xdata : np.ndarray, array-like + [required] The data sites ``x1 < x2 < ... < xN``: + - 1-D data vector/sequence (array-like) for univariate/multivariate ydata case + - The sequence of 1-D data vectors for nd-gridded ydata case + + ydata : np.ndarray, array-like + [required] The data values: + - 1-D data vector/sequence (array-like) for univariate data case + - N-D array/array-like for multivariate data case + - N-D array for nd-gridded data case + + xidata : np.ndarray, array-like, sequence[array-like] + [optional] The data sites for output smoothed data: + - 1-D data vector/sequence (array-like) for univariate/multivariate ydata case + - The sequence of 1-D data vectors for nd-gridded ydata case + If this argument was not set, the function will return computed spline for given data + in `ISmoothingSpline` object. + + weights : np.ndarray, array-like, sequence[array-like] + [optional] The weights data vectors: + - 1-D data vector/sequence (array-like) for univariate/multivariate ydata case + - The sequence of 1-D data vectors for nd-gridded ydata case + + smooth : float, sequence[float] + [optional] The smoothing factor value(s): + - float value in the range ``[0, 1]`` for univariate/multivariate ydata case + - the sequence of float in the range ``[0, 1]`` or None for nd-gridded ydata case + If this argument was not set or None or sequence with None-items, the function will return + named tuple `SmoothedData` with computed smoothed data values and smoothing factor value(s). + + axis : int + [optional] The ydata axis. Axis along which "ydata" is assumed to be varying. + If this argument was not set the last axis will be used. + Currently, `axis` will be ignored for nd-gridded ydata case. + + Returns + ------- + yidata : np.ndarray + Smoothed data values if `xidata` and `smooth` were set. + smoothed_data : SmoothedData + The named tuple with two fileds: + - 'values' -- smoothed data values + - 'smooth' -- smooth value + This result will be returned if `xidata` was set and `smooth` was not set. + sspobj : ISmoothingSpline + Smoothing spline object if `xidata` was not set. + + Examples + -------- + + .. code-block:: python + + import numpy as np + from csaps import csaps + + x = np.linspace(-5., 5., 25) + y = np.exp(-(x/2.5)**2) + (np.random.rand(25) - 0.2) * 0.3 + xi = np.linspace(-5., 5., 150) + + yi = csaps(x, y, xi, smooth=0.85) + + """ + try: + axis = -1 if axis is None else axis + sp = UnivariateCubicSmoothingSpline(xdata, ydata, weights, smooth, axis) + except ValueError as univariate_error: + try: + sp = NdGridCubicSmoothingSpline(xdata, ydata, weights, smooth) + except (ValueError, TypeError) as ndgrid_error: + ndgrid_error.__cause__ = univariate_error + + raise ValueError( + 'Invalid input data for all cases:\n [univariate/multivariate]: {}\n [nd-gridded]: {}'.format( + univariate_error, ndgrid_error)) from ndgrid_error + + if xidata is None: + return sp + + yidata = sp(xidata) + + auto_smooth = smooth is None + if isinstance(smooth, Sequence): + auto_smooth = any(sm is None for sm in smooth) + + if auto_smooth: + return SmoothedData(yidata, sp.smooth) + else: + return yidata diff --git a/tests/test_shortcut.py b/tests/test_shortcut.py new file mode 100644 index 0000000..65b3af4 --- /dev/null +++ b/tests/test_shortcut.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +import pytest +import numpy as np + +from csaps import csaps, SmoothedData, UnivariateCubicSmoothingSpline, NdGridCubicSmoothingSpline + + +@pytest.fixture(scope='module') +def curve(): + np.random.seed(12345) + + x = np.linspace(-5., 5., 25) + y = np.exp(-(x / 2.5) ** 2) + (np.random.rand(25) - 0.2) * 0.3 + return x, y + + +@pytest.fixture(scope='module') +def surface(): + np.random.seed(12345) + + x = [np.linspace(-3, 3, 61), np.linspace(-3.5, 3.5, 51)] + i, j = np.meshgrid(*x, indexing='ij') + + y = (3 * (1 - j) ** 2. * np.exp(-(j ** 2) - (i + 1) ** 2) + - 10 * (j / 5 - j ** 3 - i ** 5) * np.exp(-j ** 2 - i ** 2) + - 1 / 3 * np.exp(-(j + 1) ** 2 - i ** 2)) + y += np.random.randn(*y.shape) * 0.75 + return x, y + + +@pytest.fixture +def data(curve, surface, request): + if request.param == 'univariate': + x, y = curve + xi = np.linspace(x[0], x[-1], 150) + return x, y, xi, 0.85, UnivariateCubicSmoothingSpline + + elif request.param == 'ndgrid': + x, y = surface + + return x, y, x, [0.85, 0.85], NdGridCubicSmoothingSpline + + +@pytest.mark.parametrize('data', [ + 'univariate', + 'ndgrid', +], indirect=True) +def test_shortcut_output(data): + x, y, xi, smooth, sp_cls = data + + yi = csaps(x, y, xi, smooth=smooth) + assert isinstance(yi, np.ndarray) + + smoothed_data = csaps(x, y, xi) + assert isinstance(smoothed_data, SmoothedData) + + sp = csaps(x, y) + assert isinstance(sp, sp_cls) + + +@pytest.mark.parametrize('smooth, cls', [ + (0.85, np.ndarray), + ([0.85, 0.85], np.ndarray), + (None, SmoothedData), + ([None, 0.85], SmoothedData), + ([0.85, None], SmoothedData), + ([None, None], SmoothedData), +]) +def test_shortcut_ndgrid_smooth_output(surface, smooth, cls): + x, y = surface + + output = csaps(x, y, x, smooth=smooth) + assert isinstance(output, cls) From 2673902d9bb9658cb12c8d982a8241300346ffef Mon Sep 17 00:00:00 2001 From: Eugene Prilepin Date: Wed, 1 Jan 2020 04:54:04 +0300 Subject: [PATCH 2/5] Refactoring --- csaps/_shortcut.py | 30 +++++++++++++++++------------- csaps/_sspndg.py | 44 ++++++++++++++++++++++---------------------- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/csaps/_shortcut.py b/csaps/_shortcut.py index e87bdb8..0394dca 100644 --- a/csaps/_shortcut.py +++ b/csaps/_shortcut.py @@ -5,21 +5,21 @@ """ +from collections import abc as c_abc from typing import Optional, Union, Sequence, NamedTuple import numpy as np from csaps._base import ISmoothingSpline from csaps._sspumv import UnivariateCubicSmoothingSpline -from csaps._sspndg import NdGridCubicSmoothingSpline +from csaps._sspndg import ndgrid_prepare_data_sites, NdGridCubicSmoothingSpline from csaps._types import ( UnivariateDataType, UnivariateVectorizedDataType, - MultivariateDataType, NdGridDataType, ) -_XDataType = Union[UnivariateDataType, MultivariateDataType, NdGridDataType] +_XDataType = Union[UnivariateDataType, NdGridDataType] _YDataType = Union[UnivariateVectorizedDataType, np.ndarray] _XiDataType = Optional[Union[UnivariateDataType, NdGridDataType]] _WeightsDataType = Optional[Union[UnivariateDataType, NdGridDataType]] @@ -111,18 +111,22 @@ def csaps(xdata: _XDataType, yi = csaps(x, y, xi, smooth=0.85) """ - try: - axis = -1 if axis is None else axis - sp = UnivariateCubicSmoothingSpline(xdata, ydata, weights, smooth, axis) - except ValueError as univariate_error: + + if isinstance(xdata, c_abc.Sequence): try: - sp = NdGridCubicSmoothingSpline(xdata, ydata, weights, smooth) - except (ValueError, TypeError) as ndgrid_error: - ndgrid_error.__cause__ = univariate_error + ndgrid_prepare_data_sites(xdata, 'xdata') + except ValueError: + umv = True + else: + umv = False + else: + umv = True - raise ValueError( - 'Invalid input data for all cases:\n [univariate/multivariate]: {}\n [nd-gridded]: {}'.format( - univariate_error, ndgrid_error)) from ndgrid_error + if umv: + axis = -1 if axis is None else axis + sp = UnivariateCubicSmoothingSpline(xdata, ydata, weights, smooth, axis) + else: + sp = NdGridCubicSmoothingSpline(xdata, ydata, weights, smooth) if xidata is None: return sp diff --git a/csaps/_sspndg.py b/csaps/_sspndg.py index 7585218..c3265f9 100644 --- a/csaps/_sspndg.py +++ b/csaps/_sspndg.py @@ -15,6 +15,24 @@ from csaps._sspumv import SplinePPForm, UnivariateCubicSmoothingSpline +def ndgrid_prepare_data_sites(data, name) -> ty.Tuple[np.ndarray, ...]: + if not isinstance(data, c_abc.Sequence): + raise TypeError("'{}' must be a sequence of the vectors.".format(name)) + + data = list(data) + + for i, di in enumerate(data): + di = np.array(di, dtype=np.float64) + if di.ndim > 1: + raise ValueError("All '{}' elements must be a vector.".format(name)) + if di.size < 2: + raise ValueError( + "'{}' must contain at least 2 data points.".format(name)) + data[i] = di + + return tuple(data) + + class NdGridSplinePPForm(SplinePPFormBase[ty.Sequence[np.ndarray], ty.Tuple[int, ...]]): """N-D grid spline representation in PP-form @@ -53,7 +71,7 @@ def order(self) -> ty.Tuple[int, ...]: def ndim(self) -> int: return self._ndim - def evaluate(self, xi: ty.Sequence[np.ndarray]) -> np.ndarray: + def evaluate(self, xi: NdGridDataType) -> np.ndarray: yi = self.coeffs.copy() sizey = list(yi.shape) nsize = tuple(x.size for x in xi) @@ -134,27 +152,9 @@ def spline(self) -> NdGridSplinePPForm: """ return self._spline - @staticmethod - def _prepare_grid_vectors(data, name) -> ty.Tuple[np.ndarray, ...]: - if not isinstance(data, c_abc.Sequence): - raise TypeError('{} must be sequence of vectors'.format(name)) - - data = list(data) - - for i, di in enumerate(data): - di = np.array(di, dtype=np.float64) - if di.ndim > 1: - raise ValueError('All {} elements must be vector'.format(name)) - if di.size < 2: - raise ValueError( - '{} must contain at least 2 data points'.format(name)) - data[i] = di - - return tuple(data) - @classmethod def _prepare_data(cls, xdata, ydata, weights, smooth): - xdata = cls._prepare_grid_vectors(xdata, 'xdata') + xdata = ndgrid_prepare_data_sites(xdata, 'xdata') data_ndim = len(xdata) if ydata.ndim != data_ndim: @@ -169,7 +169,7 @@ def _prepare_data(cls, xdata, ydata, weights, smooth): if not weights: weights = [None] * data_ndim else: - weights = cls._prepare_grid_vectors(weights, 'weights') + weights = ndgrid_prepare_data_sites(weights, 'weights') if len(weights) != data_ndim: raise ValueError( @@ -197,7 +197,7 @@ def _prepare_data(cls, xdata, ydata, weights, smooth): return xdata, ydata, weights, smooth def __call__(self, xi: NdGridDataType) -> np.ndarray: - xi = self._prepare_grid_vectors(xi, 'xi') + xi = ndgrid_prepare_data_sites(xi, 'xi') if len(xi) != self._ndim: raise ValueError( From f8f47261183b2c81331f44993b0e7242633a025e Mon Sep 17 00:00:00 2001 From: Eugene Prilepin Date: Wed, 1 Jan 2020 04:55:12 +0300 Subject: [PATCH 3/5] Fix type --- csaps/_sspndg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csaps/_sspndg.py b/csaps/_sspndg.py index c3265f9..232afa9 100644 --- a/csaps/_sspndg.py +++ b/csaps/_sspndg.py @@ -71,7 +71,7 @@ def order(self) -> ty.Tuple[int, ...]: def ndim(self) -> int: return self._ndim - def evaluate(self, xi: NdGridDataType) -> np.ndarray: + def evaluate(self, xi: ty.Sequence[np.ndarray]) -> np.ndarray: yi = self.coeffs.copy() sizey = list(yi.shape) nsize = tuple(x.size for x in xi) From ebf29556718ef7b39360f4c270d65dfaacc13209 Mon Sep 17 00:00:00 2001 From: Eugene Prilepin Date: Wed, 1 Jan 2020 05:04:57 +0300 Subject: [PATCH 4/5] Update docstring --- csaps/_shortcut.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/csaps/_shortcut.py b/csaps/_shortcut.py index 0394dca..2493ee1 100644 --- a/csaps/_shortcut.py +++ b/csaps/_shortcut.py @@ -91,14 +91,18 @@ def csaps(xdata: _XDataType, smoothed_data : SmoothedData The named tuple with two fileds: - 'values' -- smoothed data values - - 'smooth' -- smooth value + - 'smooth' -- computed smoothing factor This result will be returned if `xidata` was set and `smooth` was not set. sspobj : ISmoothingSpline - Smoothing spline object if `xidata` was not set. + Smoothing spline object if `xidata` was not set: + - `UnivariateCubicSmoothingSpline` instance for univariate/multivariate data + - `NdGridCubicSmoothingSpline` instance for nd-gridded data Examples -------- + Univariate data smoothing + .. code-block:: python import numpy as np @@ -108,8 +112,15 @@ def csaps(xdata: _XDataType, y = np.exp(-(x/2.5)**2) + (np.random.rand(25) - 0.2) * 0.3 xi = np.linspace(-5., 5., 150) + # Smooth data with smoothing factor 0.85 yi = csaps(x, y, xi, smooth=0.85) + # Smooth data and compute smoothing factor automatically + yi, smooth = csaps(x, y, xi) + + # Do not evaluate the spline, only compute it + sp = csaps(x, y, smooth=0.98) + """ if isinstance(xdata, c_abc.Sequence): From 4327197971e873f5be97100bda64c10ca6cffa10 Mon Sep 17 00:00:00 2001 From: Eugene Prilepin Date: Wed, 1 Jan 2020 05:11:14 +0300 Subject: [PATCH 5/5] Refactoring --- csaps/__init__.py | 4 ++-- csaps/_shortcut.py | 16 +++++++++++----- tests/test_shortcut.py | 12 ++++++------ 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/csaps/__init__.py b/csaps/__init__.py index 5b280ab..3f9e68e 100644 --- a/csaps/__init__.py +++ b/csaps/__init__.py @@ -26,12 +26,12 @@ MultivariateDataType, NdGridDataType, ) -from csaps._shortcut import csaps, SmoothedData +from csaps._shortcut import csaps, SmoothingResult __all__ = [ # Shortcut 'csaps', - 'SmoothedData', + 'SmoothingResult', # Classes 'SplinePPFormBase', diff --git a/csaps/_shortcut.py b/csaps/_shortcut.py index 2493ee1..ab244a0 100644 --- a/csaps/_shortcut.py +++ b/csaps/_shortcut.py @@ -25,14 +25,14 @@ _WeightsDataType = Optional[Union[UnivariateDataType, NdGridDataType]] _SmoothDataType = Optional[Union[float, Sequence[Optional[float]]]] -SmoothedData = NamedTuple('SmoothedData', [ +SmoothingResult = NamedTuple('SmoothingResult', [ ('values', _YDataType), ('smooth', _SmoothDataType), ]) _ReturnType = Union[ _YDataType, - SmoothedData, + SmoothingResult, ISmoothingSpline, ] @@ -77,7 +77,7 @@ def csaps(xdata: _XDataType, - float value in the range ``[0, 1]`` for univariate/multivariate ydata case - the sequence of float in the range ``[0, 1]`` or None for nd-gridded ydata case If this argument was not set or None or sequence with None-items, the function will return - named tuple `SmoothedData` with computed smoothed data values and smoothing factor value(s). + named tuple `SmoothingResult` with computed smoothed data values and smoothing factor value(s). axis : int [optional] The ydata axis. Axis along which "ydata" is assumed to be varying. @@ -88,7 +88,7 @@ def csaps(xdata: _XDataType, ------- yidata : np.ndarray Smoothed data values if `xidata` and `smooth` were set. - smoothed_data : SmoothedData + smoothed_data : SmoothingResult The named tuple with two fileds: - 'values' -- smoothed data values - 'smooth' -- computed smoothing factor @@ -121,6 +121,12 @@ def csaps(xdata: _XDataType, # Do not evaluate the spline, only compute it sp = csaps(x, y, smooth=0.98) + See Also + -------- + + `UnivariateCubicSmoothingSpline` + `NdGridCubicSmoothingSpline` + """ if isinstance(xdata, c_abc.Sequence): @@ -149,6 +155,6 @@ def csaps(xdata: _XDataType, auto_smooth = any(sm is None for sm in smooth) if auto_smooth: - return SmoothedData(yidata, sp.smooth) + return SmoothingResult(yidata, sp.smooth) else: return yidata diff --git a/tests/test_shortcut.py b/tests/test_shortcut.py index 65b3af4..bf7589c 100644 --- a/tests/test_shortcut.py +++ b/tests/test_shortcut.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from csaps import csaps, SmoothedData, UnivariateCubicSmoothingSpline, NdGridCubicSmoothingSpline +from csaps import csaps, SmoothingResult, UnivariateCubicSmoothingSpline, NdGridCubicSmoothingSpline @pytest.fixture(scope='module') @@ -53,7 +53,7 @@ def test_shortcut_output(data): assert isinstance(yi, np.ndarray) smoothed_data = csaps(x, y, xi) - assert isinstance(smoothed_data, SmoothedData) + assert isinstance(smoothed_data, SmoothingResult) sp = csaps(x, y) assert isinstance(sp, sp_cls) @@ -62,10 +62,10 @@ def test_shortcut_output(data): @pytest.mark.parametrize('smooth, cls', [ (0.85, np.ndarray), ([0.85, 0.85], np.ndarray), - (None, SmoothedData), - ([None, 0.85], SmoothedData), - ([0.85, None], SmoothedData), - ([None, None], SmoothedData), + (None, SmoothingResult), + ([None, 0.85], SmoothingResult), + ([0.85, None], SmoothingResult), + ([None, None], SmoothingResult), ]) def test_shortcut_ndgrid_smooth_output(surface, smooth, cls): x, y = surface