diff --git a/CHANGELOG.md b/CHANGELOG.md index aae99c4..e8d93c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v0.11.0 + +* Internal re-design `SplinePPForm` and `NdGridSplinePPForm` classes [#17](https://github.com/espdev/csaps/issues/17): + - Remove `shape` and `axis` properties and reshaping data in these classes + - `NdGridSplinePPForm` coefficients array for 1D grid now is 1-d instead of 2-d +* Refactoring the code and decrease memory consumption +* Add `overload` type-hints for `csaps` function signatures + ## v0.10.1 * Fix call of `numpy.pad` function for numpy <1.17 [#15](https://github.com/espdev/csaps/issues/15) diff --git a/README.md b/README.md index f716116..260bc08 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ **csaps** is a Python package for univariate, multivariate and n-dimensional grid data approximation using cubic smoothing splines. The package can be useful in practical engineering tasks for data approximation and smoothing. -## Installation +## Installing Use pip for installing: @@ -89,20 +89,22 @@ plt.show() ## Documentation -More examples of usage and the full documentation can be found at ReadTheDocs. - -https://csaps.readthedocs.io +More examples of usage and the full documentation can be found at https://csaps.readthedocs.io. ## Testing pytest, tox and Travis CI are used for testing. Please see [tests](tests). -## Algorithms and implementations +## Algorithm and Implementation -**csaps** package is a Python modified port of MATLAB [CSAPS](https://www.mathworks.com/help/curvefit/csaps.html) function that is an implementation of +**csaps** Python package is inspired by MATLAB [CSAPS](https://www.mathworks.com/help/curvefit/csaps.html) function that is an implementation of Fortran routine SMOOTH from [PGS](http://pages.cs.wisc.edu/~deboor/pgs/) (originally written by Carl de Boor). -[csaps-cpp](https://github.com/espdev/csaps-cpp) C++11 Eigen based implementation of the algorithm. +Also the algothithm implementation in other languages: + +* [csaps-rs](https://github.com/espdev/csaps-rs) Rust ndarray/sprs based implementation +* [csaps-cpp](https://github.com/espdev/csaps-cpp) C++11 Eigen based implementation (incomplete) + ## References diff --git a/csaps/__init__.py b/csaps/__init__.py index adc8bd9..2aea816 100644 --- a/csaps/__init__.py +++ b/csaps/__init__.py @@ -23,7 +23,6 @@ ) from csaps._types import ( UnivariateDataType, - UnivariateVectorizedDataType, MultivariateDataType, NdGridDataType, ) @@ -46,7 +45,6 @@ # Type-hints 'UnivariateDataType', - 'UnivariateVectorizedDataType', 'MultivariateDataType', 'NdGridDataType', ] diff --git a/csaps/_shortcut.py b/csaps/_shortcut.py index fee3b53..474f65f 100644 --- a/csaps/_shortcut.py +++ b/csaps/_shortcut.py @@ -6,50 +6,93 @@ """ from collections import abc as c_abc -from typing import Optional, Union, Sequence, NamedTuple - -import numpy as np +from typing import Optional, Union, Sequence, NamedTuple, overload from ._base import ISmoothingSpline from ._sspumv import CubicSmoothingSpline from ._sspndg import ndgrid_prepare_data_sites, NdGridCubicSmoothingSpline -from ._types import ( - UnivariateDataType, - UnivariateVectorizedDataType, - NdGridDataType, -) - -_XDataType = Union[UnivariateDataType, NdGridDataType] -_YDataType = Union[UnivariateVectorizedDataType, np.ndarray] -_XiDataType = Optional[Union[UnivariateDataType, NdGridDataType]] -_WeightsDataType = Optional[Union[UnivariateDataType, NdGridDataType]] -_SmoothDataType = Optional[Union[float, Sequence[Optional[float]]]] +from ._types import UnivariateDataType, MultivariateDataType, NdGridDataType class AutoSmoothingResult(NamedTuple): """The result for auto smoothing for `csaps` function""" - values: _YDataType + values: MultivariateDataType """Smoothed data values""" - smooth: _SmoothDataType + smooth: Union[float, Sequence[Optional[float]]] """The calculated smoothing parameter""" -_ReturnType = Union[ - _YDataType, - AutoSmoothingResult, - ISmoothingSpline, -] +# ************************************** +# csaps signatures +# +@overload +def csaps(xdata: UnivariateDataType, + ydata: MultivariateDataType, + *, + weights: Optional[UnivariateDataType] = None, + smooth: Optional[float] = None, + axis: Optional[int] = None) -> ISmoothingSpline: ... + + +@overload +def csaps(xdata: UnivariateDataType, + ydata: MultivariateDataType, + xidata: UnivariateDataType, + *, + weights: Optional[UnivariateDataType] = None, + axis: Optional[int] = None) -> AutoSmoothingResult: ... + + +@overload +def csaps(xdata: UnivariateDataType, + ydata: MultivariateDataType, + xidata: UnivariateDataType, + *, + smooth: float, + weights: Optional[UnivariateDataType] = None, + axis: Optional[int] = None) -> MultivariateDataType: ... + + +@overload +def csaps(xdata: NdGridDataType, + ydata: MultivariateDataType, + *, + weights: Optional[NdGridDataType] = None, + smooth: Optional[Sequence[float]] = None, + axis: Optional[int] = None) -> ISmoothingSpline: ... + + +@overload +def csaps(xdata: NdGridDataType, + ydata: MultivariateDataType, + xidata: NdGridDataType, + *, + weights: Optional[NdGridDataType] = None, + axis: Optional[int] = None) -> AutoSmoothingResult: ... + + +@overload +def csaps(xdata: NdGridDataType, + ydata: MultivariateDataType, + xidata: NdGridDataType, + *, + smooth: Sequence[float], + weights: Optional[NdGridDataType] = None, + axis: Optional[int] = None) -> MultivariateDataType: ... +# +# csaps signatures +# ************************************** -def csaps(xdata: _XDataType, - ydata: _YDataType, - xidata: _XiDataType = None, +def csaps(xdata: Union[UnivariateDataType, NdGridDataType], + ydata: MultivariateDataType, + xidata: Optional[Union[UnivariateDataType, NdGridDataType]] = None, *, - weights: _WeightsDataType = None, - smooth: _SmoothDataType = None, - axis: Optional[int] = None) -> _ReturnType: + weights: Optional[Union[UnivariateDataType, NdGridDataType]] = None, + smooth: Optional[Union[float, Sequence[float]]] = None, + axis: Optional[int] = None) -> Union[MultivariateDataType, ISmoothingSpline, AutoSmoothingResult]: """Smooths the univariate/multivariate/gridded data or computes the corresponding splines This function might be used as the main API for smoothing any data. diff --git a/csaps/_sspndg.py b/csaps/_sspndg.py index c275d37..030b9a1 100644 --- a/csaps/_sspndg.py +++ b/csaps/_sspndg.py @@ -37,82 +37,64 @@ class NdGridSplinePPForm(SplinePPFormBase[ty.Sequence[np.ndarray], ty.Tuple[int, Parameters ---------- - breaks : np.ndarray - Breaks values 1-D array + breaks : Sequence[np.ndarray] + The sequence of the breaks 1-D arrays for each dimension + coeffs : np.ndarray - Spline coefficients N-D array + Tensor-product spline coefficients N-D array + """ def __init__(self, breaks: ty.Sequence[np.ndarray], coeffs: np.ndarray) -> None: self._breaks = breaks self._coeffs = coeffs self._pieces = tuple(x.size - 1 for x in breaks) + self._order = tuple(s // p for s, p in zip(coeffs.shape, self._pieces)) self._ndim = len(breaks) - if self._ndim > 1: - self._order = tuple(s // p for s, p in zip(coeffs.shape, self._pieces)) - else: - # the corner case for univariate spline that is represented as 1d-grid - self._order = (coeffs.shape[1] // self._pieces[0], ) - @property def breaks(self) -> ty.Sequence[np.ndarray]: + """Returns the sequence of the data breaks for each dimension""" return self._breaks @property def coeffs(self) -> np.ndarray: + """Returns n-d array of tensor-product n-d grid spline coefficients""" return self._coeffs @property def order(self) -> ty.Tuple[int, ...]: + """Returns the tuple of the spline orders for each dimension""" return self._order @property def pieces(self) -> ty.Tuple[int, ...]: + """Returns the tuple of the spline pieces for each dimension""" return self._pieces @property def ndim(self) -> int: + """Returns the dimensionality of n-d grid data""" return self._ndim - @property - def shape(self) -> ty.Tuple[int, ...]: - """Returns the original data shape - """ - return tuple(x.size for x in self.breaks) - def evaluate(self, xi: ty.Sequence[np.ndarray]) -> np.ndarray: + """Evaluates the spline for given data point(s) on the n-d grid""" shape = tuple(x.size for x in xi) coeffs = self.coeffs - coeffs_shape = list(coeffs.shape) + coeffs_shape = coeffs.shape - d = self.ndim - 1 - permuted_axes = (d, *range(d)) + ndim_m1 = self.ndim - 1 + permuted_axes = (ndim_m1, *range(ndim_m1)) for i in reversed(range(self.ndim)): - xii = xi[i] - ndim = int(np.prod(coeffs_shape[:d])) - - if self.ndim > 2: - coeffs = coeffs.reshape((ndim, self.pieces[i] * self.order[i])) - - spp = SplinePPForm( - breaks=self.breaks[i], - coeffs=coeffs, - pieces=self.pieces[i], - order=self.order[i], - shape=(ndim, xii.size) - ) + umv_ndim = int(np.prod(coeffs_shape[:ndim_m1])) + coeffs = coeffs.reshape((umv_ndim, self.pieces[i] * self.order[i])) - coeffs = spp.evaluate(xii) + coeffs = SplinePPForm(breaks=self.breaks[i], coeffs=coeffs).evaluate(xi[i]) - if self.ndim > 2: - coeffs = coeffs.reshape((*coeffs_shape[:d], shape[i])) - - if self.ndim > 1: - coeffs = coeffs.transpose(permuted_axes) - coeffs_shape = list(coeffs.shape) + coeffs = coeffs.reshape((*coeffs_shape[:ndim_m1], shape[i])).transpose(permuted_axes) + coeffs_shape = coeffs.shape return coeffs.reshape(shape) @@ -261,4 +243,6 @@ def _make_spline(self, smooth: ty.List[ty.Optional[float]]) -> ty.Tuple[NdGridSp coeffs = coeffs.transpose(permute_axes) coeffs_shape = list(coeffs.shape) + coeffs = coeffs.squeeze() + return NdGridSplinePPForm(breaks=self._xdata, coeffs=coeffs), tuple(reversed(smooths)) diff --git a/csaps/_sspumv.py b/csaps/_sspumv.py index 38cc955..56e0885 100644 --- a/csaps/_sspumv.py +++ b/csaps/_sspumv.py @@ -14,7 +14,7 @@ import scipy.sparse.linalg as la from ._base import SplinePPFormBase, ISmoothingSpline -from ._types import UnivariateDataType, UnivariateVectorizedDataType, MultivariateDataType +from ._types import UnivariateDataType, MultivariateDataType from ._reshape import from_2d, to_2d @@ -29,62 +29,42 @@ class SplinePPForm(SplinePPFormBase[np.ndarray, int]): coeffs : np.ndarray Spline coefficients 2-D array - ndim : int - Spline dimension - - shape : Sequence[int] - It determines Y data shape - - axis : int - Axis along which values are assumed to be varying """ - def __init__(self, breaks: np.ndarray, coeffs: np.ndarray, order: int, pieces: int, - shape: ty.Sequence[int], axis: int = -1) -> None: + def __init__(self, breaks: np.ndarray, coeffs: np.ndarray) -> None: self._breaks = breaks self._coeffs = coeffs - self._order = order - self._pieces = pieces + self._pieces = breaks.size - 1 + self._order = coeffs.shape[1] // self._pieces self._ndim = coeffs.shape[0] - self._shape = tuple(shape) - self._axis = axis - @property def breaks(self) -> np.ndarray: + """Returns the breaks array""" return self._breaks @property def coeffs(self) -> np.ndarray: + """Returns the spline coefficients 2-D array""" return self._coeffs @property def order(self) -> int: + """Returns the spline order""" return self._order @property def pieces(self) -> int: + """Returns the number of the spline pieces""" return self._pieces @property def ndim(self) -> int: + """Returns dimensionality (>1 for multivariate data)""" return self._ndim - @property - def shape(self) -> ty.Tuple[int, ...]: - """Returns the original data shape - """ - return self._shape - - @property - def axis(self) -> int: - """Returns the data axis along the spline will be evaluated - """ - return self._axis - def evaluate(self, xi: np.ndarray) -> np.ndarray: - shape = list(self.shape) - shape[self.axis] = xi.size + """Evaluates the spline for the given data point(s)""" # For each data site, compute its break interval mesh = self.breaks[1:-1] @@ -107,10 +87,6 @@ def evaluate(self, xi: np.ndarray) -> np.ndarray: index += self.pieces values = xi * values + self.coeffs[:, index] - if values.shape != shape: - # Reshape values 2-D NxM array to N-D array with original shape - values = from_2d(values, shape, self.axis) - return values @@ -144,7 +120,7 @@ class CubicSmoothingSpline(ISmoothingSpline[SplinePPForm, float, UnivariateDataT def __init__(self, xdata: UnivariateDataType, - ydata: UnivariateVectorizedDataType, + ydata: MultivariateDataType, weights: ty.Optional[UnivariateDataType] = None, smooth: ty.Optional[float] = None, axis: int = -1): @@ -167,7 +143,17 @@ def __call__(self, xi: UnivariateDataType) -> np.ndarray: if xi.ndim > 1: # pragma: no cover raise ValueError("'xi' data must be a 1-d array.") - return self._spline.evaluate(xi) + yi = self._spline.evaluate(xi) + + shape = list(self._shape) + shape[self._axis] = xi.size + shape = tuple(shape) + + if yi.shape != shape: + # Reshape values 2-D NxM array to N-D array with original shape + yi = from_2d(yi, shape, self._axis) + + return yi @property def smooth(self) -> float: @@ -246,75 +232,66 @@ def _make_spline(self, smooth: ty.Optional[float]) -> ty.Tuple[SplinePPForm, flo dy = np.diff(self._ydata, axis=1) dy_dx = dy / dx - if pcount > 2: - # Create diagonal sparse matrices - diags_r = np.vstack((dx[1:], 2 * (dx[1:] + dx[:-1]), dx[:-1])) - r = sp.spdiags(diags_r, [-1, 0, 1], pcount - 2, pcount - 2) + if pcount == 2: + # The corner case for the data with 2 points (1 breaks interval) + # In this case we have 2-ordered spline and linear interpolation in fact + yi = self._ydata[:, 0][:, np.newaxis] + coeffs = np.hstack((dy_dx, yi)) + + spline = SplinePPForm(breaks=self._xdata, coeffs=coeffs) + p = 1. - odx = 1. / dx - diags_qt = np.vstack((odx[:-1], -(odx[1:] + odx[:-1]), odx[1:])) - qt = sp.diags(diags_qt, [0, 1, 2], (pcount - 2, pcount)) + return spline, p - ow = 1. / self._weights - osqw = 1. / np.sqrt(self._weights) # type: np.ndarray - w = sp.diags(ow, 0, (pcount, pcount)) - qtw = qt @ sp.diags(osqw, 0, (pcount, pcount)) + # Create diagonal sparse matrices + diags_r = np.vstack((dx[1:], 2 * (dx[1:] + dx[:-1]), dx[:-1])) + r = sp.spdiags(diags_r, [-1, 0, 1], pcount - 2, pcount - 2) - # Solve linear system for the 2nd derivatives - qtwq = qtw @ qtw.T + dx_recip = 1. / dx + diags_qtw = np.vstack((dx_recip[:-1], -(dx_recip[1:] + dx_recip[:-1]), dx_recip[1:])) + diags_sqrw_recip = 1. / np.sqrt(self._weights) - if smooth is None: - p = self._compute_smooth(r, qtwq) - else: - p = smooth + qtw = (sp.diags(diags_qtw, [0, 1, 2], (pcount - 2, pcount)) @ + sp.diags(diags_sqrw_recip, 0, (pcount, pcount))) + qtw = qtw @ qtw.T - a = (6. * (1. - p)) * qtwq + p * r - b = np.diff(dy_dx, axis=1).T + if smooth is None: + p = self._compute_smooth(r, qtw) + else: + p = smooth - u = la.spsolve(a, b) - if u.ndim < 2: - u = u[np.newaxis] - if self._ydim == 1: - u = u.T + pp = (6. * (1. - p)) - dx = dx[:, np.newaxis] + # Solve linear system for the 2nd derivatives + a = pp * qtw + p * r + b = np.diff(dy_dx, axis=1).T - pad = functools.partial(np.pad, pad_width=[(1, 1), (0, 0)], mode='constant') + u = la.spsolve(a, b) + if u.ndim < 2: + u = u[np.newaxis] + if self._ydim == 1: + u = u.T - d1 = np.diff(pad(u), axis=0) / dx - d2 = np.diff(pad(d1), axis=0) + dx = dx[:, np.newaxis] - yi = self._ydata.T - ((6. * (1. - p)) * w) @ d2 - c3 = pad(p * u) - c2 = np.diff(yi, axis=0) / dx - dx * (2. * c3[:-1, :] + c3[1:, :]) + vpad = functools.partial(np.pad, pad_width=[(1, 1), (0, 0)], mode='constant') - coeffs = np.vstack(( - np.diff(c3, axis=0) / dx, - 3. * c3[:-1, :], - c2, - yi[:-1, :], - )).T + d1 = np.diff(vpad(u), axis=0) / dx + d2 = np.diff(vpad(d1), axis=0) - order = 4 - pieces = coeffs.shape[1] // order - else: - # The corner case for the data with 2 points. - yi = self._ydata[:, 0][:, np.newaxis] - coeffs = np.hstack((dy_dx, yi)) + diags_w_recip = 1. / self._weights + w = sp.diags(diags_w_recip, 0, (pcount, pcount)) - order = 2 - pieces = 1 + yi = self._ydata.T - (pp * w) @ d2 + pu = vpad(p * u) - p = 1. + p1 = np.diff(pu, axis=0) / dx + p2 = 3. * pu[:-1, :] + p3 = np.diff(yi, axis=0) / dx - dx * (2. * pu[:-1, :] + pu[1:, :]) + p4 = yi[:-1, :] - spline = SplinePPForm( - breaks=self._xdata, - coeffs=coeffs, - order=order, - pieces=pieces, - shape=self._shape, - axis=self._axis - ) + coeffs = np.vstack((p1, p2, p3, p4)).T + spline = SplinePPForm(breaks=self._xdata, coeffs=coeffs) return spline, p @@ -324,7 +301,7 @@ class UnivariateCubicSmoothingSpline(ISmoothingSpline[SplinePPForm, float, Univa def __init__(self, xdata: UnivariateDataType, - ydata: UnivariateVectorizedDataType, + ydata: MultivariateDataType, weights: ty.Optional[UnivariateDataType] = None, smooth: ty.Optional[float] = None, axis: int = -1) -> None: diff --git a/csaps/_types.py b/csaps/_types.py index beefd9f..b960a32 100644 --- a/csaps/_types.py +++ b/csaps/_types.py @@ -5,32 +5,18 @@ """ -import typing as ty -import numbers +from collections import abc +from typing import Union, Sequence, Tuple, TypeVar +from numbers import Number import numpy as np -UnivariateDataType = ty.Union[ - np.ndarray, - ty.Sequence[numbers.Number] -] +UnivariateDataType = Union[np.ndarray, Sequence[Number]] +MultivariateDataType = Union[np.ndarray, abc.Sequence] +NdGridDataType = Sequence[UnivariateDataType] -UnivariateVectorizedDataType = ty.Union[ - UnivariateDataType, - # FIXME: mypy does not support recursive types - # https://github.com/python/mypy/issues/731 - # ty.Sequence['UnivariateVectorizedDataType'] -] - -MultivariateDataType = ty.Union[ - np.ndarray, - ty.Sequence[UnivariateDataType] -] - -NdGridDataType = ty.Sequence[UnivariateDataType] - -TData = ty.TypeVar('TData', np.ndarray, ty.Sequence[np.ndarray]) -TProps = ty.TypeVar('TProps', int, ty.Tuple[int, ...]) -TSmooth = ty.TypeVar('TSmooth', float, ty.Tuple[float, ...]) -TXi = ty.TypeVar('TXi', UnivariateDataType, NdGridDataType) -TSpline = ty.TypeVar('TSpline') +TData = TypeVar('TData', np.ndarray, Sequence[np.ndarray]) +TProps = TypeVar('TProps', int, Tuple[int, ...]) +TSmooth = TypeVar('TSmooth', float, Tuple[float, ...]) +TXi = TypeVar('TXi', UnivariateDataType, NdGridDataType) +TSpline = TypeVar('TSpline') diff --git a/csaps/_version.py b/csaps/_version.py index c6e8e14..4cad2e2 100644 --- a/csaps/_version.py +++ b/csaps/_version.py @@ -1,3 +1,3 @@ # -*- coding: utf-8 -*- -__version__ = '0.10.1' +__version__ = '0.11.0' diff --git a/tests/test_ndgrid.py b/tests/test_ndgrid.py index 36aa88f..28a026c 100644 --- a/tests/test_ndgrid.py +++ b/tests/test_ndgrid.py @@ -143,5 +143,5 @@ def test_nd_array(shape: tuple): sp = csaps.NdGridCubicSmoothingSpline(xdata, ydata, smooth=1.0) ydata_s = sp(xdata) - assert sp.spline.shape == ydata.shape + assert len(sp.spline.coeffs.shape) == len(ydata.shape) assert ydata_s == pytest.approx(ydata) diff --git a/tests/test_univariate.py b/tests/test_univariate.py index 45ff972..f690e51 100644 --- a/tests/test_univariate.py +++ b/tests/test_univariate.py @@ -132,7 +132,6 @@ def test_splineppform(y, order, pieces, ndim): assert s.order == order assert s.pieces == pieces assert s.ndim == ndim - assert s.shape == y.shape @pytest.mark.parametrize('shape, axis', chain( @@ -154,11 +153,7 @@ def test_axis(shape, axis): y = np.arange(int(np.prod(shape))).reshape(shape) x = np.arange(np.array(y).shape[axis]) - s = csaps.UnivariateCubicSmoothingSpline(x, y, axis=axis) - ys = s(x) - - assert s.spline.shape == y.shape - assert s.spline.axis == axis + ys = csaps.UnivariateCubicSmoothingSpline(x, y, axis=axis)(x) np.testing.assert_allclose(ys, y)