Finish api changes

bashtage · Jul 15, 2019 · 922e9d2 · 922e9d2
1 parent 8af808e
commit 922e9d2
Show file tree

Hide file tree

Showing 12 changed files with 133 additions and 88 deletions.
diff --git a/statsmodels/tools/validation/__init__.py b/statsmodels/tools/validation/__init__.py
@@ -1 +1,4 @@
-from .validation import array_like
+from .validation import array_like, PandasWrapper
+
+
+__all__ = ['array_like', 'PandasWrapper']
diff --git a/statsmodels/tools/validation/decorators.py b/statsmodels/tools/validation/decorators.py
@@ -12,16 +12,16 @@ def inner(func):
         def wrapper(*args, **kwargs):
             if pos < len(args):
                 arg = args[pos]
-                arg = v.array_like(arg, name, dtype, ndim, maxdim, shape, order,
-                                   contiguous)
+                arg = v.array_like(arg, name, dtype, ndim, maxdim, shape,
+                                   order, contiguous)
                 if pos == 0:
                     args = (arg,) + args[1:]
                 else:
                     args = args[:pos] + (arg,) + args[pos + 1:]
             else:
                 arg = kwargs[name]
-                arg = v.array_like(arg, name, dtype, ndim, maxdim, shape, order,
-                                   contiguous)
+                arg = v.array_like(arg, name, dtype, ndim, maxdim, shape,
+                                   order, contiguous)
                 kwargs[name] = arg
 
             return func(*args, **kwargs)

diff --git a/statsmodels/tools/validation/validation.py b/statsmodels/tools/validation/validation.py
@@ -1,4 +1,6 @@
 import numpy as np
+import pandas as pd
+
 
 def _right_squeeze(arr, stop_dim=0):
     """
@@ -109,18 +111,18 @@ def array_like(obj, name, dtype=np.double, ndim=1, maxdim=None,
     ValueError: x is required to have shape (*, 4, 4) but has shape (4, 10, 4)
     """
     arr = np.asarray(obj, dtype=dtype, order=order)
-    if ndim is not None:
+    if maxdim is not None:
+        if arr.ndim > maxdim:
+            msg = '{0} must have ndim <= {1}'.format(name, maxdim)
+            raise ValueError(msg)
+    elif ndim is not None:
         if arr.ndim > ndim:
             arr = _right_squeeze(arr, stop_dim=ndim)
         elif arr.ndim < ndim:
             arr = np.reshape(arr, arr.shape + (1,) * (ndim - arr.ndim))
         if arr.ndim != ndim:
             msg = '{0} is required to have ndim {1} but has ndim {2}'
             raise ValueError(msg.format(name, ndim, arr.ndim))
-    elif maxdim is not None:
-        if arr.ndim > maxdim:
-            msg = '{0} must have ndim <= {1}'.format(name, maxdim)
-            raise ValueError(msg)
     if shape is not None:
         for actual, req in zip(arr.shape, shape):
             if req is not None and actual != req:
@@ -130,3 +132,81 @@ def array_like(obj, name, dtype=np.double, ndim=1, maxdim=None,
     if contiguous:
         arr = np.ascontiguousarray(arr, dtype=dtype)
     return arr
+
+
+class PandasWrapper(object):
+    """
+    Wrap array_like using the index from the original input, if pandas
+
+    Parameters
+    ----------
+    pandas_obj : {Series, DataFrame}
+        Object to extract the index from for wrapping
+
+    Notes
+    -----
+    Raises if ``orig`` is a pandas type but obj and and ``orig`` have
+    different numbers of elements in axis 0. Also raises if the ndim of obj
+    is larger than 2.
+    """
+
+    def __init__(self, pandas_obj):
+        self._pandas_obj = pandas_obj
+        self._is_pandas = isinstance(pandas_obj, (pd.Series, pd.DataFrame))
+
+    def wrap(self, obj, columns=None, append=None, trim_start=0, trim_end=0):
+        """
+        Parameters
+        ----------
+        :param obj:
+        :param columns:
+        :param append:
+        :param trim_start:
+        :param trim_end:
+        :return:
+
+        Returns
+        -------
+        wrapper : callable
+        Callable that has one required input and one optional:
+
+        * `obj`: array_like to wrap
+        * `columns`: (optional) Column names or series name, if obj is 1d
+        * `trim_start`: (optional, default 0) number of observations to drop
+          from the start of the index, so that the index applied is
+          index[trim_start:]
+        * `trim_start`: (optional, default 0) number of observations to drop
+          from the end of the index , so that the index applied is
+          index[:nobs - trim_end]
+        """
+        obj = np.asarray(obj)
+        if not self._is_pandas:
+            return obj
+
+        if obj.shape[0] + trim_start + trim_end != self._pandas_obj.shape[0]:
+            raise ValueError('obj must have the same number of elements in '
+                             'axis 0 as orig')
+        index = self._pandas_obj.index
+        index = index[trim_start:index.shape[0] - trim_end]
+        if obj.ndim == 1:
+            if columns is None:
+                name = getattr(self._pandas_obj, 'name', None)
+            elif isinstance(columns, str):
+                name = columns
+            else:
+                name = columns[0]
+            if append is not None:
+                name = append if name is None else name + '_' + append
+
+            return pd.Series(obj, name=name, index=index)
+        elif obj.ndim == 2:
+            if columns is None:
+                columns = getattr(self._pandas_obj, 'columns', None)
+            if append is not None:
+                new = []
+                for c in columns:
+                    new.append(append if c is None else str(c) + '_' + append)
+                columns = new
+            return pd.DataFrame(obj, columns=columns, index=index)
+        else:
+            raise ValueError('Can only wrap 1 or 2-d array_like')
diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py
@@ -22,7 +22,6 @@
 import statsmodels.tsa.base.tsa_model as tsbase
 import statsmodels.base.wrapper as wrap
 from statsmodels.regression.linear_model import yule_walker, OLS
-from statsmodels.tools.validation import array_like
 from statsmodels.tsa.tsatools import (lagmat, add_trend,
                                       _ar_transparams, _ar_invtransparams,
                                       _ma_transparams, _ma_invtransparams,

diff --git a/statsmodels/tsa/filters/_utils.py b/statsmodels/tsa/filters/_utils.py
@@ -25,29 +25,6 @@ def _get_pandas_wrapper(X, trim_head=None, trim_tail=None, names=None):
         return lambda x : X.__class__(x, index=index, name=names)
 
 
-def _maybe_get_pandas_wrapper(X, trim_head=None, trim_tail=None):
-    """
-    If using pandas returns a function to wrap the results, e.g., wrapper(X)
-    trim is an integer for the symmetric truncation of the series in some
-    filters.
-    otherwise returns None
-    """
-    if _is_using_pandas(X, None):
-        return _get_pandas_wrapper(X, trim_head, trim_tail)
-    else:
-        return lambda x : x
-
-
-def _maybe_get_pandas_wrapper_freq(X, trim=None):
-    if _is_using_pandas(X, None):
-        index = X.index
-        func = _get_pandas_wrapper(X, trim)
-        freq = index.inferred_freq
-        return func, freq
-    else:
-        return lambda x : x, None
-
-
 def pandas_wrapper(func, trim_head=None, trim_tail=None, names=None, *args,
                    **kwargs):
     @wraps(func)

diff --git a/statsmodels/tsa/filters/bk_filter.py b/statsmodels/tsa/filters/bk_filter.py
@@ -3,8 +3,7 @@
 import numpy as np
 from scipy.signal import fftconvolve
 
-from statsmodels.tools.validation import array_like
-from ._utils import _maybe_get_pandas_wrapper
+from statsmodels.tools.validation import array_like, PandasWrapper
 
 
 def bkfilter(x, low=6, high=32, K=12):
@@ -77,7 +76,7 @@ def bkfilter(x, low=6, high=32, K=12):
     # TODO: allow windowing functions to correct for Gibb's Phenomenon?
     # adjust bweights (symmetrically) by below before demeaning
     # Lancosz Sigma Factors np.sinc(2*j/(2.*K+1))
-    _pandas_wrapper = _maybe_get_pandas_wrapper(x, K, K)
+    pw = PandasWrapper(x)
     x = array_like(x, 'x', maxdim=2)
     omega_1 = 2. * np.pi / high  # convert from freq. to periodicity
     omega_2 = 2. * np.pi / low
@@ -92,7 +91,5 @@ def bkfilter(x, low=6, high=32, K=12):
         bweights = bweights[:, None]
     x = fftconvolve(x, bweights, mode='valid')
     # get a centered moving avg/convolution
-    if _pandas_wrapper is not None:
-        return _pandas_wrapper(x)
 
-    return x
+    return pw.wrap(x, append='cycle', trim_start=K, trim_end=K)
diff --git a/statsmodels/tsa/filters/cf_filter.py b/statsmodels/tsa/filters/cf_filter.py
@@ -1,8 +1,7 @@
 import numpy as np
 
 from statsmodels.compat.python import range
-from ._utils import _maybe_get_pandas_wrapper
-from statsmodels.tools.validation import array_like
+from statsmodels.tools.validation import array_like, PandasWrapper
 
 
 # the data is sampled quarterly, so cut-off frequency of 18
@@ -70,7 +69,7 @@ def cffilter(x, low=6, high=32, drift=True):
     #      and estimates of theta other than random walk.
     if low < 2:
         raise ValueError("low must be >= 2")
-    _pandas_wrapper = _maybe_get_pandas_wrapper(x)
+    pw = PandasWrapper(x)
     x = array_like(x, 'x', ndim=2)
     nobs, nseries = x.shape
     a = 2*np.pi/high
@@ -92,12 +91,9 @@ def cffilter(x, low=6, high=32, drift=True):
                 B * x[-1] + np.dot(Bj[1:i].T, x[1:i][::-1]) + A * x[0])
     y = y.squeeze()
 
-    cycle, trend = y, x.squeeze() - y
+    cycle, trend = y.squeeze(), x.squeeze() - y
 
-    if _pandas_wrapper is not None:
-        return _pandas_wrapper(cycle), _pandas_wrapper(trend)
-
-    return cycle, trend
+    return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')
 
 
 if __name__ == "__main__":

diff --git a/statsmodels/tsa/filters/filtertools.py b/statsmodels/tsa/filters/filtertools.py
@@ -17,10 +17,8 @@
 from scipy import signal
 from scipy.signal.signaltools import _centered as trim_centered
 
-from statsmodels.base.data import ArrayLike
 from statsmodels.compat.python import range
-from ._utils import _maybe_get_pandas_wrapper
-
+from statsmodels.tools.validation import array_like, PandasWrapper
 
 def _pad_nans(x, head=None, tail=None):
     if np.ndim(x) == 1:
@@ -192,12 +190,12 @@ def recursive_filter(x, ar_coeff, init=None):
 
     where n_coeff = len(n_coeff).
     '''
-    _pandas_wrapper = _maybe_get_pandas_wrapper(x)
-    x = ArrayLike(x, 'x', ndim=1)
-    ar_coeff = ArrayLike(ar_coeff, 'ar_coeff', ndim=1)
+    pw = PandasWrapper(x)
+    x = array_like(x, 'x')
+    ar_coeff = array_like(ar_coeff, 'ar_coeff')
 
     if init is not None:  # integer init are treated differently in lfiltic
-        init = ArrayLike(init, 'init', ndim=1)
+        init = array_like(init, 'init')
         if len(init) != len(ar_coeff):
             raise ValueError("ar_coeff must be the same length as init")
 
@@ -213,9 +211,8 @@ def recursive_filter(x, ar_coeff, init=None):
     else:
         result = y
 
-    if _pandas_wrapper:
-        return _pandas_wrapper(result)
-    return result
+    return pw.wrap(result)
+
 
 
 def convolution_filter(x, filt, nsides=2):
@@ -279,9 +276,9 @@ def convolution_filter(x, filt, nsides=2):
     else:  # pragma : no cover
         raise ValueError("nsides must be 1 or 2")
 
-    _pandas_wrapper = _maybe_get_pandas_wrapper(x)
-    x = ArrayLike(x, 'x', maxdim=2)
-    filt = ArrayLike(filt, 'filt', ndim=x.ndim)
+    pw = PandasWrapper(x)
+    x = array_like(x, 'x', maxdim=2)
+    filt = array_like(filt, 'filt', ndim=x.ndim)
 
     if filt.ndim == 1 or min(filt.shape) == 1:
         result = signal.convolve(x, filt, mode='valid')
@@ -299,9 +296,7 @@ def convolution_filter(x, filt, nsides=2):
                 result[:, i] = signal.convolve(x[:, i], np.r_[0, filt[:, i]],
                                                mode='valid')
     result = _pad_nans(result, trim_head, trim_tail)
-    if _pandas_wrapper:
-        return _pandas_wrapper(result)
-    return result
+    return pw.wrap(result)
 
 
 # previously located in sandbox.tsa.garch
@@ -343,8 +338,8 @@ def miso_lfilter(ar, ma, x, useic=False):
     with shapes y (nobs,), x (nobs,nvars), ar (narlags,), ma (narlags,nvars)
 
     '''
-    ma = ArrayLike(ma, 'ma', ndim=1)
-    ar = ArrayLike(ar, 'ar', ndim=1)
+    ma = array_like(ma, 'ma')
+    ar = array_like(ar, 'ar')
     # inp = signal.convolve(x, ma, mode='valid')
     # inp = signal.convolve(x, ma)[:, (x.shape[1]+1)//2]
     # Note: convolve mixes up the variable left-right flip

diff --git a/statsmodels/tsa/filters/hp_filter.py b/statsmodels/tsa/filters/hp_filter.py
@@ -3,8 +3,7 @@
 import numpy as np
 from scipy import sparse
 from scipy.sparse.linalg import spsolve
-from ._utils import _maybe_get_pandas_wrapper
-from statsmodels.tools.validation import array_like
+from statsmodels.tools.validation import array_like, PandasWrapper
 
 
 def hpfilter(x, lamb=1600):
@@ -83,7 +82,7 @@ def hpfilter(x, lamb=1600):
         Filter for the Frequency of Observations." `The Review of Economics and
         Statistics`, 84(2), 371-80.
     """
-    _pandas_wrapper = _maybe_get_pandas_wrapper(x)
+    pw = PandasWrapper(x)
     x = array_like(x, 'x', ndim=1)
     nobs = len(x)
     I = sparse.eye(nobs, nobs)  # noqa:E741
@@ -95,6 +94,4 @@ def hpfilter(x, lamb=1600):
     trend = spsolve(I+lamb*K.T.dot(K), x, use_umfpack=use_umfpack)
 
     cycle = x - trend
-    if _pandas_wrapper is not None:
-        return _pandas_wrapper(cycle), _pandas_wrapper(trend)
-    return cycle, trend
+    return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')
diff --git a/statsmodels/tsa/filters/tests/test_filters.py b/statsmodels/tsa/filters/tests/test_filters.py
@@ -563,15 +563,15 @@ def test_bking_pandas():
     assert_equal(filtered.values, nd_filtered)
     assert_equal(filtered.index[0], datetime(1962, 3, 31))
     assert_equal(filtered.index[-1], datetime(2006, 9, 30))
-    assert_equal(filtered.name, "infl")
+    assert_equal(filtered.name, "infl_cycle")
 
     # 2d
     filtered = bkfilter(dta[["infl", "unemp"]])
     nd_filtered = bkfilter(dta[['infl', 'unemp']].values)
     assert_equal(filtered.values, nd_filtered)
     assert_equal(filtered.index[0], datetime(1962, 3, 31))
     assert_equal(filtered.index[-1], datetime(2006, 9, 30))
-    assert_equal(filtered.columns.values, ["infl", "unemp"])
+    assert_equal(filtered.columns.values, ["infl_cycle", "unemp_cycle"])
 
 
 def test_cfitz_pandas():
@@ -584,15 +584,15 @@ def test_cfitz_pandas():
     assert_allclose(cycle.values, ndcycle, rtol=1e-14)
     assert_equal(cycle.index[0], datetime(1959, 3, 31))
     assert_equal(cycle.index[-1], datetime(2009, 9, 30))
-    assert_equal(cycle.name, "infl")
+    assert_equal(cycle.name, "infl_cycle")
 
     # 2d
     cycle, trend = cffilter(dta[["infl", "unemp"]])
     ndcycle, ndtrend = cffilter(dta[['infl', 'unemp']].values)
     assert_allclose(cycle.values, ndcycle, rtol=1e-14)
     assert_equal(cycle.index[0], datetime(1959, 3, 31))
     assert_equal(cycle.index[-1], datetime(2009, 9, 30))
-    assert_equal(cycle.columns.values, ["infl", "unemp"])
+    assert_equal(cycle.columns.values, ["infl_cycle", "unemp_cycle"])
 
 
 def test_hpfilter_pandas():
@@ -604,7 +604,7 @@ def test_hpfilter_pandas():
     assert_equal(cycle.values, ndcycle)
     assert_equal(cycle.index[0], datetime(1959, 3, 31))
     assert_equal(cycle.index[-1], datetime(2009, 9, 30))
-    assert_equal(cycle.name, "cycle")
+    assert_equal(cycle.name, "realgdp_cycle")
 
 
 class TestFilters(object):