Skip to content

Commit

Permalink
Finish api changes
Browse files Browse the repository at this point in the history
  • Loading branch information
bashtage committed Jul 15, 2019
1 parent 8af808e commit 922e9d2
Show file tree
Hide file tree
Showing 12 changed files with 133 additions and 88 deletions.
5 changes: 4 additions & 1 deletion statsmodels/tools/validation/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from .validation import array_like
from .validation import array_like, PandasWrapper


__all__ = ['array_like', 'PandasWrapper']
8 changes: 4 additions & 4 deletions statsmodels/tools/validation/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ def inner(func):
def wrapper(*args, **kwargs):
if pos < len(args):
arg = args[pos]
arg = v.array_like(arg, name, dtype, ndim, maxdim, shape, order,
contiguous)
arg = v.array_like(arg, name, dtype, ndim, maxdim, shape,
order, contiguous)
if pos == 0:
args = (arg,) + args[1:]
else:
args = args[:pos] + (arg,) + args[pos + 1:]
else:
arg = kwargs[name]
arg = v.array_like(arg, name, dtype, ndim, maxdim, shape, order,
contiguous)
arg = v.array_like(arg, name, dtype, ndim, maxdim, shape,
order, contiguous)
kwargs[name] = arg

return func(*args, **kwargs)
Expand Down
90 changes: 85 additions & 5 deletions statsmodels/tools/validation/validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import numpy as np
import pandas as pd


def _right_squeeze(arr, stop_dim=0):
"""
Expand Down Expand Up @@ -109,18 +111,18 @@ def array_like(obj, name, dtype=np.double, ndim=1, maxdim=None,
ValueError: x is required to have shape (*, 4, 4) but has shape (4, 10, 4)
"""
arr = np.asarray(obj, dtype=dtype, order=order)
if ndim is not None:
if maxdim is not None:
if arr.ndim > maxdim:
msg = '{0} must have ndim <= {1}'.format(name, maxdim)
raise ValueError(msg)
elif ndim is not None:
if arr.ndim > ndim:
arr = _right_squeeze(arr, stop_dim=ndim)
elif arr.ndim < ndim:
arr = np.reshape(arr, arr.shape + (1,) * (ndim - arr.ndim))
if arr.ndim != ndim:
msg = '{0} is required to have ndim {1} but has ndim {2}'
raise ValueError(msg.format(name, ndim, arr.ndim))
elif maxdim is not None:
if arr.ndim > maxdim:
msg = '{0} must have ndim <= {1}'.format(name, maxdim)
raise ValueError(msg)
if shape is not None:
for actual, req in zip(arr.shape, shape):
if req is not None and actual != req:
Expand All @@ -130,3 +132,81 @@ def array_like(obj, name, dtype=np.double, ndim=1, maxdim=None,
if contiguous:
arr = np.ascontiguousarray(arr, dtype=dtype)
return arr


class PandasWrapper(object):
"""
Wrap array_like using the index from the original input, if pandas
Parameters
----------
pandas_obj : {Series, DataFrame}
Object to extract the index from for wrapping
Notes
-----
Raises if ``orig`` is a pandas type but obj and and ``orig`` have
different numbers of elements in axis 0. Also raises if the ndim of obj
is larger than 2.
"""

def __init__(self, pandas_obj):
self._pandas_obj = pandas_obj
self._is_pandas = isinstance(pandas_obj, (pd.Series, pd.DataFrame))

def wrap(self, obj, columns=None, append=None, trim_start=0, trim_end=0):
"""
Parameters
----------
:param obj:
:param columns:
:param append:
:param trim_start:
:param trim_end:
:return:
Returns
-------
wrapper : callable
Callable that has one required input and one optional:
* `obj`: array_like to wrap
* `columns`: (optional) Column names or series name, if obj is 1d
* `trim_start`: (optional, default 0) number of observations to drop
from the start of the index, so that the index applied is
index[trim_start:]
* `trim_start`: (optional, default 0) number of observations to drop
from the end of the index , so that the index applied is
index[:nobs - trim_end]
"""
obj = np.asarray(obj)
if not self._is_pandas:
return obj

if obj.shape[0] + trim_start + trim_end != self._pandas_obj.shape[0]:
raise ValueError('obj must have the same number of elements in '
'axis 0 as orig')
index = self._pandas_obj.index
index = index[trim_start:index.shape[0] - trim_end]
if obj.ndim == 1:
if columns is None:
name = getattr(self._pandas_obj, 'name', None)
elif isinstance(columns, str):
name = columns
else:
name = columns[0]
if append is not None:
name = append if name is None else name + '_' + append

return pd.Series(obj, name=name, index=index)
elif obj.ndim == 2:
if columns is None:
columns = getattr(self._pandas_obj, 'columns', None)
if append is not None:
new = []
for c in columns:
new.append(append if c is None else str(c) + '_' + append)
columns = new
return pd.DataFrame(obj, columns=columns, index=index)
else:
raise ValueError('Can only wrap 1 or 2-d array_like')
1 change: 0 additions & 1 deletion statsmodels/tsa/arima_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import statsmodels.tsa.base.tsa_model as tsbase
import statsmodels.base.wrapper as wrap
from statsmodels.regression.linear_model import yule_walker, OLS
from statsmodels.tools.validation import array_like
from statsmodels.tsa.tsatools import (lagmat, add_trend,
_ar_transparams, _ar_invtransparams,
_ma_transparams, _ma_invtransparams,
Expand Down
23 changes: 0 additions & 23 deletions statsmodels/tsa/filters/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,6 @@ def _get_pandas_wrapper(X, trim_head=None, trim_tail=None, names=None):
return lambda x : X.__class__(x, index=index, name=names)


def _maybe_get_pandas_wrapper(X, trim_head=None, trim_tail=None):
"""
If using pandas returns a function to wrap the results, e.g., wrapper(X)
trim is an integer for the symmetric truncation of the series in some
filters.
otherwise returns None
"""
if _is_using_pandas(X, None):
return _get_pandas_wrapper(X, trim_head, trim_tail)
else:
return lambda x : x


def _maybe_get_pandas_wrapper_freq(X, trim=None):
if _is_using_pandas(X, None):
index = X.index
func = _get_pandas_wrapper(X, trim)
freq = index.inferred_freq
return func, freq
else:
return lambda x : x, None


def pandas_wrapper(func, trim_head=None, trim_tail=None, names=None, *args,
**kwargs):
@wraps(func)
Expand Down
9 changes: 3 additions & 6 deletions statsmodels/tsa/filters/bk_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import numpy as np
from scipy.signal import fftconvolve

from statsmodels.tools.validation import array_like
from ._utils import _maybe_get_pandas_wrapper
from statsmodels.tools.validation import array_like, PandasWrapper


def bkfilter(x, low=6, high=32, K=12):
Expand Down Expand Up @@ -77,7 +76,7 @@ def bkfilter(x, low=6, high=32, K=12):
# TODO: allow windowing functions to correct for Gibb's Phenomenon?
# adjust bweights (symmetrically) by below before demeaning
# Lancosz Sigma Factors np.sinc(2*j/(2.*K+1))
_pandas_wrapper = _maybe_get_pandas_wrapper(x, K, K)
pw = PandasWrapper(x)
x = array_like(x, 'x', maxdim=2)
omega_1 = 2. * np.pi / high # convert from freq. to periodicity
omega_2 = 2. * np.pi / low
Expand All @@ -92,7 +91,5 @@ def bkfilter(x, low=6, high=32, K=12):
bweights = bweights[:, None]
x = fftconvolve(x, bweights, mode='valid')
# get a centered moving avg/convolution
if _pandas_wrapper is not None:
return _pandas_wrapper(x)

return x
return pw.wrap(x, append='cycle', trim_start=K, trim_end=K)
12 changes: 4 additions & 8 deletions statsmodels/tsa/filters/cf_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import numpy as np

from statsmodels.compat.python import range
from ._utils import _maybe_get_pandas_wrapper
from statsmodels.tools.validation import array_like
from statsmodels.tools.validation import array_like, PandasWrapper


# the data is sampled quarterly, so cut-off frequency of 18
Expand Down Expand Up @@ -70,7 +69,7 @@ def cffilter(x, low=6, high=32, drift=True):
# and estimates of theta other than random walk.
if low < 2:
raise ValueError("low must be >= 2")
_pandas_wrapper = _maybe_get_pandas_wrapper(x)
pw = PandasWrapper(x)
x = array_like(x, 'x', ndim=2)
nobs, nseries = x.shape
a = 2*np.pi/high
Expand All @@ -92,12 +91,9 @@ def cffilter(x, low=6, high=32, drift=True):
B * x[-1] + np.dot(Bj[1:i].T, x[1:i][::-1]) + A * x[0])
y = y.squeeze()

cycle, trend = y, x.squeeze() - y
cycle, trend = y.squeeze(), x.squeeze() - y

if _pandas_wrapper is not None:
return _pandas_wrapper(cycle), _pandas_wrapper(trend)

return cycle, trend
return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')


if __name__ == "__main__":
Expand Down
31 changes: 13 additions & 18 deletions statsmodels/tsa/filters/filtertools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
from scipy import signal
from scipy.signal.signaltools import _centered as trim_centered

from statsmodels.base.data import ArrayLike
from statsmodels.compat.python import range
from ._utils import _maybe_get_pandas_wrapper

from statsmodels.tools.validation import array_like, PandasWrapper

def _pad_nans(x, head=None, tail=None):
if np.ndim(x) == 1:
Expand Down Expand Up @@ -192,12 +190,12 @@ def recursive_filter(x, ar_coeff, init=None):
where n_coeff = len(n_coeff).
'''
_pandas_wrapper = _maybe_get_pandas_wrapper(x)
x = ArrayLike(x, 'x', ndim=1)
ar_coeff = ArrayLike(ar_coeff, 'ar_coeff', ndim=1)
pw = PandasWrapper(x)
x = array_like(x, 'x')
ar_coeff = array_like(ar_coeff, 'ar_coeff')

if init is not None: # integer init are treated differently in lfiltic
init = ArrayLike(init, 'init', ndim=1)
init = array_like(init, 'init')
if len(init) != len(ar_coeff):
raise ValueError("ar_coeff must be the same length as init")

Expand All @@ -213,9 +211,8 @@ def recursive_filter(x, ar_coeff, init=None):
else:
result = y

if _pandas_wrapper:
return _pandas_wrapper(result)
return result
return pw.wrap(result)



def convolution_filter(x, filt, nsides=2):
Expand Down Expand Up @@ -279,9 +276,9 @@ def convolution_filter(x, filt, nsides=2):
else: # pragma : no cover
raise ValueError("nsides must be 1 or 2")

_pandas_wrapper = _maybe_get_pandas_wrapper(x)
x = ArrayLike(x, 'x', maxdim=2)
filt = ArrayLike(filt, 'filt', ndim=x.ndim)
pw = PandasWrapper(x)
x = array_like(x, 'x', maxdim=2)
filt = array_like(filt, 'filt', ndim=x.ndim)

if filt.ndim == 1 or min(filt.shape) == 1:
result = signal.convolve(x, filt, mode='valid')
Expand All @@ -299,9 +296,7 @@ def convolution_filter(x, filt, nsides=2):
result[:, i] = signal.convolve(x[:, i], np.r_[0, filt[:, i]],
mode='valid')
result = _pad_nans(result, trim_head, trim_tail)
if _pandas_wrapper:
return _pandas_wrapper(result)
return result
return pw.wrap(result)


# previously located in sandbox.tsa.garch
Expand Down Expand Up @@ -343,8 +338,8 @@ def miso_lfilter(ar, ma, x, useic=False):
with shapes y (nobs,), x (nobs,nvars), ar (narlags,), ma (narlags,nvars)
'''
ma = ArrayLike(ma, 'ma', ndim=1)
ar = ArrayLike(ar, 'ar', ndim=1)
ma = array_like(ma, 'ma')
ar = array_like(ar, 'ar')
# inp = signal.convolve(x, ma, mode='valid')
# inp = signal.convolve(x, ma)[:, (x.shape[1]+1)//2]
# Note: convolve mixes up the variable left-right flip
Expand Down
9 changes: 3 additions & 6 deletions statsmodels/tsa/filters/hp_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import numpy as np
from scipy import sparse
from scipy.sparse.linalg import spsolve
from ._utils import _maybe_get_pandas_wrapper
from statsmodels.tools.validation import array_like
from statsmodels.tools.validation import array_like, PandasWrapper


def hpfilter(x, lamb=1600):
Expand Down Expand Up @@ -83,7 +82,7 @@ def hpfilter(x, lamb=1600):
Filter for the Frequency of Observations." `The Review of Economics and
Statistics`, 84(2), 371-80.
"""
_pandas_wrapper = _maybe_get_pandas_wrapper(x)
pw = PandasWrapper(x)
x = array_like(x, 'x', ndim=1)
nobs = len(x)
I = sparse.eye(nobs, nobs) # noqa:E741
Expand All @@ -95,6 +94,4 @@ def hpfilter(x, lamb=1600):
trend = spsolve(I+lamb*K.T.dot(K), x, use_umfpack=use_umfpack)

cycle = x - trend
if _pandas_wrapper is not None:
return _pandas_wrapper(cycle), _pandas_wrapper(trend)
return cycle, trend
return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')
10 changes: 5 additions & 5 deletions statsmodels/tsa/filters/tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,15 +563,15 @@ def test_bking_pandas():
assert_equal(filtered.values, nd_filtered)
assert_equal(filtered.index[0], datetime(1962, 3, 31))
assert_equal(filtered.index[-1], datetime(2006, 9, 30))
assert_equal(filtered.name, "infl")
assert_equal(filtered.name, "infl_cycle")

# 2d
filtered = bkfilter(dta[["infl", "unemp"]])
nd_filtered = bkfilter(dta[['infl', 'unemp']].values)
assert_equal(filtered.values, nd_filtered)
assert_equal(filtered.index[0], datetime(1962, 3, 31))
assert_equal(filtered.index[-1], datetime(2006, 9, 30))
assert_equal(filtered.columns.values, ["infl", "unemp"])
assert_equal(filtered.columns.values, ["infl_cycle", "unemp_cycle"])


def test_cfitz_pandas():
Expand All @@ -584,15 +584,15 @@ def test_cfitz_pandas():
assert_allclose(cycle.values, ndcycle, rtol=1e-14)
assert_equal(cycle.index[0], datetime(1959, 3, 31))
assert_equal(cycle.index[-1], datetime(2009, 9, 30))
assert_equal(cycle.name, "infl")
assert_equal(cycle.name, "infl_cycle")

# 2d
cycle, trend = cffilter(dta[["infl", "unemp"]])
ndcycle, ndtrend = cffilter(dta[['infl', 'unemp']].values)
assert_allclose(cycle.values, ndcycle, rtol=1e-14)
assert_equal(cycle.index[0], datetime(1959, 3, 31))
assert_equal(cycle.index[-1], datetime(2009, 9, 30))
assert_equal(cycle.columns.values, ["infl", "unemp"])
assert_equal(cycle.columns.values, ["infl_cycle", "unemp_cycle"])


def test_hpfilter_pandas():
Expand All @@ -604,7 +604,7 @@ def test_hpfilter_pandas():
assert_equal(cycle.values, ndcycle)
assert_equal(cycle.index[0], datetime(1959, 3, 31))
assert_equal(cycle.index[-1], datetime(2009, 9, 30))
assert_equal(cycle.name, "cycle")
assert_equal(cycle.name, "realgdp_cycle")


class TestFilters(object):
Expand Down
Loading

0 comments on commit 922e9d2

Please sign in to comment.