Skip to content

Commit

Permalink
Remove rolling (#2995)
Browse files Browse the repository at this point in the history
* Remove rolling

* Remove deprecated rolling methods

xref pandas-dev/pandas#18723

* PEP8

* Added reference
  • Loading branch information
TomAugspurger authored and jcrist committed Dec 21, 2017
1 parent 330c2dc commit 4b09c3e
Show file tree
Hide file tree
Showing 5 changed files with 2 additions and 103 deletions.
5 changes: 1 addition & 4 deletions dask/dataframe/__init__.py
Expand Up @@ -9,10 +9,7 @@
demo, to_hdf, to_records, to_bag)
from .optimize import optimize
from .multi import merge, concat
from .rolling import (rolling_count, rolling_sum, rolling_mean, rolling_median,
rolling_min, rolling_max, rolling_std, rolling_var,
rolling_skew, rolling_kurt, rolling_quantile, rolling_apply,
rolling_window)
from . import rolling
from ..base import compute
from .reshape import get_dummies, pivot_table, melt
try:
Expand Down
44 changes: 0 additions & 44 deletions dask/dataframe/rolling.py
@@ -1,8 +1,6 @@
from __future__ import absolute_import, division, print_function

import datetime
import warnings
from functools import wraps

import pandas as pd
from pandas.core.window import Rolling as pd_Rolling
Expand Down Expand Up @@ -142,27 +140,6 @@ def map_overlap(func, df, before, after, *args, **kwargs):
return df._constructor(dsk, name, meta, df.divisions)


def wrap_rolling(func, method_name):
"""Create a chunked version of a pandas.rolling_* function"""
@wraps(func)
def rolling(arg, window, *args, **kwargs):
# pd.rolling_* functions are deprecated
warnings.warn(("DeprecationWarning: dd.rolling_{0} is deprecated and "
"will be removed in a future version, replace with "
"df.rolling(...).{0}(...)").format(method_name))

rolling_kwargs = {}
method_kwargs = {}
for k, v in kwargs.items():
if k in {'min_periods', 'center', 'win_type', 'axis', 'freq'}:
rolling_kwargs[k] = v
else:
method_kwargs[k] = v
rolling = arg.rolling(window, **rolling_kwargs)
return getattr(rolling, method_name)(*args, **method_kwargs)
return rolling


def _head_timedelta(current, next_, after):
"""Return rows of ``next_`` whose index is before the last
observation in ``current`` + ``after``.
Expand Down Expand Up @@ -197,27 +174,6 @@ def _tail_timedelta(prev, current, before):
return prev[prev.index > (current.index.min() - before)]


rolling_count = wrap_rolling(pd.rolling_count, 'count')
rolling_sum = wrap_rolling(pd.rolling_sum, 'sum')
rolling_mean = wrap_rolling(pd.rolling_mean, 'mean')
rolling_median = wrap_rolling(pd.rolling_median, 'median')
rolling_min = wrap_rolling(pd.rolling_min, 'min')
rolling_max = wrap_rolling(pd.rolling_max, 'max')
rolling_std = wrap_rolling(pd.rolling_std, 'std')
rolling_var = wrap_rolling(pd.rolling_var, 'var')
rolling_skew = wrap_rolling(pd.rolling_skew, 'skew')
rolling_kurt = wrap_rolling(pd.rolling_kurt, 'kurt')
rolling_quantile = wrap_rolling(pd.rolling_quantile, 'quantile')
rolling_apply = wrap_rolling(pd.rolling_apply, 'apply')


@wraps(pd.rolling_window)
def rolling_window(arg, window, **kwargs):
if kwargs.pop('mean', True):
return rolling_mean(arg, window, **kwargs)
return rolling_sum(arg, window, **kwargs)


def pandas_rolling_method(df, rolling_kwargs, name, *args, **kwargs):
rolling = df.rolling(**rolling_kwargs)
return getattr(rolling, name)(*args, **kwargs)
Expand Down
42 changes: 0 additions & 42 deletions dask/dataframe/tests/test_rolling.py
Expand Up @@ -93,48 +93,6 @@ def mad(x):
return np.fabs(x - x.mean()).mean()


def rolling_functions_tests(p, d):
# Old-fashioned rolling API
with pytest.warns(FutureWarning):
assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
# see note around test_rolling_dataframe for logic concerning precision
assert_eq(pd.rolling_skew(p, 3),
dd.rolling_skew(d, 3), check_less_precise=True)
assert_eq(pd.rolling_kurt(p, 3),
dd.rolling_kurt(d, 3), check_less_precise=True)
assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
# Test with edge-case window sizes
assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
# Test with kwargs
assert_eq(pd.rolling_sum(p, 3, min_periods=3),
dd.rolling_sum(d, 3, min_periods=3))
pytest.importorskip("scipy")
assert_eq(pd.rolling_window(p, 3, win_type='boxcar'),
dd.rolling_window(d, 3, win_type='boxcar'))


def test_rolling_functions_series():
ts = pd.Series(np.random.randn(25).cumsum())
dts = dd.from_pandas(ts, 3)
rolling_functions_tests(ts, dts)


def test_rolling_functions_dataframe():
df = pd.DataFrame({'a': np.random.randn(25).cumsum(),
'b': np.random.randint(100, size=(25,))})
ddf = dd.from_pandas(df, 3)
rolling_functions_tests(df, ddf)


rolling_method_args_check_less_precise = [
('count', (), False),
('sum', (), False),
Expand Down
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Expand Up @@ -33,6 +33,7 @@ DataFrame
- Correctly handle the column name (`df.columns.name`) when reading in ``dd.read_parquet`` (:pr:2973`) `Tom Augspurger`_
- Fixed ``dd.concat`` losing the index dtype when the data contained a categorical (:issue:`2932`) `Tom Augspurger`_
- ``DataFrame.merge()`` (:pr:`2960`) now supports merging on a combination of columns and the index `Jon Mease`_
- Removed the deprecated ``dd.rolling*`` methods, in preperation for their removal in the next pandas release (:pr:`2995`) `Tom Augspurger`_


Core
Expand Down
13 changes: 0 additions & 13 deletions docs/source/dataframe-api.rst
Expand Up @@ -243,19 +243,6 @@ Rolling Operations

.. autosummary::
rolling.map_overlap
rolling.rolling_apply
rolling.rolling_count
rolling.rolling_kurt
rolling.rolling_max
rolling.rolling_mean
rolling.rolling_median
rolling.rolling_min
rolling.rolling_quantile
rolling.rolling_skew
rolling.rolling_std
rolling.rolling_sum
rolling.rolling_var
rolling.rolling_window

Create DataFrames
~~~~~~~~~~~~~~~~~
Expand Down

4 comments on commit 4b09c3e

@topper-123
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thx for pulling this in.

Is there a timeline for releasing a new (minor) version of dask? Currently this is a blocker for pulling in pandas-dev/pandas#18723 into pandas.

@mrocklin
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking through the changes since last release I don't see any reason why we couldn't issue a minor release. Pinging @TomAugspurger and @jcrist who may have thoughts here.

@TomAugspurger
Copy link
Member Author

@TomAugspurger TomAugspurger commented on 4b09c3e Dec 24, 2017 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jcrist
Copy link
Member

@jcrist jcrist commented on 4b09c3e Dec 26, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a few PRs for small fixes that would be nice to merge beforehand, but other than that fine by me.

Please sign in to comment.