Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rolling cov #5154

Merged
merged 10 commits into from Aug 5, 2019
@@ -39,10 +39,15 @@ def overlap_chunk(
if isinstance(before, datetime.timedelta):
before = len(prev_part)

expansion = out.shape[0] // combined.shape[0]

This comment has been minimized.

Copy link
@TomAugspurger

TomAugspurger Jul 29, 2019

Member

Is this likely to cause new issues with empty partitions? e.g. the one from

In [33]: df = pd.DataFrame({"A": range(12), "B": [True] * 3 + [False] * 3 + [True] * 6})

In [34]: ddf = dd.from_pandas(df, 4)

ddf[df.B].get_partition(1).compute()

We may already have issues with empty partitions, in which case don't worry about it.

This comment has been minimized.

Copy link
@ivarsfg

ivarsfg Jul 30, 2019

Author Contributor

I didn't find a test case to trigger this but I'm thinking a check here doesn't add much complexity and could prevent confusion later.

if before:
before *= expansion
if next_part is None:
return out.iloc[before:]
if isinstance(after, datetime.timedelta):
after = len(next_part)
if after:
after *= expansion
return out.iloc[before:-after]


@@ -351,6 +356,10 @@ def _call_method(self, method_name, *args, **kwargs):
def count(self):
return self._call_method("count")

@derived_from(pd_Rolling)
def cov(self):
return self._call_method("cov")

@derived_from(pd_Rolling)
def sum(self):
return self._call_method("sum")
@@ -402,7 +411,7 @@ def apply(self, func, args=(), kwargs={}, **kwds):
if kwargs:
msg = (
"Invalid argument to 'apply'. Keyword arguments "
"should be given as a dict to the 'kwargs' arugment. "
"should be given as a dict to the 'kwargs' argument. "
)
raise TypeError(msg)
return self._call_method("apply", func, args=args, kwargs=kwargs, **kwds)
@@ -166,6 +166,20 @@ def test_rolling_methods(method, args, window, center, check_less_precise):
)


@pytest.mark.parametrize("window", [1, 2, 4, 5])
@pytest.mark.parametrize("center", [True, False])
def test_rolling_cov(window, center):
# DataFrame
prolling = df.drop("a", 1).rolling(window, center=center)
drolling = ddf.drop("a", 1).rolling(window, center=center)
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
This conversation was marked as resolved by ivarsfg

This comment has been minimized.

Copy link
@jcrist

jcrist Jul 26, 2019

Member

Since you're not parametrizing on the method anymore, you can just call cov directly:

Suggested change
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
assert_eq(prolling.cov(), drolling.cov())

# Series
prolling = df.b.rolling(window, center=center)
drolling = ddf.b.rolling(window, center=center)
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
This conversation was marked as resolved by ivarsfg

This comment has been minimized.

Copy link
@jcrist

jcrist Jul 26, 2019

Member
Suggested change
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
assert_eq(prolling.cov(), drolling.cov())


@pytest.mark.skipif(PANDAS_VERSION >= "0.23.0", reason="Raw is allowed.")
def test_rolling_raw_pandas_lt_0230_raises():
with pytest.raises(TypeError):
@@ -273,6 +287,19 @@ def test_time_rolling_methods(method, args, window, check_less_precise):
)


@pytest.mark.parametrize("window", ["1S", "2S", "3S", pd.offsets.Second(5)])
def test_time_rolling_cov(window):
# DataFrame
prolling = ts.drop("a", 1).rolling(window)
drolling = dts.drop("a", 1).rolling(window)
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
This conversation was marked as resolved by ivarsfg

This comment has been minimized.

Copy link
@jcrist

jcrist Jul 26, 2019

Member
Suggested change
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
assert_eq(prolling.cov(), drolling.cov())

# Series
prolling = ts.b.rolling(window)
drolling = dts.b.rolling(window)
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
This conversation was marked as resolved by ivarsfg

This comment has been minimized.

Copy link
@jcrist

jcrist Jul 26, 2019

Member
Suggested change
assert_eq(getattr(prolling, "cov")(), getattr(drolling, "cov")())
assert_eq(prolling.cov(), drolling.cov())


@pytest.mark.parametrize(
"window,N",
[("1s", 10), ("2s", 10), ("10s", 10), ("10h", 10), ("10s", 100), ("10h", 100)],
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.