Skip to content

Commit

Permalink
BUG: Adding skipna as an option to groupby cumsum and cumprod (pandas…
Browse files Browse the repository at this point in the history
  • Loading branch information
shangyian authored and jreback committed Mar 1, 2018
1 parent 072545d commit 5f271eb
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,7 @@ Groupby/Resample/Rolling
- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`)

Sparse
^^^^^^
Expand Down
16 changes: 14 additions & 2 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
def group_cumprod_float64(float64_t[:, :] out,
float64_t[:, :] values,
int64_t[:] labels,
bint is_datetimelike):
bint is_datetimelike,
bint skipna=True):
"""
Only transforms on axis=0
"""
Expand All @@ -163,14 +164,20 @@ def group_cumprod_float64(float64_t[:, :] out,
if val == val:
accum[lab, j] *= val
out[i, j] = accum[lab, j]
else:
out[i, j] = NaN
if not skipna:
accum[lab, j] = NaN
break


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cumsum(numeric[:, :] out,
numeric[:, :] values,
int64_t[:] labels,
is_datetimelike):
is_datetimelike,
bint skipna=True):
"""
Only transforms on axis=0
"""
Expand All @@ -196,6 +203,11 @@ def group_cumsum(numeric[:, :] out,
if val == val:
accum[lab, j] += val
out[i, j] = accum[lab, j]
else:
out[i, j] = NaN
if not skipna:
accum[lab, j] = NaN
break
else:
accum[lab, j] += val
out[i, j] = accum[lab, j]
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,8 @@ def rank(self, method='average', ascending=True, na_option='keep',
@Appender(_doc_template)
def cumprod(self, axis=0, *args, **kwargs):
"""Cumulative product for each group"""
nv.validate_groupby_func('cumprod', args, kwargs, ['numeric_only'])
nv.validate_groupby_func('cumprod', args, kwargs,
['numeric_only', 'skipna'])
if axis != 0:
return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))

Expand All @@ -1898,7 +1899,8 @@ def cumprod(self, axis=0, *args, **kwargs):
@Appender(_doc_template)
def cumsum(self, axis=0, *args, **kwargs):
"""Cumulative sum for each group"""
nv.validate_groupby_func('cumsum', args, kwargs, ['numeric_only'])
nv.validate_groupby_func('cumsum', args, kwargs,
['numeric_only', 'skipna'])
if axis != 0:
return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))

Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,31 @@ def test_cython_transform_series(self, op, args, targop):
tm.assert_series_equal(expected, getattr(
data.groupby(labels), op)(*args))

@pytest.mark.parametrize("op", ['cumprod', 'cumsum'])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize('input, exp', [
# When everything is NaN
({'key': ['b'] * 10, 'value': np.nan},
pd.Series([np.nan] * 10, name='value')),
# When there is a single NaN
({'key': ['b'] * 10 + ['a'] * 2,
'value': [3] * 3 + [np.nan] + [3] * 8},
{('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0],
('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729.,
2187., 6561., 19683., 3.0, 9.0],
('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0],
('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18.,
21., 24., 27., 3.0, 6.0]})])
def test_groupby_cum_skipna(self, op, skipna, input, exp):
df = pd.DataFrame(input)
result = df.groupby('key')['value'].transform(op, skipna=skipna)
if isinstance(exp, dict):
expected = exp[(op, skipna)]
else:
expected = exp
expected = pd.Series(expected, name='value')
tm.assert_series_equal(expected, result)

@pytest.mark.parametrize(
"op, args, targop",
[('cumprod', (), lambda x: x.cumprod()),
Expand Down

0 comments on commit 5f271eb

Please sign in to comment.