Skip to content

Commit

Permalink
Cythonized GroupBy pct_change (pandas-dev#19919)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored and jreback committed Mar 10, 2018
1 parent da6f827 commit 52cffa3
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 55 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,7 @@ Performance Improvements
- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)

.. _whatsnew_0230.docs:

Expand Down
24 changes: 24 additions & 0 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,23 @@ def shift(self, periods=1, freq=None, axis=0):
result_is_index=True,
periods=periods)

@Substitution(name='groupby')
@Appender(_doc_template)
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
axis=0):
"""Calcuate pct_change of each value to previous entry in group"""
if freq is not None or axis != 0:
return self.apply(lambda x: x.pct_change(periods=periods,
fill_method=fill_method,
limit=limit, freq=freq,
axis=axis))

filled = getattr(self, fill_method)(limit=limit).drop(
self.grouper.names, axis=1)
shifted = filled.shift(periods=periods, freq=freq)

return (filled / shifted) - 1

@Substitution(name='groupby')
@Appender(_doc_template)
def head(self, n=5):
Expand Down Expand Up @@ -3884,6 +3901,13 @@ def _apply_to_column_groupbys(self, func):
""" return a pass thru """
return func(self)

def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None):
"""Calculate percent change of each value to previous entry in group"""
filled = getattr(self, fill_method)(limit=limit)
shifted = filled.shift(periods=periods, freq=freq)

return (filled / shifted) - 1


class NDFrameGroupBy(GroupBy):

Expand Down
55 changes: 0 additions & 55 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2062,61 +2062,6 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
ascending=ascending,
na_option=na_option, pct=pct)

@pytest.mark.parametrize("mix_groupings", [True, False])
@pytest.mark.parametrize("as_series", [True, False])
@pytest.mark.parametrize("val1,val2", [
('foo', 'bar'), (1, 2), (1., 2.)])
@pytest.mark.parametrize("fill_method,limit,exp_vals", [
("ffill", None,
[np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
("ffill", 1,
[np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
("bfill", None,
['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
("bfill", 1,
[np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
])
def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
fill_method, limit, exp_vals):
vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
_exp_vals = list(exp_vals)
# Overwrite placeholder values
for index, exp_val in enumerate(_exp_vals):
if exp_val == 'val1':
_exp_vals[index] = val1
elif exp_val == 'val2':
_exp_vals[index] = val2

# Need to modify values and expectations depending on the
# Series / DataFrame that we ultimately want to generate
if mix_groupings: # ['a', 'b', 'a, 'b', ...]
keys = ['a', 'b'] * len(vals)

def interweave(list_obj):
temp = list()
for x in list_obj:
temp.extend([x, x])

return temp

_exp_vals = interweave(_exp_vals)
vals = interweave(vals)
else: # ['a', 'a', 'a', ... 'b', 'b', 'b']
keys = ['a'] * len(vals) + ['b'] * len(vals)
_exp_vals = _exp_vals * 2
vals = vals * 2

df = DataFrame({'key': keys, 'val': vals})
if as_series:
result = getattr(
df.groupby('key')['val'], fill_method)(limit=limit)
exp = Series(_exp_vals, name='val')
assert_series_equal(result, exp)
else:
result = getattr(df.groupby('key'), fill_method)(limit=limit)
exp = DataFrame({'key': keys, 'val': _exp_vals})
assert_frame_equal(result, exp)

@pytest.mark.parametrize("agg_func", ['any', 'all'])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("vals", [
Expand Down
87 changes: 87 additions & 0 deletions pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,3 +636,90 @@ def test_transform_numeric_ret(self, cols, exp, comp_func, agg_func):
exp = exp.astype('float')

comp_func(result, exp)

@pytest.mark.parametrize("mix_groupings", [True, False])
@pytest.mark.parametrize("as_series", [True, False])
@pytest.mark.parametrize("val1,val2", [
('foo', 'bar'), (1, 2), (1., 2.)])
@pytest.mark.parametrize("fill_method,limit,exp_vals", [
("ffill", None,
[np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
("ffill", 1,
[np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
("bfill", None,
['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
("bfill", 1,
[np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
])
def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
fill_method, limit, exp_vals):
vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
_exp_vals = list(exp_vals)
# Overwrite placeholder values
for index, exp_val in enumerate(_exp_vals):
if exp_val == 'val1':
_exp_vals[index] = val1
elif exp_val == 'val2':
_exp_vals[index] = val2

# Need to modify values and expectations depending on the
# Series / DataFrame that we ultimately want to generate
if mix_groupings: # ['a', 'b', 'a, 'b', ...]
keys = ['a', 'b'] * len(vals)

def interweave(list_obj):
temp = list()
for x in list_obj:
temp.extend([x, x])

return temp

_exp_vals = interweave(_exp_vals)
vals = interweave(vals)
else: # ['a', 'a', 'a', ... 'b', 'b', 'b']
keys = ['a'] * len(vals) + ['b'] * len(vals)
_exp_vals = _exp_vals * 2
vals = vals * 2

df = DataFrame({'key': keys, 'val': vals})
if as_series:
result = getattr(
df.groupby('key')['val'], fill_method)(limit=limit)
exp = Series(_exp_vals, name='val')
assert_series_equal(result, exp)
else:
result = getattr(df.groupby('key'), fill_method)(limit=limit)
exp = DataFrame({'key': keys, 'val': _exp_vals})
assert_frame_equal(result, exp)

@pytest.mark.parametrize("test_series", [True, False])
@pytest.mark.parametrize("periods,fill_method,limit", [
(1, 'ffill', None), (1, 'ffill', 1),
(1, 'bfill', None), (1, 'bfill', 1),
(-1, 'ffill', None), (-1, 'ffill', 1),
(-1, 'bfill', None), (-1, 'bfill', 1)])
def test_pct_change(self, test_series, periods, fill_method, limit):
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
exp_vals = Series(vals).pct_change(periods=periods,
fill_method=fill_method,
limit=limit).tolist()

df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
'vals': vals * 2})
grp = df.groupby('key')

def get_result(grp_obj):
return grp_obj.pct_change(periods=periods,
fill_method=fill_method,
limit=limit)

if test_series:
exp = pd.Series(exp_vals * 2)
exp.name = 'vals'
grp = grp['vals']
result = get_result(grp)
tm.assert_series_equal(result, exp)
else:
exp = DataFrame({'vals': exp_vals * 2})
result = get_result(grp)
tm.assert_frame_equal(result, exp)

0 comments on commit 52cffa3

Please sign in to comment.