Skip to content

Commit

Permalink
API: Disallow dict as agg parameter during groupby
Browse files Browse the repository at this point in the history
Grouped, rolled, and resample Series / DataFrames
will now disallow dicts / nested dicts respectively
as parameters to aggregation (was deprecated before).

xref pandas-devgh-15931.
  • Loading branch information
gfyoung committed Oct 28, 2018
1 parent da9d851 commit 5e44744
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 303 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,8 @@ Removal of prior version deprecations/changes
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
- Grouped, rolled, and resampled ``Series`` will now raise a ``ValueError`` when a dictionary is passed in during aggregation (:issue:`15931`)
- Grouped, rolled, and resampled ``DataFrame`` will now raise a ``ValueError`` when a nested dictionary is passed in during aggregation (:issue:`15931`)
- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`)
- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`)
- Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`)
Expand Down
18 changes: 7 additions & 11 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,14 +354,10 @@ def _aggregate(self, arg, *args, **kwargs):

obj = self._selected_obj

def nested_renaming_depr(level=4):
# deprecation of nested renaming
# GH 15931
warnings.warn(
("using a dict with renaming "
"is deprecated and will be removed in a future "
"version"),
FutureWarning, stacklevel=level)
def raise_on_dict_renaming():
# Originally deprecated in gh-15931, now enforcing.
rename_msg_err = "Using a dict with renaming is not allowed"
raise ValueError(rename_msg_err)

# if we have a dict of any non-scalars
# eg. {'A' : ['mean']}, normalize all to
Expand Down Expand Up @@ -391,10 +387,10 @@ def nested_renaming_depr(level=4):
msg = ('cannot perform renaming for {key} with a '
'nested dictionary').format(key=k)
raise SpecificationError(msg)
nested_renaming_depr(4 + (_level or 0))
raise_on_dict_renaming()

elif isinstance(obj, ABCSeries):
nested_renaming_depr()
raise_on_dict_renaming()
elif (isinstance(obj, ABCDataFrame) and
k not in obj.columns):
raise KeyError(
Expand All @@ -408,7 +404,7 @@ def nested_renaming_depr(level=4):
keys = list(compat.iterkeys(arg))
if (isinstance(obj, ABCDataFrame) and
len(obj.columns.intersection(keys)) != len(keys)):
nested_renaming_depr()
raise_on_dict_renaming()

from pandas.core.reshape.concat import concat

Expand Down
13 changes: 3 additions & 10 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import collections
import copy
import warnings
from functools import partial
from textwrap import dedent

Expand Down Expand Up @@ -785,15 +784,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
def _aggregate_multiple_funcs(self, arg, _level):
if isinstance(arg, dict):

# show the deprecation, but only if we
# have not shown a higher level one
# GH 15931
if isinstance(self._selected_obj, Series) and _level <= 1:
warnings.warn(
("using a dict on a Series for aggregation\n"
"is deprecated and will be removed in a future "
"version"),
FutureWarning, stacklevel=3)
# Deprecated in gh-15931, now enforcing.
if isinstance(self._selected_obj, Series):
raise ValueError("Using a dict with renaming is not allowed")

columns = list(arg.keys())
arg = list(arg.items())
Expand Down
46 changes: 0 additions & 46 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,49 +241,3 @@ def test_more_flexible_frame_multi_function(df):
expected = grouped.aggregate(OrderedDict([['C', np.mean],
['D', [np.mean, np.std]]]))
tm.assert_frame_equal(result, expected)

def foo(x):
return np.mean(x)

def bar(x):
return np.std(x, ddof=1)

# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
d = OrderedDict([['C', np.mean],
['D', OrderedDict([['foo', np.mean],
['bar', np.std]])]])
result = grouped.aggregate(d)

d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
expected = grouped.aggregate(d)

tm.assert_frame_equal(result, expected)


def test_multi_function_flexible_mix(df):
# GH #1268
grouped = df.groupby('A')

# Expected
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', {'sum': 'sum'}]])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = grouped.aggregate(d)

# Test 1
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', 'sum']])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)

# Test 2
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', ['sum']]])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)
87 changes: 5 additions & 82 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,103 +195,26 @@ def test_aggregate_api_consistency():
expected = pd.concat([d_sum, c_mean], axis=1)
tm.assert_frame_equal(result, expected, check_like=True)

result = grouped.agg({'C': ['mean', 'sum'],
'D': ['mean', 'sum']})
expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
expected.columns = MultiIndex.from_product([['C', 'D'],
['mean', 'sum']])

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped[['D', 'C']].agg({'r': np.sum,
'r2': np.mean})
expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
expected.columns = MultiIndex.from_product([['r', 'r2'],
['D', 'C']])
tm.assert_frame_equal(result, expected, check_like=True)


def test_agg_dict_renaming_deprecation():
# 15931
df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
'B': range(5),
'C': range(5)})

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False) as w:
df.groupby('A').agg({'B': {'foo': ['sum', 'max']},
'C': {'bar': ['count', 'min']}})
assert "using a dict with renaming" in str(w[0].message)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
df.groupby('A')[['B', 'C']].agg({'ma': 'max'})

with tm.assert_produces_warning(FutureWarning) as w:
df.groupby('A').B.agg({'foo': 'count'})
assert "using a dict on a Series for aggregation" in str(w[0].message)


def test_agg_compat():
# GH 12334
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'two',
'two', 'two', 'one', 'two'],
'C': np.random.randn(8) + 1.0,
'D': np.arange(8)})

g = df.groupby(['A', 'B'])

expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
expected.columns = MultiIndex.from_tuples([('C', 'sum'),
('C', 'std')])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g['D'].agg({'C': ['sum', 'std']})
tm.assert_frame_equal(result, expected, check_like=True)

expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
expected.columns = ['C', 'D']

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g['D'].agg({'C': 'sum', 'D': 'std'})
tm.assert_frame_equal(result, expected, check_like=True)


def test_agg_nested_dicts():
# API change for disallowing these types of nested dicts
# API change for disallowing these types of nested dicts.
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'two',
'two', 'two', 'one', 'two'],
'C': np.random.randn(8) + 1.0,
'D': np.arange(8)})

g = df.groupby(['A', 'B'])

msg = r'cannot perform renaming for r[1-2] with a nested dictionary'
with tm.assert_raises_regex(SpecificationError, msg):
g.aggregate({'r1': {'C': ['mean', 'sum']},
'r2': {'D': ['mean', 'sum']}})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g.agg({'C': {'ra': ['mean', 'std']},
'D': {'rb': ['mean', 'std']}})
expected = pd.concat([g['C'].mean(), g['C'].std(),
g['D'].mean(), g['D'].std()],
axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[('ra', 'mean'), ('ra', 'std'),
('rb', 'mean'), ('rb', 'std')])
tm.assert_frame_equal(result, expected, check_like=True)

# same name as the original column
# GH9052
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = g['D'].agg({'result1': np.sum, 'result2': np.mean})
expected = expected.rename(columns={'result1': 'D'})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g['D'].agg({'D': np.sum, 'result2': np.mean})
tm.assert_frame_equal(result, expected, check_like=True)
msg = "Using a dict with renaming is not allowed"
with tm.assert_raises_regex(ValueError, msg):
g.agg({'C': {'ra': ['mean', 'std']},
'D': {'rb': ['mean', 'std']}})


def test_agg_item_by_item_raise_typeerror():
Expand Down
22 changes: 4 additions & 18 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ def test_basic(dtype):
check_index_type=False)

# complex agg
agged = grouped.aggregate([np.mean, np.std])
grouped.aggregate([np.mean, np.std])

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
agged = grouped.aggregate({'one': np.mean, 'two': np.std})
msg = "Using a dict with renaming is not allowed"
with tm.assert_raises_regex(ValueError, msg):
grouped.aggregate({'one': np.mean, 'two': np.std})

group_constants = {0: 10, 1: 20, 2: 30}
agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
Expand Down Expand Up @@ -444,11 +444,6 @@ def test_frame_set_name_single(df):
result = grouped['C'].agg([np.mean, np.std])
assert result.index.name == 'A'

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
assert result.index.name == 'A'


def test_multi_func(df):
col1 = df['A']
Expand Down Expand Up @@ -553,15 +548,6 @@ def test_groupby_as_index_agg(df):
expected2['D'] = grouped.sum()['D']
assert_frame_equal(result2, expected2)

grouped = df.groupby('A', as_index=True)
expected3 = grouped['C'].sum()
expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result3 = grouped['C'].agg({'Q': np.sum})
assert_frame_equal(result3, expected3)

# multi-key

grouped = df.groupby(['A', 'B'], as_index=False)
Expand Down
Loading

0 comments on commit 5e44744

Please sign in to comment.