API: Disallow dict as agg parameter during groupby

Grouped, rolled, and resample Series / DataFrames will now disallow dicts / nested dicts respectively as parameters to aggregation (was deprecated before). xref pandas-devgh-15931.
forking-repos · Oct 28, 2018 · 5e44744 · 5e44744
1 parent da9d851
commit 5e44744
Show file tree

Hide file tree

Showing 8 changed files with 28 additions and 303 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -941,6 +941,8 @@ Removal of prior version deprecations/changes
 - Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
 - Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
 - Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
+- Grouped, rolled, and resampled ``Series`` will now raise a ``ValueError`` when a dictionary is passed in during aggregation (:issue:`15931`)
+- Grouped, rolled, and resampled ``DataFrame`` will now raise a ``ValueError`` when a nested dictionary is passed in during aggregation (:issue:`15931`)
 - :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`)
 - Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`)
 - Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -354,14 +354,10 @@ def _aggregate(self, arg, *args, **kwargs):
 
             obj = self._selected_obj
 
-            def nested_renaming_depr(level=4):
-                # deprecation of nested renaming
-                # GH 15931
-                warnings.warn(
-                    ("using a dict with renaming "
-                     "is deprecated and will be removed in a future "
-                     "version"),
-                    FutureWarning, stacklevel=level)
+            def raise_on_dict_renaming():
+                # Originally deprecated in gh-15931, now enforcing.
+                rename_msg_err = "Using a dict with renaming is not allowed"
+                raise ValueError(rename_msg_err)
 
             # if we have a dict of any non-scalars
             # eg. {'A' : ['mean']}, normalize all to
@@ -391,10 +387,10 @@ def nested_renaming_depr(level=4):
                             msg = ('cannot perform renaming for {key} with a '
                                    'nested dictionary').format(key=k)
                             raise SpecificationError(msg)
-                        nested_renaming_depr(4 + (_level or 0))
+                        raise_on_dict_renaming()
 
                     elif isinstance(obj, ABCSeries):
-                        nested_renaming_depr()
+                        raise_on_dict_renaming()
                     elif (isinstance(obj, ABCDataFrame) and
                           k not in obj.columns):
                         raise KeyError(
@@ -408,7 +404,7 @@ def nested_renaming_depr(level=4):
                 keys = list(compat.iterkeys(arg))
                 if (isinstance(obj, ABCDataFrame) and
                         len(obj.columns.intersection(keys)) != len(keys)):
-                    nested_renaming_depr()
+                    raise_on_dict_renaming()
 
             from pandas.core.reshape.concat import concat
 

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -8,7 +8,6 @@
 
 import collections
 import copy
-import warnings
 from functools import partial
 from textwrap import dedent
 
@@ -785,15 +784,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
     def _aggregate_multiple_funcs(self, arg, _level):
         if isinstance(arg, dict):
 
-            # show the deprecation, but only if we
-            # have not shown a higher level one
-            # GH 15931
-            if isinstance(self._selected_obj, Series) and _level <= 1:
-                warnings.warn(
-                    ("using a dict on a Series for aggregation\n"
-                     "is deprecated and will be removed in a future "
-                     "version"),
-                    FutureWarning, stacklevel=3)
+            # Deprecated in gh-15931, now enforcing.
+            if isinstance(self._selected_obj, Series):
+                raise ValueError("Using a dict with renaming is not allowed")
 
             columns = list(arg.keys())
             arg = list(arg.items())

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -241,49 +241,3 @@ def test_more_flexible_frame_multi_function(df):
     expected = grouped.aggregate(OrderedDict([['C', np.mean],
                                               ['D', [np.mean, np.std]]]))
     tm.assert_frame_equal(result, expected)
-
-    def foo(x):
-        return np.mean(x)
-
-    def bar(x):
-        return np.std(x, ddof=1)
-
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        d = OrderedDict([['C', np.mean],
-                         ['D', OrderedDict([['foo', np.mean],
-                                            ['bar', np.std]])]])
-        result = grouped.aggregate(d)
-
-    d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
-    expected = grouped.aggregate(d)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_multi_function_flexible_mix(df):
-    # GH #1268
-    grouped = df.groupby('A')
-
-    # Expected
-    d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', {'sum': 'sum'}]])
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        expected = grouped.aggregate(d)
-
-    # Test 1
-    d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', 'sum']])
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = grouped.aggregate(d)
-    tm.assert_frame_equal(result, expected)
-
-    # Test 2
-    d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', ['sum']]])
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = grouped.aggregate(d)
-    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -195,103 +195,26 @@ def test_aggregate_api_consistency():
     expected = pd.concat([d_sum, c_mean], axis=1)
     tm.assert_frame_equal(result, expected, check_like=True)
 
-    result = grouped.agg({'C': ['mean', 'sum'],
-                          'D': ['mean', 'sum']})
-    expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
-    expected.columns = MultiIndex.from_product([['C', 'D'],
-                                                ['mean', 'sum']])
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = grouped[['D', 'C']].agg({'r': np.sum,
-                                          'r2': np.mean})
-    expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([['r', 'r2'],
-                                                ['D', 'C']])
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-
-def test_agg_dict_renaming_deprecation():
-    # 15931
-    df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
-                       'B': range(5),
-                       'C': range(5)})
-
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False) as w:
-        df.groupby('A').agg({'B': {'foo': ['sum', 'max']},
-                             'C': {'bar': ['count', 'min']}})
-        assert "using a dict with renaming" in str(w[0].message)
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        df.groupby('A')[['B', 'C']].agg({'ma': 'max'})
-
-    with tm.assert_produces_warning(FutureWarning) as w:
-        df.groupby('A').B.agg({'foo': 'count'})
-        assert "using a dict on a Series for aggregation" in str(w[0].message)
-
-
-def test_agg_compat():
-    # GH 12334
-    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                          'foo', 'bar', 'foo', 'foo'],
-                    'B': ['one', 'one', 'two', 'two',
-                          'two', 'two', 'one', 'two'],
-                    'C': np.random.randn(8) + 1.0,
-                    'D': np.arange(8)})
-
-    g = df.groupby(['A', 'B'])
-
-    expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
-    expected.columns = MultiIndex.from_tuples([('C', 'sum'),
-                                               ('C', 'std')])
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g['D'].agg({'C': ['sum', 'std']})
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
-    expected.columns = ['C', 'D']
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g['D'].agg({'C': 'sum', 'D': 'std'})
-    tm.assert_frame_equal(result, expected, check_like=True)
-
 
 def test_agg_nested_dicts():
-    # API change for disallowing these types of nested dicts
+    # API change for disallowing these types of nested dicts.
     df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                           'foo', 'bar', 'foo', 'foo'],
                     'B': ['one', 'one', 'two', 'two',
                           'two', 'two', 'one', 'two'],
                     'C': np.random.randn(8) + 1.0,
                     'D': np.arange(8)})
-
     g = df.groupby(['A', 'B'])
 
     msg = r'cannot perform renaming for r[1-2] with a nested dictionary'
     with tm.assert_raises_regex(SpecificationError, msg):
         g.aggregate({'r1': {'C': ['mean', 'sum']},
                      'r2': {'D': ['mean', 'sum']}})
 
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g.agg({'C': {'ra': ['mean', 'std']},
-                        'D': {'rb': ['mean', 'std']}})
-    expected = pd.concat([g['C'].mean(), g['C'].std(),
-                          g['D'].mean(), g['D'].std()],
-                         axis=1)
-    expected.columns = pd.MultiIndex.from_tuples(
-        [('ra', 'mean'), ('ra', 'std'),
-         ('rb', 'mean'), ('rb', 'std')])
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    # same name as the original column
-    # GH9052
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        expected = g['D'].agg({'result1': np.sum, 'result2': np.mean})
-    expected = expected.rename(columns={'result1': 'D'})
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g['D'].agg({'D': np.sum, 'result2': np.mean})
-    tm.assert_frame_equal(result, expected, check_like=True)
+    msg = "Using a dict with renaming is not allowed"
+    with tm.assert_raises_regex(ValueError, msg):
+        g.agg({'C': {'ra': ['mean', 'std']},
+               'D': {'rb': ['mean', 'std']}})
 
 
 def test_agg_item_by_item_raise_typeerror():

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -61,11 +61,11 @@ def test_basic(dtype):
                         check_index_type=False)
 
     # complex agg
-    agged = grouped.aggregate([np.mean, np.std])
+    grouped.aggregate([np.mean, np.std])
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
-        agged = grouped.aggregate({'one': np.mean, 'two': np.std})
+    msg = "Using a dict with renaming is not allowed"
+    with tm.assert_raises_regex(ValueError, msg):
+        grouped.aggregate({'one': np.mean, 'two': np.std})
 
     group_constants = {0: 10, 1: 20, 2: 30}
     agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
@@ -444,11 +444,6 @@ def test_frame_set_name_single(df):
     result = grouped['C'].agg([np.mean, np.std])
     assert result.index.name == 'A'
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
-        result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
-    assert result.index.name == 'A'
-
 
 def test_multi_func(df):
     col1 = df['A']
@@ -553,15 +548,6 @@ def test_groupby_as_index_agg(df):
     expected2['D'] = grouped.sum()['D']
     assert_frame_equal(result2, expected2)
 
-    grouped = df.groupby('A', as_index=True)
-    expected3 = grouped['C'].sum()
-    expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
-
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
-        result3 = grouped['C'].agg({'Q': np.sum})
-    assert_frame_equal(result3, expected3)
-
     # multi-key
 
     grouped = df.groupby(['A', 'B'], as_index=False)