diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index ae5842b22349a..542402b92364e 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -967,4 +967,7 @@ Bug Fixes - Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`) - Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`) + - Bug in ``crosstab`` where arguments with non-overlapping indexes would return a ``KeyError`` (:issue:`10291`) + +- Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 324f30ed00bed..8a3ac4db37d2d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4072,22 +4072,24 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): # this only matters if the reduction in values is of different dtype # e.g. if we want to apply to a SparseFrame, then can't directly reduce if reduce: - values = self.values - # Create a dummy Series from an empty array - index = self._get_axis(axis) - empty_arr = np.empty(len(index), dtype=values.dtype) - dummy = Series(empty_arr, index=self._get_axis(axis), - dtype=values.dtype) + # we cannot reduce using non-numpy dtypes, + # as demonstrated in gh-12244 + if not is_internal_type(values): + # Create a dummy Series from an empty array + index = self._get_axis(axis) + empty_arr = np.empty(len(index), dtype=values.dtype) + dummy = Series(empty_arr, index=self._get_axis(axis), + dtype=values.dtype) - try: - labels = self._get_agg_axis(axis) - result = lib.reduce(values, func, axis=axis, dummy=dummy, - labels=labels) - return Series(result, index=labels) - except Exception: - pass + try: + labels = self._get_agg_axis(axis) + result = lib.reduce(values, func, axis=axis, dummy=dummy, + labels=labels) + return Series(result, index=labels) + except Exception: + pass dtype = object if self._is_mixed_type else None if axis == 0: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index e68b94342985d..120a51ab0b809 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -400,3 +400,19 @@ def test_applymap(self): result = df.applymap(str) for f in ['datetime', 'timedelta']: self.assertEqual(result.loc[0, f], str(df.loc[0, f])) + + # See gh-12244 + def test_apply_non_numpy_dtype(self): + df = DataFrame({'dt': pd.date_range( + "2015-01-01", periods=3, tz='Europe/Brussels')}) + result = df.apply(lambda x: x) + assert_frame_equal(result, df) + + result = df.apply(lambda x: x + pd.Timedelta('1day')) + expected = DataFrame({'dt': pd.date_range( + "2015-01-02", periods=3, tz='Europe/Brussels')}) + assert_frame_equal(result, expected) + + df = DataFrame({'dt': ['a', 'b', 'c', 'a']}, dtype='category') + result = df.apply(lambda x: x) + assert_frame_equal(result, df)