Skip to content

Commit

Permalink
BUG: Allow apply to be used with non-numpy-dtype DataFrames
Browse files Browse the repository at this point in the history
Fixes bug in DataFrame.apply by avoiding reducing DataFrames
whose values dtype is not a numpy dtype.

Closes pandas-devgh-12244.
  • Loading branch information
gfyoung committed Feb 12, 2016
1 parent 0c09bd1 commit b18b74f
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 13 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -967,4 +967,7 @@ Bug Fixes
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)

- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)

- Bug in ``crosstab`` where arguments with non-overlapping indexes would return a ``KeyError`` (:issue:`10291`)

- Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`)
28 changes: 15 additions & 13 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4072,22 +4072,24 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
# this only matters if the reduction in values is of different dtype
# e.g. if we want to apply to a SparseFrame, then can't directly reduce
if reduce:

values = self.values

# Create a dummy Series from an empty array
index = self._get_axis(axis)
empty_arr = np.empty(len(index), dtype=values.dtype)
dummy = Series(empty_arr, index=self._get_axis(axis),
dtype=values.dtype)
# we cannot reduce using non-numpy dtypes,
# as demonstrated in gh-12244
if not is_internal_type(values):
# Create a dummy Series from an empty array
index = self._get_axis(axis)
empty_arr = np.empty(len(index), dtype=values.dtype)
dummy = Series(empty_arr, index=self._get_axis(axis),
dtype=values.dtype)

try:
labels = self._get_agg_axis(axis)
result = lib.reduce(values, func, axis=axis, dummy=dummy,
labels=labels)
return Series(result, index=labels)
except Exception:
pass
try:
labels = self._get_agg_axis(axis)
result = lib.reduce(values, func, axis=axis, dummy=dummy,
labels=labels)
return Series(result, index=labels)
except Exception:
pass

dtype = object if self._is_mixed_type else None
if axis == 0:
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,3 +400,19 @@ def test_applymap(self):
result = df.applymap(str)
for f in ['datetime', 'timedelta']:
self.assertEqual(result.loc[0, f], str(df.loc[0, f]))

# See gh-12244
def test_apply_non_numpy_dtype(self):
df = DataFrame({'dt': pd.date_range(
"2015-01-01", periods=3, tz='Europe/Brussels')})
result = df.apply(lambda x: x)
assert_frame_equal(result, df)

result = df.apply(lambda x: x + pd.Timedelta('1day'))
expected = DataFrame({'dt': pd.date_range(
"2015-01-02", periods=3, tz='Europe/Brussels')})
assert_frame_equal(result, expected)

df = DataFrame({'dt': ['a', 'b', 'c', 'a']}, dtype='category')
result = df.apply(lambda x: x)
assert_frame_equal(result, df)

0 comments on commit b18b74f

Please sign in to comment.