Skip to content

Commit

Permalink
BUG: Categorical.remove_categories(np.nan) fails when underlying dtyp…
Browse files Browse the repository at this point in the history
…e is float (GH pandas-dev#10156)
  • Loading branch information
evanpw authored and Evan Wright committed Jun 9, 2015
1 parent 90e067b commit e462c34
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.16.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ Bug Fixes
- Bug in GroupBy.get_group raises ValueError when group key contains NaT (:issue:`6992`)
- Bug in ``SparseSeries`` constructor ignores input data name (:issue:`10258`)

- Bug in ``Categorical.remove_categories`` causing a ValueError when removing the ``NaN`` category if underlying dtype is floating-point (:issue:`10156`)

- Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
- Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`)
- Bug to handle masking empty ``DataFrame``(:issue:`10126`)
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,11 +749,19 @@ def remove_categories(self, removals, inplace=False):
"""
if not is_list_like(removals):
removals = [removals]
removals = set(list(removals))
not_included = removals - set(self._categories)

removal_set = set(list(removals))
not_included = removal_set - set(self._categories)
new_categories = [ c for c in self._categories if c not in removal_set ]

# GH 10156
if any(isnull(removals)):
not_included = [x for x in not_included if notnull(x)]
new_categories = [x for x in new_categories if notnull(x)]

if len(not_included) != 0:
raise ValueError("removals must all be in old categories: %s" % str(not_included))
new_categories = [ c for c in self._categories if c not in removals ]

return self.set_categories(new_categories, ordered=self.ordered, rename=False,
inplace=inplace)

Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,28 @@ def test_nan_handling(self):
self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],dtype=np.object_))
self.assert_numpy_array_equal(c._codes , np.array([0,2,-1,0]))

# Remove null categories (GH 10156)
cases = [
([1.0, 2.0, np.nan], [1.0, 2.0]),
(['a', 'b', None], ['a', 'b']),
([pd.Timestamp('2012-05-01'), pd.NaT], [pd.Timestamp('2012-05-01')])
]

null_values = [np.nan, None, pd.NaT]

for with_null, without in cases:
base = Categorical([], with_null)
expected = Categorical([], without)

for nullval in null_values:
result = base.remove_categories(nullval)
self.assert_categorical_equal(result, expected)

# Different null values are indistinguishable
for i, j in [(0, 1), (0, 2), (1, 2)]:
nulls = [null_values[i], null_values[j]]
self.assertRaises(ValueError, lambda: Categorical([], categories=nulls))


def test_isnull(self):
exp = np.array([False, False, True])
Expand Down

0 comments on commit e462c34

Please sign in to comment.