Skip to content

Commit

Permalink
BUG: Categorical[dt64tz].to_numpy() losing tz (pandas-dev#38136)
Browse files Browse the repository at this point in the history
* BUG: Categorical[dt64tz].to_numpy() losing tz

* Update pandas/core/arrays/categorical.py

Co-authored-by: gfyoung <gfyoung17+GitHub@gmail.com>

* Avoid FutureWarning

* whatsnew

Co-authored-by: gfyoung <gfyoung17+GitHub@gmail.com>
  • Loading branch information
jbrockmendel and gfyoung committed Nov 30, 2020
1 parent 92aa7ae commit f9368fd
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 12 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ Categorical
- Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`)
- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`)
- Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`)
-
- Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with timezone-aware ``datetime64`` categories incorrectly dropping the timezone information instead of casting to object dtype (:issue:`38136`)

Datetimelike
^^^^^^^^^^^^
Expand Down
10 changes: 10 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,16 @@ def index_or_series(request):
index_or_series2 = index_or_series


@pytest.fixture(
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
)
def index_or_series_or_array(request):
"""
Fixture to parametrize over Index, Series, and ExtensionArray
"""
return request.param


@pytest.fixture
def dict_subclass():
"""
Expand Down
12 changes: 5 additions & 7 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,15 +1269,13 @@ def __array__(self, dtype=None) -> np.ndarray:
if dtype==None (default), the same dtype as
categorical.categories.dtype.
"""
ret = take_1d(self.categories.values, self._codes)
ret = take_1d(self.categories._values, self._codes)
if dtype and not is_dtype_equal(dtype, self.categories.dtype):
return np.asarray(ret, dtype)
if is_extension_array_dtype(ret):
# When we're a Categorical[ExtensionArray], like Interval,
# we need to ensure __array__ get's all the way to an
# ndarray.
ret = np.asarray(ret)
return ret
# When we're a Categorical[ExtensionArray], like Interval,
# we need to ensure __array__ gets all the way to an
# ndarray.
return np.asarray(ret)

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
# for binary ops, use our custom dunder methods
Expand Down
16 changes: 14 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,13 @@
from pandas.core.generic import NDFrame
from pandas.core.indexers import deprecate_ndim_indexing, unpack_1tuple
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
from pandas.core.indexes.api import Float64Index, Index, MultiIndex, ensure_index
from pandas.core.indexes.api import (
CategoricalIndex,
Float64Index,
Index,
MultiIndex,
ensure_index,
)
import pandas.core.indexes.base as ibase
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import PeriodIndex
Expand Down Expand Up @@ -412,7 +418,13 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
labels = ensure_index(labels)

if labels._is_all_dates:
if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
deep_labels = labels
if isinstance(labels, CategoricalIndex):
deep_labels = labels.categories

if not isinstance(
deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
):
try:
labels = DatetimeIndex(labels)
# need to set here because we changed the index
Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,18 +316,34 @@ def test_array_multiindex_raises():
TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"),
np.array([0, 3600000000000], dtype="m8[ns]"),
),
# GH#26406 tz is preserved in Categorical[dt64tz]
(
pd.Categorical(pd.date_range("2016-01-01", periods=2, tz="US/Pacific")),
np.array(
[
Timestamp("2016-01-01", tz="US/Pacific"),
Timestamp("2016-01-02", tz="US/Pacific"),
]
),
),
],
)
def test_to_numpy(array, expected, index_or_series):
box = index_or_series
def test_to_numpy(array, expected, index_or_series_or_array):
box = index_or_series_or_array
thing = box(array)

if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index:
pytest.skip(f"No index type for {array.dtype}")

if array.dtype.name == "int64" and box is pd.array:
pytest.xfail("thing is Int64 and to_numpy() returns object")

result = thing.to_numpy()
tm.assert_numpy_array_equal(result, expected)

result = np.asarray(thing)
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize("as_series", [True, False])
@pytest.mark.parametrize(
Expand Down

0 comments on commit f9368fd

Please sign in to comment.