diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bb06bcc9b5aa8b..3fab4850dd1ec6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -533,7 +533,7 @@ Categorical - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) - Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`) - Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`) -- +- Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with timezone-aware ``datetime64`` categories incorrectly dropping the timezone information instead of casting to object dtype (:issue:`38136`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/conftest.py b/pandas/conftest.py index 3d9d2ba04f31b8..cb5b4145855d17 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -320,6 +320,16 @@ def index_or_series(request): index_or_series2 = index_or_series +@pytest.fixture( + params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"] +) +def index_or_series_or_array(request): + """ + Fixture to parametrize over Index, Series, and ExtensionArray + """ + return request.param + + @pytest.fixture def dict_subclass(): """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index fe66aae23f5103..3995e7b251184b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1269,15 +1269,13 @@ def __array__(self, dtype=None) -> np.ndarray: if dtype==None (default), the same dtype as categorical.categories.dtype. """ - ret = take_1d(self.categories.values, self._codes) + ret = take_1d(self.categories._values, self._codes) if dtype and not is_dtype_equal(dtype, self.categories.dtype): return np.asarray(ret, dtype) - if is_extension_array_dtype(ret): - # When we're a Categorical[ExtensionArray], like Interval, - # we need to ensure __array__ get's all the way to an - # ndarray. - ret = np.asarray(ret) - return ret + # When we're a Categorical[ExtensionArray], like Interval, + # we need to ensure __array__ gets all the way to an + # ndarray. + return np.asarray(ret) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # for binary ops, use our custom dunder methods diff --git a/pandas/core/series.py b/pandas/core/series.py index 1f4221206e5bc0..b1b5d16eaf7f07 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -84,7 +84,13 @@ from pandas.core.generic import NDFrame from pandas.core.indexers import deprecate_ndim_indexing, unpack_1tuple from pandas.core.indexes.accessors import CombinedDatetimelikeProperties -from pandas.core.indexes.api import Float64Index, Index, MultiIndex, ensure_index +from pandas.core.indexes.api import ( + CategoricalIndex, + Float64Index, + Index, + MultiIndex, + ensure_index, +) import pandas.core.indexes.base as ibase from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex @@ -412,7 +418,13 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None: labels = ensure_index(labels) if labels._is_all_dates: - if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + deep_labels = labels + if isinstance(labels, CategoricalIndex): + deep_labels = labels.categories + + if not isinstance( + deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex) + ): try: labels = DatetimeIndex(labels) # need to set here because we changed the index diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index a6fdb82e48197b..668954a3f4a0bd 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -316,18 +316,34 @@ def test_array_multiindex_raises(): TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"), np.array([0, 3600000000000], dtype="m8[ns]"), ), + # GH#26406 tz is preserved in Categorical[dt64tz] + ( + pd.Categorical(pd.date_range("2016-01-01", periods=2, tz="US/Pacific")), + np.array( + [ + Timestamp("2016-01-01", tz="US/Pacific"), + Timestamp("2016-01-02", tz="US/Pacific"), + ] + ), + ), ], ) -def test_to_numpy(array, expected, index_or_series): - box = index_or_series +def test_to_numpy(array, expected, index_or_series_or_array): + box = index_or_series_or_array thing = box(array) if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: pytest.skip(f"No index type for {array.dtype}") + if array.dtype.name == "int64" and box is pd.array: + pytest.xfail("thing is Int64 and to_numpy() returns object") + result = thing.to_numpy() tm.assert_numpy_array_equal(result, expected) + result = np.asarray(thing) + tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize(