Merge pull request #23 from data-apis/cudf-updates

cuDF updates
data-apis · May 31, 2023 · a774210 · a774210
2 parents 99235d6 + c4c2f53
commit a774210
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 41 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -39,9 +39,6 @@ def pytest_configure(config):
 
 
 ci_xfail_ids = [
-    # https://github.com/rapidsai/cudf/issues/11320
-    "test_signatures.py::test_buffer_method[cudf-__dlpack__]",
-    "test_signatures.py::test_buffer_method[cudf-__dlpack_device__]",
     # https://github.com/vaexio/vaex/issues/2083
     "test_from_dataframe.py::test_from_dataframe_roundtrip[vaex-pandas]",
     # TODO: triage
@@ -63,6 +60,7 @@ def pytest_configure(config):
 ci_skip_ids = [
     # https://github.com/rapidsai/cudf/issues/11332
     "test_column_object.py::test_describe_categorical[cudf]",
+    "test_column_object.py::test_describe_categorical_on_categorical[cudf]",
     # https://github.com/vaexio/vaex/issues/2118
     # https://github.com/vaexio/vaex/issues/2139
     "test_column_object.py::test_dtype[vaex]",

diff --git a/tests/wrappers.py b/tests/wrappers.py
@@ -1,6 +1,4 @@
-import re
 import string
-from copy import copy
 from functools import partial
 from typing import Any, Callable, Dict, List, NamedTuple, Set, Tuple
 from unittest.mock import MagicMock
@@ -28,10 +26,14 @@ class LibraryInfo(NamedTuple):
     mock_to_toplevel: Callable[[MockDataFrame], TopLevelDataFrame]
     from_dataframe: Callable[[TopLevelDataFrame], DataFrame]
     frame_equal: Callable[[TopLevelDataFrame, DataFrame], bool]
-    supported_dtypes: Set[NominalDtype] = set(NominalDtype)
+    excluded_dtypes: Set[NominalDtype] = set()
     allow_zero_cols: bool = True
     allow_zero_rows: bool = True
 
+    @property
+    def supported_dtypes(self) -> Set[NominalDtype]:
+        return set(NominalDtype) ^ self.excluded_dtypes
+
     def mock_to_interchange(self, mock_dataframe: MockDataFrame) -> DataFrame:
         toplevel_df = self.mock_to_toplevel(mock_dataframe)
         return toplevel_df.__dataframe__()
@@ -174,7 +176,7 @@ def vaex_frame_equal(df1, df2) -> bool:
         mock_to_toplevel=mock_to_vaex_df,
         from_dataframe=vaex_from_dataframe,
         frame_equal=vaex_frame_equal,
-        supported_dtypes=set(NominalDtype) ^ {NominalDtype.DATETIME64NS},
+        excluded_dtypes={NominalDtype.DATETIME64NS},
         # https://github.com/vaexio/vaex/issues/2094
         allow_zero_cols=False,
         allow_zero_rows=False,
@@ -246,8 +248,7 @@ def modin_frame_equal(df1: mpd.DataFrame, df2: mpd.DataFrame) -> bool:
         mock_to_toplevel=mock_to_modin_df,
         from_dataframe=modin_from_dataframe,
         frame_equal=modin_frame_equal,
-        supported_dtypes=set(NominalDtype)
-        ^ {
+        excluded_dtypes={
             NominalDtype.DATETIME64NS,
             # https://github.com/modin-project/modin/issues/4654
             NominalDtype.UTF8,
@@ -261,35 +262,6 @@ def modin_frame_equal(df1: mpd.DataFrame, df2: mpd.DataFrame) -> bool:
 
 
 def make_cudf_libinfo() -> LibraryInfo:
-    # ethereal hacks! ----------------------------------------------------------
-    try:
-        import pandas
-        import pyarrow
-        from pandas._libs.tslibs.parsing import guess_datetime_format
-        from pandas.core.tools import datetimes
-        from pyarrow.lib import ArrowKeyError
-    except ImportError:
-        pass
-    else:
-        old_register_extension_type = copy(pyarrow.register_extension_type)
-        r_existing_ext_type_msg = re.compile(
-            "A type extension with name pandas.[a-z_]+ already defined"
-        )
-
-        def register_extension_type(*a, **kw):
-            try:
-                old_register_extension_type(*a, **kw)
-            except ArrowKeyError as e:
-                if r_existing_ext_type_msg.fullmatch(str(e)):
-                    pass
-                else:
-                    raise e
-
-        setattr(pyarrow, "register_extension_type", register_extension_type)
-        setattr(datetimes, "_guess_datetime_format", guess_datetime_format)
-        setattr(pandas, "__version__", "1.4.3")
-    # ------------------------------------------------------------ end of hacks.
-
     import cudf
     from cudf.core.df_protocol import from_dataframe as cudf_from_dataframe
 
@@ -317,8 +289,7 @@ def mock_to_cudf_df(mock_df: MockDataFrame) -> cudf.DataFrame:
         mock_to_toplevel=mock_to_cudf_df,
         from_dataframe=cudf_from_dataframe,
         frame_equal=lambda df1, df2: df1.equals(df2),  # NaNs considered equal
-        supported_dtypes=set(NominalDtype)
-        ^ {
+        excluded_dtypes={
             NominalDtype.DATETIME64NS,
             # https://github.com/rapidsai/cudf/issues/11308
             NominalDtype.UTF8,
@@ -475,7 +446,7 @@ def pl_frame_equal(df1: pl.DataFrame, df2: pl.DataFrame) -> bool:
         from_dataframe=pl_from_dataframe,
         frame_equal=pl_frame_equal,
         # TODO: support testing categoricals
-        supported_dtypes=set(NominalDtype) ^ {NominalDtype.CATEGORY},
+        excluded_dtypes={NominalDtype.CATEGORY},
         # https://github.com/pola-rs/polars/issues/8884
         allow_zero_cols=False,
     )