Skip to content

Commit

Permalink
Merge pull request #23 from data-apis/cudf-updates
Browse files Browse the repository at this point in the history
cuDF updates
  • Loading branch information
honno committed May 31, 2023
2 parents 99235d6 + c4c2f53 commit a774210
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 41 deletions.
4 changes: 1 addition & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ def pytest_configure(config):


ci_xfail_ids = [
# https://github.com/rapidsai/cudf/issues/11320
"test_signatures.py::test_buffer_method[cudf-__dlpack__]",
"test_signatures.py::test_buffer_method[cudf-__dlpack_device__]",
# https://github.com/vaexio/vaex/issues/2083
"test_from_dataframe.py::test_from_dataframe_roundtrip[vaex-pandas]",
# TODO: triage
Expand All @@ -63,6 +60,7 @@ def pytest_configure(config):
ci_skip_ids = [
# https://github.com/rapidsai/cudf/issues/11332
"test_column_object.py::test_describe_categorical[cudf]",
"test_column_object.py::test_describe_categorical_on_categorical[cudf]",
# https://github.com/vaexio/vaex/issues/2118
# https://github.com/vaexio/vaex/issues/2139
"test_column_object.py::test_dtype[vaex]",
Expand Down
47 changes: 9 additions & 38 deletions tests/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import re
import string
from copy import copy
from functools import partial
from typing import Any, Callable, Dict, List, NamedTuple, Set, Tuple
from unittest.mock import MagicMock
Expand Down Expand Up @@ -28,10 +26,14 @@ class LibraryInfo(NamedTuple):
mock_to_toplevel: Callable[[MockDataFrame], TopLevelDataFrame]
from_dataframe: Callable[[TopLevelDataFrame], DataFrame]
frame_equal: Callable[[TopLevelDataFrame, DataFrame], bool]
supported_dtypes: Set[NominalDtype] = set(NominalDtype)
excluded_dtypes: Set[NominalDtype] = set()
allow_zero_cols: bool = True
allow_zero_rows: bool = True

@property
def supported_dtypes(self) -> Set[NominalDtype]:
return set(NominalDtype) ^ self.excluded_dtypes

def mock_to_interchange(self, mock_dataframe: MockDataFrame) -> DataFrame:
toplevel_df = self.mock_to_toplevel(mock_dataframe)
return toplevel_df.__dataframe__()
Expand Down Expand Up @@ -174,7 +176,7 @@ def vaex_frame_equal(df1, df2) -> bool:
mock_to_toplevel=mock_to_vaex_df,
from_dataframe=vaex_from_dataframe,
frame_equal=vaex_frame_equal,
supported_dtypes=set(NominalDtype) ^ {NominalDtype.DATETIME64NS},
excluded_dtypes={NominalDtype.DATETIME64NS},
# https://github.com/vaexio/vaex/issues/2094
allow_zero_cols=False,
allow_zero_rows=False,
Expand Down Expand Up @@ -246,8 +248,7 @@ def modin_frame_equal(df1: mpd.DataFrame, df2: mpd.DataFrame) -> bool:
mock_to_toplevel=mock_to_modin_df,
from_dataframe=modin_from_dataframe,
frame_equal=modin_frame_equal,
supported_dtypes=set(NominalDtype)
^ {
excluded_dtypes={
NominalDtype.DATETIME64NS,
# https://github.com/modin-project/modin/issues/4654
NominalDtype.UTF8,
Expand All @@ -261,35 +262,6 @@ def modin_frame_equal(df1: mpd.DataFrame, df2: mpd.DataFrame) -> bool:


def make_cudf_libinfo() -> LibraryInfo:
# ethereal hacks! ----------------------------------------------------------
try:
import pandas
import pyarrow
from pandas._libs.tslibs.parsing import guess_datetime_format
from pandas.core.tools import datetimes
from pyarrow.lib import ArrowKeyError
except ImportError:
pass
else:
old_register_extension_type = copy(pyarrow.register_extension_type)
r_existing_ext_type_msg = re.compile(
"A type extension with name pandas.[a-z_]+ already defined"
)

def register_extension_type(*a, **kw):
try:
old_register_extension_type(*a, **kw)
except ArrowKeyError as e:
if r_existing_ext_type_msg.fullmatch(str(e)):
pass
else:
raise e

setattr(pyarrow, "register_extension_type", register_extension_type)
setattr(datetimes, "_guess_datetime_format", guess_datetime_format)
setattr(pandas, "__version__", "1.4.3")
# ------------------------------------------------------------ end of hacks.

import cudf
from cudf.core.df_protocol import from_dataframe as cudf_from_dataframe

Expand Down Expand Up @@ -317,8 +289,7 @@ def mock_to_cudf_df(mock_df: MockDataFrame) -> cudf.DataFrame:
mock_to_toplevel=mock_to_cudf_df,
from_dataframe=cudf_from_dataframe,
frame_equal=lambda df1, df2: df1.equals(df2), # NaNs considered equal
supported_dtypes=set(NominalDtype)
^ {
excluded_dtypes={
NominalDtype.DATETIME64NS,
# https://github.com/rapidsai/cudf/issues/11308
NominalDtype.UTF8,
Expand Down Expand Up @@ -475,7 +446,7 @@ def pl_frame_equal(df1: pl.DataFrame, df2: pl.DataFrame) -> bool:
from_dataframe=pl_from_dataframe,
frame_equal=pl_frame_equal,
# TODO: support testing categoricals
supported_dtypes=set(NominalDtype) ^ {NominalDtype.CATEGORY},
excluded_dtypes={NominalDtype.CATEGORY},
# https://github.com/pola-rs/polars/issues/8884
allow_zero_cols=False,
)
Expand Down

0 comments on commit a774210

Please sign in to comment.