diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 7fc3ff32b..e37203479 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -106,6 +106,11 @@ def gpu_long_table(long_table): return cudf.from_pandas(long_table) if cudf else None +@pytest.fixture() +def gpu_string_table(string_table): + return cudf.from_pandas(string_table) if cudf else None + + @pytest.fixture() def c( df_simple, @@ -120,6 +125,7 @@ def c( gpu_user_table_1, gpu_df, gpu_long_table, + gpu_string_table, ): dfs = { "df_simple": df_simple, @@ -134,6 +140,7 @@ def c( "gpu_user_table_1": gpu_user_table_1, "gpu_df": gpu_df, "gpu_long_table": gpu_long_table, + "gpu_string_table": gpu_string_table, } # Lazy import, otherwise the pytest framework has problems diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index ad7a3fb3e..dea6d5604 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -119,19 +119,24 @@ def test_random(c, df): result_df = result_df.compute() -def test_not(c, string_table): +@pytest.mark.parametrize( + "input_table", + ["string_table", pytest.param("gpu_string_table", marks=pytest.mark.gpu),], +) +def test_not(c, input_table, request): + string_table = request.getfixturevalue(input_table) df = c.sql( - """ + f""" SELECT * - FROM string_table + FROM {input_table} WHERE NOT a LIKE '%normal%' """ ) df = df.compute() expected_df = string_table[~string_table.a.str.contains("normal")] - assert_frame_equal(df, expected_df) + dd.assert_eq(df, expected_df) def test_operators(c, df): @@ -169,19 +174,41 @@ def test_operators(c, df): assert_frame_equal(result_df, expected_df) -def test_like(c, string_table): +@pytest.mark.parametrize( + "input_table,gpu", + [ + ("string_table", False), + pytest.param( + "gpu_string_table", + True, + marks=( + pytest.mark.gpu, + pytest.mark.xfail( + reason="Failing due to cuDF bug https://github.com/rapidsai/cudf/issues/9434" + ), + ), + ), + ], +) +def test_like(c, input_table, gpu, request): + string_table = request.getfixturevalue(input_table) + if gpu: + xd = pytest.importorskip("cudf") + else: + xd = pd + df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a SIMILAR TO '%n[a-z]rmal st_i%' """ ).compute() - assert_frame_equal(df, string_table.iloc[[0]]) + dd.assert_eq(df, string_table.iloc[[0]]) df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE '%n[a-z]rmal st_i%' """ ).compute() @@ -189,42 +216,42 @@ def test_like(c, string_table): assert len(df) == 0 df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE 'Ä%Ä_Ä%' ESCAPE 'Ä' """ ).compute() - assert_frame_equal(df, string_table.iloc[[1]]) + dd.assert_eq(df, string_table.iloc[[1]]) df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' """ ).compute() - assert_frame_equal(df, string_table.iloc[[2]]) + dd.assert_eq(df, string_table.iloc[[2]]) df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE '^|()-*r[r]$' ESCAPE 'r' """ ).compute() - assert_frame_equal(df, string_table.iloc[[2]]) + dd.assert_eq(df, string_table.iloc[[2]]) df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE '%_' ESCAPE 'r' """ ).compute() - assert_frame_equal(df, string_table) + dd.assert_eq(df, string_table) - string_table2 = pd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]}) + string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]}) c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2") df = c.sql( """ @@ -233,7 +260,7 @@ def test_like(c, string_table): """ ).compute() - assert_frame_equal(df, string_table2.iloc[[1]]) + dd.assert_eq(df, string_table2.iloc[[1]]) def test_null(c): @@ -406,9 +433,15 @@ def test_subqueries(c, user_table_1, user_table_2): ) -def test_string_functions(c): +@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)]) +def test_string_functions(c, gpu): + if gpu: + input_table = "gpu_string_table" + else: + input_table = "string_table" + df = c.sql( - """ + f""" SELECT a || 'hello' || a AS a, CONCAT(a, 'hello', a) as b, @@ -432,10 +465,14 @@ def test_string_functions(c): INITCAP(UPPER(a)) AS t, INITCAP(LOWER(a)) AS u FROM - string_table + {input_table} """ ).compute() + if gpu: + df = df.to_pandas() + df = df.astype({"c": "int64", "f": "int64", "g": "int64"}) + expected_df = pd.DataFrame( { "a": ["a normal stringhelloa normal string"], diff --git a/tests/integration/test_show.py b/tests/integration/test_show.py index eb9c18337..893c91738 100644 --- a/tests/integration/test_show.py +++ b/tests/integration/test_show.py @@ -55,6 +55,7 @@ def test_tables(c): "gpu_user_table_1", "gpu_df", "gpu_long_table", + "gpu_string_table", ] } )