dask-contrib · galipremsagar · Oct 27, 2021 · Oct 13, 2021 · Oct 13, 2021 · Oct 13, 2021
@@ -106,6 +106,11 @@ def gpu_long_table(long_table):
     return cudf.from_pandas(long_table) if cudf else None
 
 
+@pytest.fixture()
+def gpu_string_table(string_table):
+    return cudf.from_pandas(string_table) if cudf else None
+
+
 @pytest.fixture()
 def c(
     df_simple,
@@ -120,6 +125,7 @@ def c(
     gpu_user_table_1,
     gpu_df,
     gpu_long_table,
+    gpu_string_table,
 ):
     dfs = {
         "df_simple": df_simple,
@@ -134,6 +140,7 @@ def c(
         "gpu_user_table_1": gpu_user_table_1,
         "gpu_df": gpu_df,
         "gpu_long_table": gpu_long_table,
+        "gpu_string_table": gpu_string_table,
     }
 
     # Lazy import, otherwise the pytest framework has problems

@@ -119,19 +119,24 @@ def test_random(c, df):
     result_df = result_df.compute()
 
 
-def test_not(c, string_table):
+@pytest.mark.parametrize(
+    "input_table",
+    ["string_table", pytest.param("gpu_string_table", marks=pytest.mark.gpu),],
+)
+def test_not(c, input_table, request):
+    string_table = request.getfixturevalue(input_table)
     df = c.sql(
-        """
+        f"""
     SELECT
         *
-    FROM string_table
+    FROM {input_table}
     WHERE NOT a LIKE '%normal%'
     """
     )
     df = df.compute()
 
     expected_df = string_table[~string_table.a.str.contains("normal")]
-    assert_frame_equal(df, expected_df)
+    dd.assert_eq(df, expected_df)
 
 
 def test_operators(c, df):
@@ -169,62 +174,84 @@ def test_operators(c, df):
     assert_frame_equal(result_df, expected_df)
 
 
-def test_like(c, string_table):
+@pytest.mark.parametrize(
+    "input_table,gpu",
+    [
+        ("string_table", False),
+        pytest.param(
+            "gpu_string_table",
+            True,
+            marks=(
+                pytest.mark.gpu,
+                pytest.mark.xfail(
+                    reason="Failing due to cuDF bug https://github.com/rapidsai/cudf/issues/9434"
+                ),
+            ),
+        ),
+    ],
+)
+def test_like(c, input_table, gpu, request):
+    string_table = request.getfixturevalue(input_table)
+    if gpu:
+        xd = pytest.importorskip("cudf")
+    else:
+        xd = pd
+
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a SIMILAR TO '%n[a-z]rmal st_i%'
     """
     ).compute()
 
-    assert_frame_equal(df, string_table.iloc[[0]])
+    dd.assert_eq(df, string_table.iloc[[0]])
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE '%n[a-z]rmal st_i%'
     """
     ).compute()
 
     assert len(df) == 0
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE 'Ä%Ä_Ä%' ESCAPE 'Ä'
     """
     ).compute()
 
-    assert_frame_equal(df, string_table.iloc[[1]])
+    dd.assert_eq(df, string_table.iloc[[1]])
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r'
         """
     ).compute()
 
-    assert_frame_equal(df, string_table.iloc[[2]])
+    dd.assert_eq(df, string_table.iloc[[2]])
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE '^|()-*r[r]$' ESCAPE 'r'
     """
     ).compute()
 
-    assert_frame_equal(df, string_table.iloc[[2]])
+    dd.assert_eq(df, string_table.iloc[[2]])
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE '%_' ESCAPE 'r'
     """
     ).compute()
 
-    assert_frame_equal(df, string_table)
+    dd.assert_eq(df, string_table)
 
-    string_table2 = pd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
+    string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
     c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2")
     df = c.sql(
         """
@@ -233,7 +260,7 @@ def test_like(c, string_table):
     """
     ).compute()
 
-    assert_frame_equal(df, string_table2.iloc[[1]])
+    dd.assert_eq(df, string_table2.iloc[[1]])
 
 
 def test_null(c):
@@ -406,9 +433,15 @@ def test_subqueries(c, user_table_1, user_table_2):
     )
 
 
-def test_string_functions(c):
+@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
+def test_string_functions(c, gpu):
+    if gpu:
+        input_table = "gpu_string_table"
+    else:
+        input_table = "string_table"
+
     df = c.sql(
-        """
+        f"""
         SELECT
             a || 'hello' || a AS a,
             CONCAT(a, 'hello', a) as b,
@@ -432,10 +465,14 @@ def test_string_functions(c):
             INITCAP(UPPER(a)) AS t,
             INITCAP(LOWER(a)) AS u
         FROM
-            string_table
+            {input_table}
         """
     ).compute()
 
+    if gpu:
+        df = df.to_pandas()
+        df = df.astype({"c": "int64", "f": "int64", "g": "int64"})
+
     expected_df = pd.DataFrame(
         {
             "a": ["a normal stringhelloa normal string"],

@@ -55,6 +55,7 @@ def test_tables(c):
                 "gpu_user_table_1",
                 "gpu_df",
                 "gpu_long_table",
+                "gpu_string_table",
             ]
         }
     )