From 331c3720f6fceccfb822f404038e09c49e4f32ef Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Wed, 13 Oct 2021 10:19:55 -0700
Subject: [PATCH 1/8] Add fixture for gpu_string_table

---
 tests/integration/fixtures.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py
index 7fc3ff32b..e37203479 100644
--- a/tests/integration/fixtures.py
+++ b/tests/integration/fixtures.py
@@ -106,6 +106,11 @@ def gpu_long_table(long_table):
     return cudf.from_pandas(long_table) if cudf else None
 
 
+@pytest.fixture()
+def gpu_string_table(string_table):
+    return cudf.from_pandas(string_table) if cudf else None
+
+
 @pytest.fixture()
 def c(
     df_simple,
@@ -120,6 +125,7 @@ def c(
     gpu_user_table_1,
     gpu_df,
     gpu_long_table,
+    gpu_string_table,
 ):
     dfs = {
         "df_simple": df_simple,
@@ -134,6 +140,7 @@ def c(
         "gpu_user_table_1": gpu_user_table_1,
         "gpu_df": gpu_df,
         "gpu_long_table": gpu_long_table,
+        "gpu_string_table": gpu_string_table,
     }
 
     # Lazy import, otherwise the pytest framework has problems

From f5e534607bdcd148b9f3ff37cb9e2e7e302ec1be Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Wed, 13 Oct 2021 10:22:36 -0700
Subject: [PATCH 2/8] Add string_function tests for gpu_dfs

---
 tests/integration/test_rex.py  | 16 +++++++++++++---
 tests/integration/test_show.py |  1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py
index ad7a3fb3e..52bcd250d 100644
--- a/tests/integration/test_rex.py
+++ b/tests/integration/test_rex.py
@@ -406,9 +406,15 @@ def test_subqueries(c, user_table_1, user_table_2):
     )
 
 
-def test_string_functions(c):
+@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
+def test_string_functions(c, gpu):
+    if gpu:
+        input_table = "gpu_string_table"
+    else:
+        input_table = "string_table"
+
     df = c.sql(
-        """
+        f"""
         SELECT
             a || 'hello' || a AS a,
             CONCAT(a, 'hello', a) as b,
@@ -432,10 +438,14 @@ def test_string_functions(c):
             INITCAP(UPPER(a)) AS t,
             INITCAP(LOWER(a)) AS u
         FROM
-            string_table
+            {input_table}
         """
     ).compute()
 
+    if gpu:
+        df = df.to_pandas()
+        df = df.astype({"c": "int64", "f": "int64", "g": "int64"})
+
     expected_df = pd.DataFrame(
         {
             "a": ["a normal stringhelloa normal string"],
diff --git a/tests/integration/test_show.py b/tests/integration/test_show.py
index eb9c18337..893c91738 100644
--- a/tests/integration/test_show.py
+++ b/tests/integration/test_show.py
@@ -55,6 +55,7 @@ def test_tables(c):
                 "gpu_user_table_1",
                 "gpu_df",
                 "gpu_long_table",
+                "gpu_string_table",
             ]
         }
     )

From e5d2ed67ea0ab02e32adeb44ce76451f94890137 Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Wed, 13 Oct 2021 10:48:40 -0700
Subject: [PATCH 3/8] Trigger Build


From a3c6c653b626cbe68900bd000d8b2415d15a9753 Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Wed, 13 Oct 2021 16:23:13 -0700
Subject: [PATCH 4/8] Add gpu option for test_like, test_not string tests

---
 tests/integration/test_rex.py | 94 ++++++++++++++++++++++++-----------
 1 file changed, 65 insertions(+), 29 deletions(-)

diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py
index 52bcd250d..3086e3b7f 100644
--- a/tests/integration/test_rex.py
+++ b/tests/integration/test_rex.py
@@ -119,16 +119,23 @@ def test_random(c, df):
     result_df = result_df.compute()
 
 
-def test_not(c, string_table):
+@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
+def test_not(c, string_table, gpu):
+    if gpu:
+        input_table = "gpu_string_table"
+    else:
+        input_table = "string_table"
     df = c.sql(
-        """
+        f"""
     SELECT
         *
-    FROM string_table
+    FROM {input_table}
     WHERE NOT a LIKE '%normal%'
     """
     )
     df = df.compute()
+    if gpu:
+        df = df.to_pandas()
 
     expected_df = string_table[~string_table.a.str.contains("normal")]
     assert_frame_equal(df, expected_df)
@@ -169,19 +176,42 @@ def test_operators(c, df):
     assert_frame_equal(result_df, expected_df)
 
 
-def test_like(c, string_table):
+@pytest.mark.parametrize(
+    "gpu",
+    [
+        False,
+        pytest.param(
+            True,
+            marks=(
+                pytest.mark.gpu,
+                pytest.mark.xfail(
+                    reason="a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' results in an incorrect output with cuDF"
+                ),
+            ),
+        ),
+    ],
+)
+def test_like(c, string_table, gpu):
+    if gpu:
+        input_table = "gpu_string_table"
+        xd = pytest.importorskip("cudf")
+    else:
+        input_table = "string_table"
+        xd = pd
+
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a SIMILAR TO '%n[a-z]rmal st_i%'
     """
     ).compute()
-
+    if gpu:
+        df = df.to_pandas()
     assert_frame_equal(df, string_table.iloc[[0]])
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE '%n[a-z]rmal st_i%'
     """
     ).compute()
@@ -189,50 +219,56 @@ def test_like(c, string_table):
     assert len(df) == 0
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE 'Ä%Ä_Ä%' ESCAPE 'Ä'
     """
     ).compute()
-
+    if gpu:
+        df = df.to_pandas()
     assert_frame_equal(df, string_table.iloc[[1]])
 
-    df = c.sql(
-        """
-        SELECT * FROM string_table
-        WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r'
-        """
-    ).compute()
-
-    assert_frame_equal(df, string_table.iloc[[2]])
+    # df = c.sql(
+    #     f"""
+    #     SELECT * FROM {input_table}
+    #     WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r'
+    #     """
+    # ).compute()
+    # if gpu:
+    #     df = df.to_pandas()
+    # assert_frame_equal(df, string_table.iloc[[2]])
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE '^|()-*r[r]$' ESCAPE 'r'
     """
     ).compute()
-
+    if gpu:
+        df = df.to_pandas()
     assert_frame_equal(df, string_table.iloc[[2]])
 
     df = c.sql(
-        """
-        SELECT * FROM string_table
+        f"""
+        SELECT * FROM {input_table}
         WHERE a LIKE '%_' ESCAPE 'r'
     """
     ).compute()
-
+    if gpu:
+        df = df.to_pandas()
     assert_frame_equal(df, string_table)
 
-    string_table2 = pd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
+    string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
     c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2")
     df = c.sql(
-        """
+        f"""
         SELECT * FROM string_table2
         WHERE b LIKE 'b'
     """
     ).compute()
-
+    if gpu:
+        df = df.to_pandas()
+        string_table2 = string_table2.to_pandas()
     assert_frame_equal(df, string_table2.iloc[[1]])
 
 
From 05b0c5d31824103aaa93713b6b27a7add5b63d26 Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Wed, 13 Oct 2021 16:27:43 -0700
Subject: [PATCH 5/8] remove unneeded fstring, fixes flake8 issue

---
 tests/integration/test_rex.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py
index 3086e3b7f..3917ab98c 100644
--- a/tests/integration/test_rex.py
+++ b/tests/integration/test_rex.py
@@ -261,7 +261,7 @@ def test_like(c, string_table, gpu):
     string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
     c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2")
     df = c.sql(
-        f"""
+        """
         SELECT * FROM string_table2
         WHERE b LIKE 'b'
     """

From c378b174035d49f12fbe6fd923e7807292c6c3c1 Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Wed, 13 Oct 2021 16:33:57 -0700
Subject: [PATCH 6/8] Undo accidental commenting out of one test

---
 tests/integration/test_rex.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py
index 3917ab98c..28a0b2d74 100644
--- a/tests/integration/test_rex.py
+++ b/tests/integration/test_rex.py
@@ -228,15 +228,15 @@ def test_like(c, string_table, gpu):
         df = df.to_pandas()
     assert_frame_equal(df, string_table.iloc[[1]])
 
-    # df = c.sql(
-    #     f"""
-    #     SELECT * FROM {input_table}
-    #     WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r'
-    #     """
-    # ).compute()
-    # if gpu:
-    #     df = df.to_pandas()
-    # assert_frame_equal(df, string_table.iloc[[2]])
+    df = c.sql(
+        f"""
+        SELECT * FROM {input_table}
+        WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r'
+        """
+    ).compute()
+    if gpu:
+        df = df.to_pandas()
+    assert_frame_equal(df, string_table.iloc[[2]])
 
     df = c.sql(
         f"""

From 12825c395fdca5f426b5c9c88a706c83a2f464d3 Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Wed, 13 Oct 2021 17:26:38 -0700
Subject: [PATCH 7/8] Update test_like xfail reason with relevant cudf issue

---
 tests/integration/test_rex.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py
index 28a0b2d74..b5205b625 100644
--- a/tests/integration/test_rex.py
+++ b/tests/integration/test_rex.py
@@ -185,7 +185,7 @@ def test_operators(c, df):
             marks=(
                 pytest.mark.gpu,
                 pytest.mark.xfail(
-                    reason="a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' results in an incorrect output with cuDF"
+                    reason="Failing due to cuDF bug https://github.com/rapidsai/cudf/issues/9434"
                 ),
             ),
         ),

From d66c1fcc7a0dc700f03429c0c2bac4a2b0665d8d Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Thu, 21 Oct 2021 15:53:56 -0700
Subject: [PATCH 8/8] Update gpu test params and equality check

---
 tests/integration/test_rex.py | 57 +++++++++++++++--------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py
index b5205b625..dea6d5604 100644
--- a/tests/integration/test_rex.py
+++ b/tests/integration/test_rex.py
@@ -119,12 +119,12 @@ def test_random(c, df):
     result_df = result_df.compute()
 
 
-@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
-def test_not(c, string_table, gpu):
-    if gpu:
-        input_table = "gpu_string_table"
-    else:
-        input_table = "string_table"
+@pytest.mark.parametrize(
+    "input_table",
+    ["string_table", pytest.param("gpu_string_table", marks=pytest.mark.gpu),],
+)
+def test_not(c, input_table, request):
+    string_table = request.getfixturevalue(input_table)
     df = c.sql(
         f"""
     SELECT
@@ -134,11 +134,9 @@ def test_not(c, string_table, gpu):
     """
     )
     df = df.compute()
-    if gpu:
-        df = df.to_pandas()
 
     expected_df = string_table[~string_table.a.str.contains("normal")]
-    assert_frame_equal(df, expected_df)
+    dd.assert_eq(df, expected_df)
 
 
 def test_operators(c, df):
@@ -177,10 +175,11 @@ def test_operators(c, df):
 
 
 @pytest.mark.parametrize(
-    "gpu",
+    "input_table,gpu",
     [
-        False,
+        ("string_table", False),
         pytest.param(
+            "gpu_string_table",
             True,
             marks=(
                 pytest.mark.gpu,
@@ -191,12 +190,11 @@ def test_operators(c, df):
         ),
     ],
 )
-def test_like(c, string_table, gpu):
+def test_like(c, input_table, gpu, request):
+    string_table = request.getfixturevalue(input_table)
     if gpu:
-        input_table = "gpu_string_table"
         xd = pytest.importorskip("cudf")
     else:
-        input_table = "string_table"
         xd = pd
 
     df = c.sql(
@@ -205,9 +203,8 @@ def test_like(c, string_table, gpu):
         WHERE a SIMILAR TO '%n[a-z]rmal st_i%'
     """
     ).compute()
-    if gpu:
-        df = df.to_pandas()
-    assert_frame_equal(df, string_table.iloc[[0]])
+
+    dd.assert_eq(df, string_table.iloc[[0]])
 
     df = c.sql(
         f"""
@@ -224,9 +221,8 @@ def test_like(c, string_table, gpu):
         WHERE a LIKE 'Ä%Ä_Ä%' ESCAPE 'Ä'
     """
     ).compute()
-    if gpu:
-        df = df.to_pandas()
-    assert_frame_equal(df, string_table.iloc[[1]])
+
+    dd.assert_eq(df, string_table.iloc[[1]])
 
     df = c.sql(
         f"""
@@ -234,9 +230,8 @@ def test_like(c, string_table, gpu):
         WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r'
         """
     ).compute()
-    if gpu:
-        df = df.to_pandas()
-    assert_frame_equal(df, string_table.iloc[[2]])
+
+    dd.assert_eq(df, string_table.iloc[[2]])
 
     df = c.sql(
         f"""
@@ -244,9 +239,8 @@ def test_like(c, string_table, gpu):
         WHERE a LIKE '^|()-*r[r]$' ESCAPE 'r'
     """
     ).compute()
-    if gpu:
-        df = df.to_pandas()
-    assert_frame_equal(df, string_table.iloc[[2]])
+
+    dd.assert_eq(df, string_table.iloc[[2]])
 
     df = c.sql(
         f"""
@@ -254,9 +248,8 @@ def test_like(c, string_table, gpu):
         WHERE a LIKE '%_' ESCAPE 'r'
     """
     ).compute()
-    if gpu:
-        df = df.to_pandas()
-    assert_frame_equal(df, string_table)
+
+    dd.assert_eq(df, string_table)
 
     string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
     c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2")
@@ -266,10 +259,8 @@ def test_like(c, string_table, gpu):
         WHERE b LIKE 'b'
     """
     ).compute()
-    if gpu:
-        df = df.to_pandas()
-        string_table2 = string_table2.to_pandas()
-    assert_frame_equal(df, string_table2.iloc[[1]])
+
+    dd.assert_eq(df, string_table2.iloc[[1]])
 
 
 def test_null(c):