From 331c3720f6fceccfb822f404038e09c49e4f32ef Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Wed, 13 Oct 2021 10:19:55 -0700 Subject: [PATCH 1/8] Add fixture for gpu_string_table --- tests/integration/fixtures.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 7fc3ff32b..e37203479 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -106,6 +106,11 @@ def gpu_long_table(long_table): return cudf.from_pandas(long_table) if cudf else None +@pytest.fixture() +def gpu_string_table(string_table): + return cudf.from_pandas(string_table) if cudf else None + + @pytest.fixture() def c( df_simple, @@ -120,6 +125,7 @@ def c( gpu_user_table_1, gpu_df, gpu_long_table, + gpu_string_table, ): dfs = { "df_simple": df_simple, @@ -134,6 +140,7 @@ def c( "gpu_user_table_1": gpu_user_table_1, "gpu_df": gpu_df, "gpu_long_table": gpu_long_table, + "gpu_string_table": gpu_string_table, } # Lazy import, otherwise the pytest framework has problems From f5e534607bdcd148b9f3ff37cb9e2e7e302ec1be Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Wed, 13 Oct 2021 10:22:36 -0700 Subject: [PATCH 2/8] Add string_function tests for gpu_dfs --- tests/integration/test_rex.py | 16 +++++++++++++--- tests/integration/test_show.py | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index ad7a3fb3e..52bcd250d 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -406,9 +406,15 @@ def test_subqueries(c, user_table_1, user_table_2): ) -def test_string_functions(c): +@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)]) +def test_string_functions(c, gpu): + if gpu: + input_table = "gpu_string_table" + else: + input_table = "string_table" + df = c.sql( - """ + f""" SELECT a || 'hello' || a AS a, CONCAT(a, 'hello', a) as b, @@ -432,10 +438,14 @@ def test_string_functions(c): INITCAP(UPPER(a)) AS t, INITCAP(LOWER(a)) AS u FROM - string_table + {input_table} """ ).compute() + if gpu: + df = df.to_pandas() + df = df.astype({"c": "int64", "f": "int64", "g": "int64"}) + expected_df = pd.DataFrame( { "a": ["a normal stringhelloa normal string"], diff --git a/tests/integration/test_show.py b/tests/integration/test_show.py index eb9c18337..893c91738 100644 --- a/tests/integration/test_show.py +++ b/tests/integration/test_show.py @@ -55,6 +55,7 @@ def test_tables(c): "gpu_user_table_1", "gpu_df", "gpu_long_table", + "gpu_string_table", ] } ) From e5d2ed67ea0ab02e32adeb44ce76451f94890137 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Wed, 13 Oct 2021 10:48:40 -0700 Subject: [PATCH 3/8] Trigger Build From a3c6c653b626cbe68900bd000d8b2415d15a9753 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Wed, 13 Oct 2021 16:23:13 -0700 Subject: [PATCH 4/8] Add gpu option for test_like, test_not string tests --- tests/integration/test_rex.py | 94 ++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 29 deletions(-) diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index 52bcd250d..3086e3b7f 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -119,16 +119,23 @@ def test_random(c, df): result_df = result_df.compute() -def test_not(c, string_table): +@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)]) +def test_not(c, string_table, gpu): + if gpu: + input_table = "gpu_string_table" + else: + input_table = "string_table" df = c.sql( - """ + f""" SELECT * - FROM string_table + FROM {input_table} WHERE NOT a LIKE '%normal%' """ ) df = df.compute() + if gpu: + df = df.to_pandas() expected_df = string_table[~string_table.a.str.contains("normal")] assert_frame_equal(df, expected_df) @@ -169,19 +176,42 @@ def test_operators(c, df): assert_frame_equal(result_df, expected_df) -def test_like(c, string_table): +@pytest.mark.parametrize( + "gpu", + [ + False, + pytest.param( + True, + marks=( + pytest.mark.gpu, + pytest.mark.xfail( + reason="a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' results in an incorrect output with cuDF" + ), + ), + ), + ], +) +def test_like(c, string_table, gpu): + if gpu: + input_table = "gpu_string_table" + xd = pytest.importorskip("cudf") + else: + input_table = "string_table" + xd = pd + df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a SIMILAR TO '%n[a-z]rmal st_i%' """ ).compute() - + if gpu: + df = df.to_pandas() assert_frame_equal(df, string_table.iloc[[0]]) df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE '%n[a-z]rmal st_i%' """ ).compute() @@ -189,50 +219,56 @@ def test_like(c, string_table): assert len(df) == 0 df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE 'Ä%Ä_Ä%' ESCAPE 'Ä' """ ).compute() - + if gpu: + df = df.to_pandas() assert_frame_equal(df, string_table.iloc[[1]]) - df = c.sql( - """ - SELECT * FROM string_table - WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' - """ - ).compute() - - assert_frame_equal(df, string_table.iloc[[2]]) + # df = c.sql( + # f""" + # SELECT * FROM {input_table} + # WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' + # """ + # ).compute() + # if gpu: + # df = df.to_pandas() + # assert_frame_equal(df, string_table.iloc[[2]]) df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE '^|()-*r[r]$' ESCAPE 'r' """ ).compute() - + if gpu: + df = df.to_pandas() assert_frame_equal(df, string_table.iloc[[2]]) df = c.sql( - """ - SELECT * FROM string_table + f""" + SELECT * FROM {input_table} WHERE a LIKE '%_' ESCAPE 'r' """ ).compute() - + if gpu: + df = df.to_pandas() assert_frame_equal(df, string_table) - string_table2 = pd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]}) + string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]}) c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2") df = c.sql( - """ + f""" SELECT * FROM string_table2 WHERE b LIKE 'b' """ ).compute() - + if gpu: + df = df.to_pandas() + string_table2 = string_table2.to_pandas() assert_frame_equal(df, string_table2.iloc[[1]]) From 05b0c5d31824103aaa93713b6b27a7add5b63d26 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Wed, 13 Oct 2021 16:27:43 -0700 Subject: [PATCH 5/8] remove unneeded fstring, fixes flake8 issue --- tests/integration/test_rex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index 3086e3b7f..3917ab98c 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -261,7 +261,7 @@ def test_like(c, string_table, gpu): string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]}) c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2") df = c.sql( - f""" + """ SELECT * FROM string_table2 WHERE b LIKE 'b' """ From c378b174035d49f12fbe6fd923e7807292c6c3c1 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Wed, 13 Oct 2021 16:33:57 -0700 Subject: [PATCH 6/8] Undo accidental commenting out of one test --- tests/integration/test_rex.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index 3917ab98c..28a0b2d74 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -228,15 +228,15 @@ def test_like(c, string_table, gpu): df = df.to_pandas() assert_frame_equal(df, string_table.iloc[[1]]) - # df = c.sql( - # f""" - # SELECT * FROM {input_table} - # WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' - # """ - # ).compute() - # if gpu: - # df = df.to_pandas() - # assert_frame_equal(df, string_table.iloc[[2]]) + df = c.sql( + f""" + SELECT * FROM {input_table} + WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' + """ + ).compute() + if gpu: + df = df.to_pandas() + assert_frame_equal(df, string_table.iloc[[2]]) df = c.sql( f""" From 12825c395fdca5f426b5c9c88a706c83a2f464d3 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Wed, 13 Oct 2021 17:26:38 -0700 Subject: [PATCH 7/8] Update test_like xfail reason with relevant cudf issue --- tests/integration/test_rex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index 28a0b2d74..b5205b625 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -185,7 +185,7 @@ def test_operators(c, df): marks=( pytest.mark.gpu, pytest.mark.xfail( - reason="a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' results in an incorrect output with cuDF" + reason="Failing due to cuDF bug https://github.com/rapidsai/cudf/issues/9434" ), ), ), From d66c1fcc7a0dc700f03429c0c2bac4a2b0665d8d Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Thu, 21 Oct 2021 15:53:56 -0700 Subject: [PATCH 8/8] Update gpu test params and equality check --- tests/integration/test_rex.py | 57 +++++++++++++++-------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index b5205b625..dea6d5604 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -119,12 +119,12 @@ def test_random(c, df): result_df = result_df.compute() -@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)]) -def test_not(c, string_table, gpu): - if gpu: - input_table = "gpu_string_table" - else: - input_table = "string_table" +@pytest.mark.parametrize( + "input_table", + ["string_table", pytest.param("gpu_string_table", marks=pytest.mark.gpu),], +) +def test_not(c, input_table, request): + string_table = request.getfixturevalue(input_table) df = c.sql( f""" SELECT @@ -134,11 +134,9 @@ def test_not(c, string_table, gpu): """ ) df = df.compute() - if gpu: - df = df.to_pandas() expected_df = string_table[~string_table.a.str.contains("normal")] - assert_frame_equal(df, expected_df) + dd.assert_eq(df, expected_df) def test_operators(c, df): @@ -177,10 +175,11 @@ def test_operators(c, df): @pytest.mark.parametrize( - "gpu", + "input_table,gpu", [ - False, + ("string_table", False), pytest.param( + "gpu_string_table", True, marks=( pytest.mark.gpu, @@ -191,12 +190,11 @@ def test_operators(c, df): ), ], ) -def test_like(c, string_table, gpu): +def test_like(c, input_table, gpu, request): + string_table = request.getfixturevalue(input_table) if gpu: - input_table = "gpu_string_table" xd = pytest.importorskip("cudf") else: - input_table = "string_table" xd = pd df = c.sql( @@ -205,9 +203,8 @@ def test_like(c, string_table, gpu): WHERE a SIMILAR TO '%n[a-z]rmal st_i%' """ ).compute() - if gpu: - df = df.to_pandas() - assert_frame_equal(df, string_table.iloc[[0]]) + + dd.assert_eq(df, string_table.iloc[[0]]) df = c.sql( f""" @@ -224,9 +221,8 @@ def test_like(c, string_table, gpu): WHERE a LIKE 'Ä%Ä_Ä%' ESCAPE 'Ä' """ ).compute() - if gpu: - df = df.to_pandas() - assert_frame_equal(df, string_table.iloc[[1]]) + + dd.assert_eq(df, string_table.iloc[[1]]) df = c.sql( f""" @@ -234,9 +230,8 @@ def test_like(c, string_table, gpu): WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r' """ ).compute() - if gpu: - df = df.to_pandas() - assert_frame_equal(df, string_table.iloc[[2]]) + + dd.assert_eq(df, string_table.iloc[[2]]) df = c.sql( f""" @@ -244,9 +239,8 @@ def test_like(c, string_table, gpu): WHERE a LIKE '^|()-*r[r]$' ESCAPE 'r' """ ).compute() - if gpu: - df = df.to_pandas() - assert_frame_equal(df, string_table.iloc[[2]]) + + dd.assert_eq(df, string_table.iloc[[2]]) df = c.sql( f""" @@ -254,9 +248,8 @@ def test_like(c, string_table, gpu): WHERE a LIKE '%_' ESCAPE 'r' """ ).compute() - if gpu: - df = df.to_pandas() - assert_frame_equal(df, string_table) + + dd.assert_eq(df, string_table) string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]}) c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2") @@ -266,10 +259,8 @@ def test_like(c, string_table, gpu): WHERE b LIKE 'b' """ ).compute() - if gpu: - df = df.to_pandas() - string_table2 = string_table2.to_pandas() - assert_frame_equal(df, string_table2.iloc[[1]]) + + dd.assert_eq(df, string_table2.iloc[[1]]) def test_null(c):