Skip to content
7 changes: 7 additions & 0 deletions tests/integration/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ def gpu_long_table(long_table):
return cudf.from_pandas(long_table) if cudf else None


@pytest.fixture()
def gpu_string_table(string_table):
return cudf.from_pandas(string_table) if cudf else None


@pytest.fixture()
def c(
df_simple,
Expand All @@ -120,6 +125,7 @@ def c(
gpu_user_table_1,
gpu_df,
gpu_long_table,
gpu_string_table,
):
dfs = {
"df_simple": df_simple,
Expand All @@ -134,6 +140,7 @@ def c(
"gpu_user_table_1": gpu_user_table_1,
"gpu_df": gpu_df,
"gpu_long_table": gpu_long_table,
"gpu_string_table": gpu_string_table,
}

# Lazy import, otherwise the pytest framework has problems
Expand Down
91 changes: 64 additions & 27 deletions tests/integration/test_rex.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,24 @@ def test_random(c, df):
result_df = result_df.compute()


def test_not(c, string_table):
@pytest.mark.parametrize(
"input_table",
["string_table", pytest.param("gpu_string_table", marks=pytest.mark.gpu),],
)
def test_not(c, input_table, request):
string_table = request.getfixturevalue(input_table)
df = c.sql(
"""
f"""
SELECT
*
FROM string_table
FROM {input_table}
WHERE NOT a LIKE '%normal%'
"""
)
df = df.compute()

expected_df = string_table[~string_table.a.str.contains("normal")]
assert_frame_equal(df, expected_df)
dd.assert_eq(df, expected_df)


def test_operators(c, df):
Expand Down Expand Up @@ -169,62 +174,84 @@ def test_operators(c, df):
assert_frame_equal(result_df, expected_df)


def test_like(c, string_table):
@pytest.mark.parametrize(
"input_table,gpu",
[
("string_table", False),
pytest.param(
"gpu_string_table",
True,
marks=(
pytest.mark.gpu,
pytest.mark.xfail(
reason="Failing due to cuDF bug https://github.com/rapidsai/cudf/issues/9434"
),
),
),
],
)
def test_like(c, input_table, gpu, request):
string_table = request.getfixturevalue(input_table)
if gpu:
xd = pytest.importorskip("cudf")
else:
xd = pd

df = c.sql(
"""
SELECT * FROM string_table
f"""
SELECT * FROM {input_table}
WHERE a SIMILAR TO '%n[a-z]rmal st_i%'
"""
).compute()

assert_frame_equal(df, string_table.iloc[[0]])
dd.assert_eq(df, string_table.iloc[[0]])

df = c.sql(
"""
SELECT * FROM string_table
f"""
SELECT * FROM {input_table}
WHERE a LIKE '%n[a-z]rmal st_i%'
"""
).compute()

assert len(df) == 0

df = c.sql(
"""
SELECT * FROM string_table
f"""
SELECT * FROM {input_table}
WHERE a LIKE 'Ä%Ä_Ä%' ESCAPE 'Ä'
"""
).compute()

assert_frame_equal(df, string_table.iloc[[1]])
dd.assert_eq(df, string_table.iloc[[1]])

df = c.sql(
"""
SELECT * FROM string_table
f"""
SELECT * FROM {input_table}
WHERE a SIMILAR TO '^|()-*r[r]$' ESCAPE 'r'
"""
).compute()

assert_frame_equal(df, string_table.iloc[[2]])
dd.assert_eq(df, string_table.iloc[[2]])

df = c.sql(
"""
SELECT * FROM string_table
f"""
SELECT * FROM {input_table}
WHERE a LIKE '^|()-*r[r]$' ESCAPE 'r'
"""
).compute()

assert_frame_equal(df, string_table.iloc[[2]])
dd.assert_eq(df, string_table.iloc[[2]])

df = c.sql(
"""
SELECT * FROM string_table
f"""
SELECT * FROM {input_table}
WHERE a LIKE '%_' ESCAPE 'r'
"""
).compute()

assert_frame_equal(df, string_table)
dd.assert_eq(df, string_table)

string_table2 = pd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
string_table2 = xd.DataFrame({"b": ["a", "b", None, pd.NA, float("nan")]})
c.register_dask_table(dd.from_pandas(string_table2, npartitions=1), "string_table2")
df = c.sql(
"""
Expand All @@ -233,7 +260,7 @@ def test_like(c, string_table):
"""
).compute()

assert_frame_equal(df, string_table2.iloc[[1]])
dd.assert_eq(df, string_table2.iloc[[1]])


def test_null(c):
Expand Down Expand Up @@ -406,9 +433,15 @@ def test_subqueries(c, user_table_1, user_table_2):
)


def test_string_functions(c):
@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
def test_string_functions(c, gpu):
if gpu:
input_table = "gpu_string_table"
else:
input_table = "string_table"

df = c.sql(
"""
f"""
SELECT
a || 'hello' || a AS a,
CONCAT(a, 'hello', a) as b,
Expand All @@ -432,10 +465,14 @@ def test_string_functions(c):
INITCAP(UPPER(a)) AS t,
INITCAP(LOWER(a)) AS u
FROM
string_table
{input_table}
"""
).compute()

if gpu:
df = df.to_pandas()
df = df.astype({"c": "int64", "f": "int64", "g": "int64"})

expected_df = pd.DataFrame(
{
"a": ["a normal stringhelloa normal string"],
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_tables(c):
"gpu_user_table_1",
"gpu_df",
"gpu_long_table",
"gpu_string_table",
]
}
)
Expand Down