Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ jobs:
fail-fast: false
matrix:
python-version:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
- "3.13"
- "3.14"
toolchain:
- "stable"

Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ substrait = ["dep:datafusion-substrait"]

[dependencies]
tokio = { version = "1.47", features = ["macros", "rt", "rt-multi-thread", "sync"] }
pyo3 = { version = "0.25", features = ["extension-module", "abi3", "abi3-py39"] }
pyo3 = { version = "0.25", features = ["extension-module", "abi3", "abi3-py310"] }
pyo3-async-runtimes = { version = "0.25", features = ["tokio-runtime"]}
pyo3-log = "0.12.4"
arrow = { version = "56", features = ["pyarrow"] }
Expand Down
16 changes: 11 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ name = "datafusion"
description = "Build and run queries against data"
readme = "README.md"
license = { file = "LICENSE.txt" }
requires-python = ">=3.9"
requires-python = ">=3.10"
keywords = ["datafusion", "dataframe", "rust", "query-engine"]
classifiers = [
"Development Status :: 2 - Pre-Alpha",
Expand All @@ -35,15 +35,19 @@ classifiers = [
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python",
"Programming Language :: Rust",
]
dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"]
dependencies = [
"pyarrow>=11.0.0;python_version<'3.14'",
"pyarrow>=22.0.0;python_version>='3.14'",
"typing-extensions;python_version<'3.13'"
]
dynamic = ["version"]

[project.urls]
Expand Down Expand Up @@ -147,8 +151,10 @@ ignore-words-list = [
[dependency-groups]
dev = [
"maturin>=1.8.1",
"numpy>1.25.0",
"pre-commit>=4.0.0",
"numpy>1.25.0;python_version<'3.14'",
"numpy>=2.3.2;python_version>='3.14'",
"pre-commit>=4.3.0",
"pyyaml>=6.0.3",
"pytest>=7.4.4",
"pytest-asyncio>=0.23.3",
"ruff>=0.9.1",
Expand Down
4 changes: 2 additions & 2 deletions python/datafusion/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ def with_columns(
if isinstance(expr, str):
expressions.append(self.parse_sql_expr(expr).expr)
elif isinstance(expr, Iterable) and not isinstance(
expr, (Expr, str, bytes, bytearray)
expr, Expr | str | bytes | bytearray
):
expressions.extend(
[
Expand Down Expand Up @@ -639,7 +639,7 @@ def aggregate(
"""
group_by_list = (
list(group_by)
if isinstance(group_by, Sequence) and not isinstance(group_by, (Expr, str))
if isinstance(group_by, Sequence) and not isinstance(group_by, Expr | str)
else [group_by]
)
aggs_list = (
Expand Down
6 changes: 3 additions & 3 deletions python/datafusion/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def _iter(
) -> Iterable[expr_internal.Expr]:
for expr in items:
if isinstance(expr, Iterable) and not isinstance(
expr, (Expr, str, bytes, bytearray)
expr, Expr | str | bytes | bytearray
):
# Treat string-like objects as atomic to surface standard errors
yield from _iter(expr)
Expand Down Expand Up @@ -308,7 +308,7 @@ def expr_list_to_raw_expr_list(
expr_list: Optional[list[Expr] | Expr],
) -> Optional[list[expr_internal.Expr]]:
"""Convert a sequence of expressions or column names to raw expressions."""
if isinstance(expr_list, (Expr, str)):
if isinstance(expr_list, Expr | str):
expr_list = [expr_list]
if expr_list is None:
return None
Expand All @@ -326,7 +326,7 @@ def sort_list_to_raw_sort_list(
sort_list: Optional[_typing.Union[Sequence[SortKey], SortKey]],
) -> Optional[list[expr_internal.SortExpr]]:
"""Helper function to return an optional sort list to raw variant."""
if isinstance(sort_list, (Expr, SortExpr, str)):
if isinstance(sort_list, Expr | SortExpr | str):
sort_list = [sort_list]
if sort_list is None:
return None
Expand Down
10 changes: 5 additions & 5 deletions python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ def test_array_functions(stmt, py_expr):

col = column("arr")
query_result = df.select(stmt(col)).collect()[0].column(0)
for a, b in zip(query_result, py_expr(data)):
for a, b in zip(query_result, py_expr(data), strict=False):
np.testing.assert_array_almost_equal(
np.array(a.as_py(), dtype=float), np.array(b, dtype=float)
)
Expand All @@ -582,7 +582,7 @@ def test_array_function_flatten():
stmt = f.flatten(literal(data))
py_expr = [py_flatten(data)]
query_result = df.select(stmt).collect()[0].column(0)
for a, b in zip(query_result, py_expr):
for a, b in zip(query_result, py_expr, strict=False):
np.testing.assert_array_almost_equal(
np.array(a.as_py(), dtype=float), np.array(b, dtype=float)
)
Expand All @@ -600,7 +600,7 @@ def test_array_function_cardinality():

query_result = df.select(stmt).collect()[0].column(0)

for a, b in zip(query_result, py_expr):
for a, b in zip(query_result, py_expr, strict=False):
np.testing.assert_array_equal(
np.array([a.as_py()], dtype=int), np.array([b], dtype=int)
)
Expand Down Expand Up @@ -631,7 +631,7 @@ def test_make_array_functions(make_func):
]

query_result = df.select(stmt).collect()[0].column(0)
for a, b in zip(query_result, py_expr):
for a, b in zip(query_result, py_expr, strict=False):
np.testing.assert_array_equal(
np.array(a.as_py(), dtype=str), np.array(b, dtype=str)
)
Expand Down Expand Up @@ -664,7 +664,7 @@ def test_array_function_obj_tests(stmt, py_expr):
batch = pa.RecordBatch.from_arrays([np.array(data, dtype=object)], names=["arr"])
df = ctx.create_dataframe([[batch]])
query_result = np.array(df.select(stmt).collect()[0].column(0))
for a, b in zip(query_result, py_expr(data)):
for a, b in zip(query_result, py_expr(data), strict=False):
assert a == b


Expand Down
13 changes: 8 additions & 5 deletions python/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty
result = pa.Table.from_batches(result)

rd = result.to_pydict()
assert dict(zip(rd["grp"], rd["cnt"])) == {"a": 3, "b": 1}
assert dict(zip(rd["grp"], rd["cnt"], strict=False)) == {"a": 3, "b": 1}


@pytest.mark.parametrize("path_to_str", [True, False])
Expand Down Expand Up @@ -340,7 +340,10 @@ def test_execute(ctx, tmp_path):
result_values.extend(pydict["cnt"])

result_keys, result_values = (
list(t) for t in zip(*sorted(zip(result_keys, result_values)))
list(t)
for t in zip(
*sorted(zip(result_keys, result_values, strict=False)), strict=False
)
)

assert result_keys == [1, 2, 3, 11, 12]
Expand Down Expand Up @@ -467,7 +470,7 @@ def test_simple_select(ctx, tmp_path, arr):
# In DF 43.0.0 we now default to having BinaryView and StringView
# so the array that is saved to the parquet is slightly different
# than the array read. Convert to values for comparison.
if isinstance(result, (pa.BinaryViewArray, pa.StringViewArray)):
if isinstance(result, pa.BinaryViewArray | pa.StringViewArray):
arr = arr.tolist()
result = result.tolist()

Expand Down Expand Up @@ -524,12 +527,12 @@ def test_register_listing_table(
result = pa.Table.from_batches(result)

rd = result.to_pydict()
assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2}
assert dict(zip(rd["grp"], rd["count"], strict=False)) == {"a": 5, "b": 2}

result = ctx.sql(
"SELECT grp, COUNT(*) AS count FROM my_table WHERE date='2020-10-05' GROUP BY grp" # noqa: E501
).collect()
result = pa.Table.from_batches(result)

rd = result.to_pydict()
assert dict(zip(rd["grp"], rd["count"])) == {"a": 3, "b": 2}
assert dict(zip(rd["grp"], rd["count"], strict=False)) == {"a": 3, "b": 2}
Loading