From 6340d0872e63099534171a43296d8cf646fe6612 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 2 May 2024 14:03:49 -0400 Subject: [PATCH 1/6] Run the python test suite when building for Pyodide --- .github/workflows/Pyodide.yml | 34 +++++++----- .../extensions/test_extensions_loading.py | 7 +++ .../fast/api/test_connection_interrupt.py | 5 ++ .../tests/fast/api/test_query_interrupt.py | 5 ++ .../pythonpkg/tests/fast/api/test_read_csv.py | 53 ++++++++++++------- .../fast/pandas/test_df_object_resolution.py | 5 ++ .../tests/fast/pandas/test_timedelta.py | 3 +- .../tests/fast/pandas/test_timestamp.py | 7 ++- .../fast/relational_api/test_rapi_query.py | 9 ++-- .../tests/fast/spark/test_spark_union.py | 18 ++----- .../tests/fast/test_alex_multithread.py | 10 +++- .../pythonpkg/tests/fast/test_memory_leaks.py | 5 +- .../pythonpkg/tests/fast/test_multithread.py | 22 ++++---- .../fast/test_relation_dependency_leak.py | 6 ++- 14 files changed, 119 insertions(+), 70 deletions(-) diff --git a/.github/workflows/Pyodide.yml b/.github/workflows/Pyodide.yml index 160bcc759f5..c7b6da90b69 100644 --- a/.github/workflows/Pyodide.yml +++ b/.github/workflows/Pyodide.yml @@ -44,8 +44,10 @@ jobs: version: - python: "3.10" pyodide-build: "0.22.1" + node: "16" - python: "3.11" pyodide-build: "0.25.1" + node: "18" steps: - uses: actions/checkout@v4 with: @@ -76,22 +78,30 @@ jobs: CFLAGS: "-fexceptions" LDFLAGS: "-fexceptions" - - name: smoke test duckdb on pyodide + - name: install node + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.version.node }} + + - name: create pyodide environment + run: pyodide venv .venv-pyodide + + - name: install deps into environment run: | - pyodide venv .venv-pyodide source .venv-pyodide/bin/activate - pip install ./tools/pythonpkg/dist/*.whl - - python -V + pip install pytest numpy pandas mypy - python < str: instant = f""" diff --git a/tools/pythonpkg/tests/fast/pandas/test_timestamp.py b/tools/pythonpkg/tests/fast/pandas/test_timestamp.py index 51641e281a3..0a580025fa6 100644 --- a/tools/pythonpkg/tests/fast/pandas/test_timestamp.py +++ b/tools/pythonpkg/tests/fast/pandas/test_timestamp.py @@ -1,8 +1,9 @@ import duckdb -import os import datetime +import os import pytest import pandas as pd +import platform from conftest import pandas_2_or_higher @@ -64,6 +65,10 @@ def test_timestamp_timedelta(self): df_from_duck = duckdb.from_df(df).df() assert df_from_duck.equals(df) + @pytest.mark.xfail( + condition=platform.system() == "Emscripten" and os.environ.get("TZ") != "UTC", + reason="time zones other than UTC don't seem to work on Pyodide", + ) def test_timestamp_timezone(self, duckdb_cursor): rel = duckdb_cursor.query( """ diff --git a/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py b/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py index 5fdbae2f9f6..fd90ecbf96e 100644 --- a/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py +++ b/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py @@ -1,5 +1,7 @@ import duckdb import pytest +import platform +import sys @pytest.fixture() @@ -117,11 +119,12 @@ def test_query_non_select_result(self, duckdb_cursor): def test_replacement_scan_recursion(self, duckdb_cursor): depth_limit = 1000 - import sys - if sys.platform.startswith('win'): - # With the default we reach a stack overflow in the CI + if sys.platform.startswith('win') or platform.system() == "Emscripten": + # With the default we reach a stack overflow in the CI for windows + # and also outside of it for Pyodide depth_limit = 250 + duckdb_cursor.execute(f"SET max_expression_depth TO {depth_limit}") rel = duckdb_cursor.sql('select 42') rel = duckdb_cursor.sql('select * from rel') diff --git a/tools/pythonpkg/tests/fast/spark/test_spark_union.py b/tools/pythonpkg/tests/fast/spark/test_spark_union.py index 7801dea4ada..2399785fafa 100644 --- a/tools/pythonpkg/tests/fast/spark/test_spark_union.py +++ b/tools/pythonpkg/tests/fast/spark/test_spark_union.py @@ -1,21 +1,10 @@ +import platform import pytest _ = pytest.importorskip("duckdb.experimental.spark") -from duckdb.experimental.spark.sql.types import ( - LongType, - StructType, - BooleanType, - StructField, - StringType, - IntegerType, - LongType, - Row, - ArrayType, - MapType, -) -from duckdb.experimental.spark.sql.functions import col, struct, when, lit, array_contains -from duckdb.experimental.spark.sql.functions import sum, avg, max, min, mean, count +from duckdb.experimental.spark.sql.types import Row +from duckdb.experimental.spark.sql.functions import col @pytest.fixture @@ -65,6 +54,7 @@ def test_merge_with_union(self, df, df2): res2 = unionDF.collect() assert res == res2 + @pytest.mark.xfail(condition=platform.system() == "Emscripten", reason="Broken on Pyodide") def test_merge_without_duplicates(self, df, df2): # 'sort' has been added to make the result deterministic disDF = df.union(df2).distinct().sort(col("employee_name")) diff --git a/tools/pythonpkg/tests/fast/test_alex_multithread.py b/tools/pythonpkg/tests/fast/test_alex_multithread.py index 765b8f0ab2b..92768ec0c9a 100644 --- a/tools/pythonpkg/tests/fast/test_alex_multithread.py +++ b/tools/pythonpkg/tests/fast/test_alex_multithread.py @@ -1,10 +1,16 @@ +import platform import duckdb from threading import Thread, current_thread -import pandas as pd -import os import pytest +pytestmark = pytest.mark.xfail( + condition=platform.system() == "Emscripten", + reason="Emscripten builds cannot use threads", + raises=RuntimeError, +) + + @pytest.fixture(scope="session") def tmp_database(tmp_path_factory): database = tmp_path_factory.mktemp("databases", numbered=True) / "tmp.duckdb" diff --git a/tools/pythonpkg/tests/fast/test_memory_leaks.py b/tools/pythonpkg/tests/fast/test_memory_leaks.py index 228a8eace27..1d4c126603e 100644 --- a/tools/pythonpkg/tests/fast/test_memory_leaks.py +++ b/tools/pythonpkg/tests/fast/test_memory_leaks.py @@ -1,9 +1,10 @@ import gc -import duckdb import pytest -import os, psutil +import os import pandas as pd +psutil = pytest.importorskip("psutil") + @pytest.fixture def check_leaks(): diff --git a/tools/pythonpkg/tests/fast/test_multithread.py b/tools/pythonpkg/tests/fast/test_multithread.py index 195b1d1e454..c4fd0837255 100644 --- a/tools/pythonpkg/tests/fast/test_multithread.py +++ b/tools/pythonpkg/tests/fast/test_multithread.py @@ -1,3 +1,4 @@ +import platform import duckdb import pytest import threading @@ -7,12 +8,11 @@ import os from typing import List -try: - import pyarrow as pa - can_run = True -except ImportError: - can_run = False +pytestmark = pytest.mark.xfail( + condition=platform.system() == "Emscripten", + reason="Emscripten builds cannot use threads", +) def connect_duck(duckdb_conn): @@ -415,15 +415,13 @@ def test_fetchdfchunk(self, duckdb_cursor, pandas): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) def test_fetcharrow(self, duckdb_cursor, pandas): - if not can_run: - return + pytest.importorskip('pyarrow') duck_threads = DuckDBThreaded(10, fetch_arrow_query, pandas) duck_threads.multithread_test() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) def test_fetch_record_batch(self, duckdb_cursor, pandas): - if not can_run: - return + pytest.importorskip('pyarrow') duck_threads = DuckDBThreaded(10, fetch_record_batch_query, pandas) duck_threads.multithread_test() @@ -449,8 +447,7 @@ def test_df_unregister(self, duckdb_cursor, pandas): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) def test_arrow_register_unregister(self, duckdb_cursor, pandas): - if not can_run: - return + pytest.importorskip('pyarrow') duck_threads = DuckDBThreaded(10, arrow_register_unregister, pandas) duck_threads.multithread_test() @@ -481,8 +478,7 @@ def test_from_DF(self, duckdb_cursor, pandas): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) def test_from_arrow(self, duckdb_cursor, pandas): - if not can_run: - return + pytest.importorskip('pyarrow') duck_threads = DuckDBThreaded(10, from_arrow, pandas) duck_threads.multithread_test() diff --git a/tools/pythonpkg/tests/fast/test_relation_dependency_leak.py b/tools/pythonpkg/tests/fast/test_relation_dependency_leak.py index 9763d486afd..ca5057047e5 100644 --- a/tools/pythonpkg/tests/fast/test_relation_dependency_leak.py +++ b/tools/pythonpkg/tests/fast/test_relation_dependency_leak.py @@ -1,6 +1,5 @@ -import duckdb import numpy as np -import os, psutil +import os import pytest try: @@ -12,6 +11,9 @@ from conftest import NumpyPandas, ArrowPandas +psutil = pytest.importorskip("psutil") + + def check_memory(function_to_check, pandas, duckdb_cursor): process = psutil.Process(os.getpid()) mem_usage = process.memory_info().rss / (10**9) From 2b8a86adbfe1e7ac976a5aaba9dee667fc09832d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 7 May 2024 12:26:00 -0400 Subject: [PATCH 2/6] xfail with MemoryError on Emscripten tests --- tools/pythonpkg/tests/fast/test_relation.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/pythonpkg/tests/fast/test_relation.py b/tools/pythonpkg/tests/fast/test_relation.py index 5c2bda13637..4e2d0384d54 100644 --- a/tools/pythonpkg/tests/fast/test_relation.py +++ b/tools/pythonpkg/tests/fast/test_relation.py @@ -1,5 +1,6 @@ import duckdb import numpy as np +import platform import tempfile import os import pandas as pd @@ -366,7 +367,13 @@ def test_relation_print(self): 2048, 5000, 1000000, - 10000000, + pytest.param( + 10000000, + marks=pytest.mark.xfail( + condition=platform.system() == "Emscripten", + raises=MemoryError, + ), + ), ], ) def test_materialized_relation(self, duckdb_cursor, num_rows): From 438e9d0715bd6b5352759fa370f7c208acdca2f3 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 7 May 2024 12:52:48 -0400 Subject: [PATCH 3/6] xfail requires a reason when a condition is used --- tools/pythonpkg/tests/fast/test_relation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/pythonpkg/tests/fast/test_relation.py b/tools/pythonpkg/tests/fast/test_relation.py index 4e2d0384d54..90d9f8ba96a 100644 --- a/tools/pythonpkg/tests/fast/test_relation.py +++ b/tools/pythonpkg/tests/fast/test_relation.py @@ -372,6 +372,7 @@ def test_relation_print(self): marks=pytest.mark.xfail( condition=platform.system() == "Emscripten", raises=MemoryError, + reason="Emscripten/Pyodide builds run out of memory at this scale" ), ), ], From e386ebd8a395e8c9e7d17e5abea9ccfef0762fab Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 7 May 2024 13:16:13 -0400 Subject: [PATCH 4/6] Style nit --- tools/pythonpkg/tests/fast/test_relation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pythonpkg/tests/fast/test_relation.py b/tools/pythonpkg/tests/fast/test_relation.py index 90d9f8ba96a..a0ed600a5b7 100644 --- a/tools/pythonpkg/tests/fast/test_relation.py +++ b/tools/pythonpkg/tests/fast/test_relation.py @@ -372,7 +372,7 @@ def test_relation_print(self): marks=pytest.mark.xfail( condition=platform.system() == "Emscripten", raises=MemoryError, - reason="Emscripten/Pyodide builds run out of memory at this scale" + reason="Emscripten/Pyodide builds run out of memory at this scale", ), ), ], From a5acba86b04588727f43f42572dc5cc0fc6ca91c Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 7 May 2024 13:17:09 -0400 Subject: [PATCH 5/6] better job name --- .github/workflows/Pyodide.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Pyodide.yml b/.github/workflows/Pyodide.yml index c7b6da90b69..c856becf238 100644 --- a/.github/workflows/Pyodide.yml +++ b/.github/workflows/Pyodide.yml @@ -96,7 +96,7 @@ jobs: source .venv-pyodide/bin/activate pip install ./tools/pythonpkg/dist/*.whl - - name: run fast tests pyodide + - name: run tests using pyodide run: | source .venv-pyodide/bin/activate python -m pytest ./tools/pythonpkg/tests From 15472d155b4ca7df2930c39e0e993c95b0553f6d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 7 May 2024 16:29:16 -0400 Subject: [PATCH 6/6] Locally import pyarrow --- tools/pythonpkg/tests/fast/test_multithread.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/pythonpkg/tests/fast/test_multithread.py b/tools/pythonpkg/tests/fast/test_multithread.py index c4fd0837255..1ffdfc25600 100644 --- a/tools/pythonpkg/tests/fast/test_multithread.py +++ b/tools/pythonpkg/tests/fast/test_multithread.py @@ -251,6 +251,7 @@ def df_unregister(duckdb_conn, queue, pandas): def arrow_register_unregister(duckdb_conn, queue, pandas): # Get a new connection + pa = pytest.importorskip('pyarrow') duckdb_conn = duckdb.connect() arrow_tbl = pa.Table.from_pydict({'my_column': pa.array([1, 2, 3, 4, 5], type=pa.int64())}) try: @@ -317,6 +318,7 @@ def from_df(duckdb_conn, queue, pandas): def from_arrow(duckdb_conn, queue, pandas): # Get a new connection + pa = pytest.importorskip('pyarrow') duckdb_conn = duckdb.connect() arrow_tbl = pa.Table.from_pydict({'my_column': pa.array([1, 2, 3, 4, 5], type=pa.int64())}) try: