From 699100f17ce8df99da36a943f4f7d86ce5eca74a Mon Sep 17 00:00:00 2001
From: Paul Timmins <paul@iqmo.com>
Date: Fri, 19 Sep 2025 14:24:10 +0000
Subject: [PATCH 1/7] test: add pytest-xdist (multi-process) and
 pytest-randomly (random order)

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index bcbb24f6..fd34ccc0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -227,6 +227,8 @@ test = [ # dependencies used for running tests
     "pytest",
     "pytest-reraise",
     "pytest-timeout",
+    "pytest-xdist", #  multi-processed tests, if `-n <num_workers> | auto`
+    "pytest-randomly", # randomizes test order to ensure no test dependencies, enabled on install
     "mypy",
     "coverage",
     "gcovr; python_version < '3.14'",
@@ -306,6 +308,7 @@ filterwarnings = [
     "ignore:distutils Version classes are deprecated:DeprecationWarning",
     "ignore:is_datetime64tz_dtype is deprecated:DeprecationWarning",
 ]
+timeout = 600  # don't let individual tests "hang"
 
 [tool.coverage.run]
 branch = true

From 5bac3795b7723dd90568e33ca07a16906d26ca67 Mon Sep 17 00:00:00 2001
From: Paul Timmins <paul@iqmo.com>
Date: Fri, 19 Sep 2025 14:26:49 +0000
Subject: [PATCH 2/7] tests: add a fixture for default_connection to avoid
 leakage across tests

---
 tests/conftest.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 5e297aee..27f1f8f8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -267,10 +267,18 @@ def spark():
 
 
 @pytest.fixture(scope='function')
-def duckdb_cursor():
-    connection = duckdb.connect('')
-    yield connection
-    connection.close()
+def duckdb_cursor(tmp_path):
+    with duckdb.connect(tmp_path / "mytest") as connection:
+        yield connection
+
+
+@pytest.fixture(scope='function')
+def default_con():
+    # ensures each test uses a fresh default connection to avoid test leakage
+    # threading_unsafe fixture
+    duckdb.default_connection().close()
+    with duckdb.default_connection() as conn:
+        yield conn
 
 
 @pytest.fixture(scope='function')

From 3922560fa651d60e1ec52d3ea316f799f036322f Mon Sep 17 00:00:00 2001
From: Paul Timmins <paul@iqmo.com>
Date: Fri, 19 Sep 2025 14:27:39 +0000
Subject: [PATCH 3/7] tests: use a tmp_path fixture to isolate test data

---
 tests/fast/api/test_to_csv.py         | 182 ++++++++++++++------------
 tests/fast/test_many_con_same_file.py |  29 ++--
 tests/slow/test_h2oai_arrow.py        |  16 +--
 3 files changed, 113 insertions(+), 114 deletions(-)

diff --git a/tests/fast/api/test_to_csv.py b/tests/fast/api/test_to_csv.py
index e48ae1b8..8a791c14 100644
--- a/tests/fast/api/test_to_csv.py
+++ b/tests/fast/api/test_to_csv.py
@@ -1,5 +1,4 @@
 import duckdb
-import tempfile
 import os
 import pandas._testing as tm
 import datetime
@@ -10,63 +9,63 @@
 
 class TestToCSV(object):
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_basic_to_csv(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_basic_to_csv(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
 
         rel.to_csv(temp_file_name)
 
-        csv_rel = duckdb.read_csv(temp_file_name)
+        csv_rel = default_con.read_csv(temp_file_name)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_sep(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_sep(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
 
         rel.to_csv(temp_file_name, sep=',')
 
-        csv_rel = duckdb.read_csv(temp_file_name, sep=',')
+        csv_rel = default_con.read_csv(temp_file_name, sep=',')
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_na_rep(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_na_rep(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
 
         rel.to_csv(temp_file_name, na_rep="test")
 
-        csv_rel = duckdb.read_csv(temp_file_name, na_values="test")
+        csv_rel = default_con.read_csv(temp_file_name, na_values="test")
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_header(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_header(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
 
         rel.to_csv(temp_file_name)
 
-        csv_rel = duckdb.read_csv(temp_file_name)
+        csv_rel = default_con.read_csv(temp_file_name)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_quotechar(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_quotechar(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame({'a': ["\'a,b,c\'", None, "hello", "bye"], 'b': [45, 234, 234, 2]})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
 
         rel.to_csv(temp_file_name, quotechar='\'', sep=',')
 
-        csv_rel = duckdb.read_csv(temp_file_name, sep=',', quotechar='\'')
+        csv_rel = default_con.read_csv(temp_file_name, sep=',', quotechar='\'')
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_escapechar(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_escapechar(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame(
             {
                 "c_bool": [True, False],
@@ -75,97 +74,102 @@ def test_to_csv_escapechar(self, pandas):
                 "c_string": ["a", "b,c"],
             }
         )
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, quotechar='"', escapechar='!')
-        csv_rel = duckdb.read_csv(temp_file_name, quotechar='"', escapechar='!')
+        csv_rel = default_con.read_csv(temp_file_name, quotechar='"', escapechar='!')
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_date_format(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_date_format(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame(getTimeSeriesData())
         dt_index = df.index
         df = pandas.DataFrame({"A": dt_index, "B": dt_index.shift(1)}, index=dt_index)
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, date_format="%Y%m%d")
 
-        csv_rel = duckdb.read_csv(temp_file_name, date_format="%Y%m%d")
+        csv_rel = default_con.read_csv(temp_file_name, date_format="%Y%m%d")
 
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_timestamp_format(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_timestamp_format(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         data = [datetime.time(hour=23, minute=1, second=34, microsecond=234345)]
         df = pandas.DataFrame({'0': pandas.Series(data=data, dtype='object')})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, timestamp_format='%m/%d/%Y')
 
-        csv_rel = duckdb.read_csv(temp_file_name, timestamp_format='%m/%d/%Y')
+        csv_rel = default_con.read_csv(temp_file_name, timestamp_format='%m/%d/%Y')
 
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_quoting_off(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_quoting_off(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, quoting=None)
 
-        csv_rel = duckdb.read_csv(temp_file_name)
+        csv_rel = default_con.read_csv(temp_file_name)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_quoting_on(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_quoting_on(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
         df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, quoting="force")
 
-        csv_rel = duckdb.read_csv(temp_file_name)
+        csv_rel = default_con.read_csv(temp_file_name)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_quoting_quote_all(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_quoting_quote_all(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, quoting=csv.QUOTE_ALL)
 
-        csv_rel = duckdb.read_csv(temp_file_name)
+        csv_rel = default_con.read_csv(temp_file_name)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_encoding_incorrect(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_encoding_incorrect(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         with pytest.raises(
             duckdb.InvalidInputException, match="Invalid Input Error: The only supported encoding option is 'UTF8"
         ):
             rel.to_csv(temp_file_name, encoding="nope")
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_encoding_correct(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_encoding_correct(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, encoding="UTF-8")
-        csv_rel = duckdb.read_csv(temp_file_name)
+        csv_rel = default_con.read_csv(temp_file_name)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_compression_gzip(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_compression_gzip(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, compression="gzip")
-        csv_rel = duckdb.read_csv(temp_file_name, compression="gzip")
+        csv_rel = default_con.read_csv(temp_file_name, compression="gzip")
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_partition(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_partition(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame(
             {
                 "c_category": ['a', 'a', 'b', 'b'],
@@ -175,9 +179,9 @@ def test_to_csv_partition(self, pandas):
                 "c_string": ["a", "b,c", "e", "f"],
             }
         )
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, header=True, partition_by=["c_category"])
-        csv_rel = duckdb.sql(
+        csv_rel = default_con.sql(
             f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE);'''
         )
         expected = [
@@ -190,8 +194,9 @@ def test_to_csv_partition(self, pandas):
         assert csv_rel.execute().fetchall() == expected
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_partition_with_columns_written(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_partition_with_columns_written(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame(
             {
                 "c_category": ['a', 'a', 'b', 'b'],
@@ -201,17 +206,18 @@ def test_to_csv_partition_with_columns_written(self, pandas):
                 "c_string": ["a", "b,c", "e", "f"],
             }
         )
-        rel = duckdb.from_df(df)
-        res = duckdb.sql("FROM rel order by all")
+        rel = default_con.from_df(df)
+        res = default_con.sql("FROM rel order by all")
         rel.to_csv(temp_file_name, header=True, partition_by=["c_category"], write_partition_columns=True)
-        csv_rel = duckdb.sql(
+        csv_rel = default_con.sql(
             f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE) order by all;'''
         )
         assert res.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_overwrite(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_overwrite(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame(
             {
                 "c_category_1": ['a', 'a', 'b', 'b'],
@@ -222,10 +228,10 @@ def test_to_csv_overwrite(self, pandas):
                 "c_string": ["a", "b,c", "e", "f"],
             }
         )
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"])  # csv to be overwritten
         rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"], overwrite=True)
-        csv_rel = duckdb.sql(
+        csv_rel = default_con.sql(
             f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE);'''
         )
         # When partition columns are read from directory names, column order become different from original
@@ -238,8 +244,9 @@ def test_to_csv_overwrite(self, pandas):
         assert csv_rel.execute().fetchall() == expected
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_overwrite_with_columns_written(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_overwrite_with_columns_written(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame(
             {
                 "c_category_1": ['a', 'a', 'b', 'b'],
@@ -250,22 +257,23 @@ def test_to_csv_overwrite_with_columns_written(self, pandas):
                 "c_string": ["a", "b,c", "e", "f"],
             }
         )
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(
             temp_file_name, header=True, partition_by=["c_category_1"], write_partition_columns=True
         )  # csv to be overwritten
         rel.to_csv(
             temp_file_name, header=True, partition_by=["c_category_1"], overwrite=True, write_partition_columns=True
         )
-        csv_rel = duckdb.sql(
+        csv_rel = default_con.sql(
             f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE) order by all;'''
         )
-        res = duckdb.sql("FROM rel order by all")
+        res = default_con.sql("FROM rel order by all")
         assert res.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_overwrite_not_enabled(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_overwrite_not_enabled(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame(
             {
                 "c_category_1": ['a', 'a', 'b', 'b'],
@@ -276,15 +284,16 @@ def test_to_csv_overwrite_not_enabled(self, pandas):
                 "c_string": ["a", "b,c", "e", "f"],
             }
         )
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"])
         with pytest.raises(duckdb.IOException, match="OVERWRITE"):
             rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"])
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_per_thread_output(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
-        num_threads = duckdb.sql("select current_setting('threads')").fetchone()[0]
+    def test_to_csv_per_thread_output(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
+        num_threads = default_con.sql("select current_setting('threads')").fetchone()[0]
         print('num_threads:', num_threads)
         df = pandas.DataFrame(
             {
@@ -295,14 +304,15 @@ def test_to_csv_per_thread_output(self, pandas):
                 "c_string": ["a", "b,c", "e", "f"],
             }
         )
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, header=True, per_thread_output=True)
-        csv_rel = duckdb.read_csv(f'{temp_file_name}/*.csv', header=True)
+        csv_rel = default_con.read_csv(f'{temp_file_name}/*.csv', header=True)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_to_csv_use_tmp_file(self, pandas):
-        temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+    def test_to_csv_use_tmp_file(self, pandas, tmp_path, default_con):
+        temp_file_name = str(tmp_path / "test.csv")
+
         df = pandas.DataFrame(
             {
                 "c_category_1": ['a', 'a', 'b', 'b'],
@@ -313,8 +323,8 @@ def test_to_csv_use_tmp_file(self, pandas):
                 "c_string": ["a", "b,c", "e", "f"],
             }
         )
-        rel = duckdb.from_df(df)
+        rel = default_con.from_df(df)
         rel.to_csv(temp_file_name, header=True)  # csv to be overwritten
         rel.to_csv(temp_file_name, header=True, use_tmp_file=True)
-        csv_rel = duckdb.read_csv(temp_file_name, header=True)
+        csv_rel = default_con.read_csv(temp_file_name, header=True)
         assert rel.execute().fetchall() == csv_rel.execute().fetchall()
diff --git a/tests/fast/test_many_con_same_file.py b/tests/fast/test_many_con_same_file.py
index 6b7362a6..fd825c76 100644
--- a/tests/fast/test_many_con_same_file.py
+++ b/tests/fast/test_many_con_same_file.py
@@ -10,29 +10,20 @@ def get_tables(con):
     return tbls
 
 
-def test_multiple_writes():
-    try:
-        os.remove("test.db")
-    except:
-        pass
-    con1 = duckdb.connect("test.db")
-    con2 = duckdb.connect("test.db")
+def test_multiple_writes(tmp_path):
+    con1 = duckdb.connect(tmp_path / "test.db")
+    con2 = duckdb.connect(tmp_path / "test.db")
     con1.execute("CREATE TABLE foo1 as SELECT 1 as a, 2 as b")
     con2.execute("CREATE TABLE bar1 as SELECT 2 as a, 3 as b")
     con2.close()
     con1.close()
-    con3 = duckdb.connect("test.db")
+    con3 = duckdb.connect(tmp_path / "test.db")
     tbls = get_tables(con3)
     assert tbls == ['bar1', 'foo1']
     del con1
     del con2
     del con3
 
-    try:
-        os.remove("test.db")
-    except:
-        pass
-
 
 def test_multiple_writes_memory():
     con1 = duckdb.connect()
@@ -64,23 +55,23 @@ def test_multiple_writes_named_memory():
     del con3
 
 
-def test_diff_config():
-    con1 = duckdb.connect("test.db", False)
+def test_diff_config(tmp_path):
+    con1 = duckdb.connect(tmp_path / "test.db", False)
     with pytest.raises(
         duckdb.ConnectionException,
         match="Can't open a connection to same database file with a different configuration than existing connections",
     ):
-        con2 = duckdb.connect("test.db", True)
+        con2 = duckdb.connect(tmp_path / "test.db", True)
     con1.close()
     del con1
 
 
-def test_diff_config_extended():
-    con1 = duckdb.connect("test.db", config={'null_order': 'NULLS FIRST'})
+def test_diff_config_extended(tmp_path):
+    con1 = duckdb.connect(tmp_path / "test.db", config={'null_order': 'NULLS FIRST'})
     with pytest.raises(
         duckdb.ConnectionException,
         match="Can't open a connection to same database file with a different configuration than existing connections",
     ):
-        con2 = duckdb.connect("test.db")
+        con2 = duckdb.connect(tmp_path / "test.db")
     con1.close()
     del con1
diff --git a/tests/slow/test_h2oai_arrow.py b/tests/slow/test_h2oai_arrow.py
index 40bde07b..7ff37d01 100644
--- a/tests/slow/test_h2oai_arrow.py
+++ b/tests/slow/test_h2oai_arrow.py
@@ -194,8 +194,10 @@ def test_join(self, threads, function, large_data):
 
 
 @fixture(scope="module")
-def arrow_dataset_register():
+def arrow_dataset_register(tmp_path_factory):
     """Single fixture to download files and register them on the given connection"""
+    temp_dir = tmp_path_factory.mktemp("h2oai_data")
+
     session = requests.Session()
     retries = urllib3_util.Retry(
         allowed_methods={'GET'},  # only retry on GETs (all we do)
@@ -212,19 +214,15 @@ def arrow_dataset_register():
         respect_retry_after_header=True,  # respect Retry-After headers
     )
     session.mount('https://', requests_adapters.HTTPAdapter(max_retries=retries))
-    saved_filenames = set()
 
     def _register(url, filename, con, tablename):
+        file_path = temp_dir / filename
         r = session.get(url)
-        with open(filename, 'wb') as f:
-            f.write(r.content)
-        con.register(tablename, read_csv(filename))
-        saved_filenames.add(filename)
+        file_path.write_bytes(r.content)
+        con.register(tablename, read_csv(str(file_path)))
 
     yield _register
 
-    for filename in saved_filenames:
-        os.remove(filename)
     session.close()
 
 
@@ -269,4 +267,4 @@ def group_by_data(arrow_dataset_register):
         "x",
     )
     yield con
-    con.close()
+    con.close()
\ No newline at end of file

From 5ca7b584194932cab69bd0006a15b890652583d8 Mon Sep 17 00:00:00 2001
From: Paul Timmins <paul@iqmo.com>
Date: Fri, 19 Sep 2025 14:28:07 +0000
Subject: [PATCH 4/7] tests: use a "clean" default_connection due to tests
 contaminating each other

---
 tests/fast/api/test_duckdb_connection.py | 119 +++++++++++------------
 1 file changed, 59 insertions(+), 60 deletions(-)

diff --git a/tests/fast/api/test_duckdb_connection.py b/tests/fast/api/test_duckdb_connection.py
index 4cb565c1..6ebd948e 100644
--- a/tests/fast/api/test_duckdb_connection.py
+++ b/tests/fast/api/test_duckdb_connection.py
@@ -24,23 +24,23 @@ def tmp_database(tmp_path_factory):
 # wrapped by the 'duckdb' module, to execute with the 'default_connection'
 class TestDuckDBConnection(object):
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_append(self, pandas):
-        duckdb.execute("Create table integers (i integer)")
+    def test_append(self, pandas, default_con):
+        default_con.execute("Create table integers (i integer)")
         df_in = pandas.DataFrame(
             {
                 'numbers': [1, 2, 3, 4, 5],
             }
         )
-        duckdb.append('integers', df_in)
-        assert duckdb.execute('select count(*) from integers').fetchone()[0] == 5
+        default_con.append('integers', df_in)
+        assert default_con.execute('select count(*) from integers').fetchone()[0] == 5
         # cleanup
-        duckdb.execute("drop table integers")
+        default_con.execute("drop table integers")
 
-    def test_default_connection_from_connect(self):
-        duckdb.sql('create or replace table connect_default_connect (i integer)')
+    def test_default_connection_from_connect(self, default_con):
+        default_con.sql('create or replace table connect_default_connect (i integer)')
         con = duckdb.connect(':default:')
         con.sql('select i from connect_default_connect')
-        duckdb.sql('drop table connect_default_connect')
+        default_con.sql('drop table connect_default_connect')
         with pytest.raises(duckdb.Error):
             con.sql('select i from connect_default_connect')
 
@@ -57,31 +57,31 @@ def test_arrow(self):
 
     def test_begin_commit(self):
         duckdb.begin()
-        duckdb.execute("create table tbl as select 1")
+        duckdb.execute("create table tbl_1 as select 1")
         duckdb.commit()
-        res = duckdb.table("tbl")
-        duckdb.execute("drop table tbl")
+        res = duckdb.table("tbl_1")
+        duckdb.execute("drop table tbl_1")
 
-    def test_begin_rollback(self):
-        duckdb.begin()
-        duckdb.execute("create table tbl as select 1")
-        duckdb.rollback()
+    def test_begin_rollback(self, default_con):
+        default_con.begin()
+        default_con.execute("create table tbl_1rb as select 1")
+        default_con.rollback()
         with pytest.raises(duckdb.CatalogException):
             # Table does not exist
-            res = duckdb.table("tbl")
+            res = default_con.table("tbl_1rb")
 
-    def test_cursor(self):
-        duckdb.execute("create table tbl as select 3")
+    def test_cursor(self, default_con):
+        default_con.execute("create table tbl_3 as select 3")
         duckdb_cursor = duckdb.cursor()
-        res = duckdb_cursor.table("tbl").fetchall()
+        res = duckdb_cursor.table("tbl_3").fetchall()
         assert res == [(3,)]
-        duckdb_cursor.execute("drop table tbl")
+        duckdb_cursor.execute("drop table tbl_3")
         with pytest.raises(duckdb.CatalogException):
             # 'tbl' no longer exists
-            duckdb.table("tbl")
+            default_con.table("tbl_3")
 
-    def test_cursor_lifetime(self):
-        con = duckdb.connect()
+    def test_cursor_lifetime(self, default_con):
+        con = default_con
 
         def use_cursors():
             cursors = []
@@ -103,12 +103,12 @@ def test_df(self):
         assert res == ref
 
     def test_duplicate(self):
-        duckdb.execute("create table tbl as select 5")
+        duckdb.execute("create table tbl_5 as select 5")
         dup_conn = duckdb.duplicate()
-        dup_conn.table("tbl").fetchall()
-        duckdb.execute("drop table tbl")
+        dup_conn.table("tbl_5").fetchall()
+        duckdb.execute("drop table tbl_5")
         with pytest.raises(duckdb.CatalogException):
-            dup_conn.table("tbl").fetchall()
+            dup_conn.table("tbl_5").fetchall()
 
     def test_readonly_properties(self):
         duckdb.execute("select 42")
@@ -123,11 +123,11 @@ def test_execute(self):
     def test_executemany(self):
         # executemany does not keep an open result set
         # TODO: shouldn't we also have a version that executes a query multiple times with different parameters, returning all of the results?
-        duckdb.execute("create table tbl (i integer, j varchar)")
-        duckdb.executemany("insert into tbl VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')])
-        res = duckdb.table("tbl").fetchall()
+        duckdb.execute("create table tbl_many (i integer, j varchar)")
+        duckdb.executemany("insert into tbl_many VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')])
+        res = duckdb.table("tbl_many").fetchall()
         assert res == [(5, 'test'), (2, 'duck'), (42, 'quack')]
-        duckdb.execute("drop table tbl")
+        duckdb.execute("drop table tbl_many")
 
     def test_pystatement(self):
         with pytest.raises(duckdb.ParserException, match='seledct'):
@@ -163,8 +163,8 @@ def test_pystatement(self):
             duckdb.execute(statements[0])
         assert duckdb.execute(statements[0], {'1': 42}).fetchall() == [(42,)]
 
-        duckdb.execute("create table tbl(a integer)")
-        statements = duckdb.extract_statements('insert into tbl select $1')
+        duckdb.execute("create table tbl_a(a integer)")
+        statements = duckdb.extract_statements('insert into tbl_a select $1')
         assert statements[0].expected_result_type == [
             duckdb.ExpectedResultType.CHANGED_ROWS,
             duckdb.ExpectedResultType.QUERY_RESULT,
@@ -174,23 +174,23 @@ def test_pystatement(self):
         ):
             duckdb.executemany(statements[0])
         duckdb.executemany(statements[0], [(21,), (22,), (23,)])
-        assert duckdb.table('tbl').fetchall() == [(21,), (22,), (23,)]
-        duckdb.execute("drop table tbl")
+        assert duckdb.table('tbl_a').fetchall() == [(21,), (22,), (23,)]
+        duckdb.execute("drop table tbl_a")
 
     def test_fetch_arrow_table(self):
         # Needed for 'fetch_arrow_table'
         pyarrow = pytest.importorskip("pyarrow")
 
-        duckdb.execute("Create Table test (a integer)")
+        duckdb.execute("Create Table test_arrow_tble (a integer)")
 
         for i in range(1024):
             for j in range(2):
-                duckdb.execute("Insert Into test values ('" + str(i) + "')")
-        duckdb.execute("Insert Into test values ('5000')")
-        duckdb.execute("Insert Into test values ('6000')")
+                duckdb.execute("Insert Into test_arrow_tble values ('" + str(i) + "')")
+        duckdb.execute("Insert Into test_arrow_tble values ('5000')")
+        duckdb.execute("Insert Into test_arrow_tble values ('6000')")
         sql = '''
         SELECT  a, COUNT(*) AS repetitions
-        FROM    test
+        FROM    test_arrow_tble
         GROUP BY a
         '''
 
@@ -200,7 +200,7 @@ def test_fetch_arrow_table(self):
 
         arrow_df = arrow_table.to_pandas()
         assert result_df['repetitions'].sum() == arrow_df['repetitions'].sum()
-        duckdb.execute("drop table test")
+        duckdb.execute("drop table test_arrow_tble")
 
     def test_fetch_df(self):
         ref = [([1, 2, 3],)]
@@ -210,22 +210,22 @@ def test_fetch_df(self):
         assert res == ref
 
     def test_fetch_df_chunk(self):
-        duckdb.execute("CREATE table t as select range a from range(3000);")
-        query = duckdb.execute("SELECT a FROM t")
+        duckdb.execute("CREATE table t_df_chunk as select range a from range(3000);")
+        query = duckdb.execute("SELECT a FROM t_df_chunk")
         cur_chunk = query.fetch_df_chunk()
         assert cur_chunk['a'][0] == 0
         assert len(cur_chunk) == 2048
         cur_chunk = query.fetch_df_chunk()
         assert cur_chunk['a'][0] == 2048
         assert len(cur_chunk) == 952
-        duckdb.execute("DROP TABLE t")
+        duckdb.execute("DROP TABLE t_df_chunk")
 
     def test_fetch_record_batch(self):
         # Needed for 'fetch_arrow_table'
         pyarrow = pytest.importorskip("pyarrow")
 
-        duckdb.execute("CREATE table t as select range a from range(3000);")
-        duckdb.execute("SELECT a FROM t")
+        duckdb.execute("CREATE table t_record_batch as select range a from range(3000);")
+        duckdb.execute("SELECT a FROM t_record_batch")
         record_batch_reader = duckdb.fetch_record_batch(1024)
         chunk = record_batch_reader.read_all()
         assert len(chunk) == 3000
@@ -286,13 +286,13 @@ def test_query(self):
     def test_register(self):
         assert None != duckdb.register
 
-    def test_register_relation(self):
-        con = duckdb.connect()
+    def test_register_relation(self, default_con):
+        con = default_con
         rel = con.sql('select [5,4,3]')
-        con.register("relation", rel)
+        con.register("relation_rr", rel)
 
-        con.sql("create table tbl as select * from relation")
-        assert con.table('tbl').fetchall() == [([5, 4, 3],)]
+        con.sql("create table tbl_reg_rel as select * from relation_rr")
+        assert con.table('tbl_reg_rel').fetchall() == [([5, 4, 3],)]
 
     def test_unregister_problematic_behavior(self, duckdb_cursor):
         # We have a VIEW called 'vw' in the Catalog
@@ -314,10 +314,10 @@ def test_unregister_problematic_behavior(self, duckdb_cursor):
         assert duckdb_cursor.execute("select * from vw").fetchone() == (0,)
 
     @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
-    def test_relation_out_of_scope(self, pandas):
+    def test_relation_out_of_scope(self, pandas, default_con):
         def temporary_scope():
             # Create a connection, we will return this
-            con = duckdb.connect()
+            con = default_con
             # Create a dataframe
             df = pandas.DataFrame({'a': [1, 2, 3]})
             # The dataframe has to be registered as well
@@ -333,8 +333,8 @@ def temporary_scope():
 
     def test_table(self):
         con = duckdb.connect()
-        con.execute("create table tbl as select 1")
-        assert [(1,)] == con.table("tbl").fetchall()
+        con.execute("create table tbl_test_table as select 1")
+        assert [(1,)] == con.table("tbl_test_table").fetchall()
 
     def test_table_function(self):
         assert None != duckdb.table_function
@@ -356,16 +356,15 @@ def test_close(self):
     def test_interrupt(self):
         assert None != duckdb.interrupt
 
-    def test_wrap_shadowing(self):
+    def test_wrap_shadowing(self, default_con):
         pd = NumpyPandas()
-        import duckdb
 
         df = pd.DataFrame({"a": [1, 2, 3]})
-        res = duckdb.sql("from df").fetchall()
+        res = default_con.sql("from df").fetchall()
         assert res == [(1,), (2,), (3,)]
 
-    def test_wrap_coverage(self):
-        con = duckdb.default_connection
+    def test_wrap_coverage(self, default_con):
+        con = default_con
 
         # Skip all of the initial __xxxx__ methods
         connection_methods = dir(con)

From a9b8ba27c98e4ccce7c36040bdaef0ec36b111dd Mon Sep 17 00:00:00 2001
From: Paul Timmins <paul@iqmo.com>
Date: Fri, 19 Sep 2025 14:28:33 +0000
Subject: [PATCH 5/7] tests: move 10M row test to tests/slow - takes 1min

---
 tests/fast/test_relation.py      |  8 --------
 tests/slow/test_relation_slow.py | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 8 deletions(-)
 create mode 100644 tests/slow/test_relation_slow.py

diff --git a/tests/fast/test_relation.py b/tests/fast/test_relation.py
index 8e68c149..2d9b3b4b 100644
--- a/tests/fast/test_relation.py
+++ b/tests/fast/test_relation.py
@@ -1,6 +1,5 @@
 import duckdb
 import numpy as np
-import platform
 import tempfile
 import os
 import pandas as pd
@@ -527,13 +526,6 @@ def test_relation_print(self):
             2048,
             5000,
             1000000,
-            pytest.param(
-                10000000,
-                marks=pytest.mark.skipif(
-                    condition=platform.system() == "Emscripten",
-                    reason="Emscripten/Pyodide builds run out of memory at this scale, and error might not thrown reliably",
-                ),
-            ),
         ],
     )
     def test_materialized_relation(self, duckdb_cursor, num_rows):
diff --git a/tests/slow/test_relation_slow.py b/tests/slow/test_relation_slow.py
new file mode 100644
index 00000000..cd892985
--- /dev/null
+++ b/tests/slow/test_relation_slow.py
@@ -0,0 +1,20 @@
+import platform
+import pytest
+
+
+class TestRelationSlow(object):
+    @pytest.mark.skipif(
+        condition=platform.system() == "Emscripten",
+        reason="Emscripten/Pyodide builds run out of memory at this scale, and error might not thrown reliably",
+    )
+    def test_materialized_relation_large(self, duckdb_cursor):
+        """Test materialized relation with 10M rows - moved from fast tests due to 1+ minute runtime"""
+        # Import the implementation function from the fast test
+        import sys
+        import os
+        sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'fast'))
+        from test_relation import TestRelation
+
+        # Create instance and call the test with large parameter
+        test_instance = TestRelation()
+        test_instance.test_materialized_relation(duckdb_cursor, 10000000)
\ No newline at end of file

From 8b228be3cf269bc3247d9e7304c6d7c5215478a7 Mon Sep 17 00:00:00 2001
From: Paul Timmins <paul@iqmo.com>
Date: Fri, 19 Sep 2025 14:29:19 +0000
Subject: [PATCH 6/7] tests: query interrupt was both slow and possibly
 incorrect - added a timeout and use pytest.raises with a more interruptable
 query

---
 tests/fast/api/test_query_interrupt.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/tests/fast/api/test_query_interrupt.py b/tests/fast/api/test_query_interrupt.py
index 6334e475..86274e7f 100644
--- a/tests/fast/api/test_query_interrupt.py
+++ b/tests/fast/api/test_query_interrupt.py
@@ -1,35 +1,31 @@
 import duckdb
 import time
 import pytest
-
 import platform
 import threading
 import _thread as thread
 
 
 def send_keyboard_interrupt():
-    # Wait a little, so we're sure the 'execute' has started
     time.sleep(0.1)
-    # Send an interrupt to the main thread
     thread.interrupt_main()
 
 
 class TestQueryInterruption(object):
+    
     @pytest.mark.xfail(
         condition=platform.system() == "Emscripten",
         reason="Emscripten builds cannot use threads",
     )
-    def test_query_interruption(self):
+    @pytest.mark.timeout(15)
+    def test_keyboard_interruption(self):
         con = duckdb.connect()
         thread = threading.Thread(target=send_keyboard_interrupt)
         # Start the thread
         thread.start()
         try:
-            res = con.execute('select count(*) from range(100000000000)').fetchall()
-        except RuntimeError:
-            # If this is not reached, we could not cancel the query before it completed
-            # indicating that the query interruption functionality is broken
-            assert True
-        except KeyboardInterrupt:
-            pytest.fail()
-        thread.join()
+            with pytest.raises((KeyboardInterrupt, RuntimeError)):
+                res = con.execute('select * from range(100000) t1,range(100000) t2').fetchall()
+        finally:
+            # Ensure the thread completes regardless of what happens
+            thread.join()

From ca15b9ee55624c2cc5f003eb6181161c756b4a30 Mon Sep 17 00:00:00 2001
From: Paul Timmins <paul@iqmo.com>
Date: Fri, 19 Sep 2025 14:32:46 +0000
Subject: [PATCH 7/7] ci: multiprocessing testing w/ xdist -n 2

---
 .github/workflows/packaging_wheels.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/packaging_wheels.yml b/.github/workflows/packaging_wheels.yml
index ea13b674..5f1a1494 100644
--- a/.github/workflows/packaging_wheels.yml
+++ b/.github/workflows/packaging_wheels.yml
@@ -57,7 +57,7 @@ jobs:
         uv export --only-group test --no-emit-project --output-file pylock.toml --directory {project} &&
         uv pip install -r pylock.toml
       CIBW_TEST_COMMAND: >
-        uv run -v pytest ${{ inputs.testsuite == 'fast' && './tests/fast' || './tests' }} --verbose --ignore=./tests/stubs
+        uv run -v pytest -n 2 ${{ inputs.testsuite == 'fast' && './tests/fast' || './tests' }} --verbose --ignore=./tests/stubs
 
     steps:
       - name: Checkout DuckDB Python