Add support for Python 3.11 (#9708)

dask · Dec 16, 2022 · 936d9f7 · 936d9f7
1 parent 0d8e12b
commit 936d9f7
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 2 deletions.
diff --git a/.github/workflows/additional.yml b/.github/workflows/additional.yml
@@ -65,7 +65,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout source
         uses: actions/checkout@v3.2.0

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -23,7 +23,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["windows-latest", "ubuntu-latest", "macos-latest"]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         exclude:
             - os: "macos-latest"
               python-version: "3.8"

diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml
@@ -0,0 +1,72 @@
+# This job includes coverage
+name: test-environment
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  # required dependencies
+  - python=3.11
+  - packaging
+  - numpy
+  - pandas
+  # test dependencies
+  - pre-commit
+  - pytest
+  - pytest-cov
+  - pytest-rerunfailures
+  - pytest-timeout
+  - pytest-xdist
+  - moto
+  - flask
+  - fastparquet>=0.8.0
+  - h5py
+  - pytables
+  # - zarr
+  # `tiledb-py=0.17.5` lead to strange seg faults in CI, However 0.18 is needed for 3.11
+  # https://github.com/dask/dask/pull/9569
+  # - tiledb-py # crashes on Python 3.11
+  # - pyspark
+  # - tiledb>=2.5.0 # crashes on Python 3.11
+  - xarray
+  - fsspec
+  - sqlalchemy>=1.4.0
+  - pyarrow>=10
+  - coverage
+  - jsonschema
+  # # other -- IO
+  - boto3
+  - botocore
+  # Temporary restriction until https://github.com/dask/distributed/issues/7173 is resolved
+  - bokeh
+  - httpretty
+  - aiohttp
+  # # Need recent version of s3fs to support newer aiobotocore versions
+  # # https://github.com/dask/s3fs/issues/514
+  - s3fs>=2021.8.0
+  - click
+  - cloudpickle
+  - crick
+  - cytoolz
+  - distributed
+  - ipython
+  - ipycytoscape
+  - lz4
+  # https://github.com/numba/numba/issues/8304
+  # - numba  # not supported on 3.11
+  - partd
+  - psutil
+  - requests
+  - scikit-image
+  - scikit-learn
+  - scipy
+  - toolz
+  - python-snappy
+  # - sparse  needs numba
+  - cachey
+  - python-graphviz
+  - python-xxhash
+  - mmh3
+  - jinja2
+  - pip
+  - pip:
+    - git+https://github.com/dask/distributed
diff --git a/dask/dataframe/io/tests/test_hdf.py b/dask/dataframe/io/tests/test_hdf.py
@@ -5,9 +5,11 @@
 import numpy as np
 import pandas as pd
 import pytest
+from packaging.version import Version
 
 import dask
 import dask.dataframe as dd
+from dask.compatibility import _PY_VERSION
 from dask.dataframe._compat import tm
 from dask.dataframe.optimize import optimize_dataframe_getitem
 from dask.dataframe.utils import assert_eq
@@ -46,6 +48,10 @@ def test_to_hdf():
         tm.assert_frame_equal(df, out[:])
 
 
+@pytest.mark.skipif(
+    _PY_VERSION >= Version("3.11"),
+    reason="segfaults due to https://github.com/PyTables/PyTables/issues/977",
+)
 def test_to_hdf_multiple_nodes():
     pytest.importorskip("tables")
     df = pd.DataFrame(
@@ -388,6 +394,10 @@ def test_to_hdf_link_optimizations():
         assert dependency_depth(d.dask) == 2 + a.npartitions
 
 
+@pytest.mark.skipif(
+    _PY_VERSION >= Version("3.11"),
+    reason="segfaults due to https://github.com/PyTables/PyTables/issues/977",
+)
 @pytest.mark.slow
 def test_to_hdf_lock_delays():
     pytest.importorskip("tables")
@@ -478,6 +488,10 @@ def test_to_hdf_exceptions():
                 a.to_hdf(hdf, "/data_*_*")
 
 
+@pytest.mark.skipif(
+    _PY_VERSION >= Version("3.11"),
+    reason="segfaults due to https://github.com/PyTables/PyTables/issues/977",
+)
 @pytest.mark.parametrize("scheduler", ["sync", "threads", "processes"])
 @pytest.mark.parametrize("npartitions", [1, 4, 10])
 def test_to_hdf_schedulers(scheduler, npartitions):
@@ -679,6 +693,10 @@ def test_read_hdf_multiply_open():
             dd.read_hdf(fn, "/data", chunksize=2, mode="r")
 
 
+@pytest.mark.skipif(
+    _PY_VERSION >= Version("3.11"),
+    reason="segfaults due to https://github.com/PyTables/PyTables/issues/977",
+)
 def test_read_hdf_multiple():
     pytest.importorskip("tables")
     df = pd.DataFrame(

diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py
@@ -661,6 +661,7 @@ def write_partition(df, i):
         assert_eq(df, ddf2, check_index=False)
 
 
+@PYARROW_MARK
 @pytest.mark.xfail(
     not PANDAS_GT_130,
     reason=(
@@ -3005,6 +3006,7 @@ def test_chunksize_aggregate_files(tmpdir, write_engine, read_engine, aggregate_
     assert_eq(df1[["c", "d"]], df2[["c", "d"]], check_index=False)
 
 
+@PYARROW_MARK
 @pytest.mark.parametrize("metadata", [True, False])
 @pytest.mark.parametrize("chunksize", [None, 1024, 4096, "1MiB"])
 def test_chunksize(tmpdir, chunksize, engine, metadata):
@@ -3998,6 +4000,7 @@ def test_metadata_task_size(tmpdir, engine, write_metadata_file, metadata_task_s
     assert_eq(ddf2b, ddf2c)
 
 
+@PYARROW_MARK
 @pytest.mark.parametrize("partition_on", ("b", None))
 def test_extra_file(tmpdir, engine, partition_on):
     # Check that read_parquet can handle spark output

diff --git a/setup.py b/setup.py
@@ -83,6 +83,7 @@
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Topic :: Scientific/Engineering",
         "Topic :: System :: Distributed Computing",
     ],