diff --git a/.github/workflows/build_and_deploy_docs.yaml b/.github/workflows/build_and_deploy_docs.yaml index d27b45dc10..3d74ba874d 100644 --- a/.github/workflows/build_and_deploy_docs.yaml +++ b/.github/workflows/build_and_deploy_docs.yaml @@ -10,7 +10,7 @@ on: env: NODE_VERSION: 20 - PYTHON_VERSION: 3.13 + PYTHON_VERSION: 3.14 jobs: build_and_deploy_docs: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 3683e87c8e..acf286c69c 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -47,13 +47,13 @@ jobs: name: Lint check uses: apify/workflows/.github/workflows/python_lint_check.yaml@main with: - python-versions: '["3.10", "3.11", "3.12", "3.13"]' + python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]' type_check: name: Type check uses: apify/workflows/.github/workflows/python_type_check.yaml@main with: - python-versions: '["3.10", "3.11", "3.12", "3.13"]' + python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]' unit_tests: name: Unit tests @@ -61,7 +61,7 @@ jobs: secrets: httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}} with: - python-versions: '["3.10", "3.11", "3.12", "3.13"]' + python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]' update_changelog: name: Update changelog diff --git a/.github/workflows/run_code_checks.yaml b/.github/workflows/run_code_checks.yaml index ca0493882f..6bf008115e 100644 --- a/.github/workflows/run_code_checks.yaml +++ b/.github/workflows/run_code_checks.yaml @@ -21,13 +21,13 @@ jobs: name: Lint check uses: apify/workflows/.github/workflows/python_lint_check.yaml@main with: - python-versions: '["3.10", "3.11", "3.12", "3.13"]' + python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]' type_check: name: Type check uses: apify/workflows/.github/workflows/python_type_check.yaml@main with: - python-versions: '["3.10", "3.11", "3.12", "3.13"]' + python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]' unit_tests: name: Unit tests @@ -35,7 +35,7 @@ jobs: secrets: httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}} with: - python-versions: '["3.10", "3.11", "3.12", "3.13"]' + python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]' docs_check: name: Docs check diff --git a/.github/workflows/templates_e2e_tests.yaml b/.github/workflows/templates_e2e_tests.yaml index 3cce8c527d..414cfab65c 100644 --- a/.github/workflows/templates_e2e_tests.yaml +++ b/.github/workflows/templates_e2e_tests.yaml @@ -7,7 +7,7 @@ on: env: NODE_VERSION: 22 - PYTHON_VERSION: 3.13 + PYTHON_VERSION: 3.14 jobs: end_to_end_tests: diff --git a/pyproject.toml b/pyproject.toml index d730539850..f0c58e9719 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Software Development :: Libraries", ] keywords = [ @@ -35,7 +36,7 @@ keywords = [ dependencies = [ "cachetools>=5.5.0", "colorama>=0.4.0", - "impit>=0.6.1", + "impit>=0.8.0", "more-itertools>=10.2.0", "protego>=0.5.0", "psutil>=6.0.0", @@ -73,7 +74,7 @@ otel = [ ] sql_postgres = [ "sqlalchemy[asyncio]>=2.0.0,<3.0.0", - "asyncpg>=0.24.0" + "asyncpg>=0.24.0; python_version < '3.14'" # TODO: https://github.com/apify/crawlee-python/issues/1555 ] sql_sqlite = [ "sqlalchemy[asyncio]>=2.0.0,<3.0.0", diff --git a/src/crawlee/storage_clients/_sql/_storage_client.py b/src/crawlee/storage_clients/_sql/_storage_client.py index 57607d1f74..d324a17a86 100644 --- a/src/crawlee/storage_clients/_sql/_storage_client.py +++ b/src/crawlee/storage_clients/_sql/_storage_client.py @@ -1,5 +1,6 @@ from __future__ import annotations +import sys import warnings from datetime import timedelta from pathlib import Path @@ -268,6 +269,14 @@ def _get_or_create_engine(self, configuration: Configuration) -> AsyncEngine: 'Unsupported database. Supported: sqlite, postgresql. Consider using a different database.' ) + # TODO: https://github.com/apify/crawlee-python/issues/1555 + if 'postgresql' in connection_string and sys.version_info >= (3, 14): + raise ValueError( + 'SqlStorageClient cannot use PostgreSQL with Python 3.14 ' + 'due to asyncpg compatibility limitations. ' + 'Please use Python 3.13 or earlier, or switch to SQLite.' + ) + self._engine = create_async_engine( connection_string, future=True, diff --git a/tests/unit/_utils/test_system.py b/tests/unit/_utils/test_system.py index 4e147d9c80..a9675af224 100644 --- a/tests/unit/_utils/test_system.py +++ b/tests/unit/_utils/test_system.py @@ -1,7 +1,7 @@ from __future__ import annotations import sys -from multiprocessing import Barrier, Process, Value, synchronize +from multiprocessing import get_context, synchronize from multiprocessing.shared_memory import SharedMemory from typing import TYPE_CHECKING @@ -38,7 +38,9 @@ def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None: equal to additional_memory_size_estimate_per_unshared_memory_child where the additional shared memory is exactly the same as the unshared memory. """ - estimated_memory_expectation = Value('b', False) # noqa: FBT003 # Common usage pattern for multiprocessing.Value + + ctx = get_context('fork') + estimated_memory_expectation = ctx.Value('b', False) # noqa: FBT003 # Common usage pattern for multiprocessing.Value def parent_process() -> None: extra_memory_size = 1024 * 1024 * 100 # 100 MB @@ -70,8 +72,8 @@ def get_additional_memory_estimation_while_running_processes( *, target: Callable, count: int = 1, use_shared_memory: bool = False ) -> float: processes = [] - ready = Barrier(parties=count + 1) - measured = Barrier(parties=count + 1) + ready = ctx.Barrier(parties=count + 1) + measured = ctx.Barrier(parties=count + 1) shared_memory: None | SharedMemory = None memory_before = get_memory_info().current_size @@ -83,7 +85,7 @@ def get_additional_memory_estimation_while_running_processes( extra_args = [] for _ in range(count): - p = Process(target=target, args=[ready, measured, *extra_args]) + p = ctx.Process(target=target, args=[ready, measured, *extra_args]) p.start() processes.append(p) @@ -129,7 +131,7 @@ def get_additional_memory_estimation_while_running_processes( f'{memory_estimation_difference_ratio=}' ) - process = Process(target=parent_process) + process = ctx.Process(target=parent_process) process.start() process.join() diff --git a/tests/unit/storage_clients/_redis/test_redis_rq_client.py b/tests/unit/storage_clients/_redis/test_redis_rq_client.py index 3f878ea981..c343c18e33 100644 --- a/tests/unit/storage_clients/_redis/test_redis_rq_client.py +++ b/tests/unit/storage_clients/_redis/test_redis_rq_client.py @@ -2,6 +2,7 @@ import asyncio import json +import sys from typing import TYPE_CHECKING import pytest @@ -25,6 +26,10 @@ async def rq_client( suppress_user_warning: None, # noqa: ARG001 ) -> AsyncGenerator[RedisRequestQueueClient, None]: """A fixture for a Redis RQ client.""" + # TODO: https://github.com/apify/crawlee-python/issues/1554 + if request.param == 'bloom' and sys.platform == 'win32' and sys.version_info >= (3, 14): + pytest.skip('Bloom filters not supported on Windows with Python 3.14 and fakeredis') + client = await RedisStorageClient(redis=redis_client, queue_dedup_strategy=request.param).create_rq_client( name='test_request_queue' ) diff --git a/uv.lock b/uv.lock index 7b96a10a94..300c6a871b 100644 --- a/uv.lock +++ b/uv.lock @@ -733,7 +733,7 @@ adaptive-crawler = [ all = [ { name = "aiosqlite" }, { name = "apify-fingerprint-datapoints" }, - { name = "asyncpg" }, + { name = "asyncpg", marker = "python_full_version < '3.14'" }, { name = "beautifulsoup4", extra = ["lxml"] }, { name = "browserforge" }, { name = "cookiecutter" }, @@ -796,7 +796,7 @@ redis = [ { name = "redis", extra = ["hiredis"] }, ] sql-postgres = [ - { name = "asyncpg" }, + { name = "asyncpg", marker = "python_full_version < '3.14'" }, { name = "sqlalchemy", extra = ["asyncio"] }, ] sql-sqlite = [ @@ -835,7 +835,7 @@ requires-dist = [ { name = "apify-fingerprint-datapoints", marker = "extra == 'adaptive-crawler'", specifier = ">=0.0.2" }, { name = "apify-fingerprint-datapoints", marker = "extra == 'httpx'", specifier = ">=0.0.2" }, { name = "apify-fingerprint-datapoints", marker = "extra == 'playwright'", specifier = ">=0.0.2" }, - { name = "asyncpg", marker = "extra == 'sql-postgres'", specifier = ">=0.24.0" }, + { name = "asyncpg", marker = "python_full_version < '3.14' and extra == 'sql-postgres'", specifier = ">=0.24.0" }, { name = "beautifulsoup4", extras = ["lxml"], marker = "extra == 'beautifulsoup'", specifier = ">=4.12.0" }, { name = "browserforge", marker = "extra == 'adaptive-crawler'", specifier = ">=1.2.3" }, { name = "browserforge", marker = "extra == 'httpx'", specifier = ">=1.2.3" }, @@ -847,7 +847,7 @@ requires-dist = [ { name = "curl-cffi", marker = "extra == 'curl-impersonate'", specifier = ">=0.9.0" }, { name = "html5lib", marker = "extra == 'beautifulsoup'", specifier = ">=1.0" }, { name = "httpx", extras = ["brotli", "http2", "zstd"], marker = "extra == 'httpx'", specifier = ">=0.27.0" }, - { name = "impit", specifier = ">=0.6.1" }, + { name = "impit", specifier = ">=0.8.0" }, { name = "inquirer", marker = "extra == 'cli'", specifier = ">=3.3.0" }, { name = "jaro-winkler", marker = "extra == 'adaptive-crawler'", specifier = ">=2.0.3" }, { name = "more-itertools", specifier = ">=10.2.0" },