diff --git a/pyproject.toml b/pyproject.toml index b760628ee7..e0821e85fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "more-itertools>=10.2.0", "protego>=0.5.0", "psutil>=6.0.0", - "pydantic-settings>=2.2.0,!=2.7.0,!=2.7.1,!=2.8.0", + "pydantic-settings>=2.12.0", "pydantic>=2.11.0", "pyee>=9.0.0", "tldextract>=5.1.0", diff --git a/src/crawlee/configuration.py b/src/crawlee/configuration.py index c0ee228e9d..d6dc6b071e 100644 --- a/src/crawlee/configuration.py +++ b/src/crawlee/configuration.py @@ -28,7 +28,9 @@ class Configuration(BaseSettings): Settings can also be configured via environment variables, prefixed with `CRAWLEE_`. """ - model_config = SettingsConfigDict(validate_by_name=True, validate_by_alias=True) + # TODO: https://github.com/pydantic/pydantic-settings/issues/706 + # Use `SettingsConfigDict(validate_by_name=True, validate_by_alias=True)` when issue is resolved. + model_config = SettingsConfigDict(populate_by_name=True) internal_timeout: Annotated[timedelta | None, Field(alias='crawlee_internal_timeout')] = None """Timeout for the internal asynchronous operations.""" diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index b75daa9e54..c97ca2a1e1 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,6 +1,3 @@ -# TODO: Update crawlee_storage_dir args once the Pydantic bug is fixed -# https://github.com/apify/crawlee-python/issues/146 - from __future__ import annotations import logging diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py index 49a2dcffba..f128b17397 100644 --- a/tests/unit/crawlers/_basic/test_basic_crawler.py +++ b/tests/unit/crawlers/_basic/test_basic_crawler.py @@ -1102,7 +1102,7 @@ async def test_services_crawlers_can_use_different_services() -> None: async def test_crawler_uses_default_storages(tmp_path: Path) -> None: configuration = Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), purge_on_start=True, ) service_locator.set_configuration(configuration) @@ -1120,7 +1120,7 @@ async def test_crawler_uses_default_storages(tmp_path: Path) -> None: async def test_crawler_can_use_other_storages(tmp_path: Path) -> None: configuration = Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), purge_on_start=True, ) service_locator.set_configuration(configuration) @@ -1148,11 +1148,11 @@ async def test_crawler_can_use_other_storages_of_same_type(tmp_path: Path) -> No } configuration_a = Configuration( - crawlee_storage_dir=str(a_path), # type: ignore[call-arg] + storage_dir=str(a_path), purge_on_start=True, ) configuration_b = Configuration( - crawlee_storage_dir=str(b_path), # type: ignore[call-arg] + storage_dir=str(b_path), purge_on_start=True, ) @@ -1652,7 +1652,7 @@ async def _run_crawler(requests: list[str], storage_dir: str) -> StatisticsState Must be defined like this to be pickable for ProcessPoolExecutor.""" service_locator.set_configuration( Configuration( - crawlee_storage_dir=storage_dir, # type: ignore[call-arg] + storage_dir=storage_dir, purge_on_start=False, ) ) diff --git a/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py b/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py index fdf8a80cd6..3276ba2f0b 100644 --- a/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +++ b/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py @@ -20,7 +20,7 @@ @pytest.fixture def configuration(tmp_path: Path) -> Configuration: return Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), ) diff --git a/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py b/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py index 9e5f9c59f0..5f2ae15da0 100644 --- a/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +++ b/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py @@ -20,7 +20,7 @@ @pytest.fixture def configuration(tmp_path: Path) -> Configuration: return Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), ) diff --git a/tests/unit/storage_clients/_file_system/test_fs_rq_client.py b/tests/unit/storage_clients/_file_system/test_fs_rq_client.py index 2b0af2651d..ddb5e22331 100644 --- a/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +++ b/tests/unit/storage_clients/_file_system/test_fs_rq_client.py @@ -20,7 +20,7 @@ @pytest.fixture def configuration(tmp_path: Path) -> Configuration: return Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), ) diff --git a/tests/unit/storage_clients/_sql/test_sql_dataset_client.py b/tests/unit/storage_clients/_sql/test_sql_dataset_client.py index 6b94e146d3..5ad4448d4c 100644 --- a/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +++ b/tests/unit/storage_clients/_sql/test_sql_dataset_client.py @@ -25,7 +25,7 @@ def configuration(tmp_path: Path) -> Configuration: """Temporary configuration for tests.""" return Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), ) diff --git a/tests/unit/storage_clients/_sql/test_sql_kvs_client.py b/tests/unit/storage_clients/_sql/test_sql_kvs_client.py index 6bd02df750..89ecc891c4 100644 --- a/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +++ b/tests/unit/storage_clients/_sql/test_sql_kvs_client.py @@ -27,7 +27,7 @@ def configuration(tmp_path: Path) -> Configuration: """Temporary configuration for tests.""" return Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), ) diff --git a/tests/unit/storage_clients/_sql/test_sql_rq_client.py b/tests/unit/storage_clients/_sql/test_sql_rq_client.py index 8885f3cf88..c98b7a1fc0 100644 --- a/tests/unit/storage_clients/_sql/test_sql_rq_client.py +++ b/tests/unit/storage_clients/_sql/test_sql_rq_client.py @@ -28,7 +28,7 @@ def configuration(tmp_path: Path) -> Configuration: """Temporary configuration for tests.""" return Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), ) diff --git a/tests/unit/storages/test_dataset.py b/tests/unit/storages/test_dataset.py index 1f1bedbe26..f2e28d6ad1 100644 --- a/tests/unit/storages/test_dataset.py +++ b/tests/unit/storages/test_dataset.py @@ -1,6 +1,3 @@ -# TODO: Update crawlee_storage_dir args once the Pydantic bug is fixed -# https://github.com/apify/crawlee-python/issues/146 - from __future__ import annotations import json @@ -492,7 +489,7 @@ async def test_export_with_multiple_kwargs(dataset: Dataset, tmp_path: Path) -> custom_dir_name = 'some_dir' custom_dir = tmp_path / custom_dir_name custom_dir.mkdir() - target_configuration = Configuration(crawlee_storage_dir=str(custom_dir)) # type: ignore[call-arg] + target_configuration = Configuration(storage_dir=str(custom_dir)) # Set expected values expected_exported_data = f'{json.dumps([{"some key": "some data"}])}' diff --git a/tests/unit/storages/test_key_value_store.py b/tests/unit/storages/test_key_value_store.py index 0611e26b62..cb2948f242 100644 --- a/tests/unit/storages/test_key_value_store.py +++ b/tests/unit/storages/test_key_value_store.py @@ -1,6 +1,3 @@ -# TODO: Update crawlee_storage_dir args once the Pydantic bug is fixed -# https://github.com/apify/crawlee-python/issues/146 - from __future__ import annotations import json @@ -1119,7 +1116,7 @@ async def test_get_auto_saved_value_various_global_clients( """Ensure that persistence is working for all clients regardless of what is set in service locator.""" service_locator.set_configuration( Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), purge_on_start=True, ) ) diff --git a/tests/unit/storages/test_request_queue.py b/tests/unit/storages/test_request_queue.py index 25d9d9a294..d3fb91c77f 100644 --- a/tests/unit/storages/test_request_queue.py +++ b/tests/unit/storages/test_request_queue.py @@ -1,6 +1,3 @@ -# TODO: Update crawlee_storage_dir args once the Pydantic bug is fixed -# https://github.com/apify/crawlee-python/issues/146 - from __future__ import annotations import asyncio diff --git a/tests/unit/storages/test_storage_instance_manager.py b/tests/unit/storages/test_storage_instance_manager.py index 736e145f72..765b3c6e8c 100644 --- a/tests/unit/storages/test_storage_instance_manager.py +++ b/tests/unit/storages/test_storage_instance_manager.py @@ -22,10 +22,7 @@ def storage_type(request: pytest.FixtureRequest) -> type[Storage]: async def test_unique_storage_by_storage_client(tmp_path: Path, storage_type: type[Storage]) -> None: - config = Configuration( - purge_on_start=True, - ) - config.storage_dir = str(tmp_path) + config = Configuration(purge_on_start=True, storage_dir=str(tmp_path)) storage_1 = await storage_type.open(storage_client=MemoryStorageClient(), configuration=config) storage_2 = await storage_type.open(storage_client=FileSystemStorageClient(), configuration=config) @@ -33,10 +30,7 @@ async def test_unique_storage_by_storage_client(tmp_path: Path, storage_type: ty async def test_same_storage_when_different_client(tmp_path: Path, storage_type: type[Storage]) -> None: - config = Configuration( - purge_on_start=True, - ) - config.storage_dir = str(tmp_path) + config = Configuration(purge_on_start=True, storage_dir=str(tmp_path)) storage_1 = await storage_type.open(storage_client=MemoryStorageClient(), configuration=config) storage_2 = await storage_type.open(storage_client=MemoryStorageClient(), configuration=config) @@ -44,10 +38,7 @@ async def test_same_storage_when_different_client(tmp_path: Path, storage_type: async def test_unique_storage_by_storage_type(tmp_path: Path) -> None: - config = Configuration( - purge_on_start=True, - ) - config.storage_dir = str(tmp_path) + config = Configuration(purge_on_start=True, storage_dir=str(tmp_path)) storage_client = MemoryStorageClient() kvs = await KeyValueStore.open(storage_client=storage_client, configuration=config) @@ -71,11 +62,9 @@ async def test_unique_storage_by_unique_cache_key_different_path(tmp_path: Path, path_1.mkdir() path_2.mkdir() - config_1 = Configuration() - config_1.storage_dir = str(path_1) + config_1 = Configuration(storage_dir=str(path_1)) - config_2 = Configuration() - config_2.storage_dir = str(path_2) + config_2 = Configuration(storage_dir=str(path_2)) storage_client = FileSystemStorageClient() @@ -87,11 +76,9 @@ async def test_unique_storage_by_unique_cache_key_different_path(tmp_path: Path, async def test_unique_storage_by_unique_cache_key_same_path(tmp_path: Path, storage_type: type[Storage]) -> None: """Test that StorageInstanceManager support unique cache key. Different configs with same storage_dir create same storage.""" - config_1 = Configuration() - config_1.storage_dir = str(tmp_path) + config_1 = Configuration(storage_dir=str(tmp_path)) - config_2 = Configuration() - config_2.storage_dir = str(tmp_path) + config_2 = Configuration(storage_dir=str(tmp_path)) storage_client = FileSystemStorageClient() diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py index 4be4309247..e93a78c23a 100644 --- a/tests/unit/test_configuration.py +++ b/tests/unit/test_configuration.py @@ -1,6 +1,3 @@ -# TODO: Update crawlee_storage_dir args once the Pydantic bug is fixed -# https://github.com/apify/crawlee-python/issues/146 - from __future__ import annotations from typing import TYPE_CHECKING @@ -38,11 +35,7 @@ def test_global_configuration_works_reversed() -> None: async def test_storage_not_persisted_when_non_persistable_storage_used(tmp_path: Path, server_url: URL) -> None: """Make the Crawler use MemoryStorageClient which can't persist state.""" - service_locator.set_configuration( - Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] - ) - ) + service_locator.set_configuration(Configuration(storage_dir=str(tmp_path))) crawler = HttpCrawler(storage_client=MemoryStorageClient()) @crawler.router.default_handler @@ -62,9 +55,7 @@ async def test_storage_persisted_with_explicit_statistics_with_persistable_stora """Make the Crawler use MemoryStorageClient which can't persist state, but pass explicit statistics to it which will use global FileSystemStorageClient() that can persist state.""" - configuration = Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] - ) + configuration = Configuration(storage_dir=str(tmp_path)) service_locator.set_configuration(configuration) service_locator.set_storage_client(FileSystemStorageClient()) @@ -85,7 +76,7 @@ async def default_handler(context: HttpCrawlingContext) -> None: async def test_storage_persisted_when_enabled(tmp_path: Path, server_url: URL) -> None: configuration = Configuration( - crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg] + storage_dir=str(tmp_path), ) storage_client = FileSystemStorageClient() diff --git a/uv.lock b/uv.lock index 552284e60f..7175522f45 100644 --- a/uv.lock +++ b/uv.lock @@ -858,7 +858,7 @@ requires-dist = [ { name = "protego", specifier = ">=0.5.0" }, { name = "psutil", specifier = ">=6.0.0" }, { name = "pydantic", specifier = ">=2.11.0" }, - { name = "pydantic-settings", specifier = ">=2.2.0,!=2.7.0,!=2.7.1,!=2.8.0" }, + { name = "pydantic-settings", specifier = ">=2.12.0" }, { name = "pyee", specifier = ">=9.0.0" }, { name = "rich", marker = "extra == 'cli'", specifier = ">=13.9.0" }, { name = "scikit-learn", marker = "extra == 'adaptive-crawler'", specifier = ">=1.6.0" }, @@ -2697,16 +2697,16 @@ wheels = [ [[package]] name = "pydantic-settings" -version = "2.11.0" +version = "2.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/20/c5/dbbc27b814c71676593d1c3f718e6cd7d4f00652cefa24b75f7aa3efb25e/pydantic_settings-2.11.0.tar.gz", hash = "sha256:d0e87a1c7d33593beb7194adb8470fc426e95ba02af83a0f23474a04c9a08180", size = 188394, upload-time = "2025-09-24T14:19:11.764Z" } +sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/83/d6/887a1ff844e64aa823fb4905978d882a633cfe295c32eacad582b78a7d8b/pydantic_settings-2.11.0-py3-none-any.whl", hash = "sha256:fe2cea3413b9530d10f3a5875adffb17ada5c1e1bab0b2885546d7310415207c", size = 48608, upload-time = "2025-09-24T14:19:10.015Z" }, + { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, ] [[package]]