From a401bac8da8888b0de51e780f438c2c6858f3577 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Fri, 17 Oct 2025 16:03:56 +0200 Subject: [PATCH] Delegate public url related functionality to client --- pyproject.toml | 2 +- .../storage_clients/_apify/_dataset_client.py | 41 ++++---------- .../_apify/_key_value_store_client.py | 56 +++++-------------- .../_apify/_request_queue_client.py | 27 +-------- src/apify/storage_clients/_apify/_utils.py | 27 +++++++++ uv.lock | 8 +-- 6 files changed, 59 insertions(+), 102 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 36135563..90f8261e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ keywords = [ "scraping", ] dependencies = [ - "apify-client>=2.0.0,<3.0.0", + "apify-client>=2.2.0,<3.0.0", "apify-shared>=2.0.0,<3.0.0", "crawlee>=1.0.2,<2.0.0", "cachetools>=5.5.0", diff --git a/src/apify/storage_clients/_apify/_dataset_client.py b/src/apify/storage_clients/_apify/_dataset_client.py index 8b6f3e11..6a30bfbc 100644 --- a/src/apify/storage_clients/_apify/_dataset_client.py +++ b/src/apify/storage_clients/_apify/_dataset_client.py @@ -1,19 +1,19 @@ from __future__ import annotations import asyncio +import warnings from logging import getLogger from typing import TYPE_CHECKING, Any from typing_extensions import override -from apify_client import ApifyClientAsync from crawlee._utils.byte_size import ByteSize from crawlee._utils.file import json_dumps from crawlee.storage_clients._base import DatasetClient from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata from crawlee.storages import Dataset -from ._utils import AliasResolver +from ._utils import AliasResolver, create_apify_client if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -52,12 +52,17 @@ def __init__( self._api_client = api_client """The Apify dataset client for API operations.""" - self._api_public_base_url = api_public_base_url - """The public base URL for accessing the key-value store records.""" - self._lock = lock """A lock to ensure that only one operation is performed at a time.""" + if api_public_base_url: + # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635 + warnings.warn( + 'api_public_base_url argument is deprecated and will be removed in version 4.0.0', + DeprecationWarning, + stacklevel=2, + ) + @override async def get_metadata(self) -> DatasetMetadata: metadata = await self._api_client.get() @@ -99,29 +104,7 @@ async def open( if sum(1 for param in [id, name, alias] if param is not None) > 1: raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.') - token = configuration.token - if not token: - raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).') - - api_url = configuration.api_base_url - if not api_url: - raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).') - - api_public_base_url = configuration.api_public_base_url - if not api_public_base_url: - raise ValueError( - 'Apify storage client requires a valid API public base URL in Configuration ' - f'(api_public_base_url={api_public_base_url}).' - ) - - # Create Apify client with the provided token and API URL. - apify_client_async = ApifyClientAsync( - token=token, - api_url=api_url, - max_retries=8, - min_delay_between_retries_millis=500, - timeout_secs=360, - ) + apify_client_async = create_apify_client(configuration) apify_datasets_client = apify_client_async.datasets() # Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed @@ -178,7 +161,7 @@ async def open( return cls( api_client=apify_dataset_client, - api_public_base_url=api_public_base_url, + api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635 lock=asyncio.Lock(), ) diff --git a/src/apify/storage_clients/_apify/_key_value_store_client.py b/src/apify/storage_clients/_apify/_key_value_store_client.py index 79215ba2..8d698a80 100644 --- a/src/apify/storage_clients/_apify/_key_value_store_client.py +++ b/src/apify/storage_clients/_apify/_key_value_store_client.py @@ -1,20 +1,18 @@ from __future__ import annotations import asyncio +import warnings from logging import getLogger from typing import TYPE_CHECKING, Any from typing_extensions import override -from yarl import URL -from apify_client import ApifyClientAsync from crawlee.storage_clients._base import KeyValueStoreClient from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata from crawlee.storages import KeyValueStore from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage -from ._utils import AliasResolver -from apify._crypto import create_hmac_signature +from ._utils import AliasResolver, create_apify_client if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -43,12 +41,17 @@ def __init__( self._api_client = api_client """The Apify KVS client for API operations.""" - self._api_public_base_url = api_public_base_url - """The public base URL for accessing the key-value store records.""" - self._lock = lock """A lock to ensure that only one operation is performed at a time.""" + if api_public_base_url: + # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635 + warnings.warn( + 'api_public_base_url argument is deprecated and will be removed in version 4.0.0', + DeprecationWarning, + stacklevel=2, + ) + @override async def get_metadata(self) -> ApifyKeyValueStoreMetadata: metadata = await self._api_client.get() @@ -90,29 +93,7 @@ async def open( if sum(1 for param in [id, name, alias] if param is not None) > 1: raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.') - token = configuration.token - if not token: - raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).') - - api_url = configuration.api_base_url - if not api_url: - raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).') - - api_public_base_url = configuration.api_public_base_url - if not api_public_base_url: - raise ValueError( - 'Apify storage client requires a valid API public base URL in Configuration ' - f'(api_public_base_url={api_public_base_url}).' - ) - - # Create Apify client with the provided token and API URL. - apify_client_async = ApifyClientAsync( - token=token, - api_url=api_url, - max_retries=8, - min_delay_between_retries_millis=500, - timeout_secs=360, - ) + apify_client_async = create_apify_client(configuration) apify_kvss_client = apify_client_async.key_value_stores() # Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to @@ -170,7 +151,7 @@ async def open( return cls( api_client=apify_kvs_client, - api_public_base_url=api_public_base_url, + api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635 lock=asyncio.Lock(), ) @@ -251,15 +232,4 @@ async def get_public_url(self, key: str) -> str: Returns: A public URL that can be used to access the value of the given key in the KVS. """ - if self._api_client.resource_id is None: - raise ValueError('resource_id cannot be None when generating a public URL') - - public_url = ( - URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key - ) - metadata = await self.get_metadata() - - if metadata.url_signing_secret_key is not None: - public_url = public_url.with_query(signature=create_hmac_signature(metadata.url_signing_secret_key, key)) - - return str(public_url) + return await self._api_client.get_record_public_url(key=key) diff --git a/src/apify/storage_clients/_apify/_request_queue_client.py b/src/apify/storage_clients/_apify/_request_queue_client.py index 88b65542..74c48cde 100644 --- a/src/apify/storage_clients/_apify/_request_queue_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_client.py @@ -5,7 +5,6 @@ from typing_extensions import override -from apify_client import ApifyClientAsync from crawlee._utils.crypto import crypto_random_object_id from crawlee.storage_clients._base import RequestQueueClient from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata @@ -14,7 +13,7 @@ from ._models import ApifyRequestQueueMetadata, RequestQueueStats from ._request_queue_shared_client import ApifyRequestQueueSharedClient from ._request_queue_single_client import ApifyRequestQueueSingleClient -from ._utils import AliasResolver +from ._utils import AliasResolver, create_apify_client if TYPE_CHECKING: from collections.abc import Sequence @@ -228,29 +227,7 @@ async def open( if sum(1 for param in [id, name, alias] if param is not None) > 1: raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.') - token = configuration.token - if not token: - raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).') - - api_url = configuration.api_base_url - if not api_url: - raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).') - - api_public_base_url = configuration.api_public_base_url - if not api_public_base_url: - raise ValueError( - 'Apify storage client requires a valid API public base URL in Configuration ' - f'(api_public_base_url={api_public_base_url}).' - ) - - # Create Apify client with the provided token and API URL. - apify_client_async = ApifyClientAsync( - token=token, - api_url=api_url, - max_retries=8, - min_delay_between_retries_millis=500, - timeout_secs=360, - ) + apify_client_async = create_apify_client(configuration) apify_rqs_client = apify_client_async.request_queues() # Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to diff --git a/src/apify/storage_clients/_apify/_utils.py b/src/apify/storage_clients/_apify/_utils.py index eee87367..0648d3f5 100644 --- a/src/apify/storage_clients/_apify/_utils.py +++ b/src/apify/storage_clients/_apify/_utils.py @@ -192,3 +192,30 @@ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> # Truncate the key to the desired length return url_safe_key[:request_id_length] + + +def create_apify_client(configuration: Configuration) -> ApifyClientAsync: + """Create and return an ApifyClientAsync instance using the provided configuration.""" + if not configuration.token: + raise ValueError(f'Apify storage client requires a valid token in Configuration (token={configuration.token}).') + + api_url = configuration.api_base_url + if not api_url: + raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).') + + api_public_base_url = configuration.api_public_base_url + if not api_public_base_url: + raise ValueError( + 'Apify storage client requires a valid API public base URL in Configuration ' + f'(api_public_base_url={api_public_base_url}).' + ) + + # Create Apify client with the provided token and API URL. + return ApifyClientAsync( + token=configuration.token, + api_url=api_url, + api_public_url=api_public_base_url, + max_retries=8, + min_delay_between_retries_millis=500, + timeout_secs=360, + ) diff --git a/uv.lock b/uv.lock index 84827538..68c16845 100644 --- a/uv.lock +++ b/uv.lock @@ -73,7 +73,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "apify-client", specifier = ">=2.0.0,<3.0.0" }, + { name = "apify-client", specifier = ">=2.2.0,<3.0.0" }, { name = "apify-shared", specifier = ">=2.0.0,<3.0.0" }, { name = "cachetools", specifier = ">=5.5.0" }, { name = "crawlee", specifier = ">=1.0.2,<2.0.0" }, @@ -112,7 +112,7 @@ dev = [ [[package]] name = "apify-client" -version = "2.1.0" +version = "2.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "apify-shared" }, @@ -120,9 +120,9 @@ dependencies = [ { name = "impit" }, { name = "more-itertools" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b7/e5/0527749e9748faeb19ed38b05b1618507beacbc885711b9156b28966aea3/apify_client-2.1.0.tar.gz", hash = "sha256:328bc76eda161bed7211be6e4915833ebd56c87c11f623ab5276c5d718f970c8", size = 361484, upload-time = "2025-09-15T08:43:07.947Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/c3/52e441203019b492c7c9f14bd64361c72143bfb66209b742903f95e9012a/apify_client-2.2.0.tar.gz", hash = "sha256:5ac6a0d463b84a4c6785edb5d205ba34443919f87f859493205c8871e9aad9b9", size = 382205, upload-time = "2025-10-13T13:03:19.235Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/59/c7ec8577bb41bc33f4fa43f674f80aa3eb44e399fe448a8ba39d76ea75e4/apify_client-2.1.0-py3-none-any.whl", hash = "sha256:515a16b16ea3dba9cf05973a0900bb1ae8951c0154d1566c83ac6544d61af16c", size = 85485, upload-time = "2025-09-15T08:43:06.525Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ab/39acf2b24cb55e3202e2c1b6d34c34e16de647e14eee8b532c4fa467cd98/apify_client-2.2.0-py3-none-any.whl", hash = "sha256:8b70983fbf52790d9fbd6b567f5e93e084b2b3cdfa5e1f2af1122be37442d787", size = 85913, upload-time = "2025-10-13T13:03:17.24Z" }, ] [[package]]