From 3b88c3269b4c103339271e4ab1b0c8b056c54163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20Nesveda?= Date: Wed, 8 Feb 2023 13:54:52 +0100 Subject: [PATCH] Allow overriding `Configuration` values in constructor, unify setting its properties --- src/apify/_utils.py | 30 ++++++- src/apify/config.py | 93 +++++++++++++++++----- src/apify/consts.py | 7 ++ tests/unit/actor/test_actor_env_helpers.py | 7 +- 4 files changed, 111 insertions(+), 26 deletions(-) diff --git a/src/apify/_utils.py b/src/apify/_utils.py index b584e4f4..9eb1e69c 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -31,10 +31,12 @@ from .consts import ( _BOOL_ENV_VARS_TYPE, _DATETIME_ENV_VARS_TYPE, + _FLOAT_ENV_VARS_TYPE, _INTEGER_ENV_VARS_TYPE, _STRING_ENV_VARS_TYPE, BOOL_ENV_VARS, DATETIME_ENV_VARS, + FLOAT_ENV_VARS, INTEGER_ENV_VARS, REQUEST_ID_LENGTH, ApifyEnvVars, @@ -113,6 +115,16 @@ def _fetch_and_parse_env_var(env_var: _DATETIME_ENV_VARS_TYPE, default: datetime ... +@overload +def _fetch_and_parse_env_var(env_var: _FLOAT_ENV_VARS_TYPE) -> Optional[float]: # noqa: U100 + ... + + +@overload +def _fetch_and_parse_env_var(env_var: _FLOAT_ENV_VARS_TYPE, default: float) -> float: # noqa: U100 + ... + + @overload def _fetch_and_parse_env_var(env_var: _INTEGER_ENV_VARS_TYPE) -> Optional[int]: # noqa: U100 ... @@ -147,11 +159,16 @@ def _fetch_and_parse_env_var(env_var: Any, default: Any = None) -> Any: if env_var in BOOL_ENV_VARS: return _maybe_parse_bool(val) + if env_var in FLOAT_ENV_VARS: + parsed_float = _maybe_parse_float(val) + if parsed_float is None: + return default + return parsed_float if env_var in INTEGER_ENV_VARS: - res = _maybe_parse_int(val) - if res is None: + parsed_int = _maybe_parse_int(val) + if parsed_int is None: return default - return res + return parsed_int if env_var in DATETIME_ENV_VARS: return _maybe_parse_datetime(val) return val @@ -185,6 +202,13 @@ def _maybe_parse_datetime(val: str) -> Union[datetime, str]: return val +def _maybe_parse_float(val: str) -> Optional[float]: + try: + return float(val) + except ValueError: + return None + + def _maybe_parse_int(val: str) -> Optional[int]: try: return int(val) diff --git a/src/apify/config.py b/src/apify/config.py index 55bf5e15..c59bdd10 100644 --- a/src/apify/config.py +++ b/src/apify/config.py @@ -13,48 +13,97 @@ class Configuration: _default_instance: Optional['Configuration'] = None - def __init__(self) -> None: - """Create a `Configuration` instance.""" + def __init__( + self, + *, + api_base_url: Optional[str] = None, + api_public_base_url: Optional[str] = None, + container_port: Optional[int] = None, + container_url: Optional[str] = None, + default_dataset_id: Optional[str] = None, + default_key_value_store_id: Optional[str] = None, + default_request_queue_id: Optional[str] = None, + input_key: Optional[str] = None, + max_used_cpu_ratio: Optional[float] = None, + metamorph_after_sleep_millis: Optional[int] = None, + persist_state_interval_millis: Optional[int] = None, + persist_storage: Optional[bool] = None, + proxy_hostname: Optional[str] = None, + proxy_password: Optional[str] = None, + proxy_port: Optional[int] = None, + proxy_status_url: Optional[str] = None, + purge_on_start: Optional[bool] = None, + token: Optional[str] = None, + system_info_interval_millis: Optional[int] = None, + ) -> None: + """Create a `Configuration` instance. + + All the parameters are loaded by default from environment variables when running on the Apify platform. + You can override them here in the Configuration constructor, which might be useful for local testing of your actors. + + Args: + api_base_url (str, optional): The URL of the Apify API. + This is the URL actually used for connecting to the API, so it can contain an IP address when running in a container on the platform. + api_public_base_url (str, optional): The public URL of the Apify API. + This will always contain the public URL of the API, even when running in a container on the platform. + Useful for generating shareable URLs to key-value store records or datasets. + container_port (int, optional): The port on which the container can listen for HTTP requests. + container_url (str, optional): The URL on which the container can listen for HTTP requests. + default_dataset_id (str, optional): The ID of the default dataset for the actor. + default_key_value_store_id (str, optional): The ID of the default key-value store for the actor. + default_request_queue_id (str, optional): The ID of the default request queue for the actor. + input_key (str, optional): The key of the input record in the actor's default key-value store + max_used_cpu_ratio (float, optional): The CPU usage above which the SYSTEM_INFO event will report the CPU is overloaded. + metamorph_after_sleep_millis (int, optional): How long should the actor sleep after calling metamorph. + persist_state_interval_millis (int, optional): How often should the actor emit the PERSIST_STATE event. + persist_storage (bool, optional): Whether the actor should persist its used storages to the filesystem when running locally. + proxy_hostname (str, optional): The hostname of Apify Proxy. + proxy_password (str, optional): The password for Apify Proxy. + proxy_port (str, optional): The port of Apify Proxy. + proxy_status_url (str, optional): The URL on which the Apify Proxy status page is available. + purge_on_start (str, optional): Whether the actor should purge its default storages on startup, when running locally. + token (str, optional): The API token for the Apify API this actor should use. + system_info_interval_millis (str, optional): How often should the actor emit the SYSTEM_INFO event when running locally. + """ self.actor_build_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_BUILD_ID) self.actor_build_number = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_BUILD_NUMBER) self.actor_events_ws_url = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_EVENTS_WS_URL) self.actor_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_ID) self.actor_run_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_RUN_ID) self.actor_task_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_TASK_ID) - self.api_base_url = _fetch_and_parse_env_var(ApifyEnvVars.API_BASE_URL, 'https://api.apify.com') - self.api_public_base_url = _fetch_and_parse_env_var(ApifyEnvVars.API_PUBLIC_BASE_URL, 'https://api.apify.com') + self.api_base_url = api_base_url or _fetch_and_parse_env_var(ApifyEnvVars.API_BASE_URL, 'https://api.apify.com') + self.api_public_base_url = api_public_base_url or _fetch_and_parse_env_var(ApifyEnvVars.API_PUBLIC_BASE_URL, 'https://api.apify.com') self.chrome_executable_path = _fetch_and_parse_env_var(ApifyEnvVars.CHROME_EXECUTABLE_PATH) - self.container_port = _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_PORT, 4321) - self.container_url = _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_URL, 'http://localhost:4321') + self.container_port = container_port or _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_PORT, 4321) + self.container_url = container_url or _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_URL, 'http://localhost:4321') self.dedicated_cpus = _fetch_and_parse_env_var(ApifyEnvVars.DEDICATED_CPUS) self.default_browser_path = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_BROWSER_PATH) - self.default_dataset_id = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_DATASET_ID, 'default') - self.default_key_value_store_id = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_KEY_VALUE_STORE_ID, 'default') - self.default_request_queue_id = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_REQUEST_QUEUE_ID, 'default') + self.default_dataset_id = default_dataset_id or _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_DATASET_ID, 'default') + self.default_key_value_store_id = default_key_value_store_id or _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_KEY_VALUE_STORE_ID, 'default') + self.default_request_queue_id = default_request_queue_id or _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_REQUEST_QUEUE_ID, 'default') self.disable_browser_sandbox = _fetch_and_parse_env_var(ApifyEnvVars.DISABLE_BROWSER_SANDBOX, False) self.headless = _fetch_and_parse_env_var(ApifyEnvVars.HEADLESS, True) - self.input_key = _fetch_and_parse_env_var(ApifyEnvVars.INPUT_KEY, 'INPUT') + self.input_key = input_key or _fetch_and_parse_env_var(ApifyEnvVars.INPUT_KEY, 'INPUT') self.input_secrets_private_key_file = _fetch_and_parse_env_var(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_FILE) self.input_secrets_private_key_passphrase = _fetch_and_parse_env_var(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE) self.is_at_home = _fetch_and_parse_env_var(ApifyEnvVars.IS_AT_HOME, False) + self.max_used_cpu_ratio = max_used_cpu_ratio or _fetch_and_parse_env_var(ApifyEnvVars.MAX_USED_CPU_RATIO, 0.95) self.memory_mbytes = _fetch_and_parse_env_var(ApifyEnvVars.MEMORY_MBYTES) self.meta_origin = _fetch_and_parse_env_var(ApifyEnvVars.META_ORIGIN) - self.metamorph_after_sleep_millis = _fetch_and_parse_env_var(ApifyEnvVars.METAMORPH_AFTER_SLEEP_MILLIS, 300000) - self.persist_state_interval_millis = _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, 60000) - self.persist_storage = _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STORAGE) - self.proxy_hostname = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_HOSTNAME, 'proxy.apify.com') - self.proxy_password = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PASSWORD) - self.proxy_port = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PORT, 8000) - self.proxy_status_url = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_STATUS_URL, 'http://proxy.apify.com') - self.purge_on_start = _fetch_and_parse_env_var(ApifyEnvVars.PURGE_ON_START, True) + self.metamorph_after_sleep_millis = metamorph_after_sleep_millis or _fetch_and_parse_env_var(ApifyEnvVars.METAMORPH_AFTER_SLEEP_MILLIS, 300000) # noqa: E501 + self.persist_state_interval_millis = persist_state_interval_millis or _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, 60000) # noqa: E501 + self.persist_storage = persist_storage or _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STORAGE) + self.proxy_hostname = proxy_hostname or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_HOSTNAME, 'proxy.apify.com') + self.proxy_password = proxy_password or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PASSWORD) + self.proxy_port = proxy_port or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PORT, 8000) + self.proxy_status_url = proxy_status_url or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_STATUS_URL, 'http://proxy.apify.com') + self.purge_on_start = purge_on_start or _fetch_and_parse_env_var(ApifyEnvVars.PURGE_ON_START, True) self.started_at = _fetch_and_parse_env_var(ApifyEnvVars.STARTED_AT) self.timeout_at = _fetch_and_parse_env_var(ApifyEnvVars.TIMEOUT_AT) - self.token = _fetch_and_parse_env_var(ApifyEnvVars.TOKEN) + self.token = token or _fetch_and_parse_env_var(ApifyEnvVars.TOKEN) self.user_id = _fetch_and_parse_env_var(ApifyEnvVars.USER_ID) self.xvfb = _fetch_and_parse_env_var(ApifyEnvVars.XVFB, False) - self.system_info_interval_millis = _fetch_and_parse_env_var(ApifyEnvVars.SYSTEM_INFO_INTERVAL_MILLIS, 60000) - - self.max_used_cpu_ratio = 0.95 + self.system_info_interval_millis = system_info_interval_millis or _fetch_and_parse_env_var(ApifyEnvVars.SYSTEM_INFO_INTERVAL_MILLIS, 60000) @classmethod def _get_default_instance(cls) -> 'Configuration': diff --git a/src/apify/consts.py b/src/apify/consts.py index 68ef8a65..ef8ad587 100644 --- a/src/apify/consts.py +++ b/src/apify/consts.py @@ -48,6 +48,7 @@ class ApifyEnvVars(str, Enum): LOCAL_STORAGE_DIR = 'APIFY_LOCAL_STORAGE_DIR' LOG_FORMAT = 'APIFY_LOG_FORMAT' LOG_LEVEL = 'APIFY_LOG_LEVEL' + MAX_USED_CPU_RATIO = 'APIFY_MAX_USED_CPU_RATIO' MEMORY_MBYTES = 'APIFY_MEMORY_MBYTES' META_ORIGIN = 'APIFY_META_ORIGIN' PERSIST_STORAGE = 'APIFY_PERSIST_STORAGE' @@ -83,6 +84,12 @@ class ApifyEnvVars(str, Enum): INTEGER_ENV_VARS: List[_INTEGER_ENV_VARS_TYPE] = list(get_args(_INTEGER_ENV_VARS_TYPE)) +_FLOAT_ENV_VARS_TYPE = Literal[ + ApifyEnvVars.MAX_USED_CPU_RATIO, +] + +FLOAT_ENV_VARS: List[_FLOAT_ENV_VARS_TYPE] = list(get_args(_FLOAT_ENV_VARS_TYPE)) + _BOOL_ENV_VARS_TYPE = Literal[ ApifyEnvVars.DISABLE_BROWSER_SANDBOX, ApifyEnvVars.DISABLE_OUTDATED_WARNING, diff --git a/tests/unit/actor/test_actor_env_helpers.py b/tests/unit/actor/test_actor_env_helpers.py index 50139768..bfd1c0c5 100644 --- a/tests/unit/actor/test_actor_env_helpers.py +++ b/tests/unit/actor/test_actor_env_helpers.py @@ -6,7 +6,7 @@ import pytest from apify import Actor -from apify.consts import BOOL_ENV_VARS, DATETIME_ENV_VARS, INTEGER_ENV_VARS, STRING_ENV_VARS, ApifyEnvVars +from apify.consts import BOOL_ENV_VARS, DATETIME_ENV_VARS, FLOAT_ENV_VARS, INTEGER_ENV_VARS, STRING_ENV_VARS, ApifyEnvVars class TestIsAtHome: @@ -31,6 +31,11 @@ async def test_get_env_use_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> No expected_get_env[int_get_env_var] = random.randint(1, 99999) monkeypatch.setenv(int_env_var, f'{expected_get_env[int_get_env_var]}') + for float_env_var in FLOAT_ENV_VARS: + float_get_env_var = float_env_var.name.lower() + expected_get_env[float_get_env_var] = random.random() + monkeypatch.setenv(float_env_var, f'{expected_get_env[float_get_env_var]}') + for bool_env_var in BOOL_ENV_VARS: bool_get_env_var = bool_env_var.name.lower() expected_get_env[bool_get_env_var] = random.choice([True, False])