diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9f8e0da..3fcde7d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -41,5 +41,5 @@ jobs: - name: Lint run: uv run pylint $(git ls-files '*.py') - #- name: Run tests - # run: uv run pytest tests/ -v + - name: Run tests + run: uv run pytest tests/ -v diff --git a/README.md b/README.md index 0058725..a9ae5e9 100644 --- a/README.md +++ b/README.md @@ -159,10 +159,13 @@ cd web-hacker uv venv --prompt web-hacker source .venv/bin/activate # Windows: .venv\\Scripts\\activate -# 3) Install in editable mode via uv (pip-compatible interface) +# 3) Install exactly what lockfile says +uv sync + +# 4) Install in editable mode via uv (pip-compatible interface) uv pip install -e . -# 4) Configure environment +# 5) Configure environment cp .env.example .env # then edit values # or set directly export OPENAI_API_KEY="sk-..." @@ -304,7 +307,7 @@ Use the **routine-discovery pipeline** to analyze captured data and synthesize a **Linux/macOS (bash):** ```bash python scripts/discover_routines.py \ - --task "recover the api endpoints for searching for trains and their prices" \ + --task "Recover API endpoints for searching for trains and their prices" \ --cdp-captures-dir ./cdp_captures \ --output-dir ./routine_discovery_output \ --llm-model gpt-5 diff --git a/scripts/browser_monitor.py b/scripts/browser_monitor.py index af8f474..3c3fb04 100644 --- a/scripts/browser_monitor.py +++ b/scripts/browser_monitor.py @@ -12,11 +12,12 @@ import shutil import sys +from src.config import Config from src.cdp.cdp_session import CDPSession from src.data_models.network import ResourceType from src.cdp.tab_managements import cdp_new_tab, dispose_context -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) # ---- Configuration ---- @@ -381,7 +382,7 @@ def main(): logger.info(f"│ └── response_body.[ext]") logger.info(f"└── storage/") logger.info(f" └── events.jsonl") - logger.info() + logger.info("\n") logger.info(f"Session complete! Check {args.output_dir} for all outputs.") except Exception as e: diff --git a/scripts/discover_routines.py b/scripts/discover_routines.py index 6f6fec4..d9e852f 100644 --- a/scripts/discover_routines.py +++ b/scripts/discover_routines.py @@ -1,4 +1,6 @@ """ +src/scripts/discover_routines.py + Script for discovering routines from the network transactions. """ @@ -13,7 +15,7 @@ from src.routine_discovery.agent import RoutineDiscoveryAgent from src.routine_discovery.context_manager import ContextManager -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) diff --git a/scripts/execute_routine.py b/scripts/execute_routine.py index d059173..173822f 100644 --- a/scripts/execute_routine.py +++ b/scripts/execute_routine.py @@ -18,10 +18,11 @@ import json import logging +from src.config import Config from src.cdp.routine_execution import execute_routine from src.data_models.production_routine import Routine -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) diff --git a/src/cdp/cdp_session.py b/src/cdp/cdp_session.py index cfe75ad..a8667a4 100644 --- a/src/cdp/cdp_session.py +++ b/src/cdp/cdp_session.py @@ -10,10 +10,11 @@ import threading import time +from src.config import Config from src.cdp.network_monitor import NetworkMonitor from src.cdp.storage_monitor import StorageMonitor -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) diff --git a/src/cdp/network_monitor.py b/src/cdp/network_monitor.py index 506d091..8a83959 100644 --- a/src/cdp/network_monitor.py +++ b/src/cdp/network_monitor.py @@ -28,6 +28,7 @@ from fnmatch import fnmatch from typing import Any +from src.config import Config from src.utils.cdp_utils import ( build_pair_dir, get_set_cookie_values, @@ -37,7 +38,7 @@ from src.data_models.network import Stage -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) diff --git a/src/cdp/routine_execution.py b/src/cdp/routine_execution.py index 3d50c01..f7473f9 100644 --- a/src/cdp/routine_execution.py +++ b/src/cdp/routine_execution.py @@ -1,3 +1,9 @@ +""" +src/cdp/routine_execution.py + +Execute a routine using Chrome DevTools Protocol. +""" + import json import logging import random @@ -9,6 +15,7 @@ import requests import websocket +from src.config import Config from src.data_models.production_routine import ( Routine, Endpoint, @@ -18,7 +25,7 @@ RoutineSleepOperation, ) -logging.basicConfig(level=logging.DEBUG) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) diff --git a/src/config.py b/src/config.py index a64eff1..eccd14f 100644 --- a/src/config.py +++ b/src/config.py @@ -4,6 +4,7 @@ Centralized environment variable configuration. """ +import logging import os from typing import Any @@ -11,12 +12,24 @@ load_dotenv() +# configure httpx logger to suppress verbose HTTP logs +logging.getLogger("httpx").setLevel(logging.WARNING) + class Config(): """ Centralized configuration for environment variables. """ + # logging configuration + LOG_LEVEL: int = logging.getLevelNamesMapping().get( + os.getenv("LOG_LEVEL", "INFO").upper(), + logging.INFO + ) + LOG_DATE_FORMAT: str = os.getenv("LOG_DATE_FORMAT", "%Y-%m-%d %H:%M:%S") + LOG_FORMAT: str = os.getenv("LOG_FORMAT", "[%(asctime)s] %(levelname)s:%(name)s:%(message)s") + + # API keys OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY") @classmethod diff --git a/src/data_models/production_routine.py b/src/data_models/production_routine.py index ee11c94..5c86a69 100644 --- a/src/data_models/production_routine.py +++ b/src/data_models/production_routine.py @@ -1,3 +1,9 @@ +""" +src/data_models/production_routine.py + +Production routine data models. +""" + import re import time import uuid @@ -84,6 +90,7 @@ class BuiltinParameter(BaseModel): description="Function to generate the builtin parameter value" ) + BUILTIN_PARAMETERS = [ BuiltinParameter( name="uuid", @@ -101,7 +108,6 @@ class BuiltinParameter(BaseModel): class Parameter(BaseModel): """ Parameter model with comprehensive validation and type information. - Fields: name (str): Parameter name (must be valid Python identifier) type (ParameterType): Parameter data type @@ -117,13 +123,12 @@ class Parameter(BaseModel): enum_values (list[str] | None): Allowed values for enum type format (str | None): Format specification (e.g., 'YYYY-MM-DD') """ - + # reserved prefixes: names that cannot be used at the beginning of a parameter name RESERVED_PREFIXES: ClassVar[list[str]] = [ "sessionStorage", "localStorage", "cookie", "meta", "uuid", "epoch_milliseconds" ] - - + name: str = Field(..., description="Parameter name (must be valid Python identifier)") type: ParameterType = Field( default=ParameterType.STRING, @@ -179,7 +184,7 @@ def validate_name(cls, v): """Ensure parameter name is a valid Python identifier and not reserved.""" if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', v): raise ValueError(f"Parameter name '{v}' is not a valid Python identifier") - + # Check for reserved prefixes for prefix in cls.RESERVED_PREFIXES: if v.startswith(prefix): @@ -187,16 +192,15 @@ def validate_name(cls, v): f"Parameter name '{v}' cannot start with '{prefix}'. " f"Reserved prefixes: {cls.RESERVED_PREFIXES}" ) - + return v - @field_validator('type') - @classmethod - def validate_type_consistency(cls, v, info): + @model_validator(mode='after') + def validate_type_consistency(self) -> 'Parameter': """Validate type-specific constraints are consistent.""" - if v == ParameterType.ENUM and not info.data.get('enum_values'): + if self.type == ParameterType.ENUM and not self.enum_values: raise ValueError("enum_values must be provided for enum type") - return v + return self @field_validator('default') @classmethod @@ -226,7 +230,6 @@ def validate_default_type(cls, v, info): else: raise ValueError(f"Default value {v} is not a valid boolean value") raise ValueError(f"Default value {v} cannot be converted to boolean") - return v @field_validator('examples') @@ -267,7 +270,6 @@ def validate_examples_type(cls, v, info): return validated_examples - class HTTPMethod(StrEnum): """ Supported HTTP methods for API endpoints. @@ -319,7 +321,6 @@ class RoutineOperationTypes(StrEnum): RETURN = "return" - class RoutineOperation(BaseModel): """ Base class for routine operations. @@ -441,31 +442,47 @@ def validate_parameter_usage(self) -> 'Routine': and no undefined parameters are used. Raises ValueError if unused parameters are found or undefined parameters are used. """ + # Check 0: Ensure name and description fields don't contain parameter placeholders + # These are metadata fields and should not have interpolation patterns + param_pattern = r'\{\{([^}]*)\}\}' + # check in Routine.name + name_matches = re.findall(param_pattern, self.name) + if name_matches: + raise ValueError( + f"Parameter placeholders found in routine name '{self.name}': {name_matches}. " + "The 'name' field is a metadata field and should not contain parameter placeholders like {{param}}." + ) + # check in Routine.description + description_matches = re.findall(param_pattern, self.description) + if description_matches: + raise ValueError( + f"Parameter placeholders found in routine description: {description_matches}. " + "The 'description' field is a metadata field and should not contain parameter placeholders like {{param}}." + ) + # list of builtin parameter names builtin_parameter_names = [builtin_parameter.name for builtin_parameter in BUILTIN_PARAMETERS] - + # Convert the entire routine to JSON string for searching routine_json = self.model_dump_json() # Extract all parameter names defined_parameters = {param.name for param in self.parameters} - # Find all parameter usages in the JSON: *"{{*}}"* - # Match quoted placeholders: "{{param}}" or \"{{param}}\" (escaped quotes in JSON strings) - # \"{{param}}\" in JSON string means "{{param}}" in actual value - # Pattern REQUIRES quotes (either " or \") immediately before {{ and after }} - param_pattern = r'(?:"|\\")\{\{([^}"]*)\}\}(?:"|\\")' + # Find all parameter usages in the JSON: {{*}} + # Match placeholders anywhere: {{param}} + # This matches parameters whether they're standalone quoted values or embedded in strings + param_pattern = r'\{\{([^}]*)\}\}' matches = re.findall(param_pattern, routine_json) - + # track used parameters used_parameters = set() - + # iterate over all parameter usages for match in matches: - # clean the match (already extracted the content between braces) match = match.strip() - + # if the parameter name contains a colon, it is a storage parameter if ":" in match: kind, path = [p.strip() for p in match.split(":", 1)] @@ -484,7 +501,7 @@ def validate_parameter_usage(self) -> 'Routine': if unused_parameters: raise ValueError( f"Unused parameters found in routine '{self.name}': {list(unused_parameters)}. " - f"All defined parameters must be used somewhere in the routine operations." + "All defined parameters must be used somewhere in the routine operations." ) # Check 2: No undefined parameters should be used @@ -492,7 +509,7 @@ def validate_parameter_usage(self) -> 'Routine': if undefined_parameters: raise ValueError( f"Undefined parameters found in routine '{self.name}': {list(undefined_parameters)}. " - f"All parameters used in the routine must be defined in parameters." + "All parameters used in the routine must be defined in parameters." ) return self diff --git a/src/routine_discovery/agent.py b/src/routine_discovery/agent.py index 6d4b763..f0eb4d2 100644 --- a/src/routine_discovery/agent.py +++ b/src/routine_discovery/agent.py @@ -12,6 +12,7 @@ from openai import OpenAI from pydantic import BaseModel, Field +from src.config import Config from src.routine_discovery.context_manager import ContextManager from src.utils.llm_utils import llm_parse_text_to_model, collect_text_from_response, manual_llm_parse_text_to_model from src.data_models.llm_responses import ( @@ -26,7 +27,7 @@ from src.data_models.dev_routine import Routine, RoutineFetchOperation from src.utils.exceptions import TransactionIdentificationFailedError -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) diff --git a/src/utils/data_utils.py b/src/utils/data_utils.py new file mode 100644 index 0000000..fd2bc77 --- /dev/null +++ b/src/utils/data_utils.py @@ -0,0 +1,82 @@ +""" +src/utils/data_utils.py + +Utility functions for loading data. +""" + +import datetime +import json +from decimal import Decimal +from pathlib import Path +from typing import Any + +from src.utils.exceptions import UnsupportedFileFormat + + +def load_data(file_path: Path) -> dict | list: + """ + Load data from a file. + Raises: + UnsupportedFileFormat: If the file is of an unsupported type. + Args: + file_path (str): Path to the JSON file. + Returns: + dict | list: Data contained in file. + """ + file_path_str = str(file_path) + if file_path_str.endswith(".json"): + with open(file_path_str, mode="r", encoding="utf-8") as data_file: + json_data = json.load(data_file) + return json_data + + raise UnsupportedFileFormat(f"No support for provided file type: {file_path_str}.") + + +def convert_floats_to_decimals(obj: Any) -> Any: + """ + Convert all float values in a JSON-like object to Decimal values. + Useful when putting or updating data into a DynamoDB table. + Parameters: + obj (Any): The object to convert. + Returns: + Any: The converted object. + """ + if isinstance(obj, float): + return Decimal(str(obj)) + elif isinstance(obj, dict): + return {k: convert_floats_to_decimals(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [convert_floats_to_decimals(i) for i in obj] + return obj + + +def convert_decimals_to_floats(obj: Any) -> Any: + """ + Convert all Decimal values in a JSON-like object to float values. + Useful when getting data from a DynamoDB table. + Parameters: + obj (Any): The object to convert. + Returns: + Any: The converted object. + """ + if isinstance(obj, Decimal): + return float(obj) + elif isinstance(obj, dict): + return {k: convert_decimals_to_floats(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [convert_decimals_to_floats(i) for i in obj] + return obj + + +def serialize_datetime(obj: Any) -> Any: + """ + Recursively convert datetime.datetime instances to ISO-8601 strings. + DynamoDB/Boto3 cannot accept raw datetimes. + """ + if isinstance(obj, dict): + return {k: serialize_datetime(v) for k, v in obj.items()} + if isinstance(obj, list): + return [serialize_datetime(v) for v in obj] + if isinstance(obj, datetime.datetime): + return obj.isoformat() + return obj diff --git a/src/utils/exceptions.py b/src/utils/exceptions.py index 53454a7..76228fa 100644 --- a/src/utils/exceptions.py +++ b/src/utils/exceptions.py @@ -4,6 +4,12 @@ Custom exceptions for the project. """ +class UnsupportedFileFormat(Exception): + """ + Raised when encountering an unsupported file type for some opertation. + """ + + class ApiKeyNotFoundError(Exception): """ Raised when an API key is not found in the environment variables. diff --git a/src/utils/llm_utils.py b/src/utils/llm_utils.py index 0b2b166..eb6c826 100644 --- a/src/utils/llm_utils.py +++ b/src/utils/llm_utils.py @@ -12,9 +12,10 @@ from openai.types.responses import Response from pydantic import BaseModel +from src.config import Config from src.utils.exceptions import LLMStructuredOutputError -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..c79c3cd --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,43 @@ +""" +tests/conftest.py + +Configuration for pytest. +""" + +from pathlib import Path + +import pytest + + +@pytest.fixture(scope="session") +def tests_root() -> Path: + """ + Root directory for tests. + Returns: + Path to the tests directory. + """ + return Path(__file__).parent.resolve() + + +@pytest.fixture(scope="session") +def data_dir(tests_root: Path) -> Path: + """ + Directory containing test data files. + Returns: + Path to tests/data. + """ + d = tests_root / "data" + d.mkdir(parents=True, exist_ok=True) + return d + + +@pytest.fixture(scope="session") +def input_data_dir(data_dir: Path) -> Path: + """ + Directory containing input test data files. + Returns: + Path to tests/data/input. + """ + d = data_dir / "input" + d.mkdir(parents=True, exist_ok=True) + return d diff --git a/tests/data/input/production_routine/parameter_invalid_enum_no_values.json b/tests/data/input/production_routine/parameter_invalid_enum_no_values.json new file mode 100644 index 0000000..2971226 --- /dev/null +++ b/tests/data/input/production_routine/parameter_invalid_enum_no_values.json @@ -0,0 +1,6 @@ +{ + "name": "choice", + "type": "enum", + "required": true, + "description": "This should fail because enum_values is missing" +} \ No newline at end of file diff --git a/tests/data/input/production_routine/parameter_invalid_reserved_name.json b/tests/data/input/production_routine/parameter_invalid_reserved_name.json new file mode 100644 index 0000000..69e8d8f --- /dev/null +++ b/tests/data/input/production_routine/parameter_invalid_reserved_name.json @@ -0,0 +1,6 @@ +{ + "name": "sessionStorage_data", + "type": "string", + "required": true, + "description": "This should fail due to reserved prefix" +} \ No newline at end of file diff --git a/tests/data/input/production_routine/parameter_valid_enum.json b/tests/data/input/production_routine/parameter_valid_enum.json new file mode 100644 index 0000000..a775478 --- /dev/null +++ b/tests/data/input/production_routine/parameter_valid_enum.json @@ -0,0 +1,9 @@ +{ + "name": "status", + "type": "enum", + "required": true, + "description": "Status of the item", + "default": "active", + "examples": ["active", "pending"], + "enum_values": ["active", "pending", "completed", "cancelled"] +} \ No newline at end of file diff --git a/tests/data/input/production_routine/parameter_valid_integer.json b/tests/data/input/production_routine/parameter_valid_integer.json new file mode 100644 index 0000000..935596c --- /dev/null +++ b/tests/data/input/production_routine/parameter_valid_integer.json @@ -0,0 +1,10 @@ +{ + "name": "page_number", + "type": "integer", + "required": false, + "description": "Page number for pagination", + "default": 1, + "examples": [1, 2, 10, 100], + "min_value": 1, + "max_value": 1000 +} \ No newline at end of file diff --git a/tests/data/input/production_routine/parameter_valid_string.json b/tests/data/input/production_routine/parameter_valid_string.json new file mode 100644 index 0000000..d98e99b --- /dev/null +++ b/tests/data/input/production_routine/parameter_valid_string.json @@ -0,0 +1,11 @@ +{ + "name": "user_name", + "type": "string", + "required": true, + "description": "The username for authentication", + "default": "guest", + "examples": ["john_doe", "jane_smith"], + "min_length": 3, + "max_length": 50, + "pattern": "^[a-zA-Z0-9_]+$" +} \ No newline at end of file diff --git a/tests/data/input/production_routine/routine_escaped_string_params.json b/tests/data/input/production_routine/routine_escaped_string_params.json new file mode 100644 index 0000000..ca640f4 --- /dev/null +++ b/tests/data/input/production_routine/routine_escaped_string_params.json @@ -0,0 +1,89 @@ +{ + "name": "routine_with_escaped_string_params", + "description": "A routine demonstrating proper escaping of string parameters vs non-string parameters. String params use escaped quotes in headers and body, while integers/numbers do not need escaping.", + "incognito": true, + "parameters": [ + { + "name": "api_key", + "type": "string", + "required": true, + "description": "API key for authentication (string, needs escaping)" + }, + { + "name": "search_query", + "type": "string", + "required": true, + "description": "Search query string (needs escaping)" + }, + { + "name": "user_agent", + "type": "string", + "required": false, + "description": "User agent string (needs escaping)", + "default": "Mozilla/5.0" + }, + { + "name": "page_size", + "type": "integer", + "required": false, + "description": "Number of results per page (integer, no escaping needed)", + "default": 10 + }, + { + "name": "timeout_ms", + "type": "integer", + "required": false, + "description": "Timeout in milliseconds (integer, no escaping needed)", + "default": 5000 + }, + { + "name": "price_threshold", + "type": "number", + "required": false, + "description": "Price threshold (number, no escaping needed)", + "default": 99.99 + } + ], + "operations": [ + { + "type": "navigate", + "url": "https://example.com/search?q={{search_query}}" + }, + { + "type": "sleep", + "timeout_seconds": 1.5 + }, + { + "type": "fetch", + "endpoint": { + "url": "https://api.example.com/v1/search?query={{search_query}}&limit={{page_size}}", + "method": "POST", + "headers": { + "Authorization": "\"{{api_key}}\"", + "User-Agent": "\"{{user_agent}}\"", + "X-Search-Query": "\"{{search_query}}\"", + "X-Page-Size": "{{page_size}}", + "X-Timeout-Ms": "{{timeout_ms}}", + "Content-Type": "application/json" + }, + "body": { + "query": "\"{{search_query}}\"", + "api_key": "\"{{api_key}}\"", + "page_size": "{{page_size}}", + "timeout_ms": "{{timeout_ms}}", + "threshold": "{{price_threshold}}", + "metadata": { + "user_agent": "\"{{user_agent}}\"", + "timestamp": "{{epoch_milliseconds}}" + } + }, + "credentials": "include" + }, + "session_storage_key": "search_results" + }, + { + "type": "return", + "session_storage_key": "search_results" + } + ] +} \ No newline at end of file diff --git a/tests/data/input/production_routine/routine_invalid_param_in_description.json b/tests/data/input/production_routine/routine_invalid_param_in_description.json new file mode 100644 index 0000000..0757a0c --- /dev/null +++ b/tests/data/input/production_routine/routine_invalid_param_in_description.json @@ -0,0 +1,26 @@ +{ + "name": "routine_with_invalid_description", + "description": "This routine has a {{param}} placeholder in the description which should fail validation", + "incognito": true, + "parameters": [ + { + "name": "query", + "type": "string", + "required": true, + "description": "Search query" + } + ], + "operations": [ + { + "type": "fetch", + "endpoint": { + "url": "https://api.example.com/search?q={{query}}", + "method": "GET", + "headers": {}, + "body": {}, + "credentials": "include" + }, + "session_storage_key": "results" + } + ] +} \ No newline at end of file diff --git a/tests/data/input/production_routine/routine_invalid_param_in_name.json b/tests/data/input/production_routine/routine_invalid_param_in_name.json new file mode 100644 index 0000000..8520d2e --- /dev/null +++ b/tests/data/input/production_routine/routine_invalid_param_in_name.json @@ -0,0 +1,26 @@ +{ + "name": "Routine for {{user_id}}", + "description": "This routine has a parameter placeholder in the name which should fail validation", + "incognito": true, + "parameters": [ + { + "name": "user_id", + "type": "string", + "required": true, + "description": "User ID" + } + ], + "operations": [ + { + "type": "fetch", + "endpoint": { + "url": "https://api.example.com/users/{{user_id}}", + "method": "GET", + "headers": {}, + "body": {}, + "credentials": "include" + }, + "session_storage_key": "user_data" + } + ] +} \ No newline at end of file diff --git a/tests/data/input/production_routine/routine_invalid_unused_param.json b/tests/data/input/production_routine/routine_invalid_unused_param.json new file mode 100644 index 0000000..19d002f --- /dev/null +++ b/tests/data/input/production_routine/routine_invalid_unused_param.json @@ -0,0 +1,32 @@ +{ + "name": "routine_with_unused_param", + "description": "This routine should fail because it has an unused parameter", + "incognito": true, + "parameters": [ + { + "name": "used_param", + "type": "string", + "required": true, + "description": "This is used" + }, + { + "name": "unused_param", + "type": "string", + "required": true, + "description": "This is NOT used and should cause validation error" + } + ], + "operations": [ + { + "type": "fetch", + "endpoint": { + "url": "https://api.example.com/search?q={{used_param}}", + "method": "GET", + "headers": {}, + "body": {}, + "credentials": "include" + }, + "session_storage_key": "results" + } + ] +} \ No newline at end of file diff --git a/tests/data/input/production_routine/routine_url_params_only.json b/tests/data/input/production_routine/routine_url_params_only.json new file mode 100644 index 0000000..6efc0f0 --- /dev/null +++ b/tests/data/input/production_routine/routine_url_params_only.json @@ -0,0 +1,54 @@ +{ + "name": "url_params_only_routine", + "description": "A routine where all parameters are used only in URLs (not in headers or body)", + "incognito": true, + "parameters": [ + { + "name": "user_id", + "type": "string", + "required": true, + "description": "User ID for the request" + }, + { + "name": "page", + "type": "integer", + "required": false, + "description": "Page number for pagination", + "default": 1 + }, + { + "name": "filter", + "type": "string", + "required": false, + "description": "Filter criteria", + "default": "active" + } + ], + "operations": [ + { + "type": "navigate", + "url": "https://example.com/users/{{user_id}}" + }, + { + "type": "sleep", + "timeout_seconds": 1.0 + }, + { + "type": "fetch", + "endpoint": { + "url": "https://api.example.com/users/{{user_id}}/items?page={{page}}&filter={{filter}}", + "method": "GET", + "headers": { + "Content-Type": "application/json" + }, + "body": {}, + "credentials": "include" + }, + "session_storage_key": "user_items" + }, + { + "type": "return", + "session_storage_key": "user_items" + } + ] +} \ No newline at end of file diff --git a/tests/data/input/production_routine/routine_valid_complex.json b/tests/data/input/production_routine/routine_valid_complex.json new file mode 100644 index 0000000..2b9527e --- /dev/null +++ b/tests/data/input/production_routine/routine_valid_complex.json @@ -0,0 +1,57 @@ +{ + "name": "complex_routine", + "description": "A complex routine with multiple parameters and operations", + "incognito": false, + "parameters": [ + { + "name": "user_id", + "type": "string", + "required": true, + "description": "User ID" + }, + { + "name": "api_token", + "type": "string", + "required": true, + "description": "API authentication token" + }, + { + "name": "limit", + "type": "integer", + "required": false, + "description": "Result limit", + "default": 10 + } + ], + "operations": [ + { + "type": "navigate", + "url": "https://example.com/user/{{user_id}}" + }, + { + "type": "sleep", + "timeout_seconds": 2.5 + }, + { + "type": "fetch", + "endpoint": { + "url": "https://api.example.com/users/{{user_id}}/data?limit={{limit}}", + "method": "POST", + "headers": { + "Authorization": "Bearer {{api_token}}", + "Content-Type": "application/json" + }, + "body": { + "user_id": "{{user_id}}", + "timestamp": "{{epoch_milliseconds}}" + }, + "credentials": "include" + }, + "session_storage_key": "user_data" + }, + { + "type": "return", + "session_storage_key": "user_data" + } + ] +} \ No newline at end of file diff --git a/tests/data/input/production_routine/routine_valid_simple.json b/tests/data/input/production_routine/routine_valid_simple.json new file mode 100644 index 0000000..5cb7a22 --- /dev/null +++ b/tests/data/input/production_routine/routine_valid_simple.json @@ -0,0 +1,34 @@ +{ + "name": "simple_search_routine", + "description": "A simple routine that searches for items", + "incognito": true, + "parameters": [ + { + "name": "query", + "type": "string", + "required": true, + "description": "Search query" + } + ], + "operations": [ + { + "type": "navigate", + "url": "https://example.com/search" + }, + { + "type": "fetch", + "endpoint": { + "url": "https://api.example.com/search?q={{query}}", + "method": "GET", + "headers": {}, + "body": {}, + "credentials": "include" + }, + "session_storage_key": "search_results" + }, + { + "type": "return", + "session_storage_key": "search_results" + } + ] +} \ No newline at end of file diff --git a/tests/unit/test_production_routine.py b/tests/unit/test_production_routine.py new file mode 100644 index 0000000..096cf20 --- /dev/null +++ b/tests/unit/test_production_routine.py @@ -0,0 +1,1036 @@ +""" +tests/unit/test_production_routine.py + +Unit tests for production routine data models. +""" + +import re +import time +from pathlib import Path + +import pytest +from pydantic import ValidationError + +from src.utils.data_utils import load_data +from src.data_models.production_routine import ( + ResourceBase, + Routine, + Parameter, + ParameterType, + Endpoint, + HTTPMethod, + CREDENTIALS, + RoutineNavigateOperation, + RoutineSleepOperation, + RoutineFetchOperation, + RoutineReturnOperation, +) + + +def _make_basic_routine( + parameters: list[Parameter], + operations: list, + name: str = "test_routine", + description: str = "Test routine", +) -> Routine: + """ + Helper to create a basic routine for testing. + Args: + parameters: List of parameters to include. + operations: List of operations to include. + name: Routine name. + description: Routine description. + Returns: + Routine instance. + """ + return Routine( + name=name, + description=description, + parameters=parameters, + operations=operations, + ) + + +def _make_fetch_operation( + url: str = "https://api.example.com/endpoint", + headers: dict | None = None, + body: dict | None = None, + session_storage_key: str = "result", +) -> RoutineFetchOperation: + """ + Helper to create a fetch operation. + Args: + url: Endpoint URL. + headers: Request headers. + body: Request body. + session_storage_key: Session storage key for result. + Returns: + RoutineFetchOperation instance. + """ + return RoutineFetchOperation( + endpoint=Endpoint( + url=url, + method=HTTPMethod.POST, + headers=headers or {}, + body=body or {}, + credentials=CREDENTIALS.INCLUDE, + ), + session_storage_key=session_storage_key, + ) + + +class TestResourceBase: + """Tests for ResourceBase class.""" + + def test_id_generation_format(self) -> None: + """ID should be generated in format ClassName_uuid.""" + + class TestResource(ResourceBase): + pass + + resource = TestResource() + # check format: ClassName_uuid + assert resource.id.startswith("TestResource_") + # check uuid portion is valid + uuid_part = resource.id.split("_", 1)[1] + uuid_pattern = r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$' + assert re.match(uuid_pattern, uuid_part) + + def test_different_subclasses_different_ids(self) -> None: + """Different subclasses should generate IDs with their own class names.""" + + class ResourceA(ResourceBase): + pass + + class ResourceB(ResourceBase): + pass + + a = ResourceA() + b = ResourceB() + + assert a.id.startswith("ResourceA_") + assert b.id.startswith("ResourceB_") + + def test_created_at_timestamp(self) -> None: + """created_at should be a valid unix timestamp.""" + + class TestResource(ResourceBase): + pass + + before = int(time.time()) + resource = TestResource() + after = int(time.time()) + + assert before <= resource.created_at <= after + assert isinstance(resource.created_at, int) + + def test_updated_at_timestamp(self) -> None: + """updated_at should be a valid unix timestamp.""" + + class TestResource(ResourceBase): + pass + + before = int(time.time()) + resource = TestResource() + after = int(time.time()) + + assert before <= resource.updated_at <= after + assert isinstance(resource.updated_at, int) + + def test_resource_type_property(self) -> None: + """resource_type property should return class name.""" + + class MyCustomResource(ResourceBase): + pass + + resource = MyCustomResource() + assert resource.resource_type == "MyCustomResource" + + def test_custom_id_provided(self) -> None: + """Should accept custom ID if provided.""" + + class TestResource(ResourceBase): + pass + + custom_id = "TestResource_custom-123" + resource = TestResource(id=custom_id) + assert resource.id == custom_id + + def test_custom_timestamps_provided(self) -> None: + """Should accept custom timestamps if provided.""" + + class TestResource(ResourceBase): + pass + + created = 1609459200 # 2021-01-01 + updated = 1640995200 # 2022-01-01 + resource = TestResource(created_at=created, updated_at=updated) + assert resource.created_at == created + assert resource.updated_at == updated + + def test_serialization(self) -> None: + """ResourceBase instances should be serializable.""" + + class _TestResource(ResourceBase): + name: str + + resource = _TestResource(name="test") + data = resource.model_dump() + + assert "id" in data + assert "created_at" in data + assert "updated_at" in data + assert data["name"] == "test" + + +class TestParameter: + """Tests for Parameter class.""" + + def test_valid_string_parameter(self, input_data_dir: Path) -> None: + """Valid string parameter should be created successfully.""" + data = load_data(input_data_dir / "production_routine" / "parameter_valid_string.json") + param = Parameter(**data) + + assert param.name == "user_name" + assert param.type == ParameterType.STRING + assert param.required is True + assert param.default == "guest" + assert len(param.examples) == 2 + + def test_valid_integer_parameter(self, input_data_dir: Path) -> None: + """Valid integer parameter should be created successfully.""" + data = load_data(input_data_dir / "production_routine" / "parameter_valid_integer.json") + param = Parameter(**data) + + assert param.name == "page_number" + assert param.type == ParameterType.INTEGER + assert param.required is False + assert param.default == 1 + assert param.min_value == 1 + assert param.max_value == 1000 + + def test_valid_enum_parameter(self, input_data_dir: Path) -> None: + """Valid enum parameter should be created successfully.""" + data = load_data(input_data_dir / "production_routine" / "parameter_valid_enum.json") + param = Parameter(**data) + + assert param.name == "status" + assert param.type == ParameterType.ENUM + assert param.enum_values == ["active", "pending", "completed", "cancelled"] + assert param.default == "active" + + @pytest.mark.parametrize("invalid_name", [ + "123invalid", # starts with number + "invalid-name", # contains hyphen + "invalid name", # contains space + "invalid.name", # contains dot + ]) + def test_invalid_parameter_names(self, invalid_name: str) -> None: + """Invalid parameter names should raise ValidationError.""" + with pytest.raises(ValidationError) as exc_info: + Parameter(name=invalid_name, description="test") + + error_msg = str(exc_info.value) + assert "not a valid Python identifier" in error_msg + + @pytest.mark.parametrize("reserved_prefix", [ + "sessionStorage", + "localStorage", + "cookie", + "meta", + "uuid", + "epoch_milliseconds", + ]) + def test_reserved_prefix_names(self, reserved_prefix: str) -> None: + """Parameter names with reserved prefixes should raise ValidationError.""" + invalid_name = f"{reserved_prefix}_data" + with pytest.raises(ValidationError) as exc_info: + Parameter(name=invalid_name, description="test") + + error_msg = str(exc_info.value) + assert f"cannot start with '{reserved_prefix}'" in error_msg + + def test_invalid_enum_without_values(self, input_data_dir: Path) -> None: + """Enum parameter without enum_values should raise ValidationError.""" + data = load_data(input_data_dir / "production_routine" / "parameter_invalid_enum_no_values.json") + with pytest.raises(ValidationError) as exc_info: + Parameter(**data) + + error_msg = str(exc_info.value) + assert "enum_values must be provided" in error_msg + + def test_default_value_type_conversion_integer(self) -> None: + """Default value should be converted to correct type for INTEGER.""" + param = Parameter( + name="count", + description="Count", + type=ParameterType.INTEGER, + default="42" + ) + assert param.default == 42 + assert isinstance(param.default, int) + + def test_default_value_type_conversion_number(self) -> None: + """Default value should be converted to correct type for NUMBER.""" + param = Parameter( + name="price", + description="Price", + type=ParameterType.NUMBER, + default="19.99" + ) + assert param.default == 19.99 + assert isinstance(param.default, float) + + @pytest.mark.parametrize("bool_value,expected", [ + ("true", True), + ("True", True), + ("1", True), + ("yes", True), + ("on", True), + ("false", False), + ("False", False), + ("0", False), + ("no", False), + ("off", False), + ]) + def test_default_value_boolean_conversion(self, bool_value: str, expected: bool) -> None: + """Boolean default values should be converted correctly.""" + param = Parameter( + name="enabled", + description="Enabled flag", + type=ParameterType.BOOLEAN, + default=bool_value + ) + assert param.default == expected + + def test_invalid_default_value_for_integer(self) -> None: + """Invalid default value for INTEGER should raise ValidationError.""" + with pytest.raises(ValidationError) as exc_info: + Parameter( + name="count", + description="Count", + type=ParameterType.INTEGER, + default="not_a_number" + ) + + error_msg = str(exc_info.value) + assert "cannot be converted to integer" in error_msg + + def test_examples_type_conversion_integer(self) -> None: + """Examples should be converted to correct type for INTEGER.""" + param = Parameter( + name="count", + description="Count", + type=ParameterType.INTEGER, + examples=["1", "2", "3"] + ) + assert param.examples == [1, 2, 3] + assert all(isinstance(ex, int) for ex in param.examples) + + def test_examples_type_conversion_number(self) -> None: + """Examples should be converted to correct type for NUMBER.""" + param = Parameter( + name="price", + description="Price", + type=ParameterType.NUMBER, + examples=["10.5", "20.99", "5"] + ) + assert param.examples == [10.5, 20.99, 5.0] + assert all(isinstance(ex, float) for ex in param.examples) + + def test_invalid_examples_for_integer(self) -> None: + """Invalid examples for INTEGER should raise ValidationError.""" + with pytest.raises(ValidationError) as exc_info: + Parameter( + name="count", + description="Count", + type=ParameterType.INTEGER, + examples=["1", "invalid", "3"] + ) + error_msg = str(exc_info.value) + assert "cannot be converted to integer" in error_msg + + def test_parameter_with_all_fields(self) -> None: + """Parameter with all optional fields should work.""" + param = Parameter( + name="advanced_param", + description="Advanced parameter", + type=ParameterType.STRING, + required=False, + default="default_value", + examples=["example1", "example2"], + min_length=5, + max_length=100, + pattern=r"^[a-z]+$", + format="lowercase" + ) + assert param.name == "advanced_param" + assert param.min_length == 5 + assert param.max_length == 100 + assert param.pattern == r"^[a-z]+$" + assert param.format == "lowercase" + + +class TestRoutineParameterValidation: + """Tests for Routine.validate_parameter_usage method.""" + + def test_parameter_in_url_query_string(self) -> None: + """Parameter used in URL query string should be valid.""" + params = [ + Parameter(name="query", description="Search query", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com/search?q={{query}}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_in_url_path(self) -> None: + """Parameter used in URL path should be valid.""" + params = [ + Parameter(name="user_id", description="User ID", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com/users/{{user_id}}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_multiple_parameters_in_url(self) -> None: + """Multiple parameters in same URL should be valid.""" + params = [ + Parameter(name="query", description="Search query", type=ParameterType.STRING), + Parameter(name="limit", description="Result limit", type=ParameterType.INTEGER), + Parameter(name="offset", description="Result offset", type=ParameterType.INTEGER), + ] + ops = [ + _make_fetch_operation( + url="https://api.example.com/search?q={{query}}&limit={{limit}}&offset={{offset}}" + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_in_headers(self) -> None: + """Parameter used in headers should be valid.""" + params = [ + Parameter(name="api_token", description="API token", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + headers={"Authorization": "Bearer {{api_token}}"} + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_in_body(self) -> None: + """Parameter used in request body should be valid.""" + params = [ + Parameter(name="username", description="Username", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + body={"username": "{{username}}"} + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_in_nested_body(self) -> None: + """Parameter used in nested request body should be valid.""" + params = [ + Parameter(name="email", description="Email", type=ParameterType.EMAIL), + ] + ops = [ + _make_fetch_operation( + body={"user": {"contact": {"email": "{{email}}"}}} + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_in_navigate_url(self) -> None: + """Parameter used in navigate operation URL should be valid.""" + params = [ + Parameter(name="page_id", description="Page ID", type=ParameterType.STRING), + ] + ops = [ + RoutineNavigateOperation(url="https://example.com/page/{{page_id}}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_used_multiple_times(self) -> None: + """Same parameter used multiple times should be valid.""" + params = [ + Parameter(name="user_id", description="User ID", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + url="https://api.example.com/users/{{user_id}}/profile", + headers={"X-User-ID": "{{user_id}}"}, + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_all_parameters_used_across_operations(self) -> None: + """All parameters used across different operations should be valid.""" + params = [ + Parameter(name="search_term", description="Search term", type=ParameterType.STRING), + Parameter(name="result_id", description="Result ID", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + url="https://api.example.com/search?q={{search_term}}", + session_storage_key="search_results", + ), + RoutineSleepOperation(timeout_seconds=1.0), + _make_fetch_operation( + url="https://api.example.com/items/{{result_id}}", + session_storage_key="item_details", + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_uuid_builtin(self) -> None: + """Using {{uuid}} builtin should not require parameter definition.""" + ops = [ + _make_fetch_operation( + url="https://api.example.com/create", + body={"id": "{{uuid}}"}, + ), + ] + routine = _make_basic_routine(parameters=[], operations=ops) + assert routine.name == "test_routine" + + def test_epoch_milliseconds_builtin(self) -> None: + """Using {{epoch_milliseconds}} builtin should not require parameter definition.""" + ops = [ + _make_fetch_operation( + url="https://api.example.com/events", + body={"timestamp": "{{epoch_milliseconds}}"}, + ), + ] + routine = _make_basic_routine(parameters=[], operations=ops) + assert routine.name == "test_routine" + + def test_builtin_with_regular_params(self) -> None: + """Builtins mixed with regular parameters should work.""" + params = [ + Parameter(name="event_name", description="Event name", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + url="https://api.example.com/events", + body={ + "id": "{{uuid}}", + "name": "{{event_name}}", + "timestamp": "{{epoch_milliseconds}}", + }, + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + @pytest.mark.parametrize("storage_type,path", [ + ("sessionStorage", "user_data"), + ("localStorage", "settings.theme"), + ("cookie", "auth_token"), + ("meta", "page.title"), + ]) + def test_storage_parameter_types(self, storage_type: str, path: str) -> None: + """Storage parameters should not require parameter definition.""" + ops = [ + _make_fetch_operation( + body={"value": f"{{{{{storage_type}:{path}}}}}"} + ), + ] + routine = _make_basic_routine(parameters=[], operations=ops) + assert routine.name == "test_routine" + + def test_storage_with_regular_params(self) -> None: + """Storage parameters mixed with regular parameters should work.""" + params = [ + Parameter(name="new_value", description="New value", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + body={ + "old_value": "{{sessionStorage:cached_data}}", + "new_value": "{{new_value}}", + "timestamp": "{{epoch_milliseconds}}", + } + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_storage_in_url(self) -> None: + """Storage parameter in URL should work.""" + ops = [ + _make_fetch_operation( + url="https://api.example.com/data?token={{cookie:session_id}}" + ), + ] + routine = _make_basic_routine(parameters=[], operations=ops) + assert routine.name == "test_routine" + + def test_unused_parameter(self) -> None: + """Defined but unused parameter should raise ValidationError.""" + params = [ + Parameter(name="unused_param", description="Unused", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com/endpoint"), + ] + with pytest.raises(ValidationError) as exc_info: + _make_basic_routine(parameters=params, operations=ops) + + error_msg = str(exc_info.value) + assert "Unused parameters" in error_msg + assert "unused_param" in error_msg + + def test_multiple_unused_parameters(self) -> None: + """Multiple unused parameters should be reported.""" + params = [ + Parameter(name="unused_one", description="Unused 1", type=ParameterType.STRING), + Parameter(name="unused_two", description="Unused 2", type=ParameterType.STRING), + Parameter(name="used_param", description="Used", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{used_param}}"), + ] + with pytest.raises(ValidationError) as exc_info: + _make_basic_routine(parameters=params, operations=ops) + + error_msg = str(exc_info.value) + assert "Unused parameters" in error_msg + assert "unused_one" in error_msg or "unused_two" in error_msg + + def test_undefined_parameter(self) -> None: + """Used but undefined parameter should raise ValidationError.""" + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{undefined_param}}"), + ] + with pytest.raises(ValidationError) as exc_info: + _make_basic_routine(parameters=[], operations=ops) + + error_msg = str(exc_info.value) + assert "Undefined parameters" in error_msg + assert "undefined_param" in error_msg + + def test_multiple_undefined_parameters(self) -> None: + """Multiple undefined parameters should be reported.""" + ops = [ + _make_fetch_operation( + url="https://api.example.com?q={{param_one}}&limit={{param_two}}" + ), + ] + with pytest.raises(ValidationError) as exc_info: + _make_basic_routine(parameters=[], operations=ops) + + error_msg = str(exc_info.value) + assert "Undefined parameters" in error_msg + assert "param_one" in error_msg or "param_two" in error_msg + + def test_both_unused_and_undefined(self) -> None: + """Having both unused and undefined parameters should fail.""" + params = [ + Parameter(name="defined_unused", description="Defined but unused", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{undefined_used}}"), + ] + with pytest.raises(ValidationError) as exc_info: + _make_basic_routine(parameters=params, operations=ops) + + error_msg = str(exc_info.value) + # should report unused first (based on validation order) + assert "Unused parameters" in error_msg or "Undefined parameters" in error_msg + + def test_empty_parameters_empty_operations(self) -> None: + """Routine with no parameters and no operations should be valid.""" + routine = _make_basic_routine(parameters=[], operations=[]) + assert routine.name == "test_routine" + + def test_parameter_in_return_operation(self) -> None: + """Parameters cannot be in return operations (only storage keys).""" + params = [ + Parameter(name="query", description="Query", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + url="https://api.example.com?q={{query}}", + session_storage_key="result", + ), + RoutineReturnOperation(session_storage_key="result"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_with_whitespace_in_placeholder(self) -> None: + """Parameter with whitespace in placeholder should be handled.""" + params = [ + Parameter(name="query", description="Query", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{ query }}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_similar_parameter_names(self) -> None: + """Similar parameter names should be distinguished.""" + params = [ + Parameter(name="user", description="User", type=ParameterType.STRING), + Parameter(name="user_id", description="User ID", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + url="https://api.example.com?user={{user}}&id={{user_id}}" + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_parameter_value_containing_braces(self) -> None: + """Regular parameters should not conflict with storage syntax.""" + params = [ + Parameter(name="data", description="Data", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(body={"value": "{{data}}"}), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + def test_complex_nested_structure(self) -> None: + """Parameters in complex nested structures should be found.""" + params = [ + Parameter(name="token", description="Token", type=ParameterType.STRING), + Parameter(name="user_id", description="User ID", type=ParameterType.STRING), + Parameter(name="action", description="Action", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation( + url="https://api.example.com/users/{{user_id}}/{{action}}", + headers={ + "Authorization": "Bearer {{token}}", + "X-Request-ID": "{{uuid}}", + }, + body={ + "metadata": { + "user": {"id": "{{user_id}}"}, + "action": {"type": "{{action}}"}, + "timestamp": "{{epoch_milliseconds}}", + "session": "{{sessionStorage:current_session}}", + } + }, + ), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.name == "test_routine" + + +class TestRoutine: + """Tests for Routine class (general functionality).""" + + def test_valid_simple_routine_from_json(self, input_data_dir: Path) -> None: + """Valid simple routine should be loaded from JSON.""" + data = load_data(input_data_dir / "production_routine" / "routine_valid_simple.json") + routine = Routine(**data) + + assert routine.name == "simple_search_routine" + assert routine.incognito is True + assert len(routine.parameters) == 1 + assert len(routine.operations) == 3 + assert routine.parameters[0].name == "query" + + def test_valid_complex_routine_from_json(self, input_data_dir: Path) -> None: + """Valid complex routine should be loaded from JSON.""" + data = load_data(input_data_dir / "production_routine" / "routine_valid_complex.json") + routine = Routine(**data) + + assert routine.name == "complex_routine" + assert routine.incognito is False + assert len(routine.parameters) == 3 + assert len(routine.operations) == 4 + + # verify parameter names + param_names = {p.name for p in routine.parameters} + assert param_names == {"user_id", "api_token", "limit"} + + def test_invalid_routine_unused_param_from_json(self, input_data_dir: Path) -> None: + """Routine with unused parameter should fail validation.""" + data = load_data(input_data_dir / "production_routine" / "routine_invalid_unused_param.json") + with pytest.raises(ValidationError) as exc_info: + Routine(**data) + + error_msg = str(exc_info.value) + assert "Unused parameters" in error_msg + assert "unused_param" in error_msg + + def test_invalid_routine_param_in_description_from_json(self, input_data_dir: Path) -> None: + """Routine with parameter placeholder in description should fail validation.""" + data = load_data(input_data_dir / "production_routine" / "routine_invalid_param_in_description.json") + with pytest.raises(ValidationError) as exc_info: + Routine(**data) + + error_msg = str(exc_info.value) + assert "Parameter placeholders found in routine description" in error_msg + assert "param" in error_msg + assert "metadata field" in error_msg + + def test_invalid_routine_param_in_name_from_json(self, input_data_dir: Path) -> None: + """Routine with parameter placeholder in name should fail validation.""" + data = load_data(input_data_dir / "production_routine" / "routine_invalid_param_in_name.json") + with pytest.raises(ValidationError) as exc_info: + Routine(**data) + + error_msg = str(exc_info.value) + assert "Parameter placeholders found in routine name" in error_msg + assert "user_id" in error_msg + assert "metadata field" in error_msg + + def test_routine_with_url_params_only_from_json(self, input_data_dir: Path) -> None: + """Routine with parameters used only in URLs should be valid.""" + data = load_data(input_data_dir / "production_routine" / "routine_url_params_only.json") + routine = Routine(**data) + + assert routine.name == "url_params_only_routine" + assert len(routine.parameters) == 3 + + # verify all three parameters + param_names = {p.name for p in routine.parameters} + assert param_names == {"user_id", "page", "filter"} + + # verify parameters are used in URLs (check navigate operation) + assert "{{user_id}}" in routine.operations[0].url + + # verify parameters are used in fetch URL + fetch_op = routine.operations[2] + assert isinstance(fetch_op, RoutineFetchOperation) + assert "{{user_id}}" in fetch_op.endpoint.url + assert "{{page}}" in fetch_op.endpoint.url + assert "{{filter}}" in fetch_op.endpoint.url + + # verify NO parameters in headers (only static header) + assert fetch_op.endpoint.headers == {"Content-Type": "application/json"} + + # verify empty body (no parameters) + assert fetch_op.endpoint.body == {} + + def test_routine_with_escaped_string_params_from_json(self, input_data_dir: Path) -> None: + """Routine with properly escaped string parameters should be valid.""" + data = load_data(input_data_dir / "production_routine" / "routine_escaped_string_params.json") + routine = Routine(**data) + + assert routine.name == "routine_with_escaped_string_params" + assert len(routine.parameters) == 6 + + # verify all parameters + param_names = {p.name for p in routine.parameters} + assert param_names == {"api_key", "search_query", "user_agent", "page_size", "timeout_ms", "price_threshold"} + + # verify parameter types + param_types = {p.name: p.type for p in routine.parameters} + assert param_types["api_key"] == ParameterType.STRING + assert param_types["search_query"] == ParameterType.STRING + assert param_types["user_agent"] == ParameterType.STRING + assert param_types["page_size"] == ParameterType.INTEGER + assert param_types["timeout_ms"] == ParameterType.INTEGER + assert param_types["price_threshold"] == ParameterType.NUMBER + + # get the fetch operation + fetch_op = routine.operations[2] + assert isinstance(fetch_op, RoutineFetchOperation) + + # verify STRING parameters are ESCAPED in headers with \"{{param}}\" + assert fetch_op.endpoint.headers["Authorization"] == '"{{api_key}}"' + assert fetch_op.endpoint.headers["User-Agent"] == '"{{user_agent}}"' + assert fetch_op.endpoint.headers["X-Search-Query"] == '"{{search_query}}"' + + # verify NON-STRING parameters are NOT ESCAPED in headers (just {{param}}) + assert fetch_op.endpoint.headers["X-Page-Size"] == "{{page_size}}" + assert fetch_op.endpoint.headers["X-Timeout-Ms"] == "{{timeout_ms}}" + + # verify STRING parameters are ESCAPED in body with \"{{param}}\" + assert fetch_op.endpoint.body["query"] == '"{{search_query}}"' + assert fetch_op.endpoint.body["api_key"] == '"{{api_key}}"' + assert fetch_op.endpoint.body["metadata"]["user_agent"] == '"{{user_agent}}"' + + # verify NON-STRING parameters are NOT ESCAPED in body (just {{param}}) + assert fetch_op.endpoint.body["page_size"] == "{{page_size}}" + assert fetch_op.endpoint.body["timeout_ms"] == "{{timeout_ms}}" + assert fetch_op.endpoint.body["threshold"] == "{{price_threshold}}" + + # verify builtin parameters (not escaped) + assert fetch_op.endpoint.body["metadata"]["timestamp"] == "{{epoch_milliseconds}}" + + # verify parameters in URL (no escaping needed in URLs) + assert "{{search_query}}" in fetch_op.endpoint.url + assert "{{page_size}}" in fetch_op.endpoint.url + + def test_yahoo_finance_routine_from_discovery_output(self) -> None: + """Real-world example: Yahoo Finance routine with escaped string headers.""" + # load the actual routine from routine_discovery_output + routine_path = Path("/home/ec2-user/web-hacker/routine_discovery_output/routine.json") + if not routine_path.exists(): + pytest.skip("Yahoo Finance routine not found in routine_discovery_output") + + data = load_data(routine_path) + routine = Routine(**data) + + assert routine.name == "Yahoo Finance - Search Ticker" + assert len(routine.parameters) == 6 + + # verify all parameters are string or integer types + param_types = {p.name: p.type for p in routine.parameters} + assert param_types["query"] == ParameterType.STRING + assert param_types["lang"] == ParameterType.STRING + assert param_types["region"] == ParameterType.STRING + assert param_types["quotesCount"] == ParameterType.INTEGER + assert param_types["newsCount"] == ParameterType.INTEGER + assert param_types["listsCount"] == ParameterType.INTEGER + + # get the fetch operation + fetch_op = routine.operations[2] + assert isinstance(fetch_op, RoutineFetchOperation) + + # verify STRING parameters are ESCAPED in x-param headers + assert fetch_op.endpoint.headers["x-param-query"] == '"{{query}}"' + assert fetch_op.endpoint.headers["x-param-lang"] == '"{{lang}}"' + assert fetch_op.endpoint.headers["x-param-region"] == '"{{region}}"' + + # verify INTEGER parameters are ESCAPED in x-param headers (note: even integers get escaped in this pattern) + assert fetch_op.endpoint.headers["x-param-quotesCount"] == '"{{quotesCount}}"' + assert fetch_op.endpoint.headers["x-param-newsCount"] == '"{{newsCount}}"' + assert fetch_op.endpoint.headers["x-param-listsCount"] == '"{{listsCount}}"' + + # verify parameters are in URL (no escaping in URLs) + assert "{{query}}" in fetch_op.endpoint.url + assert "{{lang}}" in fetch_op.endpoint.url + assert "{{region}}" in fetch_op.endpoint.url + assert "{{quotesCount}}" in fetch_op.endpoint.url + assert "{{newsCount}}" in fetch_op.endpoint.url + assert "{{listsCount}}" in fetch_op.endpoint.url + + def test_routine_inherits_from_resource_base(self) -> None: + """Routine should inherit ResourceBase functionality.""" + params = [ + Parameter(name="test_param", description="Test", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{test_param}}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + + # check ResourceBase fields + assert routine.id.startswith("Routine_") + assert isinstance(routine.created_at, int) + assert isinstance(routine.updated_at, int) + assert routine.resource_type == "Routine" + + def test_routine_default_incognito_true(self) -> None: + """Routine should default to incognito=True.""" + params = [ + Parameter(name="test_param", description="Test", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{test_param}}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + assert routine.incognito is True + + def test_routine_with_all_operation_types(self) -> None: + """Routine with all operation types should work.""" + params = [ + Parameter(name="query", description="Query", type=ParameterType.STRING), + ] + ops = [ + RoutineNavigateOperation(url="https://example.com/search"), + RoutineSleepOperation(timeout_seconds=2.0), + _make_fetch_operation( + url="https://api.example.com/search?q={{query}}", + session_storage_key="results" + ), + RoutineReturnOperation(session_storage_key="results"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + + assert len(routine.operations) == 4 + assert routine.operations[0].type == "navigate" + assert routine.operations[1].type == "sleep" + assert routine.operations[2].type == "fetch" + assert routine.operations[3].type == "return" + + def test_routine_serialization(self) -> None: + """Routine should be serializable to dict/JSON.""" + params = [ + Parameter(name="query", description="Query", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{query}}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + + # to dict + data = routine.model_dump() + assert data["name"] == "test_routine" + assert len(data["parameters"]) == 1 + assert len(data["operations"]) == 1 + + # to JSON + json_str = routine.model_dump_json() + assert isinstance(json_str, str) + assert "test_routine" in json_str + + def test_routine_deserialization(self) -> None: + """Routine should be deserializable from dict.""" + params = [ + Parameter(name="query", description="Query", type=ParameterType.STRING), + ] + ops = [ + _make_fetch_operation(url="https://api.example.com?q={{query}}"), + ] + routine1 = _make_basic_routine(parameters=params, operations=ops) + + # serialize then deserialize + data = routine1.model_dump() + routine2 = Routine(**data) + + assert routine2.name == routine1.name + assert len(routine2.parameters) == len(routine1.parameters) + assert len(routine2.operations) == len(routine1.operations) + + def test_routine_with_no_parameters(self) -> None: + """Routine with no parameters should work if none are used.""" + ops = [ + RoutineNavigateOperation(url="https://example.com"), + RoutineSleepOperation(timeout_seconds=1.0), + _make_fetch_operation( + url="https://api.example.com/data", + body={"id": "{{uuid}}"}, + session_storage_key="data" + ), + ] + routine = _make_basic_routine(parameters=[], operations=ops) + assert len(routine.parameters) == 0 + assert len(routine.operations) == 3 + + def test_routine_operation_discriminator(self) -> None: + """Operations should be correctly discriminated by type.""" + params = [ + Parameter(name="url_param", description="URL param", type=ParameterType.STRING), + ] + ops = [ + RoutineNavigateOperation(url="https://example.com/{{url_param}}"), + ] + routine = _make_basic_routine(parameters=params, operations=ops) + + # should be correctly typed + assert isinstance(routine.operations[0], RoutineNavigateOperation) + assert routine.operations[0].type == "navigate" diff --git a/uv.lock b/uv.lock index a92f644..0e470ff 100644 --- a/uv.lock +++ b/uv.lock @@ -395,7 +395,7 @@ wheels = [ [[package]] name = "openai" -version = "2.6.1" +version = "2.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -407,9 +407,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/44/303deb97be7c1c9b53118b52825cbd1557aeeff510f3a52566b1fa66f6a2/openai-2.6.1.tar.gz", hash = "sha256:27ae704d190615fca0c0fc2b796a38f8b5879645a3a52c9c453b23f97141bb49", size = 593043, upload-time = "2025-10-24T13:29:52.79Z" } +sdist = { url = "https://files.pythonhosted.org/packages/51/a2/f4023c1e0c868a6a5854955b3374f17153388aed95e835af114a17eac95b/openai-2.7.1.tar.gz", hash = "sha256:df4d4a3622b2df3475ead8eb0fbb3c27fd1c070fa2e55d778ca4f40e0186c726", size = 595933, upload-time = "2025-11-04T06:07:23.069Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/0e/331df43df633e6105ff9cf45e0ce57762bd126a45ac16b25a43f6738d8a2/openai-2.6.1-py3-none-any.whl", hash = "sha256:904e4b5254a8416746a2f05649594fa41b19d799843cd134dac86167e094edef", size = 1005551, upload-time = "2025-10-24T13:29:50.973Z" }, + { url = "https://files.pythonhosted.org/packages/8c/74/6bfc3adc81f6c2cea4439f2a734c40e3a420703bbcdc539890096a732bbd/openai-2.7.1-py3-none-any.whl", hash = "sha256:2f2530354d94c59c614645a4662b9dab0a5b881c5cd767a8587398feac0c9021", size = 1008780, upload-time = "2025-11-04T06:07:20.818Z" }, ] [[package]]