Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 36 additions & 9 deletions web_hacker/cdp/routine_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def _get_browser_websocket_url(remote_debugging_address: str) -> str:
except Exception as e:
raise RuntimeError(f"Failed to get browser WebSocket URL: {e}")


def _generate_fetch_js(
fetch_url: str,
headers: dict,
Expand Down Expand Up @@ -133,16 +132,29 @@ def _generate_fetch_js(
" }",
" }",
"",
" const PLACEHOLDER = /\\\"?\\{\\{\\s*(sessionStorage|localStorage|cookie|meta)\\s*:\\s*([^}]+?)\\s*\\}\\}\\\"?/g;",
" const PLACEHOLDER = /\\\"?\\{\\{\\s*(sessionStorage|localStorage|cookie|meta|windowProperty)\\s*:\\s*([^}]+?)\\s*\\}\\}\\\"?/g;",
" function getWindowProperty(path){",
" const parts = path.trim().split('.');",
" let obj = window;",
" for (const part of parts) {",
" if (obj == null || obj === undefined) return undefined;",
" obj = obj[part];",
" }",
" // Convert the final value to a string if it's not null/undefined",
" if (obj == null || obj === undefined) return undefined;",
" return obj;",
" }",
" function resolveOne(token){",
" const [lhs, rhs] = token.split('||');",
" const [kind, path] = lhs.split(':');",
" let val;",
" switch(kind){",
" switch(kind.trim()){",
" case 'sessionStorage': val = readStorage(window.sessionStorage, path.trim()); break;",
" case 'localStorage': val = readStorage(window.localStorage, path.trim()); break;",
" case 'cookie': val = getCookie(path.trim()); break;",
" case 'meta': val = getMeta(path.trim()); break;",
" case 'windowProperty': val = getWindowProperty(path.trim()); break;",
" default: val = undefined;",
" }",
" if ((val === undefined || val === null || val === '') && rhs){",
" if (rhs.trim() === 'uuid' && 'randomUUID' in crypto){",
Expand All @@ -168,10 +180,13 @@ def _generate_fetch_js(
" const isQuoted = startsWithEscaped && endsWithEscaped;",
" if (isQuoted) {",
" // Quoted: strings use raw value (no quotes), objects use JSON.stringify",
" return (typeof v === 'string') ? v : JSON.stringify(v);",
" if (typeof v === 'string') return v;",
" if (typeof v === 'object') return JSON.stringify(v);",
" return String(v);",
" } else {",
" // Unquoted: always stringify",
" return (typeof v === 'object') ? JSON.stringify(v) : String(v);",
" if (typeof v === 'object') return JSON.stringify(v);",
" return String(v);",
" }",
" });",
" }",
Expand Down Expand Up @@ -205,7 +220,20 @@ def _generate_fetch_js(
" // Resolve body (if any)",
" if (BODY_LITERAL !== null) {",
" const bodyVal = deepResolve(BODY_LITERAL);",
" if (typeof bodyVal === 'string' && bodyVal.trim().startsWith('{') && bodyVal.trim().endsWith('}')) {",
" ",
" // Check if content-type is application/x-www-form-urlencoded (after interpolation)",
" const contentType = headers['content-type'] || headers['Content-Type'] || '';",
" const isFormUrlEncoded = contentType.toLowerCase().includes('application/x-www-form-urlencoded');",
" ",
" if (isFormUrlEncoded && bodyVal && typeof bodyVal === 'object' && !Array.isArray(bodyVal)) {",
" // Convert object to URL-encoded string",
" const formData = Object.entries(bodyVal).map(([key, value]) => {",
" const encodedKey = encodeURIComponent(String(key));",
" const encodedValue = encodeURIComponent(String(value === null || value === undefined ? '' : value));",
" return `${encodedKey}=${encodedValue}`;",
" }).join('&');",
" opts.body = formData;",
" } else if (typeof bodyVal === 'string' && bodyVal.trim().startsWith('{') && bodyVal.trim().endsWith('}')) {",
" opts.body = bodyVal;",
" } else {",
" opts.body = JSON.stringify(bodyVal);",
Expand All @@ -224,7 +252,6 @@ def _generate_fetch_js(

return "\n".join(js_lines)


def _create_cdp_helpers(ws):
"""Create helper functions for CDP communication."""

Expand Down Expand Up @@ -383,11 +410,11 @@ def _execute_fetch_in_session(
# Prepare headers and body for injection
hdrs = headers or {}

# Serialize body to JS string literal
# Serialize body to JS string literal (conversion to form-urlencoded happens in JS after interpolation)
if body is None:
body_js_literal = "null"
elif isinstance(body, (dict, list)):
body_js_literal = json.dumps(body) # JS object, will be JSON.stringify'd in JS
body_js_literal = json.dumps(body) # JS object, will be processed in JS after interpolation
elif isinstance(body, bytes):
body_js_literal = json.dumps(body.decode("utf-8", errors="ignore"))
else:
Expand Down
14 changes: 10 additions & 4 deletions web_hacker/data_models/production_routine.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,13 @@ class Parameter(BaseModel):

# reserved prefixes: names that cannot be used at the beginning of a parameter name
RESERVED_PREFIXES: ClassVar[list[str]] = [
"sessionStorage", "localStorage", "cookie", "meta", "uuid", "epoch_milliseconds"
"sessionStorage",
"localStorage",
"cookie",
"meta",
"windowProperty",
"uuid",
"epoch_milliseconds",
]

name: str = Field(..., description="Parameter name (must be valid Python identifier)")
Expand Down Expand Up @@ -483,11 +489,11 @@ def validate_parameter_usage(self) -> 'Routine':
# clean the match (already extracted the content between braces)
match = match.strip()

# if the parameter name contains a colon, it is a storage parameter
# if the parameter name contains a colon, it is an application parameter
if ":" in match:
kind, path = [p.strip() for p in match.split(":", 1)]
assert kind in ["sessionStorage", "localStorage", "cookie", "meta"], f"Invalid prefix in parameter name: {kind}"
assert path, f"Path is required for sessionStorage, localStorage, cookie, and meta: {kind}:{path}"
assert kind in ["sessionStorage", "localStorage", "cookie", "meta", "windowProperty"], f"Invalid prefix in parameter name: {kind}"
assert path, f"Path is required for sessionStorage, localStorage, cookie, meta, and windowProperty: {kind}:{path}"
continue
# if the parameter name is a builtin parameter, add it to the used parameters
elif match in builtin_parameter_names:
Expand Down
5 changes: 2 additions & 3 deletions web_hacker/routine_discovery/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os

from openai import OpenAI
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, ConfigDict

from web_hacker.config import Config
from web_hacker.routine_discovery.context_manager import ContextManager
Expand Down Expand Up @@ -46,8 +46,7 @@ class RoutineDiscoveryAgent(BaseModel):
n_transaction_identification_attempts: int = 3
current_transaction_identification_attempt: int = 0

class Config:
arbitrary_types_allowed: bool = True
model_config = ConfigDict(arbitrary_types_allowed=True)

SYSTEM_PROMPT_IDENTIFY_TRANSACTIONS: str = f"""
You are a helpful assistant that is an expert in parsing network traffic.
Expand Down
6 changes: 2 additions & 4 deletions web_hacker/routine_discovery/context_manager.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import BaseModel, field_validator, Field
from pydantic import BaseModel, field_validator, Field, ConfigDict
from openai import OpenAI
import os
import json
Expand Down Expand Up @@ -26,9 +26,7 @@ class ContextManager(BaseModel):
cached_transaction_ids: list[str] | None = Field(default=None, exclude=True)
uploaded_transaction_ids: set[str] = Field(default_factory=set, exclude=True)

class Config:
arbitrary_types_allowed = True

model_config = ConfigDict(arbitrary_types_allowed=True)

@field_validator('transactions_dir', 'consolidated_transactions_path', 'storage_jsonl_path')
@classmethod
Expand Down