Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# CODEOWNERS file
# This file defines ownership of the codebase.
# When a PR modifies files, GitHub will automatically request reviews from the listed owners.

# Owners for the entire repository
* @alex-w-99 @dimavrem22 @rayruizhiliao
239 changes: 146 additions & 93 deletions README.md

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions example_routines/amtrak_one_way_train_search_routine.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"type": "fetch",
"endpoint": {
"description": "Amtrak station/location autocomplete. GET with query parameter searchTerm; returns JSON with autoCompleterResponse.autoCompleteList.",
"url": "https://www.amtrak.com/services/MapDataService/AutoCompleterArcgis/getResponseList?searchTerm={{origin}}",
"url": "https://www.amtrak.com/services/MapDataService/AutoCompleterArcgis/getResponseList?searchTerm=\"{{origin}}\"",
"method": "GET",
"headers": {"Accept": "application/json, text/plain, */*"},
"body": {},
Expand All @@ -21,7 +21,7 @@
"type": "fetch",
"endpoint": {
"description": "Amtrak station/location autocomplete. GET with query parameter searchTerm; returns JSON with autoCompleterResponse.autoCompleteList.",
"url": "https://www.amtrak.com/services/MapDataService/AutoCompleterArcgis/getResponseList?searchTerm={{destination}}",
"url": "https://www.amtrak.com/services/MapDataService/AutoCompleterArcgis/getResponseList?searchTerm=\"{{destination}}\"",
"method": "GET",
"headers": {"Accept": "application/json, text/plain, */*"},
"body": {},
Expand Down Expand Up @@ -55,7 +55,7 @@
{
"origin": {
"code": "{{sessionStorage:amtrak_autocomplete_stations_origin.autoCompleterResponse.autoCompleteList.0.stationCode}}",
"schedule": {"departureDateTime": "{{departureDate}}T00:00:00"}
"schedule": {"departureDateTime": "\"{{departureDate}}\"T00:00:00"}
},
"destination": {
"code": "{{sessionStorage:amtrak_autocomplete_stations_destination.autoCompleterResponse.autoCompleteList.0.stationCode}}"
Expand Down
9 changes: 0 additions & 9 deletions scripts/browser_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,9 @@ def setup_output_directory(output_dir, keep_output):
# Create organized subdirectories
network_dir = os.path.join(output_dir, "network")
storage_dir = os.path.join(output_dir, "storage")
interactions_dir = os.path.join(output_dir, "interactions")

os.makedirs(network_dir, exist_ok=True)
os.makedirs(storage_dir, exist_ok=True)
os.makedirs(interactions_dir, exist_ok=True)

# Create transactions directory for unified request/response storage
transactions_dir = os.path.join(network_dir, "transactions")
Expand All @@ -224,16 +222,12 @@ def setup_output_directory(output_dir, keep_output):
# Main directories
'network_dir': network_dir,
'storage_dir': storage_dir,
'interactions_dir': interactions_dir,
'transactions_dir': transactions_dir,


# Storage files
'storage_jsonl_path': os.path.join(storage_dir, "events.jsonl"),

# Interaction files
'interactions_jsonl_path': os.path.join(interactions_dir, "events.jsonl"),

# Summary file
'summary_path': os.path.join(output_dir, "session_summary.json")
}
Expand Down Expand Up @@ -266,9 +260,6 @@ def save_session_summary(paths, summary, args, start_time, end_time, created_tab
},
"storage": {
"events": paths['storage_jsonl_path']
},
"interactions": {
"events": paths['interactions_jsonl_path']
}
}
}
Expand Down
Empty file added src/__init__.py
Empty file.
74 changes: 61 additions & 13 deletions src/cdp/routine_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,20 @@ def _generate_fetch_js(
" // Simple tokens (computed locally, no source lookup)",
" function replaceSimpleTokens(str){",
" if (typeof str !== 'string') return str;",
" str = str.replace(/\\{\\{\\s*epoch_milliseconds\\s*\\}\\}/ig, () => String(Date.now()));",
" // Handle quoted and unquoted: \"{{epoch_milliseconds}}\" or {{epoch_milliseconds}}",
" str = str.replace(/\\\"?\\{\\{\\s*epoch_milliseconds\\s*\\}\\}\\\"?/g, () => String(Date.now()));",
" // Handle {{uuid}} - generate UUID using crypto.randomUUID() if available",
" str = str.replace(/\\\"?\\{\\{\\s*uuid\\s*\\}\\}\\\"?/g, () => {",
" if ('randomUUID' in crypto) {",
" return crypto.randomUUID();",
" }",
" // Fallback for browsers without crypto.randomUUID()",
" return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => {",
" const r = Math.random() * 16 | 0;",
" const v = c === 'x' ? r : (r & 0x3 | 0x8);",
" return v.toString(16);",
" });",
" });",
" return str;",
" }",
"",
Expand Down Expand Up @@ -113,7 +126,7 @@ def _generate_fetch_js(
" }",
" }",
"",
" const PLACEHOLDER = /\\{\\{\\s*(sessionStorage|localStorage|cookie|meta)\\s*:\\s*([^}]+?)\\s*\\}\\}/g;",
" const PLACEHOLDER = /\\\"?\\{\\{\\s*(sessionStorage|localStorage|cookie|meta)\\s*:\\s*([^}]+?)\\s*\\}\\}\\\"?/g;",
" function resolveOne(token){",
" const [lhs, rhs] = token.split('||');",
" const [kind, path] = lhs.split(':');",
Expand All @@ -137,10 +150,22 @@ def _generate_fetch_js(
" function resolvePlaceholders(str){",
" if (typeof str !== 'string') return str;",
" str = replaceSimpleTokens(str);",
" // Follow test.py pattern: for quoted placeholders, strings use raw value, objects use JSON.stringify",
" return str.replace(PLACEHOLDER, (m, _k, inner) => {",
" const v = resolveOne(`${_k}:${inner}`);",
" if (v === undefined || v === null) return m;",
" return (typeof v === 'object') ? JSON.stringify(v) : String(v);",
" // Check if match was quoted - could be \"{{...}}\" or \\\"{{...}}\\\"",
" // Check for escaped quote \\\" at start/end, or simple quote \"",
" const startsWithEscaped = m.startsWith('\\\\\"') || m.startsWith('\"');",
" const endsWithEscaped = m.endsWith('\\\\\"') || (m.endsWith('\"') && m.length > 2);",
" const isQuoted = startsWithEscaped && endsWithEscaped;",
" if (isQuoted) {",
" // Quoted: strings use raw value (no quotes), objects use JSON.stringify",
" return (typeof v === 'string') ? v : JSON.stringify(v);",
" } else {",
" // Unquoted: always stringify",
" return (typeof v === 'object') ? JSON.stringify(v) : String(v);",
" }",
" });",
" }",
"",
Expand Down Expand Up @@ -344,7 +369,7 @@ def _execute_fetch_in_session(
body_str = json.dumps(endpoint.body) # convert body from dict to str
body_str_interpolated = _apply_params(body_str, parameters_dict)
body = json.loads(body_str_interpolated) # convert body from str to dict

# Prepare headers and body for injection
hdrs = headers or {}

Expand Down Expand Up @@ -402,6 +427,11 @@ def _apply_params(text: str, parameters_dict: dict | None) -> str:

Only replaces {{param}} where 'param' is in parameters_dict.
Leaves other placeholders like {{sessionStorage:...}} untouched.

Follows the pattern from test.py:
- For string values in quoted placeholders: insert raw string (no quotes)
- For non-string values in quoted placeholders: use json.dumps(value)
- For unquoted placeholders: use str(value)

Args:
text: Text containing parameter placeholders.
Expand All @@ -412,15 +442,33 @@ def _apply_params(text: str, parameters_dict: dict | None) -> str:
"""
if not text or not parameters_dict:
return text
pattern = (
r"\{\{\s*(" + "|".join(map(re.escape, parameters_dict.keys())) + r")\s*\}\}"
)

def repl(m):
key = m.group(1)
return str(parameters_dict.get(key, m.group(0)))

return re.sub(pattern, repl, text)

for key, value in parameters_dict.items():
# Compute replacement based on value type (following test.py pattern)
if isinstance(value, str):
literal = value # For strings, insert raw string (no quotes)
else:
literal = json.dumps(value) # For numbers/bools/null, use JSON encoding

escaped_key = re.escape(key)

# Pattern 1: Simple quoted placeholder "{{key}}" in JSON string
# Matches: "{{key}}" (when the JSON value itself is the string "{{key}}")
# Use regular string concatenation to avoid f-string brace escaping issues
simple_quoted = '"' + r'\{\{' + r'\s*' + escaped_key + r'\s*' + r'\}\}' + '"'
text = re.sub(simple_quoted, literal, text)

# Pattern 2: Escaped quote variant \"{{key}}\"
# In JSON string this appears as: \\"{{key}}\\"
# Use regular string concatenation to build pattern with proper escaping
double_escaped = r'\\"' + r'\{\{' + r'\s*' + escaped_key + r'\s*' + r'\}\}' + r'\\"'
text = re.sub(double_escaped, literal, text)

# Pattern 3: Bare placeholder {{key}} (unquoted, for URL params, etc.)
bare_pattern = r'\{\{' + r'\s*' + escaped_key + r'\s*' + r'\}\}'
text = re.sub(bare_pattern, str(value), text)

return text


def _generate_random_user_agent() -> str:
Expand Down
2 changes: 1 addition & 1 deletion src/data_models/llm_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class TransactionIdentificationResponse(BaseModel):
Response from the LLM for identifying the network transaction that directly corresponds to
the user's requested task.
"""
transaction_id: str
transaction_id: str | None
description: str
url: str
method: Method
Expand Down
24 changes: 13 additions & 11 deletions src/data_models/production_routine.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,24 +448,27 @@ def validate_parameter_usage(self) -> 'Routine':
# Extract all parameter names
defined_parameters = {param.name for param in self.parameters}

# Find all parameter usages in the JSON: *{{*}}*
param_pattern = r'\{\{.*?\}\}'
# Find all parameter usages in the JSON: *"{{*}}"*
# Match quoted placeholders: "{{param}}" or \"{{param}}\" (escaped quotes in JSON strings)
# \"{{param}}\" in JSON string means "{{param}}" in actual value
# Pattern REQUIRES quotes (either " or \") immediately before {{ and after }}
param_pattern = r'(?:"|\\")\{\{([^}"]*)\}\}(?:"|\\")'
matches = re.findall(param_pattern, routine_json)

# track used parameters
used_parameters = set()

# iterate over all parameter usages
for match in matches:

# clean the match from the {{ and }}
match = match.strip()[2:-2].strip()
# clean the match (already extracted the content between braces)
match = match.strip()

# if the parameter name starts with a colon, it is a storage parameter
# if the parameter name contains a colon, it is a storage parameter
if ":" in match:
kind, path = [p.strip() for p in match.split(":", 1)]
assert kind in ["sessionStorage", "localStorage", "cookie"], f"Invalid prefix in parameter name: {kind}"
assert path, f"Path is required for sessionStorage, localStorage, and cookie: {kind}:{path}"
assert kind in ["sessionStorage", "localStorage", "cookie", "meta"], f"Invalid prefix in parameter name: {kind}"
assert path, f"Path is required for sessionStorage, localStorage, cookie, and meta: {kind}:{path}"
continue
# if the parameter name is a builtin parameter, add it to the used parameters
elif match in builtin_parameter_names:
Expand All @@ -490,5 +493,4 @@ def validate_parameter_usage(self) -> 'Routine':
f"All parameters used in the routine must be defined in parameters."
)

return self

return self
Loading