From 3416e9f6931996364b8fd6ab950e370eb9c37dd7 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 17 Nov 2025 15:47:18 -0500
Subject: [PATCH 01/27] restructuring as pip installable sdk

---
 README.md                                     |  80 +++++-
 pyproject.toml                                |  63 ++++-
 tests/unit/test_data_utils.py                 |   2 +-
 tests/unit/test_production_routine.py         |   4 +-
 web_hacker/__init__.py                        |  89 +++++++
 {src => web_hacker/cdp}/__init__.py           |   0
 {src => web_hacker}/cdp/cdp_session.py        |  10 +-
 .../cdp/interaction_monitor.py                |   6 +-
 {src => web_hacker}/cdp/network_monitor.py    |   8 +-
 {src => web_hacker}/cdp/routine_execution.py  |   6 +-
 {src => web_hacker}/cdp/storage_monitor.py    |   2 +-
 {src => web_hacker}/cdp/tab_managements.py    |   0
 {src => web_hacker}/config.py                 |   2 +-
 .../data_models}/__init__.py                  |   0
 .../data_models/dev_routine.py                |   2 +-
 .../data_models/llm_responses.py              |   4 +-
 {src => web_hacker}/data_models/network.py    |   2 +-
 .../data_models/production_routine.py         |   2 +-
 .../routine_discovery}/__init__.py            |   0
 .../routine_discovery/agent.py                |  16 +-
 .../routine_discovery/context_manager.py      |   2 +-
 .../scripts}/__init__.py                      |   0
 .../scripts}/browser_monitor.py               |  10 +-
 .../scripts}/discover_routines.py             |  10 +-
 .../scripts}/execute_routine.py               |   6 +-
 web_hacker/sdk/__init__.py                    |  16 ++
 web_hacker/sdk/client.py                      | 161 ++++++++++++
 web_hacker/sdk/discovery.py                   |  81 ++++++
 web_hacker/sdk/execution.py                   |  52 ++++
 web_hacker/sdk/monitor.py                     | 238 ++++++++++++++++++
 {src => web_hacker}/utils/__init__.py         |   0
 {src => web_hacker}/utils/cdp_utils.py        |   2 +-
 {src => web_hacker}/utils/data_utils.py       |   4 +-
 {src => web_hacker}/utils/exceptions.py       |  20 +-
 {src => web_hacker}/utils/llm_utils.py        |   6 +-
 35 files changed, 838 insertions(+), 68 deletions(-)
 create mode 100644 web_hacker/__init__.py
 rename {src => web_hacker/cdp}/__init__.py (100%)
 rename {src => web_hacker}/cdp/cdp_session.py (97%)
 rename {src => web_hacker}/cdp/interaction_monitor.py (99%)
 rename {src => web_hacker}/cdp/network_monitor.py (99%)
 rename {src => web_hacker}/cdp/routine_execution.py (99%)
 rename {src => web_hacker}/cdp/storage_monitor.py (99%)
 rename {src => web_hacker}/cdp/tab_managements.py (100%)
 rename {src => web_hacker}/config.py (98%)
 rename {src/cdp => web_hacker/data_models}/__init__.py (100%)
 rename {src => web_hacker}/data_models/dev_routine.py (99%)
 rename {src => web_hacker}/data_models/llm_responses.py (97%)
 rename {src => web_hacker}/data_models/network.py (94%)
 rename {src => web_hacker}/data_models/production_routine.py (99%)
 rename {src/data_models => web_hacker/routine_discovery}/__init__.py (100%)
 rename {src => web_hacker}/routine_discovery/agent.py (98%)
 rename {src => web_hacker}/routine_discovery/context_manager.py (99%)
 rename {src/routine_discovery => web_hacker/scripts}/__init__.py (100%)
 rename {scripts => web_hacker/scripts}/browser_monitor.py (98%)
 rename {scripts => web_hacker/scripts}/discover_routines.py (90%)
 rename {scripts => web_hacker/scripts}/execute_routine.py (93%)
 create mode 100644 web_hacker/sdk/__init__.py
 create mode 100644 web_hacker/sdk/client.py
 create mode 100644 web_hacker/sdk/discovery.py
 create mode 100644 web_hacker/sdk/execution.py
 create mode 100644 web_hacker/sdk/monitor.py
 rename {src => web_hacker}/utils/__init__.py (100%)
 rename {src => web_hacker}/utils/cdp_utils.py (99%)
 rename {src => web_hacker}/utils/data_utils.py (97%)
 rename {src => web_hacker}/utils/exceptions.py (63%)
 rename {src => web_hacker}/utils/llm_utils.py (97%)

diff --git a/README.md b/README.md
index 1d026e6..58a8a61 100644
--- a/README.md
+++ b/README.md
@@ -148,11 +148,77 @@ This substitutes parameter values and injects `auth_token` from cookies. The JSO
 
 - Python 3.12+
 - Google Chrome (stable)
-- [uv (Python package manager)](https://github.com/astral-sh/uv)
+- [uv (Python package manager)](https://github.com/astral-sh/uv) (optional, for development)
   - macOS/Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
   - Windows (PowerShell): `iwr https://astral.sh/uv/install.ps1 -UseBasicParsing | iex`
 - OpenAI API key
 
+## Installation
+
+### From PyPI (Recommended)
+
+```bash
+pip install web-hacker
+```
+
+### From Source
+
+```bash
+git clone https://github.com/VectorlyApp/web-hacker.git
+cd web-hacker
+pip install -e .
+```
+
+## Quick Start (SDK)
+
+The easiest way to use web-hacker is through the SDK:
+
+```python
+from web_hacker import WebHacker
+
+# Initialize the SDK
+hacker = WebHacker(openai_api_key="sk-...")
+
+# Monitor browser activity
+with hacker.monitor_browser(output_dir="./captures"):
+    # Navigate to your target website and perform actions
+    # The SDK will capture all network traffic, storage, and interactions
+    pass
+
+# Discover a routine from captured data
+routine = hacker.discover_routine(
+    task="Search for flights and get prices",
+    cdp_captures_dir="./captures"
+)
+
+# Execute the discovered routine
+result = hacker.execute_routine(
+    routine=routine,
+    parameters={
+        "origin": "NYC",
+        "destination": "LAX",
+        "departureDate": "2026-03-22"
+    }
+)
+
+print(result)
+```
+
+## CLI Usage
+
+The SDK also provides CLI commands:
+
+```bash
+# Monitor browser
+web-hacker-monitor --output-dir ./captures
+
+# Discover routines
+web-hacker-discover --task "Search for flights" --cdp-captures-dir ./captures
+
+# Execute routine
+web-hacker-execute --routine-path routine.json --parameters-path params.json
+```
+
 ## Set up Your Environment 🔧
 
 ### Linux
@@ -277,7 +343,7 @@ Use the CDP browser monitor to block trackers and capture network, storage, and
 **Run this command to start monitoring:**
 
 ```bash
-python scripts/browser_monitor.py --host 127.0.0.1 --port 9222 --output-dir ./cdp_captures --url about:blank --incognito
+web-hacker-monitor --host 127.0.0.1 --port 9222 --output-dir ./cdp_captures --url about:blank --incognito
 ```
 
 The script will open a new tab (starting at `about:blank`). Navigate to your target website, then manually perform the actions you want to automate (e.g., search, login, export report). Keep Chrome focused during this process. Press `Ctrl+C` and the script will consolidate transactions and produce a HAR automatically.
@@ -313,7 +379,7 @@ Use the **routine-discovery pipeline** to analyze captured data and synthesize a
 
 **Linux/macOS (bash):**
 ```bash
-python scripts/discover_routines.py \
+web-hacker-discover \
   --task "Recover API endpoints for searching for trains and their prices" \
   --cdp-captures-dir ./cdp_captures \
   --output-dir ./routine_discovery_output \
@@ -323,7 +389,7 @@ python scripts/discover_routines.py \
 **Windows (PowerShell):**
 ```powershell
 # Simple task (no quotes inside):
-python scripts/discover_routines.py --task "Recover the API endpoints for searching for trains and their prices" --cdp-captures-dir ./cdp_captures --output-dir ./routine_discovery_output --llm-model gpt-5
+web-hacker-discover --task "Recover the API endpoints for searching for trains and their prices" --cdp-captures-dir ./cdp_captures --output-dir ./routine_discovery_output --llm-model gpt-5
 ```
 
 **Example tasks:**
@@ -372,13 +438,13 @@ Run the example routine:
 ```bash
 # Using a parameters file:
 
-python scripts/execute_routine.py \
+web-hacker-execute \
   --routine-path example_routines/amtrak_one_way_train_search_routine.json \
   --parameters-path example_routines/amtrak_one_way_train_search_input.json
 
 # Or pass parameters inline (JSON string):
 
-python scripts/execute_routine.py \
+web-hacker-execute \
   --routine-path example_routines/amtrak_one_way_train_search_routine.json \
   --parameters-dict '{"origin": "BOS", "destination": "NYP", "departureDate": "2026-03-22"}'
 ```
@@ -386,7 +452,7 @@ python scripts/execute_routine.py \
 Run a discovered routine:
 
 ```bash
-python scripts/execute_routine.py \
+web-hacker-execute \
   --routine-path routine_discovery_output/routine.json \
   --parameters-path routine_discovery_output/test_parameters.json
 ```
diff --git a/pyproject.toml b/pyproject.toml
index c39f07b..3fcac99 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,16 +6,37 @@ build-backend = "hatchling.build"
 
 [project]
 name = "web-hacker"
-version = "0.1.0"
-description = " Reverse engineer any web app!"
+version = "1.1.0"
+description = "SDK for reverse engineering web apps - No API? No Problem!"
 readme = "README.md"
-requires-python = ">=3.12.3,<3.13"  # pinning to 3.12.x
+requires-python = ">=3.12.3,<3.13"
+license = {text = "Apache-2.0"}
+authors = [
+    {name = "Vectorly", email = "contact@vectorly.app"}
+]
+keywords = [
+    "web-scraping",
+    "automation",
+    "cdp",
+    "chrome-devtools",
+    "api-discovery",
+    "reverse-engineering",
+    "browser-automation",
+    "sdk",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Internet :: WWW/HTTP :: Browsers",
+    "Topic :: Software Development :: Testing",
+]
 dependencies = [
-    "ipykernel>=6.29.5",
     "openai>=2.6.1",
     "pydantic>=2.11.4",
-    "pylint>=3.0.0",
-    "pytest>=8.3.5",
     "python-dotenv>=1.2.1",
     "requests>=2.31.0",
     "websockets>=15.0.1",
@@ -23,5 +44,33 @@ dependencies = [
     "beautifulsoup4>=4.14.2",
 ]
 
+[project.optional-dependencies]
+dev = [
+    "ipykernel>=6.29.5",
+    "pylint>=3.0.0",
+    "pytest>=8.3.5",
+]
+
+[project.scripts]
+web-hacker-monitor = "web_hacker.scripts.browser_monitor:main"
+web-hacker-discover = "web_hacker.scripts.discover_routines:main"
+web-hacker-execute = "web_hacker.scripts.execute_routine:main"
+
+[project.urls]
+Homepage = "https://www.vectorly.app"
+Documentation = "https://github.com/VectorlyApp/web-hacker#readme"
+Repository = "https://github.com/VectorlyApp/web-hacker"
+Issues = "https://github.com/VectorlyApp/web-hacker/issues"
+
 [tool.hatch.build.targets.wheel]
-packages = ["src"]
+packages = ["web_hacker"]
+
+[tool.hatch.build.targets.sdist]
+include = [
+    "/web_hacker",
+    "/tests",
+    "/example_routines",
+    "README.md",
+    "LICENSE",
+    "pyproject.toml",
+]
diff --git a/tests/unit/test_data_utils.py b/tests/unit/test_data_utils.py
index 9af614b..b445df5 100644
--- a/tests/unit/test_data_utils.py
+++ b/tests/unit/test_data_utils.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from src.utils.data_utils import get_text_from_html
+from web_hacker.utils.data_utils import get_text_from_html
 
 
 class TestGetTextFromHtml:
diff --git a/tests/unit/test_production_routine.py b/tests/unit/test_production_routine.py
index 98c9346..75460c4 100644
--- a/tests/unit/test_production_routine.py
+++ b/tests/unit/test_production_routine.py
@@ -11,8 +11,8 @@
 import pytest
 from pydantic import ValidationError
 
-from src.utils.data_utils import load_data
-from src.data_models.production_routine import (
+from web_hacker.utils.data_utils import load_data
+from web_hacker.data_models.production_routine import (
     ResourceBase,
     Routine,
     Parameter,
diff --git a/web_hacker/__init__.py b/web_hacker/__init__.py
new file mode 100644
index 0000000..d2ba5b7
--- /dev/null
+++ b/web_hacker/__init__.py
@@ -0,0 +1,89 @@
+"""
+Web Hacker SDK - Reverse engineer any web app!
+
+Usage:
+    from web_hacker import WebHacker
+    
+    # Monitor browser activity
+    hacker = WebHacker()
+    with hacker.monitor_browser(output_dir="./captures"):
+        # User performs actions in browser
+        pass
+    
+    # Discover routines
+    routine = hacker.discover_routine(
+        task="Search for flights",
+        cdp_captures_dir="./captures"
+    )
+    
+    # Execute routines
+    result = hacker.execute_routine(
+        routine=routine,
+        parameters={"origin": "NYC", "destination": "LAX"}
+    )
+"""
+
+__version__ = "1.1.0"
+
+# Public API - High-level interface
+from .sdk import WebHacker, BrowserMonitor, RoutineDiscovery, RoutineExecutor
+
+# Data models - for advanced users
+from .data_models.production_routine import (
+    Routine,
+    Parameter,
+    RoutineOperation,
+    RoutineNavigateOperation,
+    RoutineFetchOperation,
+    RoutineReturnOperation,
+    RoutineSleepOperation,
+    Endpoint,
+)
+
+# Exceptions
+from .utils.exceptions import (
+    WebHackerError,
+    ApiKeyNotFoundError,
+    RoutineExecutionError,
+    BrowserConnectionError,
+    TransactionIdentificationFailedError,
+    LLMStructuredOutputError,
+    UnsupportedFileFormat,
+)
+
+# Core modules (for advanced usage)
+from . import cdp
+from . import data_models
+from . import routine_discovery
+from . import utils
+
+__all__ = [
+    # High-level API
+    "WebHacker",
+    "BrowserMonitor",
+    "RoutineDiscovery",
+    "RoutineExecutor",
+    # Data models
+    "Routine",
+    "Parameter",
+    "RoutineOperation",
+    "RoutineNavigateOperation",
+    "RoutineFetchOperation",
+    "RoutineReturnOperation",
+    "RoutineSleepOperation",
+    "Endpoint",
+    # Exceptions
+    "WebHackerError",
+    "ApiKeyNotFoundError",
+    "RoutineExecutionError",
+    "BrowserConnectionError",
+    "TransactionIdentificationFailedError",
+    "LLMStructuredOutputError",
+    "UnsupportedFileFormat",
+    # Core modules
+    "cdp",
+    "data_models",
+    "routine_discovery",
+    "utils",
+]
+
diff --git a/src/__init__.py b/web_hacker/cdp/__init__.py
similarity index 100%
rename from src/__init__.py
rename to web_hacker/cdp/__init__.py
diff --git a/src/cdp/cdp_session.py b/web_hacker/cdp/cdp_session.py
similarity index 97%
rename from src/cdp/cdp_session.py
rename to web_hacker/cdp/cdp_session.py
index 8acd671..fef8fa3 100644
--- a/src/cdp/cdp_session.py
+++ b/web_hacker/cdp/cdp_session.py
@@ -1,5 +1,5 @@
 """
-src/cdp/cdp_session.py
+web_hacker/cdp/cdp_session.py
 
 CDP Session management for web scraping with Chrome DevTools Protocol.
 """
@@ -11,10 +11,10 @@
 import threading
 import time
 
-from src.config import Config
-from src.cdp.network_monitor import NetworkMonitor
-from src.cdp.storage_monitor import StorageMonitor
-from src.cdp.interaction_monitor import InteractionMonitor
+from web_hacker.config import Config
+from web_hacker.cdp.network_monitor import NetworkMonitor
+from web_hacker.cdp.storage_monitor import StorageMonitor
+from web_hacker.cdp.interaction_monitor import InteractionMonitor
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
 logger = logging.getLogger(__name__)
diff --git a/src/cdp/interaction_monitor.py b/web_hacker/cdp/interaction_monitor.py
similarity index 99%
rename from src/cdp/interaction_monitor.py
rename to web_hacker/cdp/interaction_monitor.py
index 531cb70..e9aa8b0 100644
--- a/src/cdp/interaction_monitor.py
+++ b/web_hacker/cdp/interaction_monitor.py
@@ -1,5 +1,5 @@
 """
-src/cdp/interaction_monitor.py
+web_hacker/cdp/interaction_monitor.py
 
 Interaction monitoring for CDP — tracks mouse and keyboard events with element details.
 """
@@ -10,8 +10,8 @@
 import json
 from collections import defaultdict
 
-from src.config import Config
-from src.utils.cdp_utils import write_jsonl, write_json_file
+from web_hacker.config import Config
+from web_hacker.utils.cdp_utils import write_jsonl, write_json_file
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
 logger = logging.getLogger(__name__)
diff --git a/src/cdp/network_monitor.py b/web_hacker/cdp/network_monitor.py
similarity index 99%
rename from src/cdp/network_monitor.py
rename to web_hacker/cdp/network_monitor.py
index 8a83959..d7bd9ad 100644
--- a/src/cdp/network_monitor.py
+++ b/web_hacker/cdp/network_monitor.py
@@ -1,5 +1,5 @@
 """
-src/cdp/network_monitor.py
+web_hacker/cdp/network_monitor.py
 
 Network monitoring via CDP — *minimal*, non‑blocking, and reliable.
 
@@ -28,14 +28,14 @@
 from fnmatch import fnmatch
 from typing import Any
 
-from src.config import Config
-from src.utils.cdp_utils import (
+from web_hacker.config import Config
+from web_hacker.utils.cdp_utils import (
     build_pair_dir,
     get_set_cookie_values,
     write_json_file,
     write_jsonl,
 )
-from src.data_models.network import Stage
+from web_hacker.data_models.network import Stage
 
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
diff --git a/src/cdp/routine_execution.py b/web_hacker/cdp/routine_execution.py
similarity index 99%
rename from src/cdp/routine_execution.py
rename to web_hacker/cdp/routine_execution.py
index 01dbab2..1f039e4 100644
--- a/src/cdp/routine_execution.py
+++ b/web_hacker/cdp/routine_execution.py
@@ -1,5 +1,5 @@
 """
-src/cdp/routine_execution.py
+web_hacker/cdp/routine_execution.py
 
 Execute a routine using Chrome DevTools Protocol.
 """
@@ -15,8 +15,8 @@
 import requests
 import websocket
 
-from src.config import Config
-from src.data_models.production_routine import (
+from web_hacker.config import Config
+from web_hacker.data_models.production_routine import (
     Routine,
     Endpoint,
     RoutineFetchOperation,
diff --git a/src/cdp/storage_monitor.py b/web_hacker/cdp/storage_monitor.py
similarity index 99%
rename from src/cdp/storage_monitor.py
rename to web_hacker/cdp/storage_monitor.py
index df01687..898db62 100644
--- a/src/cdp/storage_monitor.py
+++ b/web_hacker/cdp/storage_monitor.py
@@ -5,7 +5,7 @@
 """
 
 import time
-from src.utils.cdp_utils import write_jsonl
+from web_hacker.utils.cdp_utils import write_jsonl
 
 
 class StorageMonitor:
diff --git a/src/cdp/tab_managements.py b/web_hacker/cdp/tab_managements.py
similarity index 100%
rename from src/cdp/tab_managements.py
rename to web_hacker/cdp/tab_managements.py
diff --git a/src/config.py b/web_hacker/config.py
similarity index 98%
rename from src/config.py
rename to web_hacker/config.py
index eccd14f..7ec9440 100644
--- a/src/config.py
+++ b/web_hacker/config.py
@@ -1,5 +1,5 @@
 """
-src/config.py
+web_hacker/config.py
 
 Centralized environment variable configuration.
 """
diff --git a/src/cdp/__init__.py b/web_hacker/data_models/__init__.py
similarity index 100%
rename from src/cdp/__init__.py
rename to web_hacker/data_models/__init__.py
diff --git a/src/data_models/dev_routine.py b/web_hacker/data_models/dev_routine.py
similarity index 99%
rename from src/data_models/dev_routine.py
rename to web_hacker/data_models/dev_routine.py
index 4cc93fe..4a6339c 100644
--- a/src/data_models/dev_routine.py
+++ b/web_hacker/data_models/dev_routine.py
@@ -1,5 +1,5 @@
 """
-src/data_models/dev_routine.py
+web_hacker/data_models/dev_routine.py
 """
 
 import re
diff --git a/src/data_models/llm_responses.py b/web_hacker/data_models/llm_responses.py
similarity index 97%
rename from src/data_models/llm_responses.py
rename to web_hacker/data_models/llm_responses.py
index 715732f..8d67306 100644
--- a/src/data_models/llm_responses.py
+++ b/web_hacker/data_models/llm_responses.py
@@ -1,5 +1,5 @@
 """
-src/data_models/llm_responses.py
+web_hacker/data_models/llm_responses.py
 
 LLM response data models.
 """
@@ -8,7 +8,7 @@
 
 from pydantic import BaseModel, Field
 
-from src.data_models.network import Method
+from web_hacker.data_models.network import Method
 
 
 class ConfidenceLevel(StrEnum):
diff --git a/src/data_models/network.py b/web_hacker/data_models/network.py
similarity index 94%
rename from src/data_models/network.py
rename to web_hacker/data_models/network.py
index c1c0c4a..064df18 100644
--- a/src/data_models/network.py
+++ b/web_hacker/data_models/network.py
@@ -1,5 +1,5 @@
 """
-src/data_models/network.py
+web_hacker/data_models/network.py
 
 Network data models.
 """
diff --git a/src/data_models/production_routine.py b/web_hacker/data_models/production_routine.py
similarity index 99%
rename from src/data_models/production_routine.py
rename to web_hacker/data_models/production_routine.py
index 04ac801..49789bf 100644
--- a/src/data_models/production_routine.py
+++ b/web_hacker/data_models/production_routine.py
@@ -1,5 +1,5 @@
 """
-src/data_models/production_routine.py
+web_hacker/data_models/production_routine.py
 
 Production routine data models.
 """
diff --git a/src/data_models/__init__.py b/web_hacker/routine_discovery/__init__.py
similarity index 100%
rename from src/data_models/__init__.py
rename to web_hacker/routine_discovery/__init__.py
diff --git a/src/routine_discovery/agent.py b/web_hacker/routine_discovery/agent.py
similarity index 98%
rename from src/routine_discovery/agent.py
rename to web_hacker/routine_discovery/agent.py
index 73fb078..d928831 100644
--- a/src/routine_discovery/agent.py
+++ b/web_hacker/routine_discovery/agent.py
@@ -1,5 +1,5 @@
 """
-src/routine_discovery/agent.py
+web_hacker/routine_discovery/agent.py
 
 Agent for discovering routines from the network transactions.
 """
@@ -12,10 +12,10 @@
 from openai import OpenAI
 from pydantic import BaseModel, Field
 
-from src.config import Config
-from src.routine_discovery.context_manager import ContextManager
-from src.utils.llm_utils import llm_parse_text_to_model, collect_text_from_response, manual_llm_parse_text_to_model
-from src.data_models.llm_responses import (
+from web_hacker.config import Config
+from web_hacker.routine_discovery.context_manager import ContextManager
+from web_hacker.utils.llm_utils import llm_parse_text_to_model, collect_text_from_response, manual_llm_parse_text_to_model
+from web_hacker.data_models.llm_responses import (
     TransactionIdentificationResponse,
     ExtractedVariableResponse,
     TransactionConfirmationResponse,
@@ -23,9 +23,9 @@
     ResolvedVariableResponse,
     TestParametersResponse
 )
-from src.data_models.production_routine import Routine as ProductionRoutine
-from src.data_models.dev_routine import Routine, RoutineFetchOperation
-from src.utils.exceptions import TransactionIdentificationFailedError
+from web_hacker.data_models.production_routine import Routine as ProductionRoutine
+from web_hacker.data_models.dev_routine import Routine, RoutineFetchOperation
+from web_hacker.utils.exceptions import TransactionIdentificationFailedError
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
 logger = logging.getLogger(__name__)
diff --git a/src/routine_discovery/context_manager.py b/web_hacker/routine_discovery/context_manager.py
similarity index 99%
rename from src/routine_discovery/context_manager.py
rename to web_hacker/routine_discovery/context_manager.py
index 6a429de..72112c8 100644
--- a/src/routine_discovery/context_manager.py
+++ b/web_hacker/routine_discovery/context_manager.py
@@ -5,7 +5,7 @@
 import time
 import shutil
 
-from src.utils.data_utils import get_text_from_html
+from web_hacker.utils.data_utils import get_text_from_html
 
 
 class ContextManager(BaseModel):
diff --git a/src/routine_discovery/__init__.py b/web_hacker/scripts/__init__.py
similarity index 100%
rename from src/routine_discovery/__init__.py
rename to web_hacker/scripts/__init__.py
diff --git a/scripts/browser_monitor.py b/web_hacker/scripts/browser_monitor.py
similarity index 98%
rename from scripts/browser_monitor.py
rename to web_hacker/scripts/browser_monitor.py
index 842c7e7..688ed5e 100644
--- a/scripts/browser_monitor.py
+++ b/web_hacker/scripts/browser_monitor.py
@@ -1,5 +1,5 @@
 """
-src/scripts/browser_monitor.py
+web_hacker/scripts/browser_monitor.py
 
 CDP-based web scraper that blocks trackers and captures network requests.
 """
@@ -12,10 +12,10 @@
 import shutil
 import sys
 
-from src.config import Config
-from src.cdp.cdp_session import CDPSession
-from src.data_models.network import ResourceType
-from src.cdp.tab_managements import cdp_new_tab, dispose_context
+from web_hacker.config import Config
+from web_hacker.cdp.cdp_session import CDPSession
+from web_hacker.data_models.network import ResourceType
+from web_hacker.cdp.tab_managements import cdp_new_tab, dispose_context
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
 logger = logging.getLogger(__name__)
diff --git a/scripts/discover_routines.py b/web_hacker/scripts/discover_routines.py
similarity index 90%
rename from scripts/discover_routines.py
rename to web_hacker/scripts/discover_routines.py
index d9e852f..b4c807c 100644
--- a/scripts/discover_routines.py
+++ b/web_hacker/scripts/discover_routines.py
@@ -1,5 +1,5 @@
 """
-src/scripts/discover_routines.py
+web_hacker/scripts/discover_routines.py
 
 Script for discovering routines from the network transactions.
 """
@@ -10,10 +10,10 @@
 
 from openai import OpenAI
 
-from src.config import Config
-from src.utils.exceptions import ApiKeyNotFoundError
-from src.routine_discovery.agent import RoutineDiscoveryAgent
-from src.routine_discovery.context_manager import ContextManager
+from web_hacker.config import Config
+from web_hacker.utils.exceptions import ApiKeyNotFoundError
+from web_hacker.routine_discovery.agent import RoutineDiscoveryAgent
+from web_hacker.routine_discovery.context_manager import ContextManager
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
 logger = logging.getLogger(__name__)
diff --git a/scripts/execute_routine.py b/web_hacker/scripts/execute_routine.py
similarity index 93%
rename from scripts/execute_routine.py
rename to web_hacker/scripts/execute_routine.py
index 173822f..fecd7b2 100644
--- a/scripts/execute_routine.py
+++ b/web_hacker/scripts/execute_routine.py
@@ -18,9 +18,9 @@
 import json
 import logging
 
-from src.config import Config
-from src.cdp.routine_execution import execute_routine
-from src.data_models.production_routine import Routine
+from web_hacker.config import Config
+from web_hacker.cdp.routine_execution import execute_routine
+from web_hacker.data_models.production_routine import Routine
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
 logger = logging.getLogger(__name__)
diff --git a/web_hacker/sdk/__init__.py b/web_hacker/sdk/__init__.py
new file mode 100644
index 0000000..6a7c8a3
--- /dev/null
+++ b/web_hacker/sdk/__init__.py
@@ -0,0 +1,16 @@
+"""
+Web Hacker SDK - High-level API for web automation.
+"""
+
+from .client import WebHacker
+from .monitor import BrowserMonitor
+from .discovery import RoutineDiscovery
+from .execution import RoutineExecutor
+
+__all__ = [
+    "WebHacker",
+    "BrowserMonitor",
+    "RoutineDiscovery",
+    "RoutineExecutor",
+]
+
diff --git a/web_hacker/sdk/client.py b/web_hacker/sdk/client.py
new file mode 100644
index 0000000..5d9ef33
--- /dev/null
+++ b/web_hacker/sdk/client.py
@@ -0,0 +1,161 @@
+"""
+High-level WebHacker client for easy SDK usage.
+"""
+
+from typing import Optional, Dict, Any
+from pathlib import Path
+from openai import OpenAI
+
+from ..config import Config
+from ..utils.exceptions import ApiKeyNotFoundError
+from .monitor import BrowserMonitor
+from .discovery import RoutineDiscovery
+from .execution import RoutineExecutor
+from ..data_models.production_routine import Routine
+
+
+class WebHacker:
+    """
+    Main SDK client for Web Hacker.
+    
+    Provides a simple, high-level interface for monitoring browsers,
+    discovering routines, and executing automation.
+    
+    Example:
+        >>> hacker = WebHacker(openai_api_key="sk-...")
+        >>> with hacker.monitor_browser(output_dir="./captures"):
+        ...     # User performs actions in browser
+        ...     pass
+        >>> routine = hacker.discover_routine(
+        ...     task="Search for flights",
+        ...     cdp_captures_dir="./captures"
+        ... )
+        >>> result = hacker.execute_routine(
+        ...     routine=routine,
+        ...     parameters={"origin": "NYC", "destination": "LAX"}
+        ... )
+    """
+    
+    def __init__(
+        self,
+        openai_api_key: Optional[str] = None,
+        remote_debugging_address: str = "http://127.0.0.1:9222",
+        llm_model: str = "gpt-5",
+    ):
+        """
+        Initialize WebHacker client.
+        
+        Args:
+            openai_api_key: OpenAI API key. If None, uses OPENAI_API_KEY env var.
+            remote_debugging_address: Chrome debugging server address.
+            llm_model: LLM model to use for routine discovery.
+        """
+        self.openai_api_key = openai_api_key or Config.OPENAI_API_KEY
+        if not self.openai_api_key:
+            raise ApiKeyNotFoundError("OpenAI API key is required")
+        
+        self.client = OpenAI(api_key=self.openai_api_key)
+        self.remote_debugging_address = remote_debugging_address
+        self.llm_model = llm_model
+        
+        self._monitor = None
+        self._discovery = None
+        self._executor = None
+    
+    def monitor_browser(
+        self,
+        output_dir: str = "./cdp_captures",
+        url: str = "about:blank",
+        incognito: bool = True,
+        block_patterns: Optional[list[str]] = None,
+        capture_resources: Optional[set] = None,
+        **kwargs
+    ) -> BrowserMonitor:
+        """
+        Start monitoring browser activity.
+        
+        Args:
+            output_dir: Directory to save captured data.
+            url: Initial URL to navigate to.
+            incognito: Whether to use incognito mode.
+            block_patterns: URL patterns to block (trackers, ads, etc.).
+            capture_resources: Resource types to capture.
+            **kwargs: Additional options passed to BrowserMonitor.
+        
+        Returns:
+            BrowserMonitor instance for controlling the monitoring session.
+        """
+        self._monitor = BrowserMonitor(
+            remote_debugging_address=self.remote_debugging_address,
+            output_dir=output_dir,
+            url=url,
+            incognito=incognito,
+            block_patterns=block_patterns,
+            capture_resources=capture_resources,
+            **kwargs
+        )
+        return self._monitor
+    
+    def discover_routine(
+        self,
+        task: str,
+        cdp_captures_dir: str = "./cdp_captures",
+        output_dir: str = "./routine_discovery_output",
+        llm_model: Optional[str] = None,
+    ) -> Routine:
+        """
+        Discover a routine from captured browser data.
+        
+        Args:
+            task: Description of the task to automate.
+            cdp_captures_dir: Directory containing CDP captures.
+            output_dir: Directory to save discovery results.
+            llm_model: LLM model to use (overrides default).
+        
+        Returns:
+            Discovered Routine object.
+        """
+        self._discovery = RoutineDiscovery(
+            client=self.client,
+            task=task,
+            cdp_captures_dir=cdp_captures_dir,
+            output_dir=output_dir,
+            llm_model=llm_model or self.llm_model,
+        )
+        return self._discovery.run()
+    
+    def execute_routine(
+        self,
+        routine: Routine,
+        parameters: Dict[str, Any],
+        timeout: float = 180.0,
+        wait_after_navigate_sec: float = 3.0,
+        close_tab_when_done: bool = True,
+        incognito: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Execute a routine with given parameters.
+        
+        Args:
+            routine: Routine to execute.
+            parameters: Parameters for the routine.
+            timeout: Operation timeout in seconds.
+            wait_after_navigate_sec: Wait time after navigation.
+            close_tab_when_done: Whether to close tab when finished.
+            incognito: Whether to use incognito mode.
+        
+        Returns:
+            Result dictionary with "ok" status and "result" data.
+        """
+        self._executor = RoutineExecutor(
+            remote_debugging_address=self.remote_debugging_address,
+        )
+        return self._executor.execute(
+            routine=routine,
+            parameters=parameters,
+            timeout=timeout,
+            wait_after_navigate_sec=wait_after_navigate_sec,
+            close_tab_when_done=close_tab_when_done,
+            incognito=incognito,
+        )
+
diff --git a/web_hacker/sdk/discovery.py b/web_hacker/sdk/discovery.py
new file mode 100644
index 0000000..bbce698
--- /dev/null
+++ b/web_hacker/sdk/discovery.py
@@ -0,0 +1,81 @@
+"""
+Routine discovery SDK wrapper.
+"""
+
+from pathlib import Path
+from typing import Optional
+import os
+from openai import OpenAI
+
+from ..routine_discovery.agent import RoutineDiscoveryAgent
+from ..routine_discovery.context_manager import ContextManager
+from ..data_models.production_routine import Routine
+
+
+class RoutineDiscovery:
+    """
+    High-level interface for discovering routines.
+    
+    Example:
+        >>> discovery = RoutineDiscovery(
+        ...     client=openai_client,
+        ...     task="Search for flights",
+        ...     cdp_captures_dir="./captures"
+        ... )
+        >>> routine = discovery.run()
+    """
+    
+    def __init__(
+        self,
+        client: OpenAI,
+        task: str,
+        cdp_captures_dir: str = "./cdp_captures",
+        output_dir: str = "./routine_discovery_output",
+        llm_model: str = "gpt-5",
+    ):
+        self.client = client
+        self.task = task
+        self.cdp_captures_dir = cdp_captures_dir
+        self.output_dir = output_dir
+        self.llm_model = llm_model
+        
+        self.agent: Optional[RoutineDiscoveryAgent] = None
+        self.context_manager: Optional[ContextManager] = None
+    
+    def run(self) -> Routine:
+        """
+        Run routine discovery and return the discovered routine.
+        
+        Returns:
+            Discovered Routine object.
+        """
+        # Create output directory
+        os.makedirs(self.output_dir, exist_ok=True)
+        
+        # Initialize context manager
+        self.context_manager = ContextManager(
+            client=self.client,
+            tmp_dir=str(Path(self.output_dir) / "tmp"),
+            transactions_dir=str(Path(self.cdp_captures_dir) / "network" / "transactions"),
+            consolidated_transactions_path=str(Path(self.cdp_captures_dir) / "network" / "consolidated_transactions.json"),
+            storage_jsonl_path=str(Path(self.cdp_captures_dir) / "storage" / "events.jsonl"),
+        )
+        self.context_manager.make_vectorstore()
+        
+        # Initialize and run agent
+        self.agent = RoutineDiscoveryAgent(
+            client=self.client,
+            context_manager=self.context_manager,
+            task=self.task,
+            llm_model=self.llm_model,
+            output_dir=self.output_dir,
+        )
+        self.agent.run()
+        
+        # Load and return the discovered routine
+        routine_path = Path(self.output_dir) / "routine.json"
+        if not routine_path.exists():
+            raise FileNotFoundError(f"Routine not found at {routine_path}")
+        
+        return Routine.model_validate_json(routine_path.read_text())
+
diff --git a/web_hacker/sdk/execution.py b/web_hacker/sdk/execution.py
new file mode 100644
index 0000000..466ce16
--- /dev/null
+++ b/web_hacker/sdk/execution.py
@@ -0,0 +1,52 @@
+"""
+Routine execution SDK wrapper.
+"""
+
+from typing import Dict, Any
+from ..cdp.routine_execution import execute_routine
+from ..data_models.production_routine import Routine
+
+
+class RoutineExecutor:
+    """
+    High-level interface for executing routines.
+    
+    Example:
+        >>> executor = RoutineExecutor()
+        >>> result = executor.execute(
+        ...     routine=routine,
+        ...     parameters={"origin": "NYC", "destination": "LAX"}
+        ... )
+    """
+    
+    def __init__(
+        self,
+        remote_debugging_address: str = "http://127.0.0.1:9222",
+    ):
+        self.remote_debugging_address = remote_debugging_address
+    
+    def execute(
+        self,
+        routine: Routine,
+        parameters: Dict[str, Any],
+        timeout: float = 180.0,
+        wait_after_navigate_sec: float = 3.0,
+        close_tab_when_done: bool = True,
+        incognito: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Execute a routine.
+        
+        Returns:
+            Result dictionary with "ok" status and "result" data.
+        """
+        return execute_routine(
+            routine=routine,
+            parameters_dict=parameters,
+            remote_debugging_address=self.remote_debugging_address,
+            timeout=timeout,
+            wait_after_navigate_sec=wait_after_navigate_sec,
+            close_tab_when_done=close_tab_when_done,
+            incognito=incognito,
+        )
+
diff --git a/web_hacker/sdk/monitor.py b/web_hacker/sdk/monitor.py
new file mode 100644
index 0000000..9e0987e
--- /dev/null
+++ b/web_hacker/sdk/monitor.py
@@ -0,0 +1,238 @@
+"""
+Browser monitoring SDK wrapper.
+"""
+
+from typing import Optional, Set
+from pathlib import Path
+import logging
+import sys
+import time
+import threading
+
+from ..cdp.cdp_session import CDPSession
+from ..cdp.tab_managements import cdp_new_tab, dispose_context
+from ..data_models.network import ResourceType
+from ..utils.exceptions import BrowserConnectionError
+
+logger = logging.getLogger(__name__)
+
+
+class BrowserMonitor:
+    """
+    High-level interface for monitoring browser activity.
+    
+    Example:
+        >>> monitor = BrowserMonitor(output_dir="./captures")
+        >>> with monitor:
+        ...     # User performs actions in browser
+        ...     pass
+        >>> summary = monitor.get_summary()
+    """
+    
+    def __init__(
+        self,
+        remote_debugging_address: str = "http://127.0.0.1:9222",
+        output_dir: str = "./cdp_captures",
+        url: str = "about:blank",
+        incognito: bool = True,
+        block_patterns: Optional[list[str]] = None,
+        capture_resources: Optional[Set] = None,
+        create_tab: bool = True,
+        clear_cookies: bool = False,
+        clear_storage: bool = False,
+    ):
+        self.remote_debugging_address = remote_debugging_address
+        self.output_dir = output_dir
+        self.url = url
+        self.incognito = incognito
+        self.block_patterns = block_patterns
+        self.capture_resources = capture_resources or {
+            ResourceType.XHR,
+            ResourceType.FETCH,
+            ResourceType.DOCUMENT,
+            ResourceType.SCRIPT,
+            ResourceType.IMAGE,
+            ResourceType.MEDIA
+        }
+        self.create_tab = create_tab
+        self.clear_cookies = clear_cookies
+        self.clear_storage = clear_storage
+        
+        self.session: Optional[CDPSession] = None
+        self.context_id: Optional[str] = None
+        self.created_tab = False
+        self.start_time: Optional[float] = None
+        self._run_thread: Optional[threading.Thread] = None
+        self._stop_event = threading.Event()
+    
+    def start(self) -> None:
+        """Start monitoring session."""
+        self.start_time = time.time()
+        
+        # Create output directory structure
+        paths = {
+            "output_dir": self.output_dir,
+            "network_dir": str(Path(self.output_dir) / "network"),
+            "transactions_dir": str(Path(self.output_dir) / "network" / "transactions"),
+            "storage_dir": str(Path(self.output_dir) / "storage"),
+            "interaction_dir": str(Path(self.output_dir) / "interaction"),
+        }
+        
+        # Create directories
+        for path in paths.values():
+            Path(path).mkdir(parents=True, exist_ok=True)
+        
+        # Get or create browser tab
+        if self.create_tab:
+            try:
+                target_id, browser_context_id, ws = cdp_new_tab(
+                    remote_debugging_address=self.remote_debugging_address,
+                    incognito=self.incognito,
+                    url=self.url,
+                )
+                self.context_id = browser_context_id
+                self.created_tab = True
+                ws_url = ws
+            except Exception as e:
+                raise BrowserConnectionError(f"Failed to create browser tab: {e}")
+        else:
+            # Connect to existing browser
+            try:
+                import requests
+                ver = requests.get(f"{self.remote_debugging_address}/json/version", timeout=5)
+                ver.raise_for_status()
+                data = ver.json()
+                ws_url = data.get("webSocketDebuggerUrl")
+                if not ws_url:
+                    raise BrowserConnectionError("Could not get WebSocket URL from browser")
+            except Exception as e:
+                raise BrowserConnectionError(f"Failed to connect to browser: {e}")
+        
+        # Initialize CDP session
+        self.session = CDPSession(
+            ws_url=ws_url,
+            output_dir=paths["network_dir"],  # Use network directory for response bodies
+            paths=paths,
+            capture_resources=self.capture_resources,
+            block_patterns=self.block_patterns or [],
+            clear_cookies=self.clear_cookies,
+            clear_storage=self.clear_storage,
+        )
+        
+        self.session.setup_cdp(self.url if self.create_tab else None)
+        
+        # Start the monitoring loop in a separate thread
+        self._stop_event.clear()
+        self._run_thread = threading.Thread(target=self._run_monitoring_loop, daemon=True)
+        self._run_thread.start()
+        
+        logger.info(f"Browser monitoring started. Output directory: {self.output_dir}")
+    
+    def _run_monitoring_loop(self):
+        """Run the monitoring loop in a separate thread."""
+        if not self.session:
+            return
+        
+        try:
+            import json
+            
+            # Set a timeout on the websocket to allow checking stop event
+            if hasattr(self.session.ws, 'settimeout'):
+                self.session.ws.settimeout(1.0)
+            
+            while not self._stop_event.is_set():
+                try:
+                    msg = json.loads(self.session.ws.recv())
+                    self.session.handle_message(msg)
+                except Exception as e:
+                    if self._stop_event.is_set():
+                        break
+                    # Check if it's a timeout (which is expected)
+                    if "timed out" in str(e).lower() or "timeout" in str(e).lower():
+                        continue
+                    logger.warning(f"Error in monitoring loop: {e}")
+                    break
+        except KeyboardInterrupt:
+            pass
+        finally:
+            # Final cookie sync
+            try:
+                if self.session:
+                    self.session.storage_monitor.monitor_cookie_changes(self.session)
+            except:
+                pass
+            
+            # Consolidate transactions
+            try:
+                if self.session:
+                    consolidated_path = f"{self.output_dir}/network/consolidated_transactions.json"
+                    self.session.network_monitor.consolidate_transactions(consolidated_path)
+            except:
+                pass
+            
+            # Generate HAR file
+            try:
+                if self.session:
+                    har_path = f"{self.output_dir}/network/network.har"
+                    self.session.network_monitor.generate_har_from_transactions(har_path, "Web Hacker Session")
+            except:
+                pass
+            
+            # Consolidate interactions
+            try:
+                if self.session:
+                    interaction_dir = self.session.paths.get('interaction_dir', f"{self.output_dir}/interaction")
+                    consolidated_interactions_path = str(Path(interaction_dir) / "consolidated_interactions.json")
+                    self.session.interaction_monitor.consolidate_interactions(consolidated_interactions_path)
+            except:
+                pass
+    
+    def stop(self) -> dict:
+        """Stop monitoring and return summary."""
+        if not self.session:
+            return {}
+        
+        # Signal stop
+        self._stop_event.set()
+        
+        # Wait for thread to finish (with timeout)
+        if self._run_thread and self._run_thread.is_alive():
+            self._run_thread.join(timeout=5.0)
+        
+        # Close WebSocket
+        try:
+            if self.session.ws:
+                self.session.ws.close()
+        except Exception as e:
+            logger.warning(f"Error closing WebSocket: {e}")
+        
+        summary = self.get_summary()
+        
+        # Cleanup
+        if self.created_tab and self.context_id:
+            try:
+                dispose_context(self.remote_debugging_address, self.context_id)
+            except Exception as e:
+                logger.warning(f"Could not dispose browser context: {e}")
+        
+        end_time = time.time()
+        summary["duration"] = end_time - (self.start_time or end_time)
+        
+        logger.info("Browser monitoring stopped.")
+        return summary
+    
+    def get_summary(self) -> dict:
+        """Get current monitoring summary without stopping."""
+        if not self.session:
+            return {}
+        return self.session.get_monitoring_summary() if self.session else {}
+    
+    def __enter__(self):
+        """Context manager entry."""
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        self.stop()
+
diff --git a/src/utils/__init__.py b/web_hacker/utils/__init__.py
similarity index 100%
rename from src/utils/__init__.py
rename to web_hacker/utils/__init__.py
diff --git a/src/utils/cdp_utils.py b/web_hacker/utils/cdp_utils.py
similarity index 99%
rename from src/utils/cdp_utils.py
rename to web_hacker/utils/cdp_utils.py
index 3acc00c..ebe714a 100644
--- a/src/utils/cdp_utils.py
+++ b/web_hacker/utils/cdp_utils.py
@@ -1,5 +1,5 @@
 """
-src/utils/cdp_utils.py
+web_hacker/utils/cdp_utils.py
 
 Utility functions for CDP use.
 """
diff --git a/src/utils/data_utils.py b/web_hacker/utils/data_utils.py
similarity index 97%
rename from src/utils/data_utils.py
rename to web_hacker/utils/data_utils.py
index 12da914..adacfcd 100644
--- a/src/utils/data_utils.py
+++ b/web_hacker/utils/data_utils.py
@@ -1,5 +1,5 @@
 """
-src/utils/data_utils.py
+web_hacker/utils/data_utils.py
 
 Utility functions for loading data.
 """
@@ -12,7 +12,7 @@
 from typing import Any, Union
 from bs4 import BeautifulSoup
 
-from src.utils.exceptions import UnsupportedFileFormat
+from web_hacker.utils.exceptions import UnsupportedFileFormat
 
 
 def load_data(file_path: Path) -> Union[dict, list]:
diff --git a/src/utils/exceptions.py b/web_hacker/utils/exceptions.py
similarity index 63%
rename from src/utils/exceptions.py
rename to web_hacker/utils/exceptions.py
index 76228fa..b164dc3 100644
--- a/src/utils/exceptions.py
+++ b/web_hacker/utils/exceptions.py
@@ -1,5 +1,5 @@
 """
-src/utils/exceptions.py
+web_hacker/utils/exceptions.py
 
 Custom exceptions for the project.
 """
@@ -27,3 +27,21 @@ class TransactionIdentificationFailedError(Exception):
     Exception raised when the agent fails to identify a network transaction
     that corresponds to the user's requested task after exhausting all attempts.
     """
+
+
+class BrowserConnectionError(Exception):
+    """
+    Exception raised when unable to connect to the browser or create a browser tab.
+    """
+
+
+class RoutineExecutionError(Exception):
+    """
+    Exception raised when routine execution fails.
+    """
+
+
+class WebHackerError(Exception):
+    """
+    Base exception for all Web Hacker errors.
+    """
diff --git a/src/utils/llm_utils.py b/web_hacker/utils/llm_utils.py
similarity index 97%
rename from src/utils/llm_utils.py
rename to web_hacker/utils/llm_utils.py
index eb6c826..425007e 100644
--- a/src/utils/llm_utils.py
+++ b/web_hacker/utils/llm_utils.py
@@ -1,5 +1,5 @@
 """
-src/utils/llm_utils.py
+web_hacker/utils/llm_utils.py
 
 Utility functions for LLM API calls.
 """
@@ -12,8 +12,8 @@
 from openai.types.responses import Response
 from pydantic import BaseModel
 
-from src.config import Config
-from src.utils.exceptions import LLMStructuredOutputError
+from web_hacker.config import Config
+from web_hacker.utils.exceptions import LLMStructuredOutputError
 
 logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
 logger = logging.getLogger(__name__)

From 8457d30518add8ab9261f93e669ed886a20383a2 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 17 Nov 2025 16:11:36 -0500
Subject: [PATCH 02/27] install dev dependencies

---
 .github/workflows/tests.yml | 2 +-
 pyproject.toml              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 3fcde7d..6f36baf 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -36,7 +36,7 @@ jobs:
           key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml') }}
 
       - name: Install dependencies
-        run: uv sync
+        run: uv sync --extra dev
 
       - name: Lint
         run: uv run pylint $(git ls-files '*.py')
diff --git a/pyproject.toml b/pyproject.toml
index 3fcac99..a68bdf0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "hatchling.build"
 [project]
 name = "web-hacker"
 version = "1.1.0"
-description = "SDK for reverse engineering web apps - No API? No Problem!"
+description = "SDK for reverse engineering web apps"
 readme = "README.md"
 requires-python = ">=3.12.3,<3.13"
 license = {text = "Apache-2.0"}

From bffa37ee665ed1b1ca2b94eef3e0db5939a1fb49 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 17 Nov 2025 16:19:08 -0500
Subject: [PATCH 03/27] fix: entry point for execute_routines

---
 web_hacker/scripts/execute_routine.py | 29 ++++++++++++++++++---------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/web_hacker/scripts/execute_routine.py b/web_hacker/scripts/execute_routine.py
index fecd7b2..06d5215 100644
--- a/web_hacker/scripts/execute_routine.py
+++ b/web_hacker/scripts/execute_routine.py
@@ -26,7 +26,21 @@
 logger = logging.getLogger(__name__)
 
 
-def main(routine_path: str, parameters_path: str | None = None, parameters_dict: dict | None = None):
+def main(routine_path: str | None = None, parameters_path: str | None = None, parameters_dict: str | None = None):
+    """
+    Main function for executing a routine.
+    Can be called with arguments (for direct execution) or without (for CLI entry point).
+    """
+    # If called as CLI entry point, parse arguments
+    if routine_path is None:
+        parser = argparse.ArgumentParser(description="Execute a routine")
+        parser.add_argument("--routine-path", type=str, required=True, help="Path to the routine JSON file")
+        parser.add_argument("--parameters-path", type=str, required=False, help="Path to the parameters JSON file")
+        parser.add_argument("--parameters-dict", type=str, required=False, help="Dictionary of parameters")
+        args = parser.parse_args()
+        routine_path = args.routine_path
+        parameters_path = args.parameters_path
+        parameters_dict = args.parameters_dict
     
     # ensure only one of parameters_path or parameters_dict is provided
     if parameters_path and parameters_dict:
@@ -34,9 +48,9 @@ def main(routine_path: str, parameters_path: str | None = None, parameters_dict:
     
     # Load routine data
     if parameters_path:
-        parameters_dict = json.load(open(parameters_path))
+        parameters_dict_parsed = json.load(open(parameters_path))
     elif parameters_dict:
-        parameters_dict = json.loads(parameters_dict)
+        parameters_dict_parsed = json.loads(parameters_dict)
     else:
         raise ValueError("Either --parameters-path or --parameters-dict must be provided")
         
@@ -48,7 +62,7 @@ def main(routine_path: str, parameters_path: str | None = None, parameters_dict:
     try:
         result = execute_routine(
             routine=routine,
-            parameters_dict=parameters_dict,
+            parameters_dict=parameters_dict_parsed,
             timeout=60.0,
             wait_after_navigate_sec=3.0,
             close_tab_when_done=False,
@@ -61,9 +75,4 @@ def main(routine_path: str, parameters_path: str | None = None, parameters_dict:
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Execute a routine")
-    parser.add_argument("--routine-path", type=str, required=True, help="Path to the routine JSON file")
-    parser.add_argument("--parameters-path", type=str, required=False, help="Path to the parameters JSON file")
-    parser.add_argument("--parameters-dict", type=str, required=False, help="Dictionary of parameters")
-    args = parser.parse_args()
-    main(args.routine_path, args.parameters_path, args.parameters_dict)
+    main()

From 42afe3d9e8fdde873ca422cb6e921db57992b182 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 17 Nov 2025 16:48:34 -0500
Subject: [PATCH 04/27] update readme

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 58a8a61..853213a 100644
--- a/README.md
+++ b/README.md
@@ -157,7 +157,14 @@ This substitutes parameter values and injects `auth_token` from cookies. The JSO
 
 ### From PyPI (Recommended)
 
+**Note:** We recommend using a virtual environment to avoid dependency conflicts.
+
 ```bash
+# Create and activate a virtual environment
+python3.12 -m venv web-hacker-env
+source web-hacker-env/bin/activate  # On Windows: web-hacker-env\Scripts\activate
+
+# Install web-hacker
 pip install web-hacker
 ```
 

From 4e82bbe88875247e586baa4e49dfcdbc3369ba1d Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 17 Nov 2025 16:56:11 -0500
Subject: [PATCH 05/27] update readme

---
 README.md | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 853213a..f1cfcab 100644
--- a/README.md
+++ b/README.md
@@ -161,10 +161,25 @@ This substitutes parameter values and injects `auth_token` from cookies. The JSO
 
 ```bash
 # Create and activate a virtual environment
-python3.12 -m venv web-hacker-env
+# Option 1: Using uv (recommended - handles Python version automatically)
+uv venv web-hacker-env
 source web-hacker-env/bin/activate  # On Windows: web-hacker-env\Scripts\activate
+uv pip install web-hacker
 
-# Install web-hacker
+# Option 2: Using python3 (if Python 3.12+ is your default)
+python3 -m venv web-hacker-env
+source web-hacker-env/bin/activate  # On Windows: web-hacker-env\Scripts\activate
+pip install web-hacker
+
+# Option 3: Using pyenv (if you need a specific Python version)
+pyenv install 3.12.3  # if not already installed
+pyenv local 3.12.3
+python -m venv web-hacker-env
+source web-hacker-env/bin/activate  # On Windows: web-hacker-env\Scripts\activate
+pip install web-hacker
+
+# Troubleshooting: If pip is not found, recreate the venv or use:
+python -m ensurepip --upgrade  # Install pip in the venv
 pip install web-hacker
 ```
 

From efa52ee3c2861fd6ad6fcd55c8f15351a5af3e2f Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 17 Nov 2025 18:24:12 -0500
Subject: [PATCH 06/27] include a quickstart script

---
 README.md             |  31 ++++++++
 scripts/quickstart.sh | 164 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 195 insertions(+)
 create mode 100644 scripts/quickstart.sh

diff --git a/README.md b/README.md
index f1cfcab..a26fdf1 100644
--- a/README.md
+++ b/README.md
@@ -241,6 +241,29 @@ web-hacker-discover --task "Search for flights" --cdp-captures-dir ./captures
 web-hacker-execute --routine-path routine.json --parameters-path params.json
 ```
 
+## Quickstart (Easiest Way) 🚀
+
+The fastest way to get started is using the quickstart script, which automates the entire workflow:
+
+```bash
+# Make sure web-hacker is installed
+pip install web-hacker
+
+# Set your OpenAI API key
+export OPENAI_API_KEY="sk-..."
+
+# Run the quickstart script
+./scripts/quickstart.sh
+```
+
+The quickstart script will:
+0. ✅ Automatically launch Chrome in debug mode
+1. 📊 Start browser monitoring (you perform actions)
+2. 🤖 Discover routines from captured data
+3. 📝 Show you how to execute the discovered routine
+
+**Note:** The quickstart script is included in the repository. If you installed from PyPI, you can download it from the [GitHub repository](https://github.com/VectorlyApp/web-hacker/blob/main/scripts/quickstart.sh).
+
 ## Set up Your Environment 🔧
 
 ### Linux
@@ -291,6 +314,8 @@ $env:OPENAI_API_KEY="sk-..."
 
 ## Launch Chrome in Debug Mode 🐞
 
+> 💡 **Tip:** The [quickstart script](#quickstart-easiest-way-🚀) automatically launches Chrome for you. You only need to follow these manual instructions if you're not using the quickstart script.
+
 ### Instructions for MacOS
 
 ```
@@ -353,6 +378,12 @@ The reverse engineering process follows a simple three-step workflow:
 2. **Discover** — Let the AI agent analyze the captured data and generate a reusable Routine
 3. **Execute** — Run the discovered Routine with different parameters to automate the task
 
+### Quick Start (Recommended)
+
+**Easiest way:** Use the quickstart script (see [Quickstart](#quickstart-easiest-way-🚀) above) which automates everything.
+
+### Manual Workflow
+
 Each step is detailed below. Start by ensuring Chrome is running in debug mode (see [Launch Chrome in Debug Mode](#launch-chrome-in-debug-mode-🐞) above).
 
 ### 0. Legal & Privacy Notice ⚠️
diff --git a/scripts/quickstart.sh b/scripts/quickstart.sh
new file mode 100644
index 0000000..248a646
--- /dev/null
+++ b/scripts/quickstart.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# Quickstart script: Full workflow for web-hacker
+# This script guides you through: Launch Chrome → Monitor → Discover → Execute
+
+set -e
+
+# Colors for output
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Configuration
+PORT=9222
+OUTPUT_DIR="./cdp_captures"
+ROUTINE_OUTPUT="./routine_discovery_output"
+
+echo -e "${BLUE}╔════════════════════════════════════════════════════════════╗${NC}"
+echo -e "${BLUE}║          Web Hacker - Quickstart Workflow                ║${NC}"
+echo -e "${BLUE}╚════════════════════════════════════════════════════════════╝${NC}"
+echo ""
+
+# Step 1: Launch Chrome
+echo -e "${GREEN}Step 1: Launching Chrome in debug mode...${NC}"
+
+CHROME_USER_DIR="$HOME/tmp/chrome"
+mkdir -p "$CHROME_USER_DIR"
+
+# Detect Chrome path
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
+    CHROME_PATH=$(which google-chrome 2>/dev/null || which chromium-browser 2>/dev/null || which chromium 2>/dev/null)
+else
+    CHROME_PATH=""
+fi
+
+# Check if Chrome is already running
+if curl -s "http://127.0.0.1:$PORT/json/version" > /dev/null 2>&1; then
+    echo -e "${GREEN}✅ Chrome is already running in debug mode on port $PORT${NC}"
+else
+    # Try to launch Chrome
+    CHROME_FOUND=false
+    if [[ "$OSTYPE" == "darwin"* ]] && [[ -f "$CHROME_PATH" ]]; then
+        CHROME_FOUND=true
+    elif [[ "$OSTYPE" == "linux-gnu"* ]] && command -v "$CHROME_PATH" > /dev/null 2>&1; then
+        CHROME_FOUND=true
+    fi
+    
+    if [[ "$CHROME_FOUND" == "true" ]]; then
+        echo "🚀 Launching Chrome..."
+        "$CHROME_PATH" \
+          --remote-debugging-address=127.0.0.1 \
+          --remote-debugging-port=$PORT \
+          --user-data-dir="$CHROME_USER_DIR" \
+          --remote-allow-origins=* \
+          --no-first-run \
+          --no-default-browser-check \
+          > /dev/null 2>&1 &
+        
+        CHROME_PID=$!
+        
+        # Wait for Chrome to be ready
+        echo "⏳ Waiting for Chrome to start..."
+        for i in {1..10}; do
+            if curl -s "http://127.0.0.1:$PORT/json/version" > /dev/null 2>&1; then
+                echo -e "${GREEN}✅ Chrome is ready!${NC}"
+                break
+            fi
+            sleep 1
+        done
+        
+        if ! curl -s "http://127.0.0.1:$PORT/json/version" > /dev/null 2>&1; then
+            echo -e "${YELLOW}⚠️  Chrome failed to start automatically.${NC}"
+            kill $CHROME_PID 2>/dev/null || true
+            echo "   Please launch Chrome manually with:"
+            echo "   --remote-debugging-port=$PORT"
+            echo ""
+            read -p "Press Enter when Chrome is running in debug mode..."
+        fi
+    else
+        echo -e "${YELLOW}⚠️  Chrome not found automatically.${NC}"
+        echo "   Please launch Chrome manually with:"
+        echo "   --remote-debugging-port=$PORT"
+        echo ""
+        read -p "Press Enter when Chrome is running in debug mode..."
+    fi
+fi
+
+echo ""
+
+# Step 2: Monitor
+echo -e "${GREEN}Step 2: Starting browser monitoring...${NC}"
+echo -e "${YELLOW}📋 Instructions:${NC}"
+echo "   1. A new Chrome tab will open"
+echo "   2. Navigate to your target website"
+echo "   3. Perform the actions you want to automate (search, login, etc.)"
+echo "   4. Press Ctrl+C when you're done"
+echo ""
+read -p "Press Enter to start monitoring..."
+
+echo ""
+echo "🚀 Starting monitor (press Ctrl+C when done)..."
+web-hacker-monitor \
+  --host 127.0.0.1 \
+  --port $PORT \
+  --output-dir "$OUTPUT_DIR" \
+  --url about:blank \
+  --incognito || {
+    echo ""
+    echo -e "${YELLOW}⚠️  Monitoring stopped.${NC}"
+}
+
+echo ""
+
+# Step 3: Discover
+if [[ ! -d "$OUTPUT_DIR" ]] || [[ -z "$(ls -A $OUTPUT_DIR/network/transactions 2>/dev/null)" ]]; then
+    echo -e "${YELLOW}⚠️  No capture data found. Skipping discovery step.${NC}"
+    echo "   Make sure you performed actions during monitoring."
+    exit 0
+fi
+
+echo -e "${GREEN}Step 3: Discovering routine from captured data...${NC}"
+echo -e "${YELLOW}📋 Enter a description of what you want to automate:${NC}"
+echo "   Example: 'Search for flights and get prices'"
+read -p "   Task: " TASK
+
+if [[ -z "$TASK" ]]; then
+    echo -e "${YELLOW}⚠️  No task provided. Skipping discovery.${NC}"
+    exit 0
+fi
+
+echo ""
+echo "🤖 Running routine discovery agent..."
+web-hacker-discover \
+  --task "$TASK" \
+  --cdp-captures-dir "$OUTPUT_DIR" \
+  --output-dir "$ROUTINE_OUTPUT" \
+  --llm-model gpt-5
+
+echo ""
+
+# Step 4: Execute (optional)
+if [[ ! -f "$ROUTINE_OUTPUT/routine.json" ]]; then
+    echo -e "${YELLOW}⚠️  Routine not found at $ROUTINE_OUTPUT/routine.json${NC}"
+    exit 0
+fi
+
+echo -e "${GREEN}Step 4: Ready to execute routine!${NC}"
+echo ""
+echo "✅ Routine discovered successfully!"
+echo "   Location: $ROUTINE_OUTPUT/routine.json"
+echo ""
+echo -e "${YELLOW}To execute the routine, run:${NC}"
+echo "   web-hacker-execute \\"
+echo "     --routine-path $ROUTINE_OUTPUT/routine.json \\"
+if [[ -f "$ROUTINE_OUTPUT/test_parameters.json" ]]; then
+    echo "     --parameters-path $ROUTINE_OUTPUT/test_parameters.json"
+else
+    echo "     --parameters-dict '{\"param1\": \"value1\", \"param2\": \"value2\"}'"
+fi
+echo ""
+echo -e "${BLUE}💡 Tip: Review $ROUTINE_OUTPUT/routine.json before executing${NC}"
+

From 54e288c2f4d797414ee7e54f3ca464e4cb2c2132 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 17 Nov 2025 18:35:02 -0500
Subject: [PATCH 07/27] update readme

---
 README.md | 183 ++++++++++++++++--------------------------------------
 1 file changed, 54 insertions(+), 129 deletions(-)

diff --git a/README.md b/README.md
index a26fdf1..8badc0a 100644
--- a/README.md
+++ b/README.md
@@ -183,62 +183,26 @@ python -m ensurepip --upgrade  # Install pip in the venv
 pip install web-hacker
 ```
 
-### From Source
+### From Source (Development)
+
+For development or if you want the latest code:
 
 ```bash
+# Clone the repository
 git clone https://github.com/VectorlyApp/web-hacker.git
 cd web-hacker
-pip install -e .
-```
-
-## Quick Start (SDK)
-
-The easiest way to use web-hacker is through the SDK:
-
-```python
-from web_hacker import WebHacker
-
-# Initialize the SDK
-hacker = WebHacker(openai_api_key="sk-...")
-
-# Monitor browser activity
-with hacker.monitor_browser(output_dir="./captures"):
-    # Navigate to your target website and perform actions
-    # The SDK will capture all network traffic, storage, and interactions
-    pass
-
-# Discover a routine from captured data
-routine = hacker.discover_routine(
-    task="Search for flights and get prices",
-    cdp_captures_dir="./captures"
-)
 
-# Execute the discovered routine
-result = hacker.execute_routine(
-    routine=routine,
-    parameters={
-        "origin": "NYC",
-        "destination": "LAX",
-        "departureDate": "2026-03-22"
-    }
-)
-
-print(result)
-```
-
-## CLI Usage
-
-The SDK also provides CLI commands:
-
-```bash
-# Monitor browser
-web-hacker-monitor --output-dir ./captures
+# Create and activate virtual environment
+python3 -m venv web-hacker-env
+source web-hacker-env/bin/activate  # On Windows: web-hacker-env\Scripts\activate
 
-# Discover routines
-web-hacker-discover --task "Search for flights" --cdp-captures-dir ./captures
+# Install in editable mode
+pip install -e .
 
-# Execute routine
-web-hacker-execute --routine-path routine.json --parameters-path params.json
+# Or using uv (faster)
+uv venv web-hacker-env
+source web-hacker-env/bin/activate
+uv pip install -e .
 ```
 
 ## Quickstart (Easiest Way) 🚀
@@ -257,117 +221,78 @@ export OPENAI_API_KEY="sk-..."
 ```
 
 The quickstart script will:
-0. ✅ Automatically launch Chrome in debug mode
-1. 📊 Start browser monitoring (you perform actions)
-2. 🤖 Discover routines from captured data
-3. 📝 Show you how to execute the discovered routine
+1. ✅ Automatically launch Chrome in debug mode
+2. 📊 Start browser monitoring (you perform actions)
+3. 🤖 Discover routines from captured data
+4. 📝 Show you how to execute the discovered routine
 
 **Note:** The quickstart script is included in the repository. If you installed from PyPI, you can download it from the [GitHub repository](https://github.com/VectorlyApp/web-hacker/blob/main/scripts/quickstart.sh).
 
-## Set up Your Environment 🔧
-
-### Linux
-
-```bash
-# 1) Clone and enter the repo
-git clone https://github.com/VectorlyApp/web-hacker.git
-cd web-hacker
-
-# 2) Create & activate virtual environment (uv)
-uv venv --prompt web-hacker
-source .venv/bin/activate   # Windows: .venv\\Scripts\\activate
-
-# 3) Install exactly what lockfile says
-uv sync
-
-# 4) Install in editable mode via uv (pip-compatible interface)
-uv pip install -e .
-
-# 5) Configure environment
-cp .env.example .env  # then edit values
-# or set directly
-export OPENAI_API_KEY="sk-..."
-```
-
-### Windows
-
-```powershell
-# 1) Clone and enter the repo
-git clone https://github.com/VectorlyApp/web-hacker.git
-cd web-hacker
-
-# 2) Install uv (if not already installed)
-iwr https://astral.sh/uv/install.ps1 -UseBasicParsing | iex
-
-# 3) Create & activate virtual environment (uv)
-uv venv --prompt web-hacker
-.venv\Scripts\activate
-
-# 4) Install in editable mode via uv (pip-compatible interface)
-uv pip install -e .
-
-# 5) Configure environment
-copy .env.example .env  # then edit values
-# or set directly
-$env:OPENAI_API_KEY="sk-..."
-```
-
 ## Launch Chrome in Debug Mode 🐞
 
-> 💡 **Tip:** The [quickstart script](#quickstart-easiest-way-🚀) automatically launches Chrome for you. You only need to follow these manual instructions if you're not using the quickstart script.
+> 💡 **Tip:** The [quickstart script](#quickstart-easiest-way-🚀) automatically launches Chrome for you. You only need these manual instructions if you're not using the quickstart script.
 
-### Instructions for MacOS
+### macOS
 
-```
-# You should see JSON containing a webSocketDebuggerUrl like:
-# ws://127.0.0.1:9222/devtools/browser/*************************************# Create temporary chrome user directory
-mkdir $HOME/tmp
-mkdir $HOME/tmp/chrome
+```bash
+# Create temporary Chrome user directory
+mkdir -p $HOME/tmp/chrome
 
-# Launch Chrome app in debug mode (this exposes websocket for controlling and monitoring the browser)
+# Launch Chrome in debug mode
 "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
   --remote-debugging-address=127.0.0.1 \
   --remote-debugging-port=9222 \
   --user-data-dir="$HOME/tmp/chrome" \
-  '--remote-allow-origins=*' \
+  --remote-allow-origins=* \
   --no-first-run \
   --no-default-browser-check
 
-
-# Verify chrome is running in debug mode
+# Verify Chrome is running
 curl http://127.0.0.1:9222/json/version
-
-# You should see JSON containing a webSocketDebuggerUrl like:
-# ws://127.0.0.1:9222/devtools/browser/*************************************
 ```
 
-### Instructions for Windows
+### Windows
 
-```
+```powershell
 # Create temporary Chrome user directory
-New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\\tmp\\chrome" | Out-Null
+New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\tmp\chrome" | Out-Null
 
-# Locate Chrome (adjust path if Chrome is installed elsewhere)
-$chrome = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
+# Locate Chrome
+$chrome = "C:\Program Files\Google\Chrome\Application\chrome.exe"
 if (!(Test-Path $chrome)) {
-  $chrome = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
+  $chrome = "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
 }
 
-# Launch Chrome in debug mode (exposes DevTools WebSocket)
+# Launch Chrome in debug mode
 & $chrome `
   --remote-debugging-address=127.0.0.1 `
   --remote-debugging-port=9222 `
-  --user-data-dir="$env:USERPROFILE\\tmp\\chrome" `
+  --user-data-dir="$env:USERPROFILE\tmp\chrome" `
   --remote-allow-origins=* `
   --no-first-run `
   --no-default-browser-check
 
-
-# Verify Chrome is running in debug mode
+# Verify Chrome is running
 (Invoke-WebRequest http://127.0.0.1:9222/json/version).Content
+```
 
-# You should see JSON containing a webSocketDebuggerUrl like:
-# ws://127.0.0.1:9222/devtools/browser/*************************************
+### Linux
+
+```bash
+# Create temporary Chrome user directory
+mkdir -p $HOME/tmp/chrome
+
+# Launch Chrome in debug mode (adjust path if needed)
+google-chrome \
+  --remote-debugging-address=127.0.0.1 \
+  --remote-debugging-port=9222 \
+  --user-data-dir="$HOME/tmp/chrome" \
+  --remote-allow-origins=* \
+  --no-first-run \
+  --no-default-browser-check
+
+# Verify Chrome is running
+curl http://127.0.0.1:9222/json/version
 ```
 
 ## HACK (reverse engineer) WEB APPS 👨🏻‍💻
@@ -380,9 +305,9 @@ The reverse engineering process follows a simple three-step workflow:
 
 ### Quick Start (Recommended)
 
-**Easiest way:** Use the quickstart script (see [Quickstart](#quickstart-easiest-way-🚀) above) which automates everything.
+**Easiest way:** Use the [quickstart script](#quickstart-easiest-way-🚀) which automates the entire workflow.
 
-### Manual Workflow
+### Manual Workflow (Step-by-Step)
 
 Each step is detailed below. Start by ensuring Chrome is running in debug mode (see [Launch Chrome in Debug Mode](#launch-chrome-in-debug-mode-🐞) above).
 

From a4462d1a688dd5d5eded4857d342f9507935e523 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Tue, 18 Nov 2025 12:55:02 -0500
Subject: [PATCH 08/27] use built-in collection types

---
 web_hacker/sdk/execution.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/web_hacker/sdk/execution.py b/web_hacker/sdk/execution.py
index 466ce16..cb57028 100644
--- a/web_hacker/sdk/execution.py
+++ b/web_hacker/sdk/execution.py
@@ -2,7 +2,7 @@
 Routine execution SDK wrapper.
 """
 
-from typing import Dict, Any
+from typing import Any
 from ..cdp.routine_execution import execute_routine
 from ..data_models.production_routine import Routine
 
@@ -28,12 +28,12 @@ def __init__(
     def execute(
         self,
         routine: Routine,
-        parameters: Dict[str, Any],
+        parameters: dict[str, Any],
         timeout: float = 180.0,
         wait_after_navigate_sec: float = 3.0,
         close_tab_when_done: bool = True,
         incognito: bool = False,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """
         Execute a routine.
         

From 50b8e4d8cf9f87f8f19ec9ecc4f79fe702421756 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Tue, 18 Nov 2025 13:49:53 -0500
Subject: [PATCH 09/27] add a quickstart python script

---
 README.md             |   4 +-
 scripts/quickstart.py | 280 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 282 insertions(+), 2 deletions(-)
 create mode 100755 scripts/quickstart.py

diff --git a/README.md b/README.md
index 8badc0a..927b8ba 100644
--- a/README.md
+++ b/README.md
@@ -217,7 +217,7 @@ pip install web-hacker
 export OPENAI_API_KEY="sk-..."
 
 # Run the quickstart script
-./scripts/quickstart.sh
+python scripts/quickstart.py
 ```
 
 The quickstart script will:
@@ -226,7 +226,7 @@ The quickstart script will:
 3. 🤖 Discover routines from captured data
 4. 📝 Show you how to execute the discovered routine
 
-**Note:** The quickstart script is included in the repository. If you installed from PyPI, you can download it from the [GitHub repository](https://github.com/VectorlyApp/web-hacker/blob/main/scripts/quickstart.sh).
+**Note:** The quickstart script is included in the repository. If you installed from PyPI, you can download it from the [GitHub repository](https://github.com/VectorlyApp/web-hacker/blob/main/scripts/quickstart.py).
 
 ## Launch Chrome in Debug Mode 🐞
 
diff --git a/scripts/quickstart.py b/scripts/quickstart.py
new file mode 100755
index 0000000..65810ff
--- /dev/null
+++ b/scripts/quickstart.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+"""
+Quickstart script: Full workflow for web-hacker
+This script guides you through: Launch Chrome → Monitor → Discover → Execute
+"""
+
+import os
+import sys
+import time
+import platform
+import subprocess
+import shutil
+from pathlib import Path
+from typing import Optional
+
+try:
+    import requests
+except ImportError:
+    print("Error: 'requests' package is required. Install it with: pip install requests")
+    sys.exit(1)
+
+# Colors for output (ANSI codes work on modern Windows 10+ terminals)
+GREEN = '\033[0;32m'
+YELLOW = '\033[1;33m'
+BLUE = '\033[0;34m'
+NC = '\033[0m'  # No Color
+
+# Configuration
+PORT = 9222
+OUTPUT_DIR = Path("./cdp_captures")
+ROUTINE_OUTPUT = Path("./routine_discovery_output")
+
+
+def print_colored(text: str, color: str = NC) -> None:
+    """Print colored text."""
+    print(f"{color}{text}{NC}")
+
+
+def check_chrome_running(port: int) -> bool:
+    """Check if Chrome is already running in debug mode."""
+    try:
+        response = requests.get(f"http://127.0.0.1:{port}/json/version", timeout=1)
+        return response.status_code == 200
+    except (requests.RequestException, requests.Timeout):
+        return False
+
+
+def find_chrome_path() -> Optional[str]:
+    """Find Chrome executable path based on OS."""
+    system = platform.system()
+    
+    if system == "Darwin":  # macOS
+        chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+        if os.path.isfile(chrome_path):
+            return chrome_path
+    elif system == "Linux":
+        # Try common Linux Chrome/Chromium names
+        for name in ["google-chrome", "chromium-browser", "chromium", "chrome"]:
+            chrome_path = shutil.which(name)
+            if chrome_path:
+                return chrome_path
+    elif system == "Windows":
+        # Common Windows Chrome locations
+        possible_paths = [
+            os.path.expandvars(r"%ProgramFiles%\Google\Chrome\Application\chrome.exe"),
+            os.path.expandvars(r"%ProgramFiles(x86)%\Google\Chrome\Application\chrome.exe"),
+            os.path.expandvars(r"%LocalAppData%\Google\Chrome\Application\chrome.exe"),
+        ]
+        for path in possible_paths:
+            if os.path.isfile(path):
+                return path
+        # Try to find in PATH
+        chrome_path = shutil.which("chrome") or shutil.which("google-chrome")
+        if chrome_path:
+            return chrome_path
+    
+    return None
+
+
+def launch_chrome(port: int) -> Optional[subprocess.Popen]:
+    """Launch Chrome in debug mode."""
+    chrome_path = find_chrome_path()
+    
+    if not chrome_path:
+        print_colored("⚠️  Chrome not found automatically.", YELLOW)
+        print("   Please launch Chrome manually with:")
+        print(f"   --remote-debugging-port={port}")
+        print()
+        input("Press Enter when Chrome is running in debug mode...")
+        return None
+    
+    # Create user data directory
+    if platform.system() == "Windows":
+        chrome_user_dir = os.path.expandvars(r"%USERPROFILE%\tmp\chrome")
+    else:
+        chrome_user_dir = os.path.expanduser("~/tmp/chrome")
+    
+    os.makedirs(chrome_user_dir, exist_ok=True)
+    
+    # Build Chrome arguments
+    chrome_args = [
+        chrome_path,
+        f"--remote-debugging-address=127.0.0.1",
+        f"--remote-debugging-port={port}",
+        f"--user-data-dir={chrome_user_dir}",
+        "--remote-allow-origins=*",
+        "--no-first-run",
+        "--no-default-browser-check",
+    ]
+    
+    # Launch Chrome
+    print("🚀 Launching Chrome...")
+    try:
+        # On Windows, use CREATE_NEW_PROCESS_GROUP to detach
+        creation_flags = 0
+        if platform.system() == "Windows":
+            creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP
+        
+        process = subprocess.Popen(
+            chrome_args,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            creationflags=creation_flags,
+        )
+        
+        # Wait for Chrome to be ready
+        print("⏳ Waiting for Chrome to start...")
+        for _ in range(10):
+            if check_chrome_running(port):
+                print_colored("✅ Chrome is ready!", GREEN)
+                return process
+            time.sleep(1)
+        
+        # Chrome didn't start in time
+        print_colored("⚠️  Chrome failed to start automatically.", YELLOW)
+        try:
+            process.terminate()
+            time.sleep(0.5)
+            process.kill()
+        except Exception:
+            pass
+        
+        print("   Please launch Chrome manually with:")
+        print(f"   --remote-debugging-port={port}")
+        print()
+        input("Press Enter when Chrome is running in debug mode...")
+        return None
+        
+    except Exception as e:
+        print_colored(f"⚠️  Error launching Chrome: {e}", YELLOW)
+        print("   Please launch Chrome manually with:")
+        print(f"   --remote-debugging-port={port}")
+        print()
+        input("Press Enter when Chrome is running in debug mode...")
+        return None
+
+
+def run_command(cmd: list[str], description: str) -> bool:
+    """Run a command and return True if successful."""
+    try:
+        result = subprocess.run(cmd, check=True)
+        return result.returncode == 0
+    except subprocess.CalledProcessError:
+        return False
+    except KeyboardInterrupt:
+        print()
+        print_colored("⚠️  Command interrupted.", YELLOW)
+        return False
+    except FileNotFoundError:
+        print_colored(f"⚠️  Command not found: {cmd[0]}", YELLOW)
+        print("   Make sure web-hacker is installed: pip install -e .")
+        return False
+
+
+def main():
+    """Main workflow."""
+    print_colored("╔════════════════════════════════════════════════════════════╗", BLUE)
+    print_colored("║          Web Hacker - Quickstart Workflow                ║", BLUE)
+    print_colored("╚════════════════════════════════════════════════════════════╝", BLUE)
+    print()
+    
+    # Step 1: Launch Chrome
+    print_colored("Step 1: Launching Chrome in debug mode...", GREEN)
+    
+    chrome_process = None
+    if check_chrome_running(PORT):
+        print_colored(f"✅ Chrome is already running in debug mode on port {PORT}", GREEN)
+    else:
+        chrome_process = launch_chrome(PORT)
+    
+    print()
+    
+    # Step 2: Monitor
+    print_colored("Step 2: Starting browser monitoring...", GREEN)
+    print_colored("📋 Instructions:", YELLOW)
+    print("   1. A new Chrome tab will open")
+    print("   2. Navigate to your target website")
+    print("   3. Perform the actions you want to automate (search, login, etc.)")
+    print("   4. Press Ctrl+C when you're done")
+    print()
+    input("Press Enter to start monitoring...")
+    
+    print()
+    print("🚀 Starting monitor (press Ctrl+C when done)...")
+    
+    monitor_cmd = [
+        "web-hacker-monitor",
+        "--host", "127.0.0.1",
+        "--port", str(PORT),
+        "--output-dir", str(OUTPUT_DIR),
+        "--url", "about:blank",
+        "--incognito",
+    ]
+    
+    run_command(monitor_cmd, "monitoring")
+    print()
+    
+    # Step 3: Discover
+    transactions_dir = OUTPUT_DIR / "network" / "transactions"
+    if not OUTPUT_DIR.exists() or not transactions_dir.exists() or not any(transactions_dir.iterdir()):
+        print_colored("⚠️  No capture data found. Skipping discovery step.", YELLOW)
+        print("   Make sure you performed actions during monitoring.")
+        return
+    
+    print_colored("Step 3: Discovering routine from captured data...", GREEN)
+    print_colored("📋 Enter a description of what you want to automate:", YELLOW)
+    print("   Example: 'Search for flights and get prices'")
+    task = input("   Task: ").strip()
+    
+    if not task:
+        print_colored("⚠️  No task provided. Skipping discovery.", YELLOW)
+        return
+    
+    print()
+    print("🤖 Running routine discovery agent...")
+    
+    discover_cmd = [
+        "web-hacker-discover",
+        "--task", task,
+        "--cdp-captures-dir", str(OUTPUT_DIR),
+        "--output-dir", str(ROUTINE_OUTPUT),
+        "--llm-model", "gpt-5",
+    ]
+    
+    run_command(discover_cmd, "discovery")
+    print()
+    
+    # Step 4: Execute (optional)
+    routine_file = ROUTINE_OUTPUT / "routine.json"
+    if not routine_file.exists():
+        print_colored(f"⚠️  Routine not found at {routine_file}", YELLOW)
+        return
+    
+    print_colored("Step 4: Ready to execute routine!", GREEN)
+    print()
+    print("✅ Routine discovered successfully!")
+    print(f"   Location: {routine_file}")
+    print()
+    print_colored("To execute the routine, run:", YELLOW)
+    print("   web-hacker-execute \\")
+    print(f"     --routine-path {routine_file} \\")
+    
+    test_params_file = ROUTINE_OUTPUT / "test_parameters.json"
+    if test_params_file.exists():
+        print(f"     --parameters-path {test_params_file}")
+    else:
+        print("     --parameters-dict '{\"param1\": \"value1\", \"param2\": \"value2\"}'")
+    
+    print()
+    print_colored(f"💡 Tip: Review {routine_file} before executing", BLUE)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print()
+        print_colored("⚠️  Interrupted by user.", YELLOW)
+        sys.exit(0)
+

From f4ba5c276978b47d4bc319456520ca758fd69e90 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Tue, 18 Nov 2025 15:27:21 -0500
Subject: [PATCH 10/27] remove quickstart bash script

---
 scripts/quickstart.sh | 164 ------------------------------------------
 1 file changed, 164 deletions(-)
 delete mode 100644 scripts/quickstart.sh

diff --git a/scripts/quickstart.sh b/scripts/quickstart.sh
deleted file mode 100644
index 248a646..0000000
--- a/scripts/quickstart.sh
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/bin/bash
-# Quickstart script: Full workflow for web-hacker
-# This script guides you through: Launch Chrome → Monitor → Discover → Execute
-
-set -e
-
-# Colors for output
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Configuration
-PORT=9222
-OUTPUT_DIR="./cdp_captures"
-ROUTINE_OUTPUT="./routine_discovery_output"
-
-echo -e "${BLUE}╔════════════════════════════════════════════════════════════╗${NC}"
-echo -e "${BLUE}║          Web Hacker - Quickstart Workflow                ║${NC}"
-echo -e "${BLUE}╚════════════════════════════════════════════════════════════╝${NC}"
-echo ""
-
-# Step 1: Launch Chrome
-echo -e "${GREEN}Step 1: Launching Chrome in debug mode...${NC}"
-
-CHROME_USER_DIR="$HOME/tmp/chrome"
-mkdir -p "$CHROME_USER_DIR"
-
-# Detect Chrome path
-if [[ "$OSTYPE" == "darwin"* ]]; then
-    CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
-elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
-    CHROME_PATH=$(which google-chrome 2>/dev/null || which chromium-browser 2>/dev/null || which chromium 2>/dev/null)
-else
-    CHROME_PATH=""
-fi
-
-# Check if Chrome is already running
-if curl -s "http://127.0.0.1:$PORT/json/version" > /dev/null 2>&1; then
-    echo -e "${GREEN}✅ Chrome is already running in debug mode on port $PORT${NC}"
-else
-    # Try to launch Chrome
-    CHROME_FOUND=false
-    if [[ "$OSTYPE" == "darwin"* ]] && [[ -f "$CHROME_PATH" ]]; then
-        CHROME_FOUND=true
-    elif [[ "$OSTYPE" == "linux-gnu"* ]] && command -v "$CHROME_PATH" > /dev/null 2>&1; then
-        CHROME_FOUND=true
-    fi
-    
-    if [[ "$CHROME_FOUND" == "true" ]]; then
-        echo "🚀 Launching Chrome..."
-        "$CHROME_PATH" \
-          --remote-debugging-address=127.0.0.1 \
-          --remote-debugging-port=$PORT \
-          --user-data-dir="$CHROME_USER_DIR" \
-          --remote-allow-origins=* \
-          --no-first-run \
-          --no-default-browser-check \
-          > /dev/null 2>&1 &
-        
-        CHROME_PID=$!
-        
-        # Wait for Chrome to be ready
-        echo "⏳ Waiting for Chrome to start..."
-        for i in {1..10}; do
-            if curl -s "http://127.0.0.1:$PORT/json/version" > /dev/null 2>&1; then
-                echo -e "${GREEN}✅ Chrome is ready!${NC}"
-                break
-            fi
-            sleep 1
-        done
-        
-        if ! curl -s "http://127.0.0.1:$PORT/json/version" > /dev/null 2>&1; then
-            echo -e "${YELLOW}⚠️  Chrome failed to start automatically.${NC}"
-            kill $CHROME_PID 2>/dev/null || true
-            echo "   Please launch Chrome manually with:"
-            echo "   --remote-debugging-port=$PORT"
-            echo ""
-            read -p "Press Enter when Chrome is running in debug mode..."
-        fi
-    else
-        echo -e "${YELLOW}⚠️  Chrome not found automatically.${NC}"
-        echo "   Please launch Chrome manually with:"
-        echo "   --remote-debugging-port=$PORT"
-        echo ""
-        read -p "Press Enter when Chrome is running in debug mode..."
-    fi
-fi
-
-echo ""
-
-# Step 2: Monitor
-echo -e "${GREEN}Step 2: Starting browser monitoring...${NC}"
-echo -e "${YELLOW}📋 Instructions:${NC}"
-echo "   1. A new Chrome tab will open"
-echo "   2. Navigate to your target website"
-echo "   3. Perform the actions you want to automate (search, login, etc.)"
-echo "   4. Press Ctrl+C when you're done"
-echo ""
-read -p "Press Enter to start monitoring..."
-
-echo ""
-echo "🚀 Starting monitor (press Ctrl+C when done)..."
-web-hacker-monitor \
-  --host 127.0.0.1 \
-  --port $PORT \
-  --output-dir "$OUTPUT_DIR" \
-  --url about:blank \
-  --incognito || {
-    echo ""
-    echo -e "${YELLOW}⚠️  Monitoring stopped.${NC}"
-}
-
-echo ""
-
-# Step 3: Discover
-if [[ ! -d "$OUTPUT_DIR" ]] || [[ -z "$(ls -A $OUTPUT_DIR/network/transactions 2>/dev/null)" ]]; then
-    echo -e "${YELLOW}⚠️  No capture data found. Skipping discovery step.${NC}"
-    echo "   Make sure you performed actions during monitoring."
-    exit 0
-fi
-
-echo -e "${GREEN}Step 3: Discovering routine from captured data...${NC}"
-echo -e "${YELLOW}📋 Enter a description of what you want to automate:${NC}"
-echo "   Example: 'Search for flights and get prices'"
-read -p "   Task: " TASK
-
-if [[ -z "$TASK" ]]; then
-    echo -e "${YELLOW}⚠️  No task provided. Skipping discovery.${NC}"
-    exit 0
-fi
-
-echo ""
-echo "🤖 Running routine discovery agent..."
-web-hacker-discover \
-  --task "$TASK" \
-  --cdp-captures-dir "$OUTPUT_DIR" \
-  --output-dir "$ROUTINE_OUTPUT" \
-  --llm-model gpt-5
-
-echo ""
-
-# Step 4: Execute (optional)
-if [[ ! -f "$ROUTINE_OUTPUT/routine.json" ]]; then
-    echo -e "${YELLOW}⚠️  Routine not found at $ROUTINE_OUTPUT/routine.json${NC}"
-    exit 0
-fi
-
-echo -e "${GREEN}Step 4: Ready to execute routine!${NC}"
-echo ""
-echo "✅ Routine discovered successfully!"
-echo "   Location: $ROUTINE_OUTPUT/routine.json"
-echo ""
-echo -e "${YELLOW}To execute the routine, run:${NC}"
-echo "   web-hacker-execute \\"
-echo "     --routine-path $ROUTINE_OUTPUT/routine.json \\"
-if [[ -f "$ROUTINE_OUTPUT/test_parameters.json" ]]; then
-    echo "     --parameters-path $ROUTINE_OUTPUT/test_parameters.json"
-else
-    echo "     --parameters-dict '{\"param1\": \"value1\", \"param2\": \"value2\"}'"
-fi
-echo ""
-echo -e "${BLUE}💡 Tip: Review $ROUTINE_OUTPUT/routine.json before executing${NC}"
-

From fe5ed527bf8655818f0d179e47b828545cc808a5 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Tue, 18 Nov 2025 15:35:44 -0500
Subject: [PATCH 11/27] fix: box alignment

---
 scripts/quickstart.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index 65810ff..9452f06 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -175,7 +175,7 @@ def run_command(cmd: list[str], description: str) -> bool:
 def main():
     """Main workflow."""
     print_colored("╔════════════════════════════════════════════════════════════╗", BLUE)
-    print_colored("║          Web Hacker - Quickstart Workflow                ║", BLUE)
+    print_colored("║         Web Hacker - Quickstart Workflow                   ║", BLUE)
     print_colored("╚════════════════════════════════════════════════════════════╝", BLUE)
     print()
     

From 0d3c5e7098f90abbf951dbb7a38867ca271e415e Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Tue, 18 Nov 2025 18:36:32 -0500
Subject: [PATCH 12/27] improve step 1 of quickstart

---
 scripts/chrome-debug-mode-explanation.md |  18 +++
 scripts/quickstart.py                    | 147 +++++++++++++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 scripts/chrome-debug-mode-explanation.md

diff --git a/scripts/chrome-debug-mode-explanation.md b/scripts/chrome-debug-mode-explanation.md
new file mode 100644
index 0000000..0726e6d
--- /dev/null
+++ b/scripts/chrome-debug-mode-explanation.md
@@ -0,0 +1,18 @@
+# Chrome Debug Mode is Active ✅
+
+If you're seeing this page while running the quickstart script, **Chrome has been successfully launched in debug mode**.
+
+## Current Status
+
+- ✅ **Chrome is running in debug mode**
+- ⏸️ **Monitoring has NOT started yet**
+
+## What's Next?
+
+Go back to your terminal and follow the prompts. The quickstart script will:
+1. Start monitoring (Step 2) - a new tab will open for you to perform your actions
+2. Discover routines (Step 3) - analyze what you captured
+3. Show execution instructions (Step 4)
+
+You can close this tab and continue with the workflow in your terminal. 🚀
+
diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index 9452f06..36bf2a3 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -12,6 +12,7 @@
 import shutil
 from pathlib import Path
 from typing import Optional
+from urllib.parse import quote
 
 try:
     import requests
@@ -45,6 +46,136 @@ def check_chrome_running(port: int) -> bool:
         return False
 
 
+def open_url_in_chrome(port: int, url: str) -> bool:
+    """Navigate the existing Chrome tab to a URL using CDP."""
+    try:
+        # Get list of existing tabs
+        tabs_response = requests.get(f"http://127.0.0.1:{port}/json", timeout=2)
+        if tabs_response.status_code != 200:
+            return False
+        
+        tabs = tabs_response.json()
+        if not tabs:
+            return False
+        
+        # Use the first available tab
+        first_tab = tabs[0]
+        target_id = first_tab.get("id")
+        if not target_id:
+            return False
+        
+        # Navigate the existing tab using WebSocket
+        try:
+            import websocket
+            import json
+            
+            # Get browser WebSocket URL (not the tab's)
+            version_response = requests.get(f"http://127.0.0.1:{port}/json/version", timeout=2)
+            if version_response.status_code != 200:
+                return False
+            
+            browser_ws_url = version_response.json().get("webSocketDebuggerUrl")
+            if not browser_ws_url:
+                return False
+            
+            ws = websocket.create_connection(browser_ws_url, timeout=5)
+            try:
+                next_id = 1
+                
+                # Attach to the target
+                attach_id = next_id
+                attach_msg = {
+                    "id": attach_id,
+                    "method": "Target.attachToTarget",
+                    "params": {"targetId": target_id, "flatten": True}
+                }
+                ws.send(json.dumps(attach_msg))
+                next_id += 1
+                
+                # Read attach response (may need to skip event messages)
+                ws.settimeout(5)
+                session_id = None
+                while True:
+                    try:
+                        msg = json.loads(ws.recv())
+                        # Look for the response with matching ID
+                        if msg.get("id") == attach_id:
+                            if "error" in msg:
+                                print_colored(f"⚠️  Attach error: {msg.get('error')}", YELLOW)
+                                return False
+                            if "result" in msg:
+                                session_id = msg["result"].get("sessionId")
+                                if session_id:
+                                    break
+                                else:
+                                    print_colored(f"⚠️  No sessionId in attach response: {msg}", YELLOW)
+                                    return False
+                    except websocket.WebSocketTimeoutException:
+                        print_colored("⚠️  Timeout waiting for attach response", YELLOW)
+                        return False
+                
+                if not session_id:
+                    print_colored("⚠️  Failed to get session ID", YELLOW)
+                    return False
+                
+                # Enable Page domain
+                enable_msg = {
+                    "id": next_id,
+                    "method": "Page.enable",
+                    "sessionId": session_id
+                }
+                ws.send(json.dumps(enable_msg))
+                next_id += 1
+                
+                # Read enable response (skip if timeout)
+                ws.settimeout(1)
+                try:
+                    while True:
+                        msg = json.loads(ws.recv())
+                        if msg.get("id") == next_id - 1:
+                            break
+                except websocket.WebSocketTimeoutException:
+                    pass  # Continue anyway
+                
+                # Navigate to URL
+                navigate_msg = {
+                    "id": next_id,
+                    "method": "Page.navigate",
+                    "params": {"url": url},
+                    "sessionId": session_id
+                }
+                ws.send(json.dumps(navigate_msg))
+                
+                # Wait briefly for navigate response
+                ws.settimeout(1)
+                try:
+                    while True:
+                        msg = json.loads(ws.recv())
+                        if msg.get("id") == next_id:
+                            return True
+                        if msg.get("error"):
+                            return False
+                except websocket.WebSocketTimeoutException:
+                    # Timeout is okay, navigation was sent
+                    return True
+            finally:
+                ws.close()
+        except ImportError:
+            # websocket library not available - this shouldn't happen if web-hacker is installed
+            print_colored("⚠️  websocket library not available. Cannot navigate tab.", YELLOW)
+            return False
+        except Exception as e:
+            # Print error for debugging
+            print_colored(f"⚠️  Error navigating tab: {e}", YELLOW)
+            return False
+    except (requests.RequestException, requests.Timeout) as e:
+        print_colored(f"⚠️  Error connecting to Chrome: {e}", YELLOW)
+        return False
+    except Exception as e:
+        print_colored(f"⚠️  Unexpected error: {e}", YELLOW)
+        return False
+
+
 def find_chrome_path() -> Optional[str]:
     """Find Chrome executable path based on OS."""
     system = platform.system()
@@ -128,6 +259,15 @@ def launch_chrome(port: int) -> Optional[subprocess.Popen]:
         for _ in range(10):
             if check_chrome_running(port):
                 print_colored("✅ Chrome is ready!", GREEN)
+                # Give Chrome a moment to fully initialize tabs
+                time.sleep(0.5)
+                # Open documentation page explaining what's happening
+                doc_url = "https://github.com/VectorlyApp/web-hacker/blob/main/scripts/chrome-debug-mode-explanation.md"
+                print("📖 Opening documentation page...")
+                if open_url_in_chrome(port, doc_url):
+                    print_colored("✅ Documentation page opened", GREEN)
+                else:
+                    print_colored("⚠️  Could not open documentation page automatically. You can manually navigate to it.", YELLOW)
                 return process
             time.sleep(1)
         
@@ -185,6 +325,13 @@ def main():
     chrome_process = None
     if check_chrome_running(PORT):
         print_colored(f"✅ Chrome is already running in debug mode on port {PORT}", GREEN)
+        # Still open the documentation page if Chrome was already running
+        doc_url = "https://github.com/VectorlyApp/web-hacker/blob/main/scripts/chrome-debug-mode-explanation.md"
+        print("📖 Opening documentation page...")
+        if open_url_in_chrome(PORT, doc_url):
+            print_colored("✅ Documentation page opened", GREEN)
+        else:
+            print_colored("⚠️  Could not open documentation page automatically. You can manually navigate to it.", YELLOW)
     else:
         chrome_process = launch_chrome(PORT)
     

From 40e3081de8332f001961cc1ddaa3e629b0da9ac3 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 11:10:18 -0500
Subject: [PATCH 13/27] remove error message for requests import

---
 scripts/quickstart.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index 36bf2a3..ab29aab 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -13,12 +13,7 @@
 from pathlib import Path
 from typing import Optional
 from urllib.parse import quote
-
-try:
-    import requests
-except ImportError:
-    print("Error: 'requests' package is required. Install it with: pip install requests")
-    sys.exit(1)
+import requests
 
 # Colors for output (ANSI codes work on modern Windows 10+ terminals)
 GREEN = '\033[0;32m'

From f5e73113d6d941a1335271e4bdf0f17377c499cc Mon Sep 17 00:00:00 2001
From: "Ruizhi (Ray) Liao" <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 11:11:26 -0500
Subject: [PATCH 14/27] Update scripts/quickstart.py

Co-authored-by: Alex Wilcox <98042559+alex-w-99@users.noreply.github.com>
---
 scripts/quickstart.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index ab29aab..3161142 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -340,7 +340,7 @@ def main():
     print("   3. Perform the actions you want to automate (search, login, etc.)")
     print("   4. Press Ctrl+C when you're done")
     print()
-    input("Press Enter to start monitoring...")
+    input("Press Enter to open a new tab and start monitoring...")
     
     print()
     print("🚀 Starting monitor (press Ctrl+C when done)...")

From 5c2f52c496875d18ddc58c09dc59d9138da04696 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 11:13:30 -0500
Subject: [PATCH 15/27] clarify log messaging

---
 web_hacker/cdp/cdp_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_hacker/cdp/cdp_session.py b/web_hacker/cdp/cdp_session.py
index fef8fa3..bf0e5df 100644
--- a/web_hacker/cdp/cdp_session.py
+++ b/web_hacker/cdp/cdp_session.py
@@ -211,7 +211,7 @@ def run(self):
                 msg = json.loads(self.ws.recv())
                 self.handle_message(msg)
         except KeyboardInterrupt:
-            logger.info("\nStopped.")
+            logger.info("\nStopped. Saving assets...")
             # Final cookie sync using native CDP (no delay needed)
             self.storage_monitor.monitor_cookie_changes(self)
             

From 776534d75a7e54cc02f7f37f47faa0e3e9cd7412 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 11:26:27 -0500
Subject: [PATCH 16/27] improve task input experience

---
 scripts/quickstart.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index 3161142..a8857ba 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -367,11 +367,18 @@ def main():
     print_colored("Step 3: Discovering routine from captured data...", GREEN)
     print_colored("📋 Enter a description of what you want to automate:", YELLOW)
     print("   Example: 'Search for flights and get prices'")
-    task = input("   Task: ").strip()
+    print("   (Press Ctrl+C to exit)")
     
-    if not task:
-        print_colored("⚠️  No task provided. Skipping discovery.", YELLOW)
-        return
+    task = ""
+    while not task:
+        try:
+            task = input("   Task: ").strip()
+            if not task:
+                print_colored("⚠️  Task cannot be empty. Please enter a description (or Ctrl+C to exit).", YELLOW)
+        except KeyboardInterrupt:
+            print()
+            print_colored("⚠️  Discovery cancelled by user.", YELLOW)
+            return
     
     print()
     print("🤖 Running routine discovery agent...")

From 39e9511b8ad2104afabc56d24d0cbe348a1f0968 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 12:52:42 -0500
Subject: [PATCH 17/27] remove unused import

---
 scripts/quickstart.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index a8857ba..d49a70e 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -12,7 +12,6 @@
 import shutil
 from pathlib import Path
 from typing import Optional
-from urllib.parse import quote
 import requests
 
 # Colors for output (ANSI codes work on modern Windows 10+ terminals)

From 5eeb1004b9e4429d581e4106301c833e64bdee92 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 13:06:54 -0500
Subject: [PATCH 18/27] rename vars

---
 scripts/quickstart.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index d49a70e..77084ba 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -22,8 +22,8 @@
 
 # Configuration
 PORT = 9222
-OUTPUT_DIR = Path("./cdp_captures")
-ROUTINE_OUTPUT = Path("./routine_discovery_output")
+CDP_CAPTURES_DIR = Path("./cdp_captures")
+DISCOVERY_OUTPUT_DIR = Path("./routine_discovery_output")
 
 
 def print_colored(text: str, color: str = NC) -> None:
@@ -348,7 +348,7 @@ def main():
         "web-hacker-monitor",
         "--host", "127.0.0.1",
         "--port", str(PORT),
-        "--output-dir", str(OUTPUT_DIR),
+        "--output-dir", str(CDP_CAPTURES_DIR),
         "--url", "about:blank",
         "--incognito",
     ]
@@ -357,8 +357,8 @@ def main():
     print()
     
     # Step 3: Discover
-    transactions_dir = OUTPUT_DIR / "network" / "transactions"
-    if not OUTPUT_DIR.exists() or not transactions_dir.exists() or not any(transactions_dir.iterdir()):
+    transactions_dir = CDP_CAPTURES_DIR / "network" / "transactions"
+    if not CDP_CAPTURES_DIR.exists() or not transactions_dir.exists() or not any(transactions_dir.iterdir()):
         print_colored("⚠️  No capture data found. Skipping discovery step.", YELLOW)
         print("   Make sure you performed actions during monitoring.")
         return
@@ -385,8 +385,8 @@ def main():
     discover_cmd = [
         "web-hacker-discover",
         "--task", task,
-        "--cdp-captures-dir", str(OUTPUT_DIR),
-        "--output-dir", str(ROUTINE_OUTPUT),
+        "--cdp-captures-dir", str(CDP_CAPTURES_DIR),
+        "--output-dir", str(DISCOVERY_OUTPUT_DIR),
         "--llm-model", "gpt-5",
     ]
     
@@ -394,7 +394,7 @@ def main():
     print()
     
     # Step 4: Execute (optional)
-    routine_file = ROUTINE_OUTPUT / "routine.json"
+    routine_file = DISCOVERY_OUTPUT_DIR / "routine.json"
     if not routine_file.exists():
         print_colored(f"⚠️  Routine not found at {routine_file}", YELLOW)
         return
@@ -408,7 +408,7 @@ def main():
     print("   web-hacker-execute \\")
     print(f"     --routine-path {routine_file} \\")
     
-    test_params_file = ROUTINE_OUTPUT / "test_parameters.json"
+    test_params_file = DISCOVERY_OUTPUT_DIR / "test_parameters.json"
     if test_params_file.exists():
         print(f"     --parameters-path {test_params_file}")
     else:

From a7eeab4e13d9959eea4c4003518f9d09eb52fa32 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 14:27:00 -0500
Subject: [PATCH 19/27] allow user to skip steps

---
 scripts/quickstart.py | 155 ++++++++++++++++++++++++++++--------------
 1 file changed, 104 insertions(+), 51 deletions(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index 77084ba..b0e1e46 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -308,6 +308,9 @@ def run_command(cmd: list[str], description: str) -> bool:
 
 def main():
     """Main workflow."""
+    # Use local variable that can be updated
+    cdp_captures_dir = CDP_CAPTURES_DIR
+    
     print_colored("╔════════════════════════════════════════════════════════════╗", BLUE)
     print_colored("║         Web Hacker - Quickstart Workflow                   ║", BLUE)
     print_colored("╚════════════════════════════════════════════════════════════╝", BLUE)
@@ -333,68 +336,118 @@ def main():
     
     # Step 2: Monitor
     print_colored("Step 2: Starting browser monitoring...", GREEN)
-    print_colored("📋 Instructions:", YELLOW)
-    print("   1. A new Chrome tab will open")
-    print("   2. Navigate to your target website")
-    print("   3. Perform the actions you want to automate (search, login, etc.)")
-    print("   4. Press Ctrl+C when you're done")
-    print()
-    input("Press Enter to open a new tab and start monitoring...")
-    
-    print()
-    print("🚀 Starting monitor (press Ctrl+C when done)...")
-    
-    monitor_cmd = [
-        "web-hacker-monitor",
-        "--host", "127.0.0.1",
-        "--port", str(PORT),
-        "--output-dir", str(CDP_CAPTURES_DIR),
-        "--url", "about:blank",
-        "--incognito",
-    ]
     
-    run_command(monitor_cmd, "monitoring")
-    print()
+    skip = input("   Skip monitoring step? (y/n): ").strip().lower()
+    if skip == 'y':
+        new_dir = input(f"   Enter CDP captures directory path [Press Enter to use: {CDP_CAPTURES_DIR.resolve()}]: ").strip()
+        if new_dir:
+            cdp_captures_dir = Path(new_dir)
+            print_colored(f"✅ Using CDP captures directory: {cdp_captures_dir}", GREEN)
+        print_colored("⏭️  Skipping monitoring step.", GREEN)
+        print()
+    else:
+        print_colored("📋 Instructions:", YELLOW)
+        print("   1. A new Chrome tab will open")
+        print("   2. Navigate to your target website")
+        print("   3. Perform the actions you want to automate (search, login, etc.)")
+        print("   4. Press Ctrl+C when you're done")
+        print()
+        input("Press Enter to open a new tab and start monitoring...")
+        
+        print()
+        print("🚀 Starting monitor (press Ctrl+C when done)...")
+        
+        monitor_cmd = [
+            "web-hacker-monitor",
+            "--host", "127.0.0.1",
+            "--port", str(PORT),
+            "--output-dir", str(cdp_captures_dir),
+            "--url", "about:blank",
+            "--incognito",
+        ]
+        
+        run_command(monitor_cmd, "monitoring")
+        print()
     
     # Step 3: Discover
-    transactions_dir = CDP_CAPTURES_DIR / "network" / "transactions"
-    if not CDP_CAPTURES_DIR.exists() or not transactions_dir.exists() or not any(transactions_dir.iterdir()):
+    transactions_dir = cdp_captures_dir / "network" / "transactions"
+    if not cdp_captures_dir.exists() or not transactions_dir.exists() or not any(transactions_dir.iterdir()):
         print_colored("⚠️  No capture data found. Skipping discovery step.", YELLOW)
         print("   Make sure you performed actions during monitoring.")
         return
     
-    print_colored("Step 3: Discovering routine from captured data...", GREEN)
-    print_colored("📋 Enter a description of what you want to automate:", YELLOW)
-    print("   Example: 'Search for flights and get prices'")
-    print("   (Press Ctrl+C to exit)")
+    # Check if routine already exists
+    routine_file = DISCOVERY_OUTPUT_DIR / "routine.json"
+    has_existing_routine = routine_file.exists()
     
-    task = ""
-    while not task:
-        try:
-            task = input("   Task: ").strip()
-            if not task:
-                print_colored("⚠️  Task cannot be empty. Please enter a description (or Ctrl+C to exit).", YELLOW)
-        except KeyboardInterrupt:
+    if has_existing_routine:
+        print_colored(f"📁 Found existing routine at {routine_file}", YELLOW)
+        skip = input("   Skip discovery? (y/n): ").strip().lower()
+        if skip == 'y':
+            print_colored("⏭️  Skipping discovery step.", GREEN)
             print()
-            print_colored("⚠️  Discovery cancelled by user.", YELLOW)
-            return
-    
-    print()
-    print("🤖 Running routine discovery agent...")
-    
-    discover_cmd = [
-        "web-hacker-discover",
-        "--task", task,
-        "--cdp-captures-dir", str(CDP_CAPTURES_DIR),
-        "--output-dir", str(DISCOVERY_OUTPUT_DIR),
-        "--llm-model", "gpt-5",
-    ]
-    
-    run_command(discover_cmd, "discovery")
-    print()
+        else:
+            print_colored("Step 3: Discovering routine from captured data...", GREEN)
+            print_colored("📋 Enter a description of what you want to automate:", YELLOW)
+            print("   Example: 'Search for flights and get prices'")
+            print("   (Press Ctrl+C to exit)")
+            
+            task = ""
+            while not task:
+                try:
+                    task = input("   Task: ").strip()
+                    if not task:
+                        print_colored("⚠️  Task cannot be empty. Please enter a description (or Ctrl+C to exit).", YELLOW)
+                except KeyboardInterrupt:
+                    print()
+                    print_colored("⚠️  Discovery cancelled by user.", YELLOW)
+                    return
+            
+            print()
+            print("🤖 Running routine discovery agent...")
+            
+            discover_cmd = [
+                "web-hacker-discover",
+                "--task", task,
+                "--cdp-captures-dir", str(cdp_captures_dir),
+                "--output-dir", str(DISCOVERY_OUTPUT_DIR),
+                "--llm-model", "gpt-5",
+            ]
+            
+            run_command(discover_cmd, "discovery")
+            print()
+    else:
+        print_colored("Step 3: Discovering routine from captured data...", GREEN)
+        print_colored("📋 Enter a description of what you want to automate:", YELLOW)
+        print("   Example: 'Search for flights and get prices'")
+        print("   (Press Ctrl+C to exit)")
+        
+        task = ""
+        while not task:
+            try:
+                task = input("   Task: ").strip()
+                if not task:
+                    print_colored("⚠️  Task cannot be empty. Please enter a description (or Ctrl+C to exit).", YELLOW)
+            except KeyboardInterrupt:
+                print()
+                print_colored("⚠️  Discovery cancelled by user.", YELLOW)
+                return
+        
+        print()
+        print("🤖 Running routine discovery agent...")
+        
+        discover_cmd = [
+            "web-hacker-discover",
+            "--task", task,
+            "--cdp-captures-dir", str(cdp_captures_dir),
+            "--output-dir", str(DISCOVERY_OUTPUT_DIR),
+            "--llm-model", "gpt-5",
+        ]
+        
+        run_command(discover_cmd, "discovery")
+        print()
     
     # Step 4: Execute (optional)
-    routine_file = DISCOVERY_OUTPUT_DIR / "routine.json"
     if not routine_file.exists():
         print_colored(f"⚠️  Routine not found at {routine_file}", YELLOW)
         return

From 09b41472fe617e753957c2aeb126ef22b8b5c2ed Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 14:56:09 -0500
Subject: [PATCH 20/27] ask the user whether to rm existing data

---
 scripts/quickstart.py | 64 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 7 deletions(-)

diff --git a/scripts/quickstart.py b/scripts/quickstart.py
index b0e1e46..ee4ea0a 100755
--- a/scripts/quickstart.py
+++ b/scripts/quickstart.py
@@ -308,8 +308,9 @@ def run_command(cmd: list[str], description: str) -> bool:
 
 def main():
     """Main workflow."""
-    # Use local variable that can be updated
+    # Use local variables that can be updated
     cdp_captures_dir = CDP_CAPTURES_DIR
+    discovery_output_dir = DISCOVERY_OUTPUT_DIR
     
     print_colored("╔════════════════════════════════════════════════════════════╗", BLUE)
     print_colored("║         Web Hacker - Quickstart Workflow                   ║", BLUE)
@@ -346,6 +347,21 @@ def main():
         print_colored("⏭️  Skipping monitoring step.", GREEN)
         print()
     else:
+        # Check if directory exists and has content before running monitoring
+        if cdp_captures_dir.exists() and any(cdp_captures_dir.iterdir()):
+            print_colored(f"⚠️  Directory {cdp_captures_dir} already exists and contains files.", YELLOW)
+            confirm = input("   Remove existing data before monitoring? (Data may be overwritten if not removed) (y/n): ").strip().lower()
+            if confirm == 'y':
+                # Remove all data but keep the directory
+                for item in cdp_captures_dir.iterdir():
+                    if item.is_file():
+                        item.unlink()
+                    elif item.is_dir():
+                        shutil.rmtree(item)
+                print_colored(f"✅ Cleared data in {cdp_captures_dir}", GREEN)
+            else:
+                print_colored(f"⚠️  Keeping existing data in {cdp_captures_dir}", YELLOW)
+        
         print_colored("📋 Instructions:", YELLOW)
         print("   1. A new Chrome tab will open")
         print("   2. Navigate to your target website")
@@ -376,8 +392,14 @@ def main():
         print("   Make sure you performed actions during monitoring.")
         return
     
+    print_colored("Step 3: Discovering routine from captured data...", GREEN)
+    new_output_dir = input(f"   Enter discovery output directory path [Press Enter to use: {DISCOVERY_OUTPUT_DIR.resolve()}]: ").strip()
+    if new_output_dir:
+        discovery_output_dir = Path(new_output_dir)
+        print_colored(f"✅ Using discovery output directory: {discovery_output_dir}", GREEN)
+    
     # Check if routine already exists
-    routine_file = DISCOVERY_OUTPUT_DIR / "routine.json"
+    routine_file = discovery_output_dir / "routine.json"
     has_existing_routine = routine_file.exists()
     
     if has_existing_routine:
@@ -387,7 +409,21 @@ def main():
             print_colored("⏭️  Skipping discovery step.", GREEN)
             print()
         else:
-            print_colored("Step 3: Discovering routine from captured data...", GREEN)
+            # Check if directory exists and has content before running discovery
+            if discovery_output_dir.exists() and any(discovery_output_dir.iterdir()):
+                print_colored(f"⚠️  Directory {discovery_output_dir} already exists and contains files.", YELLOW)
+                confirm = input("   Remove existing data before discovery? (Data may be overwritten if not removed) (y/n): ").strip().lower()
+                if confirm == 'y':
+                    # Remove all data but keep the directory
+                    for item in discovery_output_dir.iterdir():
+                        if item.is_file():
+                            item.unlink()
+                        elif item.is_dir():
+                            shutil.rmtree(item)
+                    print_colored(f"✅ Cleared data in {discovery_output_dir}", GREEN)
+                else:
+                    print_colored(f"⚠️  Keeping existing data in {discovery_output_dir}", YELLOW)
+            
             print_colored("📋 Enter a description of what you want to automate:", YELLOW)
             print("   Example: 'Search for flights and get prices'")
             print("   (Press Ctrl+C to exit)")
@@ -410,14 +446,28 @@ def main():
                 "web-hacker-discover",
                 "--task", task,
                 "--cdp-captures-dir", str(cdp_captures_dir),
-                "--output-dir", str(DISCOVERY_OUTPUT_DIR),
+                "--output-dir", str(discovery_output_dir),
                 "--llm-model", "gpt-5",
             ]
             
             run_command(discover_cmd, "discovery")
             print()
     else:
-        print_colored("Step 3: Discovering routine from captured data...", GREEN)
+        # Check if directory exists and has content before running discovery
+        if discovery_output_dir.exists() and any(discovery_output_dir.iterdir()):
+            print_colored(f"⚠️  Directory {discovery_output_dir} already exists and contains files.", YELLOW)
+            confirm = input("   Remove existing data before discovery? (Data may be overwritten if not removed) (y/n): ").strip().lower()
+            if confirm == 'y':
+                # Remove all data but keep the directory
+                for item in discovery_output_dir.iterdir():
+                    if item.is_file():
+                        item.unlink()
+                    elif item.is_dir():
+                        shutil.rmtree(item)
+                print_colored(f"✅ Cleared data in {discovery_output_dir}", GREEN)
+            else:
+                print_colored(f"⚠️  Keeping existing data in {discovery_output_dir}", YELLOW)
+        
         print_colored("📋 Enter a description of what you want to automate:", YELLOW)
         print("   Example: 'Search for flights and get prices'")
         print("   (Press Ctrl+C to exit)")
@@ -440,7 +490,7 @@ def main():
             "web-hacker-discover",
             "--task", task,
             "--cdp-captures-dir", str(cdp_captures_dir),
-            "--output-dir", str(DISCOVERY_OUTPUT_DIR),
+            "--output-dir", str(discovery_output_dir),
             "--llm-model", "gpt-5",
         ]
         
@@ -461,7 +511,7 @@ def main():
     print("   web-hacker-execute \\")
     print(f"     --routine-path {routine_file} \\")
     
-    test_params_file = DISCOVERY_OUTPUT_DIR / "test_parameters.json"
+    test_params_file = discovery_output_dir / "test_parameters.json"
     if test_params_file.exists():
         print(f"     --parameters-path {test_params_file}")
     else:

From 38b36b0626fa31f799df331658bd10379ebaeb81 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 14:59:46 -0500
Subject: [PATCH 21/27] move quickstart.py

---
 README.md                              | 4 ++--
 scripts/quickstart.py => quickstart.py | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename scripts/quickstart.py => quickstart.py (100%)

diff --git a/README.md b/README.md
index 927b8ba..6730c4b 100644
--- a/README.md
+++ b/README.md
@@ -217,7 +217,7 @@ pip install web-hacker
 export OPENAI_API_KEY="sk-..."
 
 # Run the quickstart script
-python scripts/quickstart.py
+python quickstart.py
 ```
 
 The quickstart script will:
@@ -226,7 +226,7 @@ The quickstart script will:
 3. 🤖 Discover routines from captured data
 4. 📝 Show you how to execute the discovered routine
 
-**Note:** The quickstart script is included in the repository. If you installed from PyPI, you can download it from the [GitHub repository](https://github.com/VectorlyApp/web-hacker/blob/main/scripts/quickstart.py).
+**Note:** The quickstart script is included in the repository. If you installed from PyPI, you can download it from the [GitHub repository](https://github.com/VectorlyApp/web-hacker/blob/main/quickstart.py).
 
 ## Launch Chrome in Debug Mode 🐞
 
diff --git a/scripts/quickstart.py b/quickstart.py
similarity index 100%
rename from scripts/quickstart.py
rename to quickstart.py

From 7e847fd87bea55833e8be790e8c9c8c0abd3395d Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 15:11:53 -0500
Subject: [PATCH 22/27] update open_url_in_chrome messaging

---
 quickstart.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/quickstart.py b/quickstart.py
index ee4ea0a..3e4d372 100755
--- a/quickstart.py
+++ b/quickstart.py
@@ -257,11 +257,10 @@ def launch_chrome(port: int) -> Optional[subprocess.Popen]:
                 time.sleep(0.5)
                 # Open documentation page explaining what's happening
                 doc_url = "https://github.com/VectorlyApp/web-hacker/blob/main/scripts/chrome-debug-mode-explanation.md"
-                print("📖 Opening documentation page...")
                 if open_url_in_chrome(port, doc_url):
                     print_colored("✅ Documentation page opened", GREEN)
                 else:
-                    print_colored("⚠️  Could not open documentation page automatically. You can manually navigate to it.", YELLOW)
+                    print_colored("⚠️  Could not navigate Chrome tab. There may be an issue with the Chrome connection.", YELLOW)
                 return process
             time.sleep(1)
         
@@ -325,11 +324,10 @@ def main():
         print_colored(f"✅ Chrome is already running in debug mode on port {PORT}", GREEN)
         # Still open the documentation page if Chrome was already running
         doc_url = "https://github.com/VectorlyApp/web-hacker/blob/main/scripts/chrome-debug-mode-explanation.md"
-        print("📖 Opening documentation page...")
         if open_url_in_chrome(PORT, doc_url):
             print_colored("✅ Documentation page opened", GREEN)
         else:
-            print_colored("⚠️  Could not open documentation page automatically. You can manually navigate to it.", YELLOW)
+            print_colored("⚠️  Could not navigate Chrome tab. There may be an issue with the Chrome connection.", YELLOW)
     else:
         chrome_process = launch_chrome(PORT)
     

From 53e4c325849b14b15287ab71a14d3223e295c206 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 15:20:17 -0500
Subject: [PATCH 23/27] do not open the documentation page if chrome was
 already running

---
 quickstart.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/quickstart.py b/quickstart.py
index 3e4d372..5c6645b 100755
--- a/quickstart.py
+++ b/quickstart.py
@@ -322,12 +322,6 @@ def main():
     chrome_process = None
     if check_chrome_running(PORT):
         print_colored(f"✅ Chrome is already running in debug mode on port {PORT}", GREEN)
-        # Still open the documentation page if Chrome was already running
-        doc_url = "https://github.com/VectorlyApp/web-hacker/blob/main/scripts/chrome-debug-mode-explanation.md"
-        if open_url_in_chrome(PORT, doc_url):
-            print_colored("✅ Documentation page opened", GREEN)
-        else:
-            print_colored("⚠️  Could not navigate Chrome tab. There may be an issue with the Chrome connection.", YELLOW)
     else:
         chrome_process = launch_chrome(PORT)
     

From cf953dd498cd6e58b56c9b1e2da080f435e0cac8 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 15:25:37 -0500
Subject: [PATCH 24/27] move imports

---
 quickstart.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/quickstart.py b/quickstart.py
index 5c6645b..e841428 100755
--- a/quickstart.py
+++ b/quickstart.py
@@ -10,9 +10,11 @@
 import platform
 import subprocess
 import shutil
+import json
 from pathlib import Path
 from typing import Optional
 import requests
+import websocket
 
 # Colors for output (ANSI codes work on modern Windows 10+ terminals)
 GREEN = '\033[0;32m'
@@ -60,9 +62,6 @@ def open_url_in_chrome(port: int, url: str) -> bool:
         
         # Navigate the existing tab using WebSocket
         try:
-            import websocket
-            import json
-            
             # Get browser WebSocket URL (not the tab's)
             version_response = requests.get(f"http://127.0.0.1:{port}/json/version", timeout=2)
             if version_response.status_code != 200:

From b5dfdf17d91d2aa2a419c14008ca2bbca1938eae Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Wed, 19 Nov 2025 15:43:32 -0500
Subject: [PATCH 25/27] graceful exit

---
 quickstart.py                 | 135 +++++++++++++++++++---------------
 web_hacker/cdp/cdp_session.py | 115 +++++++++++++++++++++++------
 web_hacker/sdk/monitor.py     |  89 ++++++++++++++--------
 3 files changed, 224 insertions(+), 115 deletions(-)

diff --git a/quickstart.py b/quickstart.py
index e841428..b75b04a 100755
--- a/quickstart.py
+++ b/quickstart.py
@@ -377,28 +377,87 @@ def main():
         print()
     
     # Step 3: Discover
+    print_colored("Step 3: Discovering routine from captured data...", GREEN)
+    
+    # Check if capture data exists first
     transactions_dir = cdp_captures_dir / "network" / "transactions"
     if not cdp_captures_dir.exists() or not transactions_dir.exists() or not any(transactions_dir.iterdir()):
         print_colored("⚠️  No capture data found. Skipping discovery step.", YELLOW)
         print("   Make sure you performed actions during monitoring.")
         return
     
-    print_colored("Step 3: Discovering routine from captured data...", GREEN)
-    new_output_dir = input(f"   Enter discovery output directory path [Press Enter to use: {DISCOVERY_OUTPUT_DIR.resolve()}]: ").strip()
-    if new_output_dir:
-        discovery_output_dir = Path(new_output_dir)
-        print_colored(f"✅ Using discovery output directory: {discovery_output_dir}", GREEN)
-    
-    # Check if routine already exists
-    routine_file = discovery_output_dir / "routine.json"
-    has_existing_routine = routine_file.exists()
-    
-    if has_existing_routine:
-        print_colored(f"📁 Found existing routine at {routine_file}", YELLOW)
-        skip = input("   Skip discovery? (y/n): ").strip().lower()
-        if skip == 'y':
-            print_colored("⏭️  Skipping discovery step.", GREEN)
-            print()
+    skip = input("   Skip discovery step? (y/n): ").strip().lower()
+    if skip == 'y':
+        # Use default directory when skipping - user can specify routine path in step 4 if needed
+        discovery_output_dir = DISCOVERY_OUTPUT_DIR
+        print_colored("⏭️  Skipping discovery step.", GREEN)
+        print_colored(f"   Using default discovery output directory: {discovery_output_dir.resolve()}", GREEN)
+        print()
+        
+        # Set routine_file for step 4 even if skipped
+        routine_file = discovery_output_dir / "routine.json"
+    else:
+        new_output_dir = input(f"   Enter discovery output directory path [Press Enter to use: {DISCOVERY_OUTPUT_DIR.resolve()}]: ").strip()
+        if new_output_dir:
+            discovery_output_dir = Path(new_output_dir)
+            print_colored(f"✅ Using discovery output directory: {discovery_output_dir}", GREEN)
+        else:
+            discovery_output_dir = DISCOVERY_OUTPUT_DIR
+        
+        # Check if routine already exists
+        routine_file = discovery_output_dir / "routine.json"
+        has_existing_routine = routine_file.exists()
+        
+        if has_existing_routine:
+            print_colored(f"📁 Found existing routine at {routine_file}", YELLOW)
+            overwrite = input("   Overwrite existing routine? (y/n): ").strip().lower()
+            if overwrite != 'y':
+                print_colored("⏭️  Keeping existing routine. Skipping discovery step.", GREEN)
+                print()
+            else:
+                # Check if directory exists and has content before running discovery
+                if discovery_output_dir.exists() and any(discovery_output_dir.iterdir()):
+                    print_colored(f"⚠️  Directory {discovery_output_dir} already exists and contains files.", YELLOW)
+                    confirm = input("   Remove existing data before discovery? (Data may be overwritten if not removed) (y/n): ").strip().lower()
+                    if confirm == 'y':
+                        # Remove all data but keep the directory
+                        for item in discovery_output_dir.iterdir():
+                            if item.is_file():
+                                item.unlink()
+                            elif item.is_dir():
+                                shutil.rmtree(item)
+                        print_colored(f"✅ Cleared data in {discovery_output_dir}", GREEN)
+                    else:
+                        print_colored(f"⚠️  Keeping existing data in {discovery_output_dir}", YELLOW)
+                
+                print_colored("📋 Enter a description of what you want to automate:", YELLOW)
+                print("   Example: 'Search for flights and get prices'")
+                print("   (Press Ctrl+C to exit)")
+                
+                task = ""
+                while not task:
+                    try:
+                        task = input("   Task: ").strip()
+                        if not task:
+                            print_colored("⚠️  Task cannot be empty. Please enter a description (or Ctrl+C to exit).", YELLOW)
+                    except KeyboardInterrupt:
+                        print()
+                        print_colored("⚠️  Discovery cancelled by user.", YELLOW)
+                        return
+                
+                print()
+                print("🤖 Running routine discovery agent...")
+                
+                discover_cmd = [
+                    "web-hacker-discover",
+                    "--task", task,
+                    "--cdp-captures-dir", str(cdp_captures_dir),
+                    "--output-dir", str(discovery_output_dir),
+                    "--llm-model", "gpt-5",
+                ]
+                
+                run_command(discover_cmd, "discovery")
+                print()
         else:
             # Check if directory exists and has content before running discovery
             if discovery_output_dir.exists() and any(discovery_output_dir.iterdir()):
@@ -443,50 +502,6 @@ def main():
             
             run_command(discover_cmd, "discovery")
             print()
-    else:
-        # Check if directory exists and has content before running discovery
-        if discovery_output_dir.exists() and any(discovery_output_dir.iterdir()):
-            print_colored(f"⚠️  Directory {discovery_output_dir} already exists and contains files.", YELLOW)
-            confirm = input("   Remove existing data before discovery? (Data may be overwritten if not removed) (y/n): ").strip().lower()
-            if confirm == 'y':
-                # Remove all data but keep the directory
-                for item in discovery_output_dir.iterdir():
-                    if item.is_file():
-                        item.unlink()
-                    elif item.is_dir():
-                        shutil.rmtree(item)
-                print_colored(f"✅ Cleared data in {discovery_output_dir}", GREEN)
-            else:
-                print_colored(f"⚠️  Keeping existing data in {discovery_output_dir}", YELLOW)
-        
-        print_colored("📋 Enter a description of what you want to automate:", YELLOW)
-        print("   Example: 'Search for flights and get prices'")
-        print("   (Press Ctrl+C to exit)")
-        
-        task = ""
-        while not task:
-            try:
-                task = input("   Task: ").strip()
-                if not task:
-                    print_colored("⚠️  Task cannot be empty. Please enter a description (or Ctrl+C to exit).", YELLOW)
-            except KeyboardInterrupt:
-                print()
-                print_colored("⚠️  Discovery cancelled by user.", YELLOW)
-                return
-        
-        print()
-        print("🤖 Running routine discovery agent...")
-        
-        discover_cmd = [
-            "web-hacker-discover",
-            "--task", task,
-            "--cdp-captures-dir", str(cdp_captures_dir),
-            "--output-dir", str(discovery_output_dir),
-            "--llm-model", "gpt-5",
-        ]
-        
-        run_command(discover_cmd, "discovery")
-        print()
     
     # Step 4: Execute (optional)
     if not routine_file.exists():
diff --git a/web_hacker/cdp/cdp_session.py b/web_hacker/cdp/cdp_session.py
index bf0e5df..5250d79 100644
--- a/web_hacker/cdp/cdp_session.py
+++ b/web_hacker/cdp/cdp_session.py
@@ -42,6 +42,10 @@ def __init__(
         self.clear_cookies = clear_cookies
         self.clear_storage = clear_storage
         
+        # Connection state tracking
+        self._connection_lost = False
+        self._connection_lost_lock = threading.Lock()
+        
         # Response tracking for synchronous commands
         self.pending_responses = {}
         self.response_lock = threading.Lock()
@@ -67,8 +71,15 @@ def __init__(
     
     def send(self, method, params=None):
         """Send CDP command and return sequence ID."""
+        if self._connection_lost:
+            raise ConnectionError("WebSocket connection is closed")
         self.seq += 1
-        self.ws.send(json.dumps({"id": self.seq, "method": method, "params": params or {}}))
+        try:
+            self.ws.send(json.dumps({"id": self.seq, "method": method, "params": params or {}}))
+        except (websocket.WebSocketConnectionClosedException, OSError, ConnectionError) as e:
+            with self._connection_lost_lock:
+                self._connection_lost = True
+            raise ConnectionError(f"WebSocket connection lost: {e}")
         return self.seq
     
     def send_and_wait(self, method, params=None, timeout=10):
@@ -100,6 +111,14 @@ def send_and_wait(self, method, params=None, timeout=10):
     
     def setup_cdp(self, navigate_to=None):
         """Setup CDP domains and configuration."""
+        # Enable Target domain to receive target lifecycle events (if on browser WebSocket)
+        # Note: This may not work on tab WebSockets, but that's okay - we'll catch disconnections
+        try:
+            self.send("Target.setDiscoverTargets", {"discover": True})
+        except Exception as e:
+            # This is expected to fail on tab WebSockets - Target domain is browser-level
+            logger.debug(f"Could not enable Target domain (expected on tab WebSockets): {e}")
+        
         # Enable basic domains
         self.send("Page.enable")
         self.send("Runtime.enable")
@@ -148,6 +167,14 @@ def setup_cdp(self, navigate_to=None):
     
     def handle_message(self, msg):
         """Handle incoming CDP message by delegating to appropriate monitors."""
+        # Check for target lifecycle events (tab closure)
+        method = msg.get("method")
+        if method == "Target.targetDestroyed":
+            logger.info("Tab was closed. Connection will be lost. Saving assets...")
+            with self._connection_lost_lock:
+                self._connection_lost = True
+            return
+        
         # Try network monitor first
         if self.network_monitor.handle_network_message(msg, self):
             return
@@ -202,44 +229,84 @@ def _handle_command_reply(self, msg):
         
         return False
     
-    def run(self):
-        """Main message processing loop."""
-        logger.info("Blocking trackers & capturing network/storage… Press Ctrl+C to stop.")
+    def _generate_assets(self):
+        """Generate all monitoring assets. Works even if connection is lost."""
+        try:
+            # Final cookie sync using native CDP (only if connection is still alive)
+            if not self._connection_lost:
+                try:
+                    self.storage_monitor.monitor_cookie_changes(self)
+                except Exception as e:
+                    logger.debug(f"Could not sync cookies (connection may be lost): {e}")
+        except Exception as e:
+            logger.debug(f"Error in cookie sync: {e}")
         
+        # Consolidate all transactions into a single JSON file (works with cached data)
         try:
-            while True:
-                msg = json.loads(self.ws.recv())
-                self.handle_message(msg)
-        except KeyboardInterrupt:
-            logger.info("\nStopped. Saving assets...")
-            # Final cookie sync using native CDP (no delay needed)
-            self.storage_monitor.monitor_cookie_changes(self)
-            
-            # Consolidate all transactions into a single JSON file
             consolidated_path = f"{self.output_dir}/consolidated_transactions.json"
             self.network_monitor.consolidate_transactions(consolidated_path)
-            
-            # Generate HAR file from consolidated transactions
+        except Exception as e:
+            logger.warning(f"Could not consolidate transactions: {e}")
+        
+        # Generate HAR file from consolidated transactions (works with cached data)
+        try:
             har_path = f"{self.output_dir}/network.har"
             self.network_monitor.generate_har_from_transactions(har_path, "Web Hacker Session")
-            
-            # Consolidate all interactions into a single JSON file
+        except Exception as e:
+            logger.warning(f"Could not generate HAR file: {e}")
+        
+        # Consolidate all interactions into a single JSON file (works with cached data)
+        try:
             interaction_dir = self.paths.get('interaction_dir', f"{self.output_dir}/interaction")
             consolidated_interactions_path = os.path.join(interaction_dir, "consolidated_interactions.json")
             self.interaction_monitor.consolidate_interactions(consolidated_interactions_path)
+        except Exception as e:
+            logger.warning(f"Could not consolidate interactions: {e}")
+    
+    def run(self):
+        """Main message processing loop."""
+        logger.info("Blocking trackers & capturing network/storage… Press Ctrl+C to stop.")
+        
+        try:
+            while True:
+                try:
+                    msg = json.loads(self.ws.recv())
+                    self.handle_message(msg)
+                    
+                    # Check if connection was lost due to tab closure
+                    if self._connection_lost:
+                        logger.info("Tab closed. Saving assets...")
+                        break
+                except (websocket.WebSocketConnectionClosedException, OSError, ConnectionError) as e:
+                    # WebSocket connection lost (tab closed, browser closed, etc.)
+                    logger.info(f"Connection lost: {e}. Saving assets...")
+                    with self._connection_lost_lock:
+                        self._connection_lost = True
+                    break
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse message: {e}")
+                    continue
+        except KeyboardInterrupt:
+            logger.info("\nStopped by user. Saving assets...")
         finally:
+            # Always generate assets, even if connection is lost
+            self._generate_assets()
+            
+            # Close WebSocket if still open
             try:
-                self.ws.close()
-            except:
+                if self.ws and not self._connection_lost:
+                    self.ws.close()
+            except Exception:
                 pass
     
     def get_monitoring_summary(self):
         """Get summary of all monitoring activities."""
-        # Trigger final cookie check using native CDP (no delay needed)
-        try:
-            self.storage_monitor.monitor_cookie_changes(self)
-        except:
-            pass
+        # Trigger final cookie check using native CDP (only if connection is still alive)
+        if not self._connection_lost:
+            try:
+                self.storage_monitor.monitor_cookie_changes(self)
+            except Exception as e:
+                logger.debug(f"Could not sync cookies for summary: {e}")
             
         storage_summary = self.storage_monitor.get_storage_summary()
         network_summary = self.network_monitor.get_network_summary()
diff --git a/web_hacker/sdk/monitor.py b/web_hacker/sdk/monitor.py
index 9e0987e..3e813de 100644
--- a/web_hacker/sdk/monitor.py
+++ b/web_hacker/sdk/monitor.py
@@ -135,6 +135,7 @@ def _run_monitoring_loop(self):
         
         try:
             import json
+            import websocket
             
             # Set a timeout on the websocket to allow checking stop event
             if hasattr(self.session.ws, 'settimeout'):
@@ -144,6 +145,17 @@ def _run_monitoring_loop(self):
                 try:
                     msg = json.loads(self.session.ws.recv())
                     self.session.handle_message(msg)
+                    
+                    # Check if connection was lost due to tab closure
+                    if self.session._connection_lost:
+                        logger.info("Tab closed during monitoring. Saving assets...")
+                        break
+                except (websocket.WebSocketConnectionClosedException, OSError, ConnectionError) as e:
+                    if self._stop_event.is_set():
+                        break
+                    # Connection lost (tab closed, browser closed, etc.)
+                    logger.info(f"Connection lost: {e}. Saving assets...")
+                    break
                 except Exception as e:
                     if self._stop_event.is_set():
                         break
@@ -155,37 +167,52 @@ def _run_monitoring_loop(self):
         except KeyboardInterrupt:
             pass
         finally:
-            # Final cookie sync
-            try:
-                if self.session:
-                    self.session.storage_monitor.monitor_cookie_changes(self.session)
-            except:
-                pass
-            
-            # Consolidate transactions
-            try:
-                if self.session:
-                    consolidated_path = f"{self.output_dir}/network/consolidated_transactions.json"
-                    self.session.network_monitor.consolidate_transactions(consolidated_path)
-            except:
-                pass
-            
-            # Generate HAR file
-            try:
-                if self.session:
-                    har_path = f"{self.output_dir}/network/network.har"
-                    self.session.network_monitor.generate_har_from_transactions(har_path, "Web Hacker Session")
-            except:
-                pass
-            
-            # Consolidate interactions
-            try:
-                if self.session:
-                    interaction_dir = self.session.paths.get('interaction_dir', f"{self.output_dir}/interaction")
-                    consolidated_interactions_path = str(Path(interaction_dir) / "consolidated_interactions.json")
-                    self.session.interaction_monitor.consolidate_interactions(consolidated_interactions_path)
-            except:
-                pass
+            # Always generate assets, even if connection is lost
+            # Use the session's asset generation method if available, otherwise do it manually
+            if self.session:
+                try:
+                    # Use the session's built-in asset generation
+                    if hasattr(self.session, '_generate_assets'):
+                        self.session._generate_assets()
+                    else:
+                        # Fallback to manual asset generation
+                        self._generate_assets_manual()
+                except Exception as e:
+                    logger.warning(f"Error generating assets: {e}")
+    
+    def _generate_assets_manual(self):
+        """Manual asset generation fallback."""
+        if not self.session:
+            return
+        
+        # Final cookie sync (only if connection is still alive)
+        try:
+            if not self.session._connection_lost:
+                self.session.storage_monitor.monitor_cookie_changes(self.session)
+        except Exception as e:
+            logger.debug(f"Could not sync cookies: {e}")
+        
+        # Consolidate transactions (works with cached data)
+        try:
+            consolidated_path = f"{self.output_dir}/network/consolidated_transactions.json"
+            self.session.network_monitor.consolidate_transactions(consolidated_path)
+        except Exception as e:
+            logger.warning(f"Could not consolidate transactions: {e}")
+        
+        # Generate HAR file (works with cached data)
+        try:
+            har_path = f"{self.output_dir}/network/network.har"
+            self.session.network_monitor.generate_har_from_transactions(har_path, "Web Hacker Session")
+        except Exception as e:
+            logger.warning(f"Could not generate HAR file: {e}")
+        
+        # Consolidate interactions (works with cached data)
+        try:
+            interaction_dir = self.session.paths.get('interaction_dir', f"{self.output_dir}/interaction")
+            consolidated_interactions_path = str(Path(interaction_dir) / "consolidated_interactions.json")
+            self.session.interaction_monitor.consolidate_interactions(consolidated_interactions_path)
+        except Exception as e:
+            logger.warning(f"Could not consolidate interactions: {e}")
     
     def stop(self) -> dict:
         """Stop monitoring and return summary."""

From 14b5d783256de7ef8db8ea271afc98a4b711f19e Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Thu, 20 Nov 2025 12:41:28 -0500
Subject: [PATCH 26/27] enable quickstart script to close chrome

---
 quickstart.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/quickstart.py b/quickstart.py
index b75b04a..95ea96f 100755
--- a/quickstart.py
+++ b/quickstart.py
@@ -11,6 +11,8 @@
 import subprocess
 import shutil
 import json
+import atexit
+import signal
 from pathlib import Path
 from typing import Optional
 import requests
@@ -27,6 +29,9 @@
 CDP_CAPTURES_DIR = Path("./cdp_captures")
 DISCOVERY_OUTPUT_DIR = Path("./routine_discovery_output")
 
+# Global variable to track Chrome process for cleanup
+_chrome_process: Optional[subprocess.Popen] = None
+
 
 def print_colored(text: str, color: str = NC) -> None:
     """Print colored text."""
@@ -287,6 +292,55 @@ def launch_chrome(port: int) -> Optional[subprocess.Popen]:
         return None
 
 
+def cleanup_chrome(process: Optional[subprocess.Popen], port: int) -> None:
+    """Clean up Chrome process if it was launched by this script."""
+    if process is None:
+        return
+    
+    try:
+        # Check if Chrome is still running on the port
+        if not check_chrome_running(port):
+            return
+        
+        print()
+        print_colored("🧹 Cleaning up Chrome...", YELLOW)
+        
+        # Try graceful termination first
+        try:
+            if platform.system() == "Windows":
+                # On Windows with CREATE_NEW_PROCESS_GROUP, we need to kill the process group
+                process.terminate()
+                time.sleep(1)
+                if process.poll() is None:
+                    process.kill()
+            else:
+                process.terminate()
+                time.sleep(1)
+                if process.poll() is None:
+                    process.kill()
+            
+            # Wait a bit for Chrome to close
+            process.wait(timeout=3)
+            print_colored("✅ Chrome closed successfully", GREEN)
+        except subprocess.TimeoutExpired:
+            # Force kill if it didn't terminate
+            try:
+                process.kill()
+                process.wait(timeout=2)
+                print_colored("✅ Chrome force-closed", GREEN)
+            except Exception:
+                pass
+        except Exception as e:
+            # Process might already be dead
+            if process.poll() is not None:
+                print_colored("✅ Chrome already closed", GREEN)
+            else:
+                print_colored(f"⚠️  Error closing Chrome: {e}", YELLOW)
+    except Exception:
+        # Silently fail during cleanup
+        pass
+
+
 def run_command(cmd: list[str], description: str) -> bool:
     """Run a command and return True if successful."""
     try:
@@ -306,6 +360,8 @@ def run_command(cmd: list[str], description: str) -> bool:
 
 def main():
     """Main workflow."""
+    global _chrome_process
+    
     # Use local variables that can be updated
     cdp_captures_dir = CDP_CAPTURES_DIR
     discovery_output_dir = DISCOVERY_OUTPUT_DIR
@@ -323,6 +379,19 @@ def main():
         print_colored(f"✅ Chrome is already running in debug mode on port {PORT}", GREEN)
     else:
         chrome_process = launch_chrome(PORT)
+        # Store globally for cleanup
+        _chrome_process = chrome_process
+        # Register cleanup function if we launched Chrome
+        if chrome_process is not None:
+            atexit.register(cleanup_chrome, chrome_process, PORT)
+            # Also register signal handlers for graceful shutdown
+            def signal_handler(signum, frame):
+                cleanup_chrome(chrome_process, PORT)
+                sys.exit(0)
+            signal.signal(signal.SIGINT, signal_handler)
+            # SIGTERM may not be available on all platforms
+            if hasattr(signal, 'SIGTERM'):
+                signal.signal(signal.SIGTERM, signal_handler)
     
     print()
     
@@ -376,6 +445,14 @@ def main():
         run_command(monitor_cmd, "monitoring")
         print()
     
+    # Close Chrome before Step 3 if we launched it
+    if chrome_process is not None:
+        cleanup_chrome(chrome_process, PORT)
+        atexit.unregister(cleanup_chrome)
+        chrome_process = None
+        _chrome_process = None
+        print()
+    
     # Step 3: Discover
     print_colored("Step 3: Discovering routine from captured data...", GREEN)
     
@@ -533,5 +610,8 @@ def main():
     except KeyboardInterrupt:
         print()
         print_colored("⚠️  Interrupted by user.", YELLOW)
+        # Clean up Chrome if we launched it
+        if _chrome_process is not None:
+            cleanup_chrome(_chrome_process, PORT)
         sys.exit(0)
 

From da0522eec660645a0670bb8d27ff3407a3a1c9ac Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Thu, 20 Nov 2025 12:45:30 -0500
Subject: [PATCH 27/27] give user high-level context before starting

---
 quickstart.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/quickstart.py b/quickstart.py
index 95ea96f..c0b9fc9 100755
--- a/quickstart.py
+++ b/quickstart.py
@@ -371,6 +371,17 @@ def main():
     print_colored("╚════════════════════════════════════════════════════════════╝", BLUE)
     print()
     
+    # Pipeline overview
+    print_colored("Web-hacker Pipeline:", BLUE)
+    print()
+    print("  Step 1: Launch Chrome in debug mode")
+    print("  Step 2: Monitor browser interactions")
+    print("  Step 3: Discover web action routine")
+    print("  Step 4 (optional): Test routine execution")
+    print()
+    input("Press Enter to start: ")
+    print()
+    
     # Step 1: Launch Chrome
     print_colored("Step 1: Launching Chrome in debug mode...", GREEN)