From 286145d462779038982fb8ce7cbb10c46e827e31 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sat, 18 Oct 2025 21:45:49 +0200 Subject: [PATCH 1/5] refactor: Improve browser configuration with stealth mode and support for multiple engines Signed-off-by: Eden Reich --- Dockerfile | 22 ++++++++++++++++++--- config/config.go | 2 ++ go.mod | 3 ++- go.sum | 6 ++++-- internal/playwright/playwright.go | 33 ++++++++++++++++++++++++++++++- 5 files changed, 59 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4fee79c..927a144 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,6 +46,7 @@ RUN apt-get update && apt-get install -y \ ca-certificates \ tzdata \ curl \ + xvfb \ && rm -rf /var/lib/apt/lists/* WORKDIR /root/ @@ -57,8 +58,8 @@ COPY --from=builder /go/bin/playwright /usr/local/bin/playwright # Copy agent card COPY --from=builder /app/.well-known ./.well-known -# Install only Chromium browser with dependencies -RUN playwright install --with-deps chromium +# Install Chromium and Firefox browsers with dependencies +RUN playwright install --with-deps chromium firefox # Expose port EXPOSE 8080 @@ -66,6 +67,21 @@ EXPOSE 8080 # Set environment variables ENV A2A_SERVER_PORT=8080 ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright +ENV DISPLAY=:99 + +# Create startup script +RUN echo '#!/bin/bash\n\ +set -e\n\ +\n\ +# Start Xvfb in the background with disabled access control\n\ +Xvfb :99 -screen 0 1920x1080x24 -ac &\n\ +\n\ +# Wait a moment for Xvfb to start\n\ +sleep 1\n\ +\n\ +# Start the main application\n\ +exec ./main\n\ +' > /usr/local/bin/start.sh && chmod +x /usr/local/bin/start.sh # Run the application -CMD ["./main"] +CMD ["/usr/local/bin/start.sh"] diff --git a/config/config.go b/config/config.go index 82f40bb..c4ee091 100644 --- a/config/config.go +++ b/config/config.go @@ -24,6 +24,7 @@ type Config struct { type BrowserConfig struct { Args string `env:"ARGS,default=[--disable-blink-features=AutomationControlled --disable-features=VizDisplayCompositor --no-first-run --disable-default-apps --disable-extensions --disable-plugins --disable-sync --disable-translate --hide-scrollbars --mute-audio --no-zygote --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-renderer-backgrounding --disable-ipc-flooding-protection]"` DataDir string `env:"DATA_DIR,default=/tmp/playwright/artifacts"` + Engine string `env:"ENGINE,default=chromium"` HeaderAccept string `env:"HEADER_ACCEPT,default=text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"` HeaderAcceptEncoding string `env:"HEADER_ACCEPT_ENCODING,default=gzip, deflate, br"` HeaderAcceptLanguage string `env:"HEADER_ACCEPT_LANGUAGE,default=en-US,en;q=0.9"` @@ -31,6 +32,7 @@ type BrowserConfig struct { HeaderDnt string `env:"HEADER_DNT,default=1"` HeaderUpgradeInsecureRequests string `env:"HEADER_UPGRADE_INSECURE_REQUESTS,default=1"` Headless bool `env:"HEADLESS,default=true"` + StealthMode bool `env:"STEALTH_MODE,default=false"` UserAgent string `env:"USER_AGENT,default=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"` ViewportHeight string `env:"VIEWPORT_HEIGHT,default=1080"` ViewportWidth string `env:"VIEWPORT_WIDTH,default=1920"` diff --git a/go.mod b/go.mod index fecf410..1b828bb 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.25 require ( github.com/inference-gateway/adk v0.15.2 + github.com/jonfriesen/playwright-go-stealth v0.0.2 github.com/playwright-community/playwright-go v0.5200.1 github.com/sethvargo/go-envconfig v1.3.0 github.com/stretchr/testify v1.10.0 @@ -65,7 +66,7 @@ require ( go.opentelemetry.io/otel/sdk v1.36.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect go.opentelemetry.io/otel/trace v1.36.0 // indirect - go.uber.org/multierr v1.10.0 // indirect + go.uber.org/multierr v1.11.0 // indirect golang.org/x/arch v0.13.0 // indirect golang.org/x/crypto v0.38.0 // indirect golang.org/x/net v0.40.0 // indirect diff --git a/go.sum b/go.sum index 5303cfc..0a4b1ea 100644 --- a/go.sum +++ b/go.sum @@ -69,6 +69,8 @@ github.com/inference-gateway/adk v0.15.2 h1:DaMiXlXUeY4/LidlhWWO6xWHNIsH+vCxifvi github.com/inference-gateway/adk v0.15.2/go.mod h1:Eh91HM5d3R0I5OOAh3YNUqZCJBBdGPHrKBALnVL8dl0= github.com/inference-gateway/sdk v1.10.0 h1:88m1XTS5J7Q9+sFaKXKHAPXdDpji6SASXVWz2pe8ZFk= github.com/inference-gateway/sdk v1.10.0/go.mod h1:3TTD7Kbr7FRt+9ZbCPAm3u0tXUIWx7flZuwrRgZgrdk= +github.com/jonfriesen/playwright-go-stealth v0.0.2 h1:xf/2ICKJRerk3po4XhxOYoDi/DvATjhsouG/ZekG2aE= +github.com/jonfriesen/playwright-go-stealth v0.0.2/go.mod h1:genxteWiUTS6fdIQkPFBWtJ85BfA2YZW1OeS7BSX9Uo= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= @@ -159,8 +161,8 @@ go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKr go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= -go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/arch v0.13.0 h1:KCkqVVV1kGg0X87TFysjCJ8MxtZEIU4Ja/yXGeoECdA= diff --git a/internal/playwright/playwright.go b/internal/playwright/playwright.go index 9d69050..9bc67be 100644 --- a/internal/playwright/playwright.go +++ b/internal/playwright/playwright.go @@ -10,6 +10,7 @@ import ( "time" config "github.com/inference-gateway/browser-agent/config" + stealth "github.com/jonfriesen/playwright-go-stealth" zap "go.uber.org/zap" "github.com/playwright-community/playwright-go" @@ -87,8 +88,18 @@ func NewBrowserConfigFromConfig(cfg *config.Config) *BrowserConfig { args = append(args, configArgs...) } + engine := Chromium + switch strings.ToLower(cfg.Browser.Engine) { + case "firefox": + engine = Firefox + case "webkit": + engine = WebKit + default: + engine = Chromium + } + return &BrowserConfig{ - Engine: Chromium, + Engine: engine, Headless: cfg.Browser.Headless, Timeout: 30 * time.Second, ViewportWidth: width, @@ -247,6 +258,14 @@ func (p *playwrightImpl) LaunchBrowser(ctx context.Context, config *BrowserConfi return nil, fmt.Errorf("failed to create page: %w", err) } + if p.config.Browser.StealthMode { + if err := stealth.Inject(page); err != nil { + p.logger.Warn("failed to inject stealth script", zap.Error(err)) + } else { + p.logger.Info("stealth mode enabled - stealth script injected") + } + } + sessionID := fmt.Sprintf("session_%d", time.Now().UnixNano()) session := &BrowserSession{ ID: sessionID, @@ -373,6 +392,14 @@ func (p *playwrightImpl) GetOrCreateDefaultSession(ctx context.Context) (*Browse return nil, fmt.Errorf("failed to create page: %w", err) } + if p.config.Browser.StealthMode { + if err := stealth.Inject(page); err != nil { + p.logger.Warn("failed to inject stealth script", zap.Error(err)) + } else { + p.logger.Info("stealth mode enabled - stealth script injected") + } + } + session := &BrowserSession{ ID: DefaultSessionID, Browser: browser, @@ -804,6 +831,8 @@ func (p *playwrightImpl) GetConfig() *config.Config { // createContextOptions creates browser context options from configuration func (p *playwrightImpl) createContextOptions(browserConfig *BrowserConfig) playwright.BrowserNewContextOptions { + storagePath := p.config.Browser.DataDir + "/browser-state" + return playwright.BrowserNewContextOptions{ Viewport: &playwright.Size{ Width: browserConfig.ViewportWidth, @@ -818,6 +847,8 @@ func (p *playwrightImpl) createContextOptions(browserConfig *BrowserConfig) play "Connection": p.config.Browser.HeaderConnection, "Upgrade-Insecure-Requests": p.config.Browser.HeaderUpgradeInsecureRequests, }, + StorageStatePath: &storagePath, + AcceptDownloads: playwright.Bool(false), JavaScriptEnabled: playwright.Bool(true), BypassCSP: playwright.Bool(true), } From 6196406a0f0893dcbea8fc13f6cfff6ea7c4cf74 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sat, 18 Oct 2025 22:42:37 +0200 Subject: [PATCH 2/5] docs: Update Dockerfile and configuration for browser selection and Xvfb support Signed-off-by: Eden Reich --- Dockerfile | 57 +++++++++-------- README.md | 46 ++++++++++++-- agent.yaml | 5 ++ config/config.go | 5 +- docker-entrypoint.sh | 60 ++++++++++++++++++ example/Dockerfile.vnc | 29 +++++++++ example/README.md | 119 ++++++++++++++++++++++++++++++++++-- example/docker-compose.yaml | 30 +++++++++ 8 files changed, 317 insertions(+), 34 deletions(-) create mode 100644 docker-entrypoint.sh create mode 100644 example/Dockerfile.vnc diff --git a/Dockerfile b/Dockerfile index 927a144..e4006aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ FROM golang:1.25-alpine AS builder # Build arguments for version injection -ARG VERSION="0.3.0" +ARG VERSION="0.4.1" ARG AGENT_NAME="browser-agent" ARG AGENT_DESCRIPTION="AI agent for browser automation and web testing using Playwright" @@ -41,12 +41,17 @@ RUN CGO_ENABLED=0 GOOS=linux go build \ # Stage 2: Final image with browser dependencies FROM ubuntu:24.04 -# Install system dependencies and browsers +# Build arguments for browser selection +ARG BROWSER_ENGINE=chromium + +# Install system dependencies +# Note: x11-utils added for xdpyinfo (Xvfb health check) RUN apt-get update && apt-get install -y \ ca-certificates \ tzdata \ curl \ xvfb \ + x11-utils \ && rm -rf /var/lib/apt/lists/* WORKDIR /root/ @@ -55,11 +60,22 @@ WORKDIR /root/ COPY --from=builder /app/main . COPY --from=builder /go/bin/playwright /usr/local/bin/playwright -# Copy agent card +# Copy agent card and entrypoint script COPY --from=builder /app/.well-known ./.well-known - -# Install Chromium and Firefox browsers with dependencies -RUN playwright install --with-deps chromium firefox +COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh +RUN chmod +x /usr/local/bin/docker-entrypoint.sh + +# Install browsers based on build argument +# Supports: chromium, firefox, webkit, or "all" for multiple browsers +RUN if [ "$BROWSER_ENGINE" = "all" ]; then \ + playwright install --with-deps chromium firefox webkit; \ + elif [ "$BROWSER_ENGINE" = "firefox" ]; then \ + playwright install --with-deps firefox; \ + elif [ "$BROWSER_ENGINE" = "webkit" ]; then \ + playwright install --with-deps webkit; \ + else \ + playwright install --with-deps chromium; \ + fi # Expose port EXPOSE 8080 @@ -67,21 +83,14 @@ EXPOSE 8080 # Set environment variables ENV A2A_SERVER_PORT=8080 ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright -ENV DISPLAY=:99 - -# Create startup script -RUN echo '#!/bin/bash\n\ -set -e\n\ -\n\ -# Start Xvfb in the background with disabled access control\n\ -Xvfb :99 -screen 0 1920x1080x24 -ac &\n\ -\n\ -# Wait a moment for Xvfb to start\n\ -sleep 1\n\ -\n\ -# Start the main application\n\ -exec ./main\n\ -' > /usr/local/bin/start.sh && chmod +x /usr/local/bin/start.sh - -# Run the application -CMD ["/usr/local/bin/start.sh"] + +# Browser configuration defaults (can be overridden at runtime) +ENV BROWSER_ENGINE=chromium +ENV BROWSER_HEADLESS=true +ENV BROWSER_STEALTH_MODE=false +ENV BROWSER_XVFB_ENABLED=false +ENV BROWSER_XVFB_DISPLAY=:99 +ENV BROWSER_XVFB_SCREEN_RESOLUTION=1920x1080x24 + +# Run the application via entrypoint script +ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] diff --git a/README.md b/README.md index 2e35e63..ec2f676 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,18 @@ A production-ready [Agent-to-Agent (A2A)](https://github.com/inference-gateway/a ## Quick Start ```bash -# Run the agent +# Run the agent locally go run . -# Or with Docker +# Or with Docker (Chromium only - smallest image) docker build -t browser-agent . docker run -p 8080:8080 browser-agent + +# Build with specific browser engine +docker build --build-arg BROWSER_ENGINE=firefox -t browser-agent:firefox . + +# Run with Xvfb enabled (for extensions or specific rendering features) +docker run -p 8080:8080 -e BROWSER_XVFB_ENABLED=true browser-agent ``` ## Features @@ -62,6 +68,7 @@ The following custom configuration variables are available: |----------|----------|-------------|---------| | **Browser** | `BROWSER_ARGS` | Args configuration | `[--disable-blink-features=AutomationControlled --disable-features=VizDisplayCompositor --no-first-run --disable-default-apps --disable-extensions --disable-plugins --disable-sync --disable-translate --hide-scrollbars --mute-audio --no-zygote --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-renderer-backgrounding --disable-ipc-flooding-protection]` | | **Browser** | `BROWSER_DATA_DIR` | Data_dir configuration | `/tmp/playwright/artifacts` | +| **Browser** | `BROWSER_ENGINE` | Engine configuration | `chromium` | | **Browser** | `BROWSER_HEADER_ACCEPT` | Header_accept configuration | `text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7` | | **Browser** | `BROWSER_HEADER_ACCEPT_ENCODING` | Header_accept_encoding configuration | `gzip, deflate, br` | | **Browser** | `BROWSER_HEADER_ACCEPT_LANGUAGE` | Header_accept_language configuration | `en-US,en;q=0.9` | @@ -69,9 +76,13 @@ The following custom configuration variables are available: | **Browser** | `BROWSER_HEADER_DNT` | Header_dnt configuration | `1` | | **Browser** | `BROWSER_HEADER_UPGRADE_INSECURE_REQUESTS` | Header_upgrade_insecure_requests configuration | `1` | | **Browser** | `BROWSER_HEADLESS` | Headless configuration | `true` | +| **Browser** | `BROWSER_STEALTH_MODE` | Stealth_mode configuration | `false` | | **Browser** | `BROWSER_USER_AGENT` | User_agent configuration | `Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36` | | **Browser** | `BROWSER_VIEWPORT_HEIGHT` | Viewport_height configuration | `1080` | | **Browser** | `BROWSER_VIEWPORT_WIDTH` | Viewport_width configuration | `1920` | +| **Browser** | `BROWSER_XVFB_DISPLAY` | Xvfb_display configuration | `:99` | +| **Browser** | `BROWSER_XVFB_ENABLED` | Xvfb_enabled configuration | `false` | +| **Browser** | `BROWSER_XVFB_SCREEN_RESOLUTION` | Xvfb_screen_resolution configuration | `1920x1080x24` | | Category | Variable | Description | Default | |----------|----------|-------------|---------| @@ -158,10 +169,10 @@ docker run --rm -it --network host ghcr.io/inference-gateway/a2a-debugger:latest ### Docker -The Docker image can be built with custom version information using build arguments: +The Docker image can be built with custom version information and browser selection using build arguments: ```bash -# Build with default values from ADL +# Build with default values from ADL (Chromium only) docker build -t browser-agent . # Build with custom version information @@ -170,15 +181,42 @@ docker build \ --build-arg AGENT_NAME="My Custom Agent" \ --build-arg AGENT_DESCRIPTION="Custom agent description" \ -t browser-agent:1.2.3 . + +# Build with specific browser engine +docker build --build-arg BROWSER_ENGINE=firefox -t browser-agent:firefox . + +# Build with all browsers (larger image) +docker build --build-arg BROWSER_ENGINE=all -t browser-agent:all . ``` **Available Build Arguments:** - `VERSION` - Agent version (default: `0.4.1`) - `AGENT_NAME` - Agent name (default: `browser-agent`) - `AGENT_DESCRIPTION` - Agent description (default: `AI agent for browser automation and web testing using Playwright`) +- `BROWSER_ENGINE` - Browser to install (`chromium`, `firefox`, `webkit`, or `all`) (default: `chromium`) These values are embedded into the binary at build time using linker flags, making them accessible at runtime without requiring environment variables. +#### Xvfb Configuration + +By default, the browser runs in native headless mode. For cases requiring a virtual display (e.g., extensions, specific rendering features), you can enable Xvfb: + +```bash +# Run with Xvfb enabled +docker run -p 8080:8080 \ + -e BROWSER_XVFB_ENABLED=true \ + browser-agent + +# Customize Xvfb display settings +docker run -p 8080:8080 \ + -e BROWSER_XVFB_ENABLED=true \ + -e BROWSER_XVFB_DISPLAY=:99 \ + -e BROWSER_XVFB_SCREEN_RESOLUTION=1920x1080x24 \ + browser-agent +``` + +**Security Note:** Xvfb is configured without the `-ac` flag (access control disabled) for security. The X server uses `-nolisten tcp` to prevent network access. + ## License MIT License - see LICENSE file for details diff --git a/agent.yaml b/agent.yaml index 388083c..3df7142 100644 --- a/agent.yaml +++ b/agent.yaml @@ -12,6 +12,8 @@ spec: config: browser: headless: true + engine: "chromium" + stealth_mode: false user_agent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" viewport_width: 1920 viewport_height: 1080 @@ -22,6 +24,9 @@ spec: header_connection: "keep-alive" header_upgrade_insecure_requests: "1" data_dir: "/tmp/playwright/artifacts" + xvfb_enabled: false + xvfb_display: ":99" + xvfb_screen_resolution: "1920x1080x24" args: - "--disable-blink-features=AutomationControlled" - "--disable-features=VizDisplayCompositor" diff --git a/config/config.go b/config/config.go index c4ee091..0cb0485 100644 --- a/config/config.go +++ b/config/config.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.8. DO NOT EDIT. +// Code generated by ADL CLI vdev. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. @@ -36,4 +36,7 @@ type BrowserConfig struct { UserAgent string `env:"USER_AGENT,default=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"` ViewportHeight string `env:"VIEWPORT_HEIGHT,default=1080"` ViewportWidth string `env:"VIEWPORT_WIDTH,default=1920"` + XvfbDisplay string `env:"XVFB_DISPLAY,default=:99"` + XvfbEnabled bool `env:"XVFB_ENABLED,default=false"` + XvfbScreenResolution string `env:"XVFB_SCREEN_RESOLUTION,default=1920x1080x24"` } diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 0000000..e83108f --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,60 @@ +#!/bin/bash +set -e + +# Configuration from environment variables +XVFB_ENABLED="${BROWSER_XVFB_ENABLED:-false}" +XVFB_DISPLAY="${BROWSER_XVFB_DISPLAY:-:99}" +XVFB_SCREEN="${BROWSER_XVFB_SCREEN_RESOLUTION:-1920x1080x24}" + +# Function to check if Xvfb is ready +wait_for_xvfb() { + local max_attempts=10 + local attempt=0 + + while [ $attempt -lt $max_attempts ]; do + if xdpyinfo -display "$XVFB_DISPLAY" >/dev/null 2>&1; then + echo "Xvfb is ready on display $XVFB_DISPLAY" + return 0 + fi + attempt=$((attempt + 1)) + sleep 0.5 + done + + echo "Warning: Xvfb failed to start within timeout" + return 1 +} + +# Start Xvfb if enabled +if [ "$XVFB_ENABLED" = "true" ]; then + echo "Starting Xvfb on display $XVFB_DISPLAY with screen resolution $XVFB_SCREEN" + + # Start Xvfb without -ac flag for security + # Use -nolisten tcp to prevent network access + Xvfb "$XVFB_DISPLAY" -screen 0 "$XVFB_SCREEN" -nolisten tcp & + XVFB_PID=$! + + # Wait for Xvfb to be ready + if wait_for_xvfb; then + export DISPLAY="$XVFB_DISPLAY" + echo "Xvfb started successfully (PID: $XVFB_PID)" + else + echo "Error: Xvfb failed to start properly" + kill "$XVFB_PID" 2>/dev/null || true + exit 1 + fi + + # Trap to cleanup Xvfb on exit + trap "echo 'Stopping Xvfb...'; kill $XVFB_PID 2>/dev/null || true" EXIT +else + echo "Xvfb disabled, using native headless mode" +fi + +# Log configuration +echo "Browser configuration:" +echo " Engine: ${BROWSER_ENGINE:-chromium}" +echo " Headless: ${BROWSER_HEADLESS:-true}" +echo " Stealth Mode: ${BROWSER_STEALTH_MODE:-false}" +echo " Xvfb Enabled: $XVFB_ENABLED" + +# Start the main application +exec ./main diff --git a/example/Dockerfile.vnc b/example/Dockerfile.vnc new file mode 100644 index 0000000..e53b789 --- /dev/null +++ b/example/Dockerfile.vnc @@ -0,0 +1,29 @@ +FROM alpine:latest + +# Install x11vnc +RUN apk add --no-cache x11vnc + +# Create a startup script that connects to the shared X display +COPY <<'EOF' /usr/local/bin/start-vnc.sh +#!/bin/sh +set -e + +echo "Waiting for X display :99 to be available..." +echo "Checking /tmp/.X11-unix directory..." +ls -la /tmp/.X11-unix/ 2>/dev/null || echo "Directory not found or empty" + +while true; do + if [ -S /tmp/.X11-unix/X99 ]; then + echo "X11 socket found, attempting to start x11vnc..." + x11vnc -display :99 -forever -shared -passwd password -rfbport 5900 -listen 0.0.0.0 -noshm && break + else + echo "Waiting for /tmp/.X11-unix/X99 socket... ($(date))" + ls -la /tmp/.X11-unix/ 2>/dev/null || echo "Directory still empty" + fi + sleep 5 +done +EOF + +RUN chmod +x /usr/local/bin/start-vnc.sh + +ENTRYPOINT ["/usr/local/bin/start-vnc.sh"] diff --git a/example/README.md b/example/README.md index a20fcc7..c33f377 100644 --- a/example/README.md +++ b/example/README.md @@ -1,22 +1,54 @@ -# Example Playwright Automation Script +# Browser Agent Example -This script demonstrates how to use the Playwright automation framework to perform basic browser actions such as navigating to a webpage, filling out a form, and taking a screenshot. +This example demonstrates how to use the browser-agent for AI-powered browser automation using Playwright. The agent can navigate webpages, fill forms, take screenshots, extract data, and more. +## Prerequisites -Configure the environment variables as needed: +Configure the environment variables: ```bash cp .env.example .env ``` -** Add at least two providers, in this example Google and DeepSeek. +**Note:** Add at least two LLM provider API keys (e.g., Google and DeepSeek) in the `.env` file. -First bring up all the containers: +## Quick Start + +### Headless Mode (Default) + +Start all containers in headless mode (fastest, most secure): ```bash docker compose up --build ``` +### Headed Mode with VNC (Visual Debugging) + +To view the browser in real-time via VNC: + +1. **Update docker-compose.yaml agent service:** + ```yaml + BROWSER_HEADLESS: false + BROWSER_XVFB_ENABLED: true + BROWSER_STEALTH_MODE: true # Optional: helps avoid bot detection + ``` + +2. **Start with VNC profile:** + ```bash + docker compose --profile vnc up --build + ``` + +3. **Connect to VNC:** + ```bash + # macOS + open vnc://localhost:5900 + # Password: password + + # Or use any VNC client: localhost:5900 + ``` + +## Usage + Go into the CLI for convenience: ```bash @@ -54,3 +86,80 @@ Finally clean up: ```bash docker compose down ``` + +## Configuration Options + +### Browser Modes + +The browser-agent supports different operational modes: + +**Headless Production Mode (Default):** +- `BROWSER_HEADLESS: true` +- `BROWSER_XVFB_ENABLED: false` +- `BROWSER_STEALTH_MODE: false` +- Fastest, most secure, lowest resource usage +- Best for production/CI/CD + +**Headed Mode with VNC (Development):** +- `BROWSER_HEADLESS: false` +- `BROWSER_XVFB_ENABLED: true` +- `BROWSER_STEALTH_MODE: true` +- Visual browser viewing via VNC +- Best for development, debugging, demos + +**Headless with Extensions:** +- `BROWSER_HEADLESS: true` +- `BROWSER_XVFB_ENABLED: true` +- Required for browser extensions +- Specific rendering features + +### Browser Engines + +You can choose different browser engines by modifying the build args and environment: + +```yaml +build: + args: + BROWSER_ENGINE: firefox # chromium (default), firefox, webkit, or all +environment: + BROWSER_ENGINE: firefox +``` + +### Xvfb Configuration + +When Xvfb is enabled, you can customize: + +```yaml +BROWSER_XVFB_DISPLAY: ":99" # X11 display number +BROWSER_XVFB_SCREEN_RESOLUTION: "1920x1080x24" # Resolution and color depth +``` + +**Security Note:** Xvfb is configured without the `-ac` flag (access control enabled) and uses `-nolisten tcp` to prevent remote network access. + +## Troubleshooting + +### VNC Connection Issues + +If VNC doesn't connect: + +1. Check Xvfb is enabled: + ```bash + docker compose exec agent env | grep XVFB + ``` + +2. Check X11 socket exists: + ```bash + docker compose exec agent ls -la /tmp/.X11-unix/ + ``` + +3. Check VNC logs: + ```bash + docker compose logs browser-vnc + ``` + +### Browser Not Starting + +Check agent logs for errors: +```bash +docker compose logs agent +``` diff --git a/example/docker-compose.yaml b/example/docker-compose.yaml index 6fe0526..133dfa5 100644 --- a/example/docker-compose.yaml +++ b/example/docker-compose.yaml @@ -4,12 +4,18 @@ services: build: context: .. dockerfile: Dockerfile + args: + BROWSER_ENGINE: chromium container_name: agent ports: - 8081:8081 volumes: - ./artifacts:/tmp/artifacts + - x11-socket:/tmp/.X11-unix environment: + BROWSER_ENGINE: chromium + BROWSER_HEADLESS: true + BROWSER_STEALTH_MODE: false BROWSER_USER_AGENT: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" BROWSER_VIEWPORT_WIDTH: "1920" BROWSER_VIEWPORT_HEIGHT: "1080" @@ -35,6 +41,9 @@ services: --disable-backgrounding-occluded-windows --disable-renderer-backgrounding --disable-ipc-flooding-protection + BROWSER_XVFB_ENABLED: false + BROWSER_XVFB_DISPLAY: ":99" + BROWSER_XVFB_SCREEN_RESOLUTION: "1920x1080x24" A2A_PORT: 8080 A2A_DEBUG: true A2A_AGENT_URL: http://localhost:8080 @@ -135,6 +144,27 @@ services: networks: - a2a-network + browser-vnc: + build: + context: . + dockerfile: Dockerfile.vnc + container_name: browser-vnc + environment: + DISPLAY: :99 + volumes: + - x11-socket:/tmp/.X11-unix + ports: + - "5900:5900" + depends_on: + - agent + networks: + - a2a-network + profiles: + - vnc + networks: a2a-network: driver: bridge + +volumes: + x11-socket: From 9dedfbc1bb5e4c6f8bb063affed13d358940629c Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sat, 18 Oct 2025 22:48:43 +0200 Subject: [PATCH 3/5] chore: Update ADL CLI version to 0.23.9 in generated files Signed-off-by: Eden Reich --- .github/workflows/cd.yml | 2 +- .github/workflows/ci.yml | 2 +- .releaserc.yaml | 2 +- CLAUDE.md | 4 ++-- Taskfile.yml | 2 +- config/config.go | 2 +- example/README.md | 19 +++++++++++++++++++ internal/logger/logger.go | 2 +- main.go | 2 +- 9 files changed, 28 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index fff038c..1ca982e 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.8. DO NOT EDIT. +# Code generated by ADL CLI v0.23.9. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a010280..be9cf0a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.8. DO NOT EDIT. +# Code generated by ADL CLI v0.23.9. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.releaserc.yaml b/.releaserc.yaml index 0eafc62..edad485 100644 --- a/.releaserc.yaml +++ b/.releaserc.yaml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.8. DO NOT EDIT. +# Code generated by ADL CLI v0.23.9. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/CLAUDE.md b/CLAUDE.md index e6e538a..5461c71 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ browser-agent is an A2A (Agent-to-Agent) server implementing the [A2A Protocol]( ### ADL-Generated Structure -The codebase is generated using ADL CLI 0.23.8 and follows a strict generation pattern: +The codebase is generated using ADL CLI 0.23.9 and follows a strict generation pattern: - **Generated Files**: Marked with `DO NOT EDIT` headers - manual changes will be overwritten - **Configuration Source**: `agent.yaml` - defines agent capabilities, skills, and metadata - **Server Implementation**: Built on the ADK (Agent Development Kit) framework from `github.com/inference-gateway/adk` @@ -117,7 +117,7 @@ Activate with: `flox activate` (if Flox is installed) - **Generated Files**: Never manually edit files with "DO NOT EDIT" headers - **Configuration Changes**: Always modify `agent.yaml` and regenerate -- **ADL Version**: Ensure ADL CLI 0.23.8 or compatible version for regeneration +- **ADL Version**: Ensure ADL CLI 0.23.9 or compatible version for regeneration - **Port Configuration**: Default 8080, configurable via `A2A_PORT` or `A2A_SERVER_PORT` ## Debugging Tips diff --git a/Taskfile.yml b/Taskfile.yml index e4d9a93..101d473 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.8. DO NOT EDIT. +# Code generated by ADL CLI v0.23.9. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/config/config.go b/config/config.go index 0cb0485..29a8564 100644 --- a/config/config.go +++ b/config/config.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI vdev. DO NOT EDIT. +// Code generated by ADL CLI v0.23.9. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/example/README.md b/example/README.md index c33f377..1e2ad6f 100644 --- a/example/README.md +++ b/example/README.md @@ -125,6 +125,25 @@ environment: BROWSER_ENGINE: firefox ``` +Or build directly with docker: + +```bash +# Build with default browser (chromium) +docker build -t browser-agent .. + +# Build with specific browser engine +docker build --build-arg BROWSER_ENGINE=firefox -t browser-agent:firefox .. + +# Build with all browsers (larger image) +docker build --build-arg BROWSER_ENGINE=all -t browser-agent:all .. +``` + +**Available Build Arguments:** +- `VERSION` - Agent version (default: from agent.yaml) +- `AGENT_NAME` - Agent name (default: from agent.yaml) +- `AGENT_DESCRIPTION` - Agent description (default: from agent.yaml) +- `BROWSER_ENGINE` - Browser to install (`chromium`, `firefox`, `webkit`, or `all`) (default: `chromium`) + ### Xvfb Configuration When Xvfb is enabled, you can customize: diff --git a/internal/logger/logger.go b/internal/logger/logger.go index cd9f319..f9724a4 100644 --- a/internal/logger/logger.go +++ b/internal/logger/logger.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.8. DO NOT EDIT. +// Code generated by ADL CLI v0.23.9. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/main.go b/main.go index da0ed86..ce21f45 100644 --- a/main.go +++ b/main.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.8. DO NOT EDIT. +// Code generated by ADL CLI v0.23.9. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. From 808e11e875a58ab514feb9cd87c92eb8a8e83b21 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sat, 18 Oct 2025 23:00:17 +0200 Subject: [PATCH 4/5] refactor: Update environment variable configuration for browser agent in .env and docker-compose files Signed-off-by: Eden Reich --- example/.env.example | 4 ++++ example/README.md | 14 +++++++------- example/docker-compose.yaml | 10 ++++------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/example/.env.example b/example/.env.example index 4ad0c5b..d8fe201 100644 --- a/example/.env.example +++ b/example/.env.example @@ -3,5 +3,9 @@ DEEPSEEK_API_KEY= GOOGLE_API_KEY= # Agent +BROWSER_ENGINE=chromium +BROWSER_HEADLESS=false +BROWSER_XVFB_ENABLED=true +BROWSER_STEALTH_MODE=true A2A_AGENT_CLIENT_PROVIDER=deepseek A2A_AGENT_CLIENT_MODEL=deepseek-chat diff --git a/example/README.md b/example/README.md index 1e2ad6f..6ea4c7a 100644 --- a/example/README.md +++ b/example/README.md @@ -26,16 +26,16 @@ docker compose up --build To view the browser in real-time via VNC: -1. **Update docker-compose.yaml agent service:** - ```yaml - BROWSER_HEADLESS: false - BROWSER_XVFB_ENABLED: true - BROWSER_STEALTH_MODE: true # Optional: helps avoid bot detection +1. **Update .env file:** + ```bash + BROWSER_HEADLESS=false + BROWSER_XVFB_ENABLED=true + BROWSER_STEALTH_MODE=true # Optional: helps avoid bot detection ``` -2. **Start with VNC profile:** +2. **Start with VNC:** ```bash - docker compose --profile vnc up --build + docker compose up --build ``` 3. **Connect to VNC:** diff --git a/example/docker-compose.yaml b/example/docker-compose.yaml index 133dfa5..5cd5dbf 100644 --- a/example/docker-compose.yaml +++ b/example/docker-compose.yaml @@ -13,9 +13,9 @@ services: - ./artifacts:/tmp/artifacts - x11-socket:/tmp/.X11-unix environment: - BROWSER_ENGINE: chromium - BROWSER_HEADLESS: true - BROWSER_STEALTH_MODE: false + BROWSER_ENGINE: ${BROWSER_ENGINE} + BROWSER_HEADLESS: ${BROWSER_HEADLESS} + BROWSER_STEALTH_MODE: ${BROWSER_STEALTH_MODE} BROWSER_USER_AGENT: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" BROWSER_VIEWPORT_WIDTH: "1920" BROWSER_VIEWPORT_HEIGHT: "1080" @@ -41,7 +41,7 @@ services: --disable-backgrounding-occluded-windows --disable-renderer-backgrounding --disable-ipc-flooding-protection - BROWSER_XVFB_ENABLED: false + BROWSER_XVFB_ENABLED: ${BROWSER_XVFB_ENABLED} BROWSER_XVFB_DISPLAY: ":99" BROWSER_XVFB_SCREEN_RESOLUTION: "1920x1080x24" A2A_PORT: 8080 @@ -159,8 +159,6 @@ services: - agent networks: - a2a-network - profiles: - - vnc networks: a2a-network: From 3ec6fae50b5d6f869ee5b6e2343c643c332408cb Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sun, 19 Oct 2025 01:19:56 +0200 Subject: [PATCH 5/5] docs: Improve README with example prompts and automation capabilities; update demo site with pagination and modal features Signed-off-by: Eden Reich --- example/README.md | 37 +++- example/demo-site/index.html | 328 ++++++++++++++++++++++++++++-- example/docker-compose.yaml | 7 +- internal/playwright/playwright.go | 17 +- 4 files changed, 353 insertions(+), 36 deletions(-) diff --git a/example/README.md b/example/README.md index 6ea4c7a..4b02df2 100644 --- a/example/README.md +++ b/example/README.md @@ -55,19 +55,50 @@ Go into the CLI for convenience: docker compose run --rm cli ``` -Ask the following: +### Example Prompts +The demo site includes several features for testing automation capabilities: + +#### Basic Screenshot ```text Please visit http://demo-site which is running locally and take a screenshot of the homepage. Use the agent. ``` You would see the CLI (A2A agent client) submitting a task to the A2A agent server and the screenshot will appear in the `screenshots` directory since it's mounted as a volume. +#### Data Extraction with Pagination +```text +Please visit http://demo-site which is running locally and collect all of the prices from all pages, write them to a CSV file. Use the agent. +``` + +The demo site has 24 products across 4 pages. The agent will: +- Navigate through all pages using the pagination buttons +- Extract product names and prices from each page +- Write the complete data to a CSV file in the artifacts directory + +#### Pop-up Handling +```text +Please visit http://demo-site?popup=true and dismiss the special offer pop-up, then collect all product prices from the first page. Use the agent. +``` + +The agent will: +- Navigate to the demo site with the popup parameter +- Wait for the pop-up modal to appear +- Click the "Dismiss Offer" button to close it +- Extract the product data from the page + +**Note:** The pop-up only appears when visiting with `?popup=true` or `#popup` URL parameter, making it easy to test with or without this challenge. + +#### Complex Multi-step Task ```text -Please visit http://demo-site which is running locally and collect all of the prices, write them to a CSV file. Use the agent. +Please visit http://demo-site?popup=true, close any pop-ups, navigate through all 4 pages of products, extract all product names and prices, and save them to a CSV file with columns: Product Name, Price, Page Number. Use the agent. ``` -You would see the CLI (A2A agent client) submitting a task to the A2A agent server and the csv file with all of the prices of the website will appear inside of the artifacts directory. +This demonstrates the agent's ability to: +- Handle intrusive modals that require specific button clicks +- Navigate multi-page content using pagination +- Extract structured data across multiple pages +- Format and save data to files Check the logs to see that the browser indeed went to the demo site and took a screenshot: diff --git a/example/demo-site/index.html b/example/demo-site/index.html index ded1895..652dd33 100644 --- a/example/demo-site/index.html +++ b/example/demo-site/index.html @@ -65,6 +65,63 @@ font-weight: bold; color: #2c3e50; } + .pagination { + display: flex; + justify-content: center; + align-items: center; + gap: 20px; + margin: 30px 0; + padding: 20px; + } + .pagination-btn { + min-width: 100px; + } + .pagination-btn:disabled { + background-color: #95a5a6; + cursor: not-allowed; + } + #page-info { + font-weight: bold; + color: #2c3e50; + } + .pagination-info { + text-align: center; + margin: 10px 0 20px 0; + color: #7f8c8d; + font-size: 0.95em; + } + .modal { + display: none; + position: fixed; + z-index: 1000; + left: 0; + top: 0; + width: 100%; + height: 100%; + background-color: rgba(0,0,0,0.8); + } + .modal-content { + background-color: #fefefe; + margin: 15% auto; + padding: 30px; + border: 3px solid #e74c3c; + border-radius: 8px; + width: 80%; + max-width: 500px; + box-shadow: 0 8px 16px rgba(0,0,0,0.5); + position: relative; + } + .modal h2 { + margin-top: 0; + color: #2c3e50; + } + .modal p { + color: #555; + line-height: 1.6; + } + .modal .button { + margin-top: 15px; + } @@ -75,26 +132,185 @@

Demo Store

Featured Products

- -
-

Wireless Headphones

-

Premium quality wireless headphones with noise cancellation.

-
$199.99
- + +
+ Showing 1-6 of 24 products
-
-

Smart Watch

-

Advanced fitness tracking and notifications on your wrist.

-
$299.99
- +
+
+

Wireless Headphones

+

Premium quality wireless headphones with noise cancellation.

+
$199.99
+ +
+ +
+

Smart Watch

+

Advanced fitness tracking and notifications on your wrist.

+
$299.99
+ +
+ +
+

Laptop Stand

+

Ergonomic aluminum laptop stand for better workspace setup.

+
$49.99
+ +
+ +
+

USB-C Hub

+

Multi-port USB-C hub with HDMI, USB 3.0, and SD card reader.

+
$79.99
+ +
+ +
+

Mechanical Keyboard

+

RGB backlit mechanical keyboard with blue switches.

+
$129.99
+ +
+ +
+

Wireless Mouse

+

Ergonomic wireless mouse with precision tracking.

+
$39.99
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
-
-

Laptop Stand

-

Ergonomic aluminum laptop stand for better workspace setup.

-
$49.99
- +
@@ -117,15 +333,15 @@

Newsletter Signup



- +

- +

- +
@@ -133,7 +349,54 @@

Newsletter Signup

+ + + \ No newline at end of file diff --git a/example/docker-compose.yaml b/example/docker-compose.yaml index 5cd5dbf..d5c10ee 100644 --- a/example/docker-compose.yaml +++ b/example/docker-compose.yaml @@ -7,8 +7,6 @@ services: args: BROWSER_ENGINE: chromium container_name: agent - ports: - - 8081:8081 volumes: - ./artifacts:/tmp/artifacts - x11-socket:/tmp/.X11-unix @@ -55,7 +53,6 @@ services: A2A_AGENT_CARD_FILE_PATH: .well-known/agent-card.json A2A_AGENT_CLIENT_PROVIDER: ${A2A_AGENT_CLIENT_PROVIDER} A2A_AGENT_CLIENT_MODEL: ${A2A_AGENT_CLIENT_MODEL} - A2A_AGENT_CLIENT_API_KEY: "" A2A_AGENT_CLIENT_BASE_URL: http://inference-gateway:8080/v1 A2A_AGENT_CLIENT_TIMEOUT: 30s A2A_AGENT_CLIENT_MAX_RETRIES: 3 @@ -113,7 +110,7 @@ services: INFER_AGENT_MODEL: deepseek/deepseek-chat INFER_A2A_AGENTS: | http://agent:8080 - INFER_DOWNLOAD_DIR: /tmp/downloads + INFER_A2A_DOWNLOAD_ARTIFACTS_DOWNLOAD_DIR: /tmp/downloads command: - chat networks: @@ -137,8 +134,6 @@ services: demo-site: image: nginx:alpine - ports: - - "8090:80" volumes: - ./demo-site:/usr/share/nginx/html:ro networks: diff --git a/internal/playwright/playwright.go b/internal/playwright/playwright.go index 9bc67be..bf33649 100644 --- a/internal/playwright/playwright.go +++ b/internal/playwright/playwright.go @@ -88,7 +88,7 @@ func NewBrowserConfigFromConfig(cfg *config.Config) *BrowserConfig { args = append(args, configArgs...) } - engine := Chromium + var engine BrowserEngine switch strings.ToLower(cfg.Browser.Engine) { case "firefox": engine = Firefox @@ -831,9 +831,7 @@ func (p *playwrightImpl) GetConfig() *config.Config { // createContextOptions creates browser context options from configuration func (p *playwrightImpl) createContextOptions(browserConfig *BrowserConfig) playwright.BrowserNewContextOptions { - storagePath := p.config.Browser.DataDir + "/browser-state" - - return playwright.BrowserNewContextOptions{ + contextOptions := playwright.BrowserNewContextOptions{ Viewport: &playwright.Size{ Width: browserConfig.ViewportWidth, Height: browserConfig.ViewportHeight, @@ -847,9 +845,18 @@ func (p *playwrightImpl) createContextOptions(browserConfig *BrowserConfig) play "Connection": p.config.Browser.HeaderConnection, "Upgrade-Insecure-Requests": p.config.Browser.HeaderUpgradeInsecureRequests, }, - StorageStatePath: &storagePath, AcceptDownloads: playwright.Bool(false), JavaScriptEnabled: playwright.Bool(true), BypassCSP: playwright.Bool(true), } + + storagePath := p.config.Browser.DataDir + "/browser-state" + if _, err := os.Stat(storagePath); err == nil { + contextOptions.StorageStatePath = &storagePath + p.logger.Debug("using existing storage state", zap.String("path", storagePath)) + } else { + p.logger.Debug("storage state file not found, creating fresh browser context", zap.String("path", storagePath)) + } + + return contextOptions }