From cf19a72dc0b77868a3f29e8791a15b3c8740a44b Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 17:41:44 +0530 Subject: [PATCH 1/3] feat: add separate browser service --- Dockerfile.backend | 3 - ENVEXAMPLE | 5 + browser/.dockerignore | 9 ++ browser/Dockerfile | 30 +++++ browser/package.json | 21 ++++ browser/server.ts | 92 +++++++++++++++ browser/tsconfig.json | 24 ++++ docker-compose.yml | 36 ++++++ maxun-core/package.json | 8 +- maxun-core/src/interpret.ts | 6 +- maxun-core/src/types/workflow.ts | 2 +- package.json | 8 +- server/src/api/record.ts | 4 - .../browser-management/browserConnection.ts | 111 ++++++++++++++++++ .../classes/RemoteBrowser.ts | 31 +---- server/src/markdownify/scrape.ts | 6 +- server/src/pgboss-worker.ts | 2 +- server/src/routes/proxy.ts | 10 +- server/src/routes/record.ts | 3 - server/src/routes/storage.ts | 3 - server/src/types/index.ts | 2 +- .../workflow-management/classes/Generator.ts | 2 +- .../classes/Interpreter.ts | 2 +- .../workflow-management/scheduler/index.ts | 5 +- server/src/workflow-management/selector.ts | 2 +- 25 files changed, 355 insertions(+), 72 deletions(-) create mode 100644 browser/.dockerignore create mode 100644 browser/Dockerfile create mode 100644 browser/package.json create mode 100644 browser/server.ts create mode 100644 browser/tsconfig.json create mode 100644 server/src/browser-management/browserConnection.ts diff --git a/Dockerfile.backend b/Dockerfile.backend index 8a5fc23ee..85ee4b83c 100644 --- a/Dockerfile.backend +++ b/Dockerfile.backend @@ -18,9 +18,6 @@ COPY server/tsconfig.json ./server/ # Install dependencies RUN npm install --legacy-peer-deps -# Install Playwright browsers and dependencies -RUN npx playwright install --with-deps chromium - # Create the Chromium data directory with necessary permissions RUN mkdir -p /tmp/chromium-data-dir && \ chmod -R 777 /tmp/chromium-data-dir diff --git a/ENVEXAMPLE b/ENVEXAMPLE index db461f556..b8881b369 100644 --- a/ENVEXAMPLE +++ b/ENVEXAMPLE @@ -38,3 +38,8 @@ AIRTABLE_REDIRECT_URI=http://localhost:8080/auth/airtable/callback # Telemetry Settings - Please keep it enabled. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. MAXUN_TELEMETRY=true + +# WebSocket port for browser CDP connections +BROWSER_WS_PORT=3001 +BROWSER_HEALTH_PORT=3002 +BROWSER_WS_HOST=browser \ No newline at end of file diff --git a/browser/.dockerignore b/browser/.dockerignore new file mode 100644 index 000000000..44f5e86e6 --- /dev/null +++ b/browser/.dockerignore @@ -0,0 +1,9 @@ +node_modules +npm-debug.log +.env +.git +.gitignore +dist +*.ts +!*.d.ts +tsconfig.json diff --git a/browser/Dockerfile b/browser/Dockerfile new file mode 100644 index 000000000..9f2ea8385 --- /dev/null +++ b/browser/Dockerfile @@ -0,0 +1,30 @@ +FROM mcr.microsoft.com/playwright:v1.57.0-jammy + +WORKDIR /app + +# Copy package files +COPY browser/package*.json ./ + +# Install dependencies +RUN npm ci + +# Copy TypeScript source and config +COPY browser/server.ts ./ +COPY browser/tsconfig.json ./ + +# Build TypeScript +RUN npm run build + +# Accept build arguments for ports (with defaults) +ARG BROWSER_WS_PORT=3001 +ARG BROWSER_HEALTH_PORT=3002 + +# Set as environment variables +ENV BROWSER_WS_PORT=${BROWSER_WS_PORT} +ENV BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT} + +# Expose ports dynamically based on build args +EXPOSE ${BROWSER_WS_PORT} ${BROWSER_HEALTH_PORT} + +# Start the browser service (run compiled JS) +CMD ["node", "dist/server.js"] diff --git a/browser/package.json b/browser/package.json new file mode 100644 index 000000000..8aaf0d25b --- /dev/null +++ b/browser/package.json @@ -0,0 +1,21 @@ +{ + "name": "maxun-browser-service", + "version": "1.0.0", + "description": "Browser service that exposes Playwright browsers via WebSocket with stealth plugins", + "main": "dist/server.js", + "scripts": { + "build": "tsc", + "start": "node dist/server.js", + "dev": "ts-node server.ts" + }, + "dependencies": { + "playwright": "1.57.0", + "playwright-extra": "^4.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.2" + }, + "devDependencies": { + "@types/node": "^22.7.9", + "typescript": "^5.0.0", + "ts-node": "^10.9.2" + } +} \ No newline at end of file diff --git a/browser/server.ts b/browser/server.ts new file mode 100644 index 000000000..2a70beef2 --- /dev/null +++ b/browser/server.ts @@ -0,0 +1,92 @@ +import { chromium } from 'playwright-extra'; +import stealthPlugin from 'puppeteer-extra-plugin-stealth'; +import http from 'http'; +import type { BrowserServer } from 'playwright'; + +// Apply stealth plugin to chromium +chromium.use(stealthPlugin()); + +let browserServer: BrowserServer | null = null; + +// Configurable ports with defaults +const BROWSER_WS_PORT = parseInt(process.env.BROWSER_WS_PORT || '3001', 10); +const BROWSER_HEALTH_PORT = parseInt(process.env.BROWSER_HEALTH_PORT || '3002', 10); + +async function start(): Promise { + console.log('Starting Maxun Browser Service...'); + console.log(`WebSocket port: ${BROWSER_WS_PORT}`); + console.log(`Health check port: ${BROWSER_HEALTH_PORT}`); + + try { + // Launch browser server that exposes WebSocket endpoint + browserServer = await chromium.launchServer({ + headless: true, + args: [ + '--disable-blink-features=AutomationControlled', + '--disable-web-security', + '--disable-features=IsolateOrigins,site-per-process', + '--disable-site-isolation-trials', + '--disable-extensions', + '--no-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--force-color-profile=srgb', + '--force-device-scale-factor=2', + '--ignore-certificate-errors', + '--mute-audio' + ], + port: BROWSER_WS_PORT, + }); + + console.log(`✅ Browser WebSocket endpoint ready: ${browserServer.wsEndpoint()}`); + console.log(`✅ Stealth plugin enabled`); + + // Health check HTTP server + const healthServer = http.createServer((req, res) => { + if (req.url === '/health') { + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + status: 'healthy', + wsEndpoint: browserServer?.wsEndpoint(), + wsPort: BROWSER_WS_PORT, + healthPort: BROWSER_HEALTH_PORT, + timestamp: new Date().toISOString() + })); + } else if (req.url === '/') { + res.writeHead(200, { 'Content-Type': 'text/plain' }); + res.end(`Maxun Browser Service\nWebSocket: ${browserServer?.wsEndpoint()}\nHealth: http://localhost:${BROWSER_HEALTH_PORT}/health`); + } else { + res.writeHead(404); + res.end('Not Found'); + } + }); + + healthServer.listen(BROWSER_HEALTH_PORT, () => { + console.log(`✅ Health check server running on port ${BROWSER_HEALTH_PORT}`); + console.log('Browser service is ready to accept connections!'); + }); + } catch (error) { + console.error('❌ Failed to start browser service:', error); + process.exit(1); + } +} + +// Graceful shutdown +async function shutdown(): Promise { + console.log('Shutting down browser service...'); + if (browserServer) { + try { + await browserServer.close(); + console.log('Browser server closed'); + } catch (error) { + console.error('Error closing browser server:', error); + } + } + process.exit(0); +} + +process.on('SIGTERM', shutdown); +process.on('SIGINT', shutdown); + +// Start the service +start().catch(console.error); diff --git a/browser/tsconfig.json b/browser/tsconfig.json new file mode 100644 index 000000000..a1a171bf1 --- /dev/null +++ b/browser/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "lib": [ + "ES2020" + ], + "outDir": "./dist", + "rootDir": "./", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "moduleResolution": "node" + }, + "include": [ + "server.ts" + ], + "exclude": [ + "node_modules", + "dist" + ] +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index c1b4302e8..dbb147b79 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,6 +74,42 @@ services: depends_on: - backend + browser: + build: + context: . + dockerfile: browser/Dockerfile + args: + BROWSER_WS_PORT: ${BROWSER_WS_PORT:-3001} + BROWSER_HEALTH_PORT: ${BROWSER_HEALTH_PORT:-3002} + ports: + - "${BROWSER_WS_PORT:-3001}:${BROWSER_WS_PORT:-3001}" + - "${BROWSER_HEALTH_PORT:-3002}:${BROWSER_HEALTH_PORT:-3002}" + environment: + - NODE_ENV=production + - DEBUG=pw:browser* + - BROWSER_WS_PORT=${BROWSER_WS_PORT:-3001} + - BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT:-3002} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:${BROWSER_HEALTH_PORT:-3002}/health"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 10s + deploy: + resources: + limits: + memory: 2G + cpus: '1.5' + reservations: + memory: 1G + cpus: '1.0' + security_opt: + - seccomp:unconfined + shm_size: 2gb + cap_add: + - SYS_ADMIN + volumes: postgres_data: minio_data: \ No newline at end of file diff --git a/maxun-core/package.json b/maxun-core/package.json index 5506f3608..21b51e37e 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -31,10 +31,10 @@ "license": "AGPL-3.0-or-later", "dependencies": { "@cliqz/adblocker-playwright": "^1.31.3", + "@types/node": "22.7.9", "cross-fetch": "^4.0.0", "joi": "^17.6.0", - "playwright": "^1.20.1", - "playwright-extra": "^4.3.6", - "puppeteer-extra-plugin-stealth": "^2.11.2" + "playwright-core": "1.57.0", + "turndown": "^7.2.2" } -} +} \ No newline at end of file diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index a34777d89..1dcc08494 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -1,5 +1,5 @@ /* eslint-disable no-await-in-loop, no-restricted-syntax */ -import { ElementHandle, Page, PageScreenshotOptions } from 'playwright'; +import { ElementHandle, Page, PageScreenshotOptions } from 'playwright-core'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import path from 'path'; @@ -144,7 +144,7 @@ export default class Interpreter extends EventEmitter { private async applyAdBlocker(page: Page): Promise { if (this.blocker) { try { - await this.blocker.enableBlockingInPage(page); + await this.blocker.enableBlockingInPage(page as any); } catch (err) { this.log(`Ad-blocker operation failed:`, Level.ERROR); } @@ -154,7 +154,7 @@ export default class Interpreter extends EventEmitter { private async disableAdBlocker(page: Page): Promise { if (this.blocker) { try { - await this.blocker.disableBlockingInPage(page); + await this.blocker.disableBlockingInPage(page as any); } catch (err) { this.log(`Ad-blocker operation failed:`, Level.ERROR); } diff --git a/maxun-core/src/types/workflow.ts b/maxun-core/src/types/workflow.ts index 08b76ef99..912780093 100644 --- a/maxun-core/src/types/workflow.ts +++ b/maxun-core/src/types/workflow.ts @@ -1,4 +1,4 @@ -import { Page } from 'playwright'; +import { Page } from 'playwright-core'; import { naryOperators, unaryOperators, operators, meta, } from './logic'; diff --git a/package.json b/package.json index 65a2f87ae..46eb302a8 100644 --- a/package.json +++ b/package.json @@ -60,12 +60,8 @@ "pg": "^8.13.0", "pg-boss": "^10.1.6", "pkce-challenge": "^4.1.0", - "playwright": "^1.48.2", - "playwright-extra": "^4.3.6", + "playwright-core": "1.57.0", "posthog-node": "^4.2.1", - "prismjs": "^1.28.0", - "puppeteer-extra-plugin-recaptcha": "^3.6.8", - "puppeteer-extra-plugin-stealth": "^2.11.2", "react": "^18.0.0", "react-dom": "^18.0.0", "react-highlight": "0.15.0", @@ -142,4 +138,4 @@ "vite": "^5.4.10", "zod": "^3.25.62" } -} +} \ No newline at end of file diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 7c665001e..5bcf41e7d 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -1,6 +1,4 @@ import { Router, Request, Response } from 'express'; -import { chromium } from "playwright-extra"; -import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { requireAPIKey } from "../middlewares/api"; import Robot from "../models/Robot"; import Run from "../models/Run"; @@ -20,8 +18,6 @@ import { airtableUpdateTasks, processAirtableUpdates } from "../workflow-managem import { sendWebhook } from "../routes/webhook"; import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape'; -chromium.use(stealthPlugin()); - const router = Router(); const formatRecording = (recordingData: any) => { diff --git a/server/src/browser-management/browserConnection.ts b/server/src/browser-management/browserConnection.ts new file mode 100644 index 000000000..16d48c9e2 --- /dev/null +++ b/server/src/browser-management/browserConnection.ts @@ -0,0 +1,111 @@ +import { chromium } from 'playwright-core'; +import type { Browser } from 'playwright-core'; +import logger from '../logger'; + +/** + * Configuration for connection retry logic + */ +const CONNECTION_CONFIG = { + maxRetries: 3, + retryDelay: 2000, + connectionTimeout: 30000, +}; + +/** + * Get the WebSocket endpoint from the browser service health check + * @returns Promise - The WebSocket endpoint URL with browser ID + */ +async function getBrowserServiceEndpoint(): Promise { + const healthPort = process.env.BROWSER_HEALTH_PORT || '3002'; + const healthHost = process.env.BROWSER_WS_HOST || 'localhost'; + const healthEndpoint = `http://${healthHost}:${healthPort}/health`; + + try { + logger.debug(`Fetching WebSocket endpoint from: ${healthEndpoint}`); + const response = await fetch(healthEndpoint); + const data = await response.json(); + + if (data.status === 'healthy' && data.wsEndpoint) { + logger.debug(`Got WebSocket endpoint: ${data.wsEndpoint}`); + return data.wsEndpoint; + } + + throw new Error('Health check did not return a valid wsEndpoint'); + } catch (error: any) { + logger.error(`Failed to fetch endpoint from health check: ${error.message}`); + throw new Error( + `Browser service is not accessible at ${healthEndpoint}. ` + + `Make sure the browser service is running (docker-compose up browser)` + ); + } +} + +/** + * Connect to the remote browser service with retry logic + * @param retries - Number of connection attempts (default: 3) + * @returns Promise - Connected browser instance + * @throws Error if connection fails after all retries + */ +export async function connectToRemoteBrowser(retries?: number): Promise { + const maxRetries = retries ?? CONNECTION_CONFIG.maxRetries; + const wsEndpoint = await getBrowserServiceEndpoint(); + + logger.info(`Connecting to browser service at ${wsEndpoint}...`); + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + logger.debug(`Connection attempt ${attempt}/${maxRetries}`); + + const browser = await chromium.connect(wsEndpoint, { + timeout: CONNECTION_CONFIG.connectionTimeout, + }); + + logger.info(`Successfully connected to browser service`); + return browser; + } catch (error: any) { + logger.warn( + `Connection attempt ${attempt}/${maxRetries} failed: ${error.message}` + ); + + if (attempt === maxRetries) { + logger.error( + `Failed to connect to browser service after ${maxRetries} attempts` + ); + throw new Error( + `Failed to connect to browser service at ${wsEndpoint}: ${error.message}` + ); + } + + logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`); + await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay)); + } + } + + throw new Error('Failed to connect to browser service'); +} + +/** + * Check if browser service is healthy + * @returns Promise - true if service is healthy + */ +export async function checkBrowserServiceHealth(): Promise { + try { + const healthPort = process.env.BROWSER_HEALTH_PORT || '3002'; + const healthHost = process.env.BROWSER_WS_HOST || 'localhost'; + const healthEndpoint = `http://${healthHost}:${healthPort}/health`; + + const response = await fetch(healthEndpoint); + const data = await response.json(); + + if (data.status === 'healthy') { + logger.info('Browser service health check passed'); + return true; + } + + logger.warn('Browser service health check failed:', data); + return false; + } catch (error: any) { + logger.error('Browser service health check error:', error.message); + return false; + } +} diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index c88ba068b..f182d1974 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -2,11 +2,9 @@ import { Page, Browser, CDPSession, - BrowserContext, -} from 'playwright'; + BrowserContext +} from 'playwright-core'; import { Socket } from "socket.io"; -import { chromium } from 'playwright-extra'; -import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import sharp from 'sharp'; @@ -16,6 +14,7 @@ import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter"; import { getDecryptedProxyConfig } from '../../routes/proxy'; import { getInjectableScript } from 'idcac-playwright'; +import { connectToRemoteBrowser } from '../browserConnection'; declare global { interface Window { @@ -83,8 +82,6 @@ interface ProcessedSnapshot { }; } -chromium.use(stealthPlugin()); - const MEMORY_CONFIG = { gcInterval: 20000, // Check memory more frequently (20s instead of 60s) maxHeapSize: 1536 * 1024 * 1024, // 1.5GB @@ -567,23 +564,7 @@ export class RemoteBrowser { while (!success && retryCount < MAX_RETRIES) { try { - this.browser = (await chromium.launch({ - headless: true, - args: [ - "--disable-blink-features=AutomationControlled", - "--disable-web-security", - "--disable-features=IsolateOrigins,site-per-process", - "--disable-site-isolation-trials", - "--disable-extensions", - "--no-sandbox", - "--disable-dev-shm-usage", - "--disable-gpu", - "--force-color-profile=srgb", - "--force-device-scale-factor=2", - "--ignore-certificate-errors", - "--mute-audio" - ], - })); + this.browser = await connectToRemoteBrowser(); if (!this.browser || this.browser.isConnected() === false) { throw new Error('Browser failed to launch or is not connected'); @@ -683,9 +664,9 @@ export class RemoteBrowser { try { const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']); - await blocker.enableBlockingInPage(this.currentPage); + await blocker.enableBlockingInPage(this.currentPage as any); this.client = await this.currentPage.context().newCDPSession(this.currentPage); - await blocker.disableBlockingInPage(this.currentPage); + await blocker.disableBlockingInPage(this.currentPage as any); console.log('Adblocker initialized'); } catch (error: any) { console.warn('Failed to initialize adblocker, continuing without it:', error.message); diff --git a/server/src/markdownify/scrape.ts b/server/src/markdownify/scrape.ts index 6821bfdb7..dfcd4b391 100644 --- a/server/src/markdownify/scrape.ts +++ b/server/src/markdownify/scrape.ts @@ -1,4 +1,4 @@ -import { chromium } from "playwright"; +import { connectToRemoteBrowser } from "../browser-management/browserConnection"; import { parseMarkdown } from "./markdown"; /** @@ -6,7 +6,7 @@ import { parseMarkdown } from "./markdown"; * returns clean Markdown using parser. */ export async function convertPageToMarkdown(url: string): Promise { - const browser = await chromium.launch(); + const browser = await connectToRemoteBrowser(); const page = await browser.newPage(); await page.goto(url, { waitUntil: "networkidle", timeout: 100000 }); @@ -61,7 +61,7 @@ export async function convertPageToMarkdown(url: string): Promise { * returns clean HTML. */ export async function convertPageToHTML(url: string): Promise { - const browser = await chromium.launch(); + const browser = await connectToRemoteBrowser(); const page = await browser.newPage(); await page.goto(url, { waitUntil: "networkidle", timeout: 100000 }); diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index f5d719b46..8f5c03dbe 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -13,7 +13,7 @@ import { WorkflowFile } from 'maxun-core'; import Run from './models/Run'; import Robot from './models/Robot'; import { browserPool } from './server'; -import { Page } from 'playwright'; +import { Page } from 'playwright-core'; import { capture } from './utils/analytics'; import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-management/integrations/gsheet'; import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable'; diff --git a/server/src/routes/proxy.ts b/server/src/routes/proxy.ts index 5cdca0fcf..52cced533 100644 --- a/server/src/routes/proxy.ts +++ b/server/src/routes/proxy.ts @@ -1,10 +1,8 @@ import { Router, Request, Response } from 'express'; -import { chromium } from 'playwright-extra'; -import stealthPlugin from 'puppeteer-extra-plugin-stealth'; +import { connectToRemoteBrowser } from '../browser-management/browserConnection'; import User from '../models/User'; import { encrypt, decrypt } from '../utils/auth'; import { requireSignIn } from '../middlewares/auth'; -chromium.use(stealthPlugin()); export const router = Router(); @@ -86,11 +84,7 @@ router.get('/test', requireSignIn, async (req: Request, res: Response) => { }), }; - const browser = await chromium.launch({ - headless: true, - proxy: proxyOptions, - args:["--ignore-certificate-errors"] - }); + const browser = await connectToRemoteBrowser(); const page = await browser.newPage(); await page.goto('https://example.com'); await browser.close(); diff --git a/server/src/routes/record.ts b/server/src/routes/record.ts index 8a5898113..bd00930aa 100644 --- a/server/src/routes/record.ts +++ b/server/src/routes/record.ts @@ -13,14 +13,11 @@ import { destroyRemoteBrowser, canCreateBrowserInState, } from '../browser-management/controller'; -import { chromium } from 'playwright-extra'; -import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import logger from "../logger"; import { requireSignIn } from '../middlewares/auth'; import { pgBoss } from '../pgboss-worker'; export const router = Router(); -chromium.use(stealthPlugin()); export interface AuthenticatedRequest extends Request { user?: any; diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index 8451c7205..181a5dc28 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -1,8 +1,6 @@ import { Router } from 'express'; import logger from "../logger"; import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller"; -import { chromium } from 'playwright-extra'; -import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { browserPool } from "../server"; import { v4 as uuid } from "uuid"; import moment from 'moment-timezone'; @@ -18,7 +16,6 @@ import { encrypt, decrypt } from '../utils/auth'; import { WorkflowFile } from 'maxun-core'; import { cancelScheduledWorkflow, scheduleWorkflow } from '../schedule-worker'; import { pgBoss, registerWorkerForQueue, registerAbortWorkerForQueue } from '../pgboss-worker'; -chromium.use(stealthPlugin()); export const router = Router(); diff --git a/server/src/types/index.ts b/server/src/types/index.ts index 75aac8029..45b21ca49 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -1,4 +1,4 @@ -import {BrowserType, LaunchOptions} from "playwright"; +import {BrowserType, LaunchOptions} from "playwright-core"; /** * Interpreter settings properties including recording parameters. diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 57a30863b..d4e6aea2a 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -2,7 +2,7 @@ import { Action, ActionType, Coordinates, TagName, DatePickerEventData } from ". import { WhereWhatPair, WorkflowFile } from 'maxun-core'; import logger from "../../logger"; import { Socket } from "socket.io"; -import { Page } from "playwright"; +import { Page } from "playwright-core"; import { getElementInformation, getRect, diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index 8a73a25fb..397192994 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -1,7 +1,7 @@ import Interpreter, { WorkflowFile } from "maxun-core"; import logger from "../../logger"; import { Socket } from "socket.io"; -import { Page } from "playwright"; +import { Page } from "playwright-core"; import { InterpreterSettings } from "../../types"; import { decrypt } from "../../utils/auth"; import Run from "../../models/Run"; diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index 470cdacb3..a3571520e 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -1,6 +1,4 @@ import { v4 as uuid } from "uuid"; -import { chromium } from 'playwright-extra'; -import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { io, Socket } from "socket.io-client"; import { createRemoteBrowserForRun, destroyRemoteBrowser } from '../../browser-management/controller'; import logger from '../../logger'; @@ -12,11 +10,10 @@ import { getDecryptedProxyConfig } from "../../routes/proxy"; import { BinaryOutputService } from "../../storage/mino"; import { capture } from "../../utils/analytics"; import { WorkflowFile } from "maxun-core"; -import { Page } from "playwright"; +import { Page } from "playwright-core"; import { sendWebhook } from "../../routes/webhook"; import { airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable"; import { convertPageToMarkdown, convertPageToHTML } from "../../markdownify/scrape"; -chromium.use(stealthPlugin()); async function createWorkflowAndStoreMetadata(id: string, userId: string) { try { diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 43c1d2f89..b5f8051f7 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -1,4 +1,4 @@ -import { Page } from "playwright"; +import { Page } from "playwright-core"; import { Coordinates } from "../types"; import { WhereWhatPair, WorkflowFile } from "maxun-core"; import logger from "../logger"; From 3018d7dc3ac64e5b34d6a814e06dc5dd26c52844 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 17:55:50 +0530 Subject: [PATCH 2/3] feat: launch local browser fallback --- .../browser-management/browserConnection.ts | 107 +++++++++++++----- 1 file changed, 76 insertions(+), 31 deletions(-) diff --git a/server/src/browser-management/browserConnection.ts b/server/src/browser-management/browserConnection.ts index 16d48c9e2..5e43b3f57 100644 --- a/server/src/browser-management/browserConnection.ts +++ b/server/src/browser-management/browserConnection.ts @@ -41,47 +41,92 @@ async function getBrowserServiceEndpoint(): Promise { } /** - * Connect to the remote browser service with retry logic + * Launch a local browser as fallback when browser service is unavailable + * @returns Promise - Locally launched browser instance + */ +async function launchLocalBrowser(): Promise { + logger.warn('Attempting to launch local browser'); + logger.warn('Note: This requires Chromium binaries to be installed (npx playwright install chromium)'); + + try { + const browser = await chromium.launch({ + headless: true, + args: [ + '--disable-blink-features=AutomationControlled', + '--disable-web-security', + '--disable-features=IsolateOrigins,site-per-process', + '--disable-site-isolation-trials', + '--disable-extensions', + '--no-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--force-color-profile=srgb', + '--force-device-scale-factor=2', + '--ignore-certificate-errors', + '--mute-audio' + ], + }); + + logger.info('Successfully launched local browser'); + return browser; + } catch (error: any) { + logger.error(`Failed to launch local browser: ${error.message}`); + throw new Error( + `Could not launch local browser. ` + + `Please either:\n` + + ` 1. Start the browser service: docker-compose up browser\n` + + ` 2. Install Chromium binaries: npx playwright@1.57.0 install chromium` + ); + } +} + +/** + * Connect to the remote browser service with retry logic, with fallback to local browser * @param retries - Number of connection attempts (default: 3) - * @returns Promise - Connected browser instance - * @throws Error if connection fails after all retries + * @returns Promise - Connected browser instance (remote or local) + * @throws Error if both remote connection and local launch fail */ export async function connectToRemoteBrowser(retries?: number): Promise { const maxRetries = retries ?? CONNECTION_CONFIG.maxRetries; - const wsEndpoint = await getBrowserServiceEndpoint(); - - logger.info(`Connecting to browser service at ${wsEndpoint}...`); - for (let attempt = 1; attempt <= maxRetries; attempt++) { - try { - logger.debug(`Connection attempt ${attempt}/${maxRetries}`); - - const browser = await chromium.connect(wsEndpoint, { - timeout: CONNECTION_CONFIG.connectionTimeout, - }); + try { + const wsEndpoint = await getBrowserServiceEndpoint(); + logger.info(`Connecting to browser service at ${wsEndpoint}...`); + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + logger.debug(`Connection attempt ${attempt}/${maxRetries}`); + + const browser = await chromium.connect(wsEndpoint, { + timeout: CONNECTION_CONFIG.connectionTimeout, + }); + + logger.info('Successfully connected to browser service'); + return browser; + } catch (error: any) { + logger.warn( + `Connection attempt ${attempt}/${maxRetries} failed: ${error.message}` + ); - logger.info(`Successfully connected to browser service`); - return browser; - } catch (error: any) { - logger.warn( - `Connection attempt ${attempt}/${maxRetries} failed: ${error.message}` - ); + if (attempt === maxRetries) { + logger.error( + `Failed to connect to browser service after ${maxRetries} attempts` + ); + throw new Error(`Remote connection failed: ${error.message}`); + } - if (attempt === maxRetries) { - logger.error( - `Failed to connect to browser service after ${maxRetries} attempts` - ); - throw new Error( - `Failed to connect to browser service at ${wsEndpoint}: ${error.message}` - ); + logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`); + await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay)); } - - logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`); - await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay)); } - } - throw new Error('Failed to connect to browser service'); + throw new Error('Failed to connect to browser service'); + } catch (error: any) { + logger.warn(`Browser service connection failed: ${error.message}`); + logger.warn('Falling back to local browser launch...'); + + return await launchLocalBrowser(); + } } /** From 7f97401b8b407ce488e901f59a8e98c3a60e31e6 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 17:58:55 +0530 Subject: [PATCH 3/3] chore: move to pw-core --- server/src/browser-management/inputHandlers.ts | 2 +- src/shared/types.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/browser-management/inputHandlers.ts b/server/src/browser-management/inputHandlers.ts index 3e58664e1..31a2bfcb2 100644 --- a/server/src/browser-management/inputHandlers.ts +++ b/server/src/browser-management/inputHandlers.ts @@ -8,7 +8,7 @@ import logger from "../logger"; import { Coordinates, ScrollDeltas, KeyboardInput, DatePickerEventData } from '../types'; import { browserPool } from "../server"; import { WorkflowGenerator } from "../workflow-management/classes/Generator"; -import { Page } from "playwright"; +import { Page } from "playwright-core"; import { throttle } from "../../../src/helpers/inputHelpers"; import { CustomActions } from "../../../src/shared/types"; import { WhereWhatPair } from "maxun-core"; diff --git a/src/shared/types.ts b/src/shared/types.ts index 0a259dea8..d29075510 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -1,5 +1,5 @@ import { WorkflowFile } from "maxun-core"; -import { Locator } from "playwright"; +import { Locator } from "playwright-core"; export type Workflow = WorkflowFile["workflow"];