diff --git a/.github/workflows/publish-mcp.yml b/.github/workflows/publish-mcp.yml new file mode 100644 index 0000000..1ce9df9 --- /dev/null +++ b/.github/workflows/publish-mcp.yml @@ -0,0 +1,50 @@ +name: Publish to MCP Registry + +on: + push: + tags: ["v*"] + +jobs: + publish: + runs-on: ubuntu-latest + permissions: + id-token: write # Required for OIDC authentication + contents: read + + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Setup pnpm + uses: pnpm/action-setup@v3 + with: + version: 10.12.4 + + - name: Setup Node.js + uses: actions/setup-node@v5 + with: + node-version: "lts/*" + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Run tests + run: pnpm test --if-present + + - name: Build package + run: pnpm build --if-present + + - name: Publish to npm + run: pnpm publish + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Install MCP Publisher + run: | + curl -L "https://github.com/modelcontextprotocol/registry/releases/download/latest/mcp-publisher_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/').tar.gz" | tar xz mcp-publisher + + - name: Login to MCP Registry + run: ./mcp-publisher login github-oidc + + - name: Publish to MCP Registry + run: ./mcp-publisher publish diff --git a/CHANGELOG.md b/CHANGELOG.md index c0f5bb5..7fcd1e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # @browserbasehq/mcp-server-browserbase +## 2.2.0 + +### Minor Changes + +- Remove multisession tools, remove prompts sampling, simplify tool descriptions for better context, add support if google apikey set, latest version of stagehand, remove custom availmodelschema to use stagehand model type instead. + ## 2.1.3 ### Patch Changes diff --git a/README.md b/README.md index 16c5925..de26e95 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![smithery badge](https://smithery.ai/badge/@browserbasehq/mcp-browserbase)](https://smithery.ai/server/@browserbasehq/mcp-browserbase) -![cover](assets/cover-mcp.png) +![cover](assets/cover.png) [The Model Context Protocol (MCP)](https://modelcontextprotocol.io/introduction) is an open protocol that enables seamless integration between LLM applications and external data sources and tools. Whether you're building an AI-powered IDE, enhancing a chat interface, or creating custom AI workflows, MCP provides a standardized way to connect LLMs with the context they need. @@ -19,7 +19,6 @@ This server provides cloud browser automation capabilities using [Browserbase](h | Model Flexibility | Supports multiple models (OpenAI, Claude, Gemini, and more) | | Vision Support | Use annotated screenshots for complex DOMs | | Session Management | Create, manage, and close browser sessions | -| Multi-Session | Run multiple browser sessions in parallel | ## How to Setup @@ -194,7 +193,7 @@ The Browserbase MCP server accepts the following command-line flags: | `--cookies [json]` | JSON array of cookies to inject into the browser | | `--browserWidth ` | Browser viewport width (default: 1024) | | `--browserHeight ` | Browser viewport height (default: 768) | -| `--modelName ` | The model to use for Stagehand (default: google/gemini-2.0-flash) | +| `--modelName ` | The model to use for Stagehand (default: gemini-2.0-flash) | | `--modelApiKey ` | API key for the custom model provider (required when using custom models) | | `--experimental` | Enable experimental features (default: false) | @@ -386,13 +385,11 @@ The server provides access to screenshot resources: - **AI-Powered Automation**: Natural language commands for web interactions - **Multi-Model Support**: Works with OpenAI, Claude, Gemini, and more -- **Advanced Session Management**: Single and multi-session support for parallel browser automation - **Screenshot Capture**: Full-page and element-specific screenshots - **Data Extraction**: Intelligent content extraction from web pages - **Proxy Support**: Enterprise-grade proxy capabilities - **Stealth Mode**: Advanced anti-detection features - **Context Persistence**: Maintain authentication and state across sessions -- **Parallel Workflows**: Run multiple browser sessions simultaneously for complex automation tasks For more information about the Model Context Protocol, visit: diff --git a/assets/browserbase-demo.png b/assets/browserbase-demo.png deleted file mode 100644 index 56b0423..0000000 Binary files a/assets/browserbase-demo.png and /dev/null differ diff --git a/assets/cover-mcp.png b/assets/cover-mcp.png deleted file mode 100644 index cfc04a9..0000000 Binary files a/assets/cover-mcp.png and /dev/null differ diff --git a/assets/session.png b/assets/session.png deleted file mode 100644 index 56b0423..0000000 Binary files a/assets/session.png and /dev/null differ diff --git a/config.d.ts b/config.d.ts index cadc16d..1b85dcc 100644 --- a/config.d.ts +++ b/config.d.ts @@ -1,5 +1,5 @@ import type { Cookie } from "playwright-core"; -import type { AvailableModelSchema } from "./src/types/models.js"; +import type { AvailableModelSchema } from "@browserbasehq/stagehand"; export type Config = { /** @@ -96,12 +96,12 @@ export type Config = { * The Model that Stagehand uses * Available models: OpenAI, Claude, Gemini, Cerebras, Groq, and other providers * - * @default "google/gemini-2.0-flash" + * @default "gemini-2.0-flash" */ - modelName?: AvailableModelSchema; + modelName?: z.infer; /** * API key for the custom model provider - * Required when using a model other than the default google/gemini-2.0-flash + * Required when using a model other than the default gemini-2.0-flash */ modelApiKey?: string; /** diff --git a/evals/mcp-eval-basic.config.json b/evals/mcp-eval-basic.config.json index f59c29d..084d10d 100644 --- a/evals/mcp-eval-basic.config.json +++ b/evals/mcp-eval-basic.config.json @@ -76,22 +76,6 @@ "browserbase_session_close" ] }, - { - "name": "multi-session-test", - "description": "Test multi-session browser management", - "steps": [ - { - "user": "Create a multi-session browser named 'test-session', list all sessions, navigate to https://example.com in that session, and close the session", - "expectedState": "closed" - } - ], - "expectTools": [ - "multi_browserbase_stagehand_session_create", - "multi_browserbase_stagehand_session_list", - "multi_browserbase_stagehand_navigate_session", - "multi_browserbase_stagehand_session_close" - ] - }, { "name": "form-interaction-test", "description": "Test form filling and submission capabilities", diff --git a/evals/mcp-eval-minimal.config.json b/evals/mcp-eval-minimal.config.json index 983c14b..81c5783 100644 --- a/evals/mcp-eval-minimal.config.json +++ b/evals/mcp-eval-minimal.config.json @@ -52,29 +52,6 @@ "browserbase_session_close" ] }, - { - "name": "smoke-test-multi-session", - "description": "Quick test to verify multi-session functionality", - "steps": [ - { - "user": "Create a browser session named 'test-session'", - "expectedState": "session created" - }, - { - "user": "List active sessions", - "expectedState": "test-session" - }, - { - "user": "Close the test session", - "expectedState": "closed session" - } - ], - "expectTools": [ - "multi_browserbase_stagehand_session_create", - "multi_browserbase_stagehand_session_list", - "multi_browserbase_stagehand_session_close" - ] - }, { "name": "smoke-test-url-tools", "description": "Quick test to verify URL retrieval tools work", diff --git a/evals/mcp-eval.config.json b/evals/mcp-eval.config.json index f59c29d..799b0b5 100644 --- a/evals/mcp-eval.config.json +++ b/evals/mcp-eval.config.json @@ -76,22 +76,6 @@ "browserbase_session_close" ] }, - { - "name": "multi-session-test", - "description": "Test multi-session browser management", - "steps": [ - { - "user": "Create a multi-session browser named 'test-session', list all sessions, navigate to https://example.com in that session, and close the session", - "expectedState": "closed" - } - ], - "expectTools": [ - "multi_browserbase_stagehand_session_create", - "multi_browserbase_stagehand_session_list", - "multi_browserbase_stagehand_navigate_session", - "multi_browserbase_stagehand_session_close" - ] - }, { "name": "form-interaction-test", "description": "Test form filling and submission capabilities", @@ -121,22 +105,6 @@ "browserbase_session_create", "browserbase_stagehand_navigate" ] - }, - { - "name": "url-retrieval-test", - "description": "Test URL retrieval functionality", - "steps": [ - { - "user": "Create a browser session, navigate to https://example.com, get the current URL to verify navigation, and close the session", - "expectedState": "https://example.com" - } - ], - "expectTools": [ - "browserbase_session_create", - "browserbase_stagehand_navigate", - "browserbase_stagehand_get_url", - "browserbase_session_close" - ] } ] } diff --git a/package.json b/package.json index e0b2cea..a8532a3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@browserbasehq/mcp-server-browserbase", - "version": "2.1.3", + "version": "2.2.0", "description": "MCP server for AI web browser automation using Browserbase and Stagehand", "mcpName": "io.github.browserbase/mcp-server-browserbase", "license": "Apache-2.0", @@ -24,7 +24,7 @@ ], "scripts": { "build": "tsc && shx chmod +x dist/*.js", - "prepare": "husky && npm run build", + "prepare": "husky && pnpm build", "watch": "tsc --watch", "smithery": "npx @smithery/cli dev src/index.ts", "inspector": "npx @modelcontextprotocol/inspector build/index.js", @@ -46,7 +46,7 @@ }, "dependencies": { "@browserbasehq/sdk": "^2.6.0", - "@browserbasehq/stagehand": "^2.5.0", + "@browserbasehq/stagehand": "^2.5.2", "@mcp-ui/server": "^5.10.0", "@modelcontextprotocol/sdk": "^1.13.1", "commander": "^14.0.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 70ba7cf..b8587ca 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,8 +11,8 @@ importers: specifier: ^2.6.0 version: 2.6.0 "@browserbasehq/stagehand": - specifier: ^2.5.0 - version: 2.5.0(bufferutil@4.0.9)(deepmerge@4.3.1)(dotenv@16.6.1)(react@19.1.0)(zod@3.25.76) + specifier: ^2.5.2 + version: 2.5.2(bufferutil@4.0.9)(deepmerge@4.3.1)(dotenv@16.6.1)(react@19.1.0)(zod@3.25.76) "@mcp-ui/server": specifier: ^5.10.0 version: 5.10.0 @@ -250,15 +250,16 @@ packages: integrity: sha512-83iXP5D7xMm8Wyn66TUaUrgoByCmAJuoMoZQI3sGg3JAiMlTfnCIMqyVBoNSaItaPIkaCnrsj6LiusmXV2X9YA==, } - "@browserbasehq/stagehand@2.5.0": + "@browserbasehq/stagehand@2.5.2": resolution: { - integrity: sha512-Cannwg1WfpiJGDMdPGyx4i59SPBEV9ctTVZctGn+XVbP/ymwcQVFsyYYwSV0cK8rOk90TZK80TkMiuQGpfCF6g==, + integrity: sha512-Awo4IRYtlY+jrGpzyAY5+nqPurp87Ou2qGortNYf8hlR+KB+cI7sgIS7SfCbldkMyGO++uXUHTXkyfNutNuo7w==, } + hasBin: true peerDependencies: deepmerge: ^4.3.1 dotenv: ^16.4.5 - zod: ">=3.25.0 <4.1.0" + zod: ">=3.25.0 <3.25.68" "@changesets/apply-release-plan@7.0.12": resolution: @@ -668,12 +669,17 @@ packages: } engines: { node: ^18.18.0 || ^20.9.0 || >=21.1.0 } - "@google/genai@0.8.0": + "@google/genai@1.24.0": resolution: { - integrity: sha512-Zs+OGyZKyMbFofGJTR9/jTQSv8kITh735N3tEuIZj4VlMQXTC0soCFahysJ9NaeenRlD7xGb6fyqmX+FwrpU6Q==, + integrity: sha512-e3jZF9Dx3dDaDCzygdMuYByHI2xJZ0PaD3r2fRgHZe2IOwBnmJ/Tu5Lt/nefTCxqr1ZnbcbQK9T13d8U/9UMWg==, } - engines: { node: ">=18.0.0" } + engines: { node: ">=20.0.0" } + peerDependencies: + "@modelcontextprotocol/sdk": ^1.11.4 + peerDependenciesMeta: + "@modelcontextprotocol/sdk": + optional: true "@humanfs/core@0.19.1": resolution: @@ -4794,11 +4800,11 @@ snapshots: transitivePeerDependencies: - encoding - "@browserbasehq/stagehand@2.5.0(bufferutil@4.0.9)(deepmerge@4.3.1)(dotenv@16.6.1)(react@19.1.0)(zod@3.25.76)": + "@browserbasehq/stagehand@2.5.2(bufferutil@4.0.9)(deepmerge@4.3.1)(dotenv@16.6.1)(react@19.1.0)(zod@3.25.76)": dependencies: "@anthropic-ai/sdk": 0.39.0 "@browserbasehq/sdk": 2.6.0 - "@google/genai": 0.8.0(bufferutil@4.0.9) + "@google/genai": 1.24.0(@modelcontextprotocol/sdk@1.17.5)(bufferutil@4.0.9) "@modelcontextprotocol/sdk": 1.17.5 ai: 4.3.19(react@19.1.0)(zod@3.25.76) deepmerge: 4.3.1 @@ -5098,10 +5104,12 @@ snapshots: "@eslint/core": 0.15.1 levn: 0.4.1 - "@google/genai@0.8.0(bufferutil@4.0.9)": + "@google/genai@1.24.0(@modelcontextprotocol/sdk@1.17.5)(bufferutil@4.0.9)": dependencies: google-auth-library: 9.15.1 ws: 8.18.3(bufferutil@4.0.9) + optionalDependencies: + "@modelcontextprotocol/sdk": 1.17.5 transitivePeerDependencies: - bufferutil - encoding diff --git a/server.json b/server.json index a7b7aa9..3d50a85 100644 --- a/server.json +++ b/server.json @@ -7,13 +7,13 @@ "url": "https://github.com/browserbase/mcp-server-browserbase", "source": "github" }, - "version": "2.1.3", + "version": "2.2.0", "packages": [ { "registry_type": "npm", "registry_base_url": "https://registry.npmjs.org", "identifier": "@browserbasehq/mcp-server-browserbase", - "version": "2.1.1", + "version": "2.2.0", "transport": { "type": "stdio" }, @@ -44,7 +44,7 @@ { "registry_type": "oci", "identifier": "browserbasehq/mcp-server-browserbase", - "version": "2.1.3", + "version": "2.2.0", "runtime_hint": "docker", "environment_variables": [ { diff --git a/src/config.ts b/src/config.ts index 2dd7e9e..3ceb662 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,6 +1,7 @@ import type { Cookie } from "playwright-core"; import type { Config } from "../config.d.ts"; -import type { AvailableModel } from "./types/models.js"; +import { z } from "zod"; +import { AvailableModelSchema } from "@browserbasehq/stagehand"; export type ToolCapability = "core" | string; @@ -15,7 +16,7 @@ export type CLIOptions = { cookies?: Cookie[]; browserWidth?: number; browserHeight?: number; - modelName?: typeof AvailableModel; + modelName?: z.infer; modelApiKey?: string; keepAlive?: boolean; experimental?: boolean; @@ -35,7 +36,7 @@ const defaultConfig: Config = { browserHeight: 768, }, cookies: undefined, - modelName: "google/gemini-2.0-flash", // Default Model + modelName: "gemini-2.0-flash", // Default Model }; // Resolve final configuration by merging defaults, file config, and CLI options @@ -46,7 +47,8 @@ export async function resolveConfig(cliOptions: CLIOptions): Promise { // --- Add Browserbase Env Vars --- if (!mergedConfig.modelApiKey) { - mergedConfig.modelApiKey = process.env.GEMINI_API_KEY; + mergedConfig.modelApiKey = + process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY; } // -------------------------------- diff --git a/src/context.ts b/src/context.ts index 3b2abcc..9fe3daa 100644 --- a/src/context.ts +++ b/src/context.ts @@ -3,30 +3,51 @@ import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import type { Config } from "../config.d.ts"; import { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { listResources, readResource } from "./mcp/resources.js"; -import { getSession, defaultSessionId } from "./sessionManager.js"; +import { SessionManager } from "./sessionManager.js"; import type { MCPTool, BrowserSession } from "./types/types.js"; +/** + * MCP Server Context + * + * Central controller that connects the MCP server infrastructure with browser automation capabilities, + * managing server instances, browser sessions, tool execution, and resource access. + */ + export class Context { public readonly config: Config; private server: Server; - public currentSessionId: string = defaultSessionId; + private sessionManager: SessionManager; + + // currentSessionId is a getter that delegates to SessionManager to ensure synchronization + // This prevents desync between Context and SessionManager session tracking + public get currentSessionId(): string { + return this.sessionManager.getActiveSessionId(); + } - constructor(server: Server, config: Config) { + constructor(server: Server, config: Config, contextId?: string) { this.server = server; this.config = config; + this.sessionManager = new SessionManager(contextId); } public getServer(): Server { return this.server; } + public getSessionManager(): SessionManager { + return this.sessionManager; + } + /** * Gets the Stagehand instance for the current session from SessionManager */ public async getStagehand( sessionId: string = this.currentSessionId, ): Promise { - const session = await getSession(sessionId, this.config); + const session = await this.sessionManager.getSession( + sessionId, + this.config, + ); if (!session) { throw new Error(`No session found for ID: ${sessionId}`); } @@ -35,7 +56,10 @@ export class Context { public async getActivePage(): Promise { // Get page from session manager - const session = await getSession(this.currentSessionId, this.config); + const session = await this.sessionManager.getSession( + this.currentSessionId, + this.config, + ); if (session && session.page && !session.page.isClosed()) { return session.page; } @@ -46,7 +70,7 @@ export class Context { public async getActiveBrowser( createIfMissing: boolean = true, ): Promise { - const session = await getSession( + const session = await this.sessionManager.getSession( this.currentSessionId, this.config, createIfMissing, diff --git a/src/index.ts b/src/index.ts index ffd4a1c..b59473b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,6 +1,8 @@ import * as dotenv from "dotenv"; dotenv.config(); +import { randomUUID } from "crypto"; + import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import type { MCPToolsArray } from "./types/types.js"; @@ -8,16 +10,13 @@ import type { MCPToolsArray } from "./types/types.js"; import { Context } from "./context.js"; import type { Config } from "../config.d.ts"; import { TOOLS } from "./tools/index.js"; -import { AvailableModelSchema } from "./types/models.js"; -import { PROMPTS, getPrompt } from "./mcp/prompts.js"; +import { AvailableModelSchema } from "@browserbasehq/stagehand"; import { RESOURCE_TEMPLATES } from "./mcp/resources.js"; import { ListResourcesRequestSchema, ReadResourceRequestSchema, ListResourceTemplatesRequestSchema, - ListPromptsRequestSchema, - GetPromptRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; const cookieSchema = z.object({ @@ -95,13 +94,13 @@ export const configSchema = z }) .optional(), modelName: AvailableModelSchema.optional().describe( - "The model to use for Stagehand (default: google/gemini-2.0-flash)", + "The model to use for Stagehand (default: gemini-2.0-flash)", ), // Already an existing Zod Enum modelApiKey: z .string() .optional() .describe( - "API key for the custom model provider. Required when using a model other than the default google/gemini-2.0-flash", + "API key for the custom model provider. Required when using a model other than the default gemini-2.0-flash", ), experimental: z .boolean() @@ -111,7 +110,7 @@ export const configSchema = z .refine( (data) => { // If a non-default model is explicitly specified, API key is required - if (data.modelName && data.modelName !== "google/gemini-2.0-flash") { + if (data.modelName && data.modelName !== "gemini-2.0-flash") { return data.modelApiKey !== undefined && data.modelApiKey.length > 0; } return true; @@ -133,7 +132,7 @@ export default function ({ config }: { config: z.infer }) { const server = new McpServer({ name: "Browserbase MCP Server", - version: "2.0.0", + version: "2.2.0", description: "Cloud browser automation server powered by Browserbase and Stagehand. Enables LLMs to navigate websites, interact with elements, extract data, and capture screenshots using natural language commands.", capabilities: { @@ -141,27 +140,20 @@ export default function ({ config }: { config: z.infer }) { subscribe: true, listChanged: true, }, - prompts: { - listChanged: true, - }, - sampling: {}, }, }); const internalConfig: Config = config as Config; // Create the context, passing server instance and config - const context = new Context(server.server, internalConfig); + const contextId = randomUUID(); + const context = new Context(server.server, internalConfig, contextId); server.server.registerCapabilities({ resources: { subscribe: true, listChanged: true, }, - prompts: { - listChanged: true, - }, - sampling: {}, }); // Add resource handlers @@ -183,16 +175,6 @@ export default function ({ config }: { config: z.infer }) { }, ); - // Add prompt handlers - server.server.setRequestHandler(ListPromptsRequestSchema, async () => { - return { prompts: PROMPTS }; - }); - - server.server.setRequestHandler(GetPromptRequestSchema, async (request) => { - const prompt = getPrompt(request.params.name); - return prompt; - }); - const tools: MCPToolsArray = [...TOOLS]; // Register each tool with the Smithery server diff --git a/src/mcp/prompts.ts b/src/mcp/prompts.ts deleted file mode 100644 index ffd079a..0000000 --- a/src/mcp/prompts.ts +++ /dev/null @@ -1,183 +0,0 @@ -/** - * Prompts module for the Browserbase MCP server - * Contains prompts definitions and handlers for prompt-related requests - * Docs: https://modelcontextprotocol.io/docs/concepts/prompts - */ - -// Define the prompts -export const PROMPTS = [ - { - name: "browserbase_system", - description: - "System prompt defining the scope and capabilities of Browserbase MCP server", - arguments: [], - }, - { - name: "multi_session_guidance", - description: - "Guidance on when and how to use multi-session browser automation", - arguments: [], - }, - { - name: "stagehand_usage", - description: - "Guidelines on how to use Stagehand's act, observe, and extract utilities effectively", - arguments: [], - }, -]; - -/** - * Get a prompt by name - * @param name The name of the prompt to retrieve - * @returns The prompt definition or throws an error if not found - */ -export function getPrompt(name: string) { - if (name === "browserbase_system") { - return { - description: "System prompt for Browserbase MCP server capabilities", - messages: [ - { - role: "system", - content: { - type: "text", - text: `You have access to a powerful browser automation server via Browserbase MCP. This server provides: - -CAPABILITIES: -- Cloud browser automation using Browserbase infrastructure -- AI-powered web interactions via Stagehand -- Parallel browser sessions for concurrent tasks -- Advanced stealth mode for anti-detection -- Proxy support for geo-location and privacy -- Context persistence for maintaining authentication -- Screenshot capture and visual analysis -- Structured data extraction from any webpage - -TOOL SELECTION GUIDE: -For SINGLE browser tasks: Use "browserbase_session_create" then regular tools -For MULTIPLE browser tasks: Use "multi_browserbase_stagehand_session_create" then session-specific tools - -MULTI-SESSION INDICATORS - Use multi-session tools when you see: -- "parallel", "multiple", "simultaneously", "concurrent" -- "different accounts", "A/B test", "compare" -- "multiple sites", "batch processing" -- Any task requiring more than one browser instance - -MULTI-SESSION WORKFLOW: -1. Create sessions: "multi_browserbase_stagehand_session_create" (give descriptive names) -2. Track sessions: "multi_browserbase_stagehand_session_list" -3. Use session tools: "multi_browserbase_stagehand_navigate_session", etc. -4. Cleanup: "multi_browserbase_stagehand_session_close" - -BEST PRACTICES: -- Use descriptive session names for easier tracking -- Always close sessions when done to free resources -- Take screenshots for visual confirmation or debugging -- Each session maintains independent state and authentication -- No need to create backup sessions - sessions are reliable and persistent - -When using this server, think of it as controlling real browsers in the cloud. You can navigate, click, type, extract data, and capture screenshots just like a human would, but with the precision and scale of automation.`, - }, - }, - ], - }; - } - - if (name === "multi_session_guidance") { - return { - description: "Comprehensive guidance on multi-session browser automation", - messages: [ - { - role: "system", - content: { - type: "text", - text: `Multi-Session Browser Automation Guidance - -WHEN TO USE MULTI-SESSION TOOLS: -- Parallel data collection from multiple websites -- A/B testing with different user flows -- Authentication with multiple user accounts simultaneously -- Cross-site operations requiring coordination -- Load testing or performance simulation -- Any task requiring more than one browser instance - -TOOL NAMING PATTERNS: -- Session Management: "multi_browserbase_stagehand_session_*" -- Browser Actions: "multi_browserbase_stagehand_*_session" - -RECOMMENDED WORKFLOW: -1. Create sessions: "multi_browserbase_stagehand_session_create" (give each a descriptive name) -2. List sessions: "multi_browserbase_stagehand_session_list" (to track active sessions) -3. Use session-specific tools: "multi_browserbase_stagehand_navigate_session", "multi_browserbase_stagehand_act_session", etc. -4. Clean up: "multi_browserbase_stagehand_session_close" when done - -IMPORTANT RULES: -- Always use session-specific tools (with "_session" suffix) when working with multiple sessions -- Each session maintains independent cookies, authentication, and browser state -- Always close sessions when finished to free resources -- Use descriptive session names for easier tracking -- No need to create backup sessions - sessions are reliable and persistent - -SINGLE VS MULTI-SESSION: -- Single: "browserbase_session_create" → "browserbase_stagehand_navigate" -- Multi: "multi_browserbase_stagehand_session_create" → "multi_browserbase_stagehand_navigate_session"`, - }, - }, - ], - }; - } - - if (name === "stagehand_usage") { - return { - description: - "Guidelines on how to use Stagehand's act, observe, and extract utilities effectively", - messages: [ - { - role: "system", - content: { - type: "text", - text: `Stagehand Usage Guidelines - -OVERVIEW: -Stagehand extends Playwright with natural-language helpers (act, observe, extract) available via stagehand.page. - -INITIALISE: -import { Stagehand } from "@browserbasehq/stagehand"; -const stagehand = new Stagehand(StagehandConfig); -await stagehand.init(); -const { page, context } = stagehand; - -ACT: -- Invoke atomic, single-step actions in plain language: page.act("Click the sign in button"); -- Avoid multi-step instructions such as "Type in the search bar and hit enter". -- Cache observe results and pass them to act whenever possible to avoid DOM drift. - -OBSERVE: -- Plan before acting: const [action] = await page.observe("Click the sign in button"); -- The returned ObserveResult array can be fed directly into page.act(action). - -EXTRACT: -- Always call page.extract({ instruction, schema }) with a strict Zod schema. -- For URLs use z.string().url(); for arrays wrap them in an object property. -Example: -const data = await page.extract({ - instruction: "extract the text inside all buttons", - schema: z.object({ text: z.array(z.string()) }), -}); - -AGENT: -Use stagehand.agent for autonomous multi-step tasks. - -BEST PRACTICES: -- Keep actions atomic and specific. -- Cache observe results to stabilise interactions. -- Prefer explicit schemas to guarantee correct extraction. -- Use observe to verify actions before invoking act. -- Treat Stagehand as controlling real browsers – navigate, click, type, and extract exactly as a user would, but with automation scale.`, - }, - }, - ], - }; - } - - throw new Error(`Invalid prompt name: ${name}`); -} diff --git a/src/mcp/sampling.ts b/src/mcp/sampling.ts index d1e0b79..ec17b26 100644 --- a/src/mcp/sampling.ts +++ b/src/mcp/sampling.ts @@ -32,173 +32,3 @@ export type SamplingMessage = { mimeType?: string; }; }; - -/** - * Pre-built sampling templates for common browser automation scenarios - */ -export const SAMPLING_TEMPLATES = { - /** - * Analyze a page to determine what actions are available - */ - analyzePageActions: ( - pageContent: string, - screenshot?: string, - ): SamplingMessage[] => [ - { - role: "user", - content: { - type: "text", - text: `Analyze this webpage and identify the main interactive elements and possible actions. - -Page content: -${pageContent} - -Please list: -1. Main navigation elements -2. Forms and input fields -3. Buttons and clickable elements -4. Key information displayed -5. Suggested next actions for common automation tasks`, - }, - }, - ...(screenshot - ? [ - { - role: "user" as const, - content: { - type: "image" as const, - data: screenshot, - mimeType: "image/png", - }, - }, - ] - : []), - ], - - /** - * Determine next steps in a multi-step process - */ - determineNextStep: ( - currentState: string, - goal: string, - ): SamplingMessage[] => [ - { - role: "user", - content: { - type: "text", - text: `Current state of the browser automation: -${currentState} - -Goal: ${goal} - -What should be the next action to take? Consider: -1. Are we on the right page? -2. What elements need to be interacted with? -3. Is there any data to extract first? -4. Are there any errors or blockers visible? - -Provide a specific, actionable next step.`, - }, - }, - ], - - /** - * Extract structured data from a page - */ - extractStructuredData: ( - pageContent: string, - dataSchema: string, - ): SamplingMessage[] => [ - { - role: "user", - content: { - type: "text", - text: `Extract structured data from this webpage according to the schema. - -Page content: -${pageContent} - -Expected data schema: -${dataSchema} - -Return the extracted data as valid JSON matching the schema. If any fields cannot be found, use null.`, - }, - }, - ], - - /** - * Handle error or unexpected state - */ - handleError: (error: string, pageState: string): SamplingMessage[] => [ - { - role: "user", - content: { - type: "text", - text: `The browser automation encountered an error: - -Error: ${error} - -Current page state: -${pageState} - -Suggest how to recover from this error: -1. What might have caused this? -2. What alternative actions could be taken? -3. Should we retry, navigate elsewhere, or try a different approach?`, - }, - }, - ], - - /** - * Interpret complex UI patterns - */ - interpretUI: (screenshot: string, instruction: string): SamplingMessage[] => [ - { - role: "user", - content: { - type: "text", - text: `Analyze this screenshot and help with: ${instruction}`, - }, - }, - { - role: "user", - content: { - type: "image", - data: screenshot, - mimeType: "image/png", - }, - }, - ], -}; - -/** - * Helper function to create a sampling request structure - * This shows what a sampling request would look like when sent to the client - */ -export function createSamplingRequest( - messages: SamplingMessage[], - options?: { - systemPrompt?: string; - temperature?: number; - maxTokens?: number; - includeContext?: "none" | "thisServer" | "allServers"; - }, -) { - return { - method: "sampling/createMessage", - params: { - messages, - systemPrompt: - options?.systemPrompt || - "You are an expert browser automation assistant helping to analyze web pages and determine optimal automation strategies.", - temperature: options?.temperature || 0.7, - maxTokens: options?.maxTokens || 1000, - includeContext: options?.includeContext || "thisServer", - modelPreferences: { - hints: [{ name: "claude-3" }, { name: "gpt-4" }], - intelligencePriority: 0.8, - speedPriority: 0.2, - }, - }, - }; -} diff --git a/src/program.ts b/src/program.ts index 875e7fe..09a25b9 100644 --- a/src/program.ts +++ b/src/program.ts @@ -6,7 +6,6 @@ import { fileURLToPath } from "url"; import createServerFunction from "./index.js"; import { ServerList } from "./server.js"; import { startHttpTransport, startStdioTransport } from "./transport.js"; -import * as stagehandStore from "./stagehandStore.js"; import { resolveConfig } from "./config.js"; @@ -59,7 +58,7 @@ program .option("--browserHeight ", "Browser height to use for the browser.") .option( "--modelName ", - "The model to use for Stagehand (default: google/gemini-2.0-flash)", + "The model to use for Stagehand (default: gemini-2.0-flash)", ) .option( "--modelApiKey ", @@ -85,7 +84,8 @@ function setupExitWatchdog(serverList: ServerList) { const handleExit = async () => { setTimeout(() => process.exit(0), 15000); try { - await Promise.all([stagehandStore.removeAll(), serverList.closeAll()]); + // SessionManager within each server handles session cleanup + await serverList.closeAll(); } catch (error) { console.error("Error during cleanup:", error); } diff --git a/src/sessionManager.ts b/src/sessionManager.ts index 5093a7e..65aead7 100644 --- a/src/sessionManager.ts +++ b/src/sessionManager.ts @@ -1,417 +1,571 @@ -import { Page, BrowserContext } from "@browserbasehq/stagehand"; +import { BrowserContext, Stagehand } from "@browserbasehq/stagehand"; import type { Config } from "../config.d.ts"; import type { Cookie } from "playwright-core"; -import { createStagehandInstance } from "./stagehandStore.js"; import { clearScreenshotsForSession } from "./mcp/resources.js"; -import type { BrowserSession } from "./types/types.js"; +import type { BrowserSession, CreateSessionParams } from "./types/types.js"; +import { randomUUID } from "crypto"; -// Global state for managing browser sessions -const browsers = new Map(); +/** + * Create a configured Stagehand instance + * This is used internally by SessionManager to initialize browser sessions + */ -// Keep track of the default session explicitly -let defaultBrowserSession: BrowserSession | null = null; +export const createStagehandInstance = async ( + config: Config, + params: CreateSessionParams = {}, + sessionId: string, +): Promise => { + const apiKey = params.apiKey || config.browserbaseApiKey; + const projectId = params.projectId || config.browserbaseProjectId; -// Define a specific ID for the default session -export const defaultSessionId = `browserbase_session_main_${Date.now()}`; + if (!apiKey || !projectId) { + throw new Error("Browserbase API Key and Project ID are required"); + } -// Keep track of the active session ID. Defaults to the main session. -let activeSessionId: string = defaultSessionId; + const stagehand = new Stagehand({ + env: "BROWSERBASE", + apiKey, + projectId, + modelName: params.modelName || config.modelName || "gemini-2.0-flash", + modelClientOptions: { + apiKey: + config.modelApiKey || + process.env.GEMINI_API_KEY || + process.env.GOOGLE_API_KEY, + }, + ...(params.browserbaseSessionID && { + browserbaseSessionID: params.browserbaseSessionID, + }), + experimental: config.experimental ?? false, + browserbaseSessionCreateParams: { + projectId, + proxies: config.proxies, + keepAlive: config.keepAlive ?? false, + browserSettings: { + viewport: { + width: config.viewPort?.browserWidth ?? 1024, + height: config.viewPort?.browserHeight ?? 768, + }, + context: config.context?.contextId + ? { + id: config.context?.contextId, + persist: config.context?.persist ?? true, + } + : undefined, + advancedStealth: config.advancedStealth ?? undefined, + }, + userMetadata: { + mcp: "true", + }, + }, + logger: (logLine) => { + console.error(`Stagehand[${sessionId}]: ${logLine.message}`); + }, + }); -/** - * Sets the active session ID. - * @param id The ID of the session to set as active. - */ -export function setActiveSessionId(id: string): void { - if (browsers.has(id) || id === defaultSessionId) { - activeSessionId = id; - } else { - process.stderr.write( - `[SessionManager] WARN - Set active session failed for non-existent ID: ${id}\n`, - ); - } -} + await stagehand.init(); + return stagehand; +}; /** - * Gets the active session ID. - * @returns The active session ID. + * SessionManager manages browser sessions and tracks active/default sessions. + * + * Session ID Strategy: + * - Default session: Uses generated ID with timestamp and UUID for uniqueness + * - User sessions: Uses raw sessionId provided by user (no suffix added) + * - All sessions stored in this.browsers Map with their internal ID as key + * + * Note: Context.currentSessionId is a getter that delegates to this.getActiveSessionId() + * to ensure session tracking stays synchronized. */ -export function getActiveSessionId(): string { - return activeSessionId; -} -/** - * Adds cookies to a browser context - * @param context Playwright browser context - * @param cookies Array of cookies to add - */ -export async function addCookiesToContext( - context: BrowserContext, - cookies: Cookie[], -): Promise { - if (!cookies || cookies.length === 0) { - return; +export class SessionManager { + private browsers: Map; + private defaultBrowserSession: BrowserSession | null; + private readonly defaultSessionId: string; + private activeSessionId: string; + // Mutex to prevent race condition when multiple calls try to create default session simultaneously + private defaultSessionCreationPromise: Promise | null = null; + // Track sessions currently being cleaned up to prevent concurrent cleanup + private cleaningUpSessions: Set = new Set(); + + constructor(contextId?: string) { + this.browsers = new Map(); + this.defaultBrowserSession = null; + const uniqueId = randomUUID(); + this.defaultSessionId = `browserbase_session_${contextId || "default"}_${Date.now()}_${uniqueId}`; + this.activeSessionId = this.defaultSessionId; } - try { - process.stderr.write( - `[SessionManager] Adding ${cookies.length} cookies to browser context\n`, - ); - await context.addCookies(cookies); - process.stderr.write( - `[SessionManager] Successfully added cookies to browser context\n`, - ); - } catch (error) { - process.stderr.write( - `[SessionManager] Error adding cookies to browser context: ${ - error instanceof Error ? error.message : String(error) - }\n`, - ); + getDefaultSessionId(): string { + return this.defaultSessionId; } -} -// Function to create a new Browserbase session using Stagehand -export async function createNewBrowserSession( - newSessionId: string, - config: Config, - resumeSessionId?: string, -): Promise { - if (!config.browserbaseApiKey) { - throw new Error("Browserbase API Key is missing in the configuration."); + /** + * Sets the active session ID. + * @param id The ID of the session to set as active. + */ + setActiveSessionId(id: string): void { + if (this.browsers.has(id)) { + this.activeSessionId = id; + } else if (id === this.defaultSessionId) { + // Allow setting to default ID even if session doesn't exist yet + // (it will be created on first use via ensureDefaultSessionInternal) + this.activeSessionId = id; + } else { + process.stderr.write( + `[SessionManager] WARN - Set active session failed for non-existent ID: ${id}\n`, + ); + } } - if (!config.browserbaseProjectId) { - throw new Error("Browserbase Project ID is missing in the configuration."); + + /** + * Gets the active session ID. + * @returns The active session ID. + */ + getActiveSessionId(): string { + return this.activeSessionId; } - try { - process.stderr.write( - `[SessionManager] ${resumeSessionId ? "Resuming" : "Creating"} Stagehand session ${newSessionId}...\n`, - ); + /** + * Adds cookies to a browser context + * @param context Playwright browser context + * @param cookies Array of cookies to add + */ + async addCookiesToContext( + context: BrowserContext, + cookies: Cookie[], + ): Promise { + if (!cookies || cookies.length === 0) { + return; + } - // Create and initialize Stagehand instance using shared function - const stagehand = await createStagehandInstance( - config, - { - ...(resumeSessionId && { browserbaseSessionID: resumeSessionId }), - }, - newSessionId, - ); + try { + process.stderr.write( + `[SessionManager] Adding ${cookies.length} cookies to browser context\n`, + ); - // Get the page and browser from Stagehand - const page = stagehand.page as unknown as Page; - const browser = page.context().browser(); + // Injecting cookies into the Browser Context + await context.addCookies(cookies); + process.stderr.write( + `[SessionManager] Successfully added cookies to browser context\n`, + ); + } catch (error) { + process.stderr.write( + `[SessionManager] Error adding cookies to browser context: ${ + error instanceof Error ? error.message : String(error) + }\n`, + ); + } + } - if (!browser) { - throw new Error("Failed to get browser from Stagehand page context"); + /** + * Creates a new Browserbase session using Stagehand. + * @param newSessionId - Internal session ID for tracking in SessionManager + * @param config - Configuration object + * @param resumeSessionId - Optional Browserbase session ID to resume/reuse + */ + async createNewBrowserSession( + newSessionId: string, + config: Config, + resumeSessionId?: string, + ): Promise { + if (!config.browserbaseApiKey) { + throw new Error("Browserbase API Key is missing in the configuration."); + } + if (!config.browserbaseProjectId) { + throw new Error( + "Browserbase Project ID is missing in the configuration.", + ); } - const browserbaseSessionId = stagehand.browserbaseSessionID; + try { + process.stderr.write( + `[SessionManager] ${resumeSessionId ? "Resuming" : "Creating"} Stagehand session ${newSessionId}...\n`, + ); - process.stderr.write( - `[SessionManager] Stagehand initialized with Browserbase session: ${browserbaseSessionId}\n`, - ); - process.stderr.write( - `[SessionManager] Browserbase Live Debugger URL: https://www.browserbase.com/sessions/${browserbaseSessionId}\n`, - ); + // Create and initialize Stagehand instance using shared function + const stagehand = await createStagehandInstance( + config, + { + ...(resumeSessionId && { browserbaseSessionID: resumeSessionId }), + }, + newSessionId, + ); - // Set up disconnect handler - browser.on("disconnected", () => { - process.stderr.write(`[SessionManager] Disconnected: ${newSessionId}\n`); - browsers.delete(newSessionId); - if (defaultBrowserSession && defaultBrowserSession.browser === browser) { - process.stderr.write( - `[SessionManager] Disconnected (default): ${newSessionId}\n`, - ); - defaultBrowserSession = null; + // Get the page and browser from Stagehand + const page = stagehand.page; + const browser = page.context().browser(); + + if (!browser) { + throw new Error("Failed to get browser from Stagehand page context"); } - if ( - activeSessionId === newSessionId && - newSessionId !== defaultSessionId - ) { - process.stderr.write( - `[SessionManager] WARN - Active session disconnected, resetting to default: ${newSessionId}\n`, + + const browserbaseSessionId = stagehand.browserbaseSessionID; + + if (!browserbaseSessionId) { + throw new Error( + "Browserbase session ID is required but was not returned by Stagehand", ); - setActiveSessionId(defaultSessionId); } - // Purge any screenshots associated with both internal and Browserbase IDs - try { - clearScreenshotsForSession(newSessionId); - const bbId = browserbaseSessionId; - if (bbId) { - clearScreenshotsForSession(bbId); - } - } catch (err) { + process.stderr.write( + `[SessionManager] Stagehand initialized with Browserbase session: ${browserbaseSessionId}\n`, + ); + process.stderr.write( + `[SessionManager] Browserbase Live Debugger URL: https://www.browserbase.com/sessions/${browserbaseSessionId}\n`, + ); + + // Set up disconnect handler + browser.on("disconnected", () => { process.stderr.write( - `[SessionManager] WARN - Failed to clear screenshots on disconnect for ${newSessionId}: ${ - err instanceof Error ? err.message : String(err) - }\n`, + `[SessionManager] Disconnected: ${newSessionId}\n`, ); - } - }); + this.browsers.delete(newSessionId); + if ( + this.defaultBrowserSession && + this.defaultBrowserSession.browser === browser + ) { + process.stderr.write( + `[SessionManager] Disconnected (default): ${newSessionId}\n`, + ); + this.defaultBrowserSession = null; + // Reset active session to default ID since default session needs recreation + this.setActiveSessionId(this.defaultSessionId); + } + if ( + this.activeSessionId === newSessionId && + newSessionId !== this.defaultSessionId + ) { + process.stderr.write( + `[SessionManager] WARN - Active session disconnected, resetting to default: ${newSessionId}\n`, + ); + this.setActiveSessionId(this.defaultSessionId); + } - // Add cookies to the context if they are provided in the config - if ( - config.cookies && - Array.isArray(config.cookies) && - config.cookies.length > 0 - ) { - await addCookiesToContext( - page.context() as BrowserContext, - config.cookies, - ); - } + // Purge any screenshots associated with both internal and Browserbase IDs + try { + clearScreenshotsForSession(newSessionId); + const bbId = browserbaseSessionId; + if (bbId) { + clearScreenshotsForSession(bbId); + } + } catch (err) { + process.stderr.write( + `[SessionManager] WARN - Failed to clear screenshots on disconnect for ${newSessionId}: ${ + err instanceof Error ? err.message : String(err) + }\n`, + ); + } + }); - const sessionObj: BrowserSession = { - browser, - page, - sessionId: browserbaseSessionId!, - stagehand, - }; + // Add cookies to the context if they are provided in the config + if ( + config.cookies && + Array.isArray(config.cookies) && + config.cookies.length > 0 + ) { + await this.addCookiesToContext( + page.context() as BrowserContext, + config.cookies, + ); + } - browsers.set(newSessionId, sessionObj); + const sessionObj: BrowserSession = { + browser, + page, + sessionId: browserbaseSessionId, + stagehand, + }; - if (newSessionId === defaultSessionId) { - defaultBrowserSession = sessionObj; - } + this.browsers.set(newSessionId, sessionObj); - setActiveSessionId(newSessionId); - process.stderr.write( - `[SessionManager] Session created and active: ${newSessionId}\n`, - ); + if (newSessionId === this.defaultSessionId) { + this.defaultBrowserSession = sessionObj; + } - return sessionObj; - } catch (creationError) { - const errorMessage = - creationError instanceof Error - ? creationError.message - : String(creationError); - process.stderr.write( - `[SessionManager] Creating session ${newSessionId} failed: ${errorMessage}\n`, - ); - throw new Error( - `Failed to create/connect session ${newSessionId}: ${errorMessage}`, - ); - } -} + this.setActiveSessionId(newSessionId); + process.stderr.write( + `[SessionManager] Session created and active: ${newSessionId}\n`, + ); -async function closeBrowserGracefully( - session: BrowserSession | undefined | null, - sessionIdToLog: string, -): Promise { - // Close Stagehand instance which handles browser cleanup - if (session?.stagehand) { - try { + return sessionObj; + } catch (creationError) { + const errorMessage = + creationError instanceof Error + ? creationError.message + : String(creationError); process.stderr.write( - `[SessionManager] Closing Stagehand for session: ${sessionIdToLog}\n`, + `[SessionManager] Creating session ${newSessionId} failed: ${errorMessage}\n`, + ); + throw new Error( + `Failed to create/connect session ${newSessionId}: ${errorMessage}`, ); - await session.stagehand.close(); + } + } + + private async closeBrowserGracefully( + session: BrowserSession | undefined | null, + sessionIdToLog: string, + ): Promise { + // Check if this session is already being cleaned up + if (this.cleaningUpSessions.has(sessionIdToLog)) { process.stderr.write( - `[SessionManager] Successfully closed Stagehand and browser for session: ${sessionIdToLog}\n`, + `[SessionManager] Session ${sessionIdToLog} is already being cleaned up, skipping.\n`, ); - // After close, purge any screenshots associated with both internal and Browserbase IDs - try { - clearScreenshotsForSession(sessionIdToLog); - const bbId = session?.stagehand?.browserbaseSessionID; - if (bbId) { - clearScreenshotsForSession(bbId); + return; + } + + // Mark session as being cleaned up + this.cleaningUpSessions.add(sessionIdToLog); + + try { + // Close Stagehand instance which handles browser cleanup + if (session?.stagehand) { + try { + process.stderr.write( + `[SessionManager] Closing Stagehand for session: ${sessionIdToLog}\n`, + ); + await session.stagehand.close(); + process.stderr.write( + `[SessionManager] Successfully closed Stagehand and browser for session: ${sessionIdToLog}\n`, + ); + // After close, purge any screenshots associated with both internal and Browserbase IDs + try { + clearScreenshotsForSession(sessionIdToLog); + const bbId = session?.stagehand?.browserbaseSessionID; + if (bbId) { + clearScreenshotsForSession(bbId); + } + } catch (err) { + process.stderr.write( + `[SessionManager] WARN - Failed to clear screenshots after close for ${sessionIdToLog}: ${ + err instanceof Error ? err.message : String(err) + }\n`, + ); + } + } catch (closeError) { + process.stderr.write( + `[SessionManager] WARN - Error closing Stagehand for session ${sessionIdToLog}: ${ + closeError instanceof Error + ? closeError.message + : String(closeError) + }\n`, + ); } - } catch (err) { - process.stderr.write( - `[SessionManager] WARN - Failed to clear screenshots after close for ${sessionIdToLog}: ${ - err instanceof Error ? err.message : String(err) - }\n`, - ); } - } catch (closeError) { - process.stderr.write( - `[SessionManager] WARN - Error closing Stagehand for session ${sessionIdToLog}: ${ - closeError instanceof Error ? closeError.message : String(closeError) - }\n`, - ); + } finally { + // Always remove from cleanup tracking set + this.cleaningUpSessions.delete(sessionIdToLog); } } -} -// Internal function to ensure default session -export async function ensureDefaultSessionInternal( - config: Config, -): Promise { - const sessionId = defaultSessionId; - let needsReCreation = false; + // Internal function to ensure default session + // Uses a mutex pattern to prevent race conditions when multiple calls happen concurrently + async ensureDefaultSessionInternal(config: Config): Promise { + // If a creation is already in progress, wait for it instead of starting a new one + if (this.defaultSessionCreationPromise) { + process.stderr.write( + `[SessionManager] Default session creation already in progress, waiting...\n`, + ); + return await this.defaultSessionCreationPromise; + } - if (!defaultBrowserSession) { - needsReCreation = true; - process.stderr.write( - `[SessionManager] Default session ${sessionId} not found, creating.\n`, - ); - } else if ( - !defaultBrowserSession.browser.isConnected() || - defaultBrowserSession.page.isClosed() - ) { - needsReCreation = true; - process.stderr.write( - `[SessionManager] Default session ${sessionId} is stale, recreating.\n`, - ); - await closeBrowserGracefully(defaultBrowserSession, sessionId); - defaultBrowserSession = null; - browsers.delete(sessionId); - } + const sessionId = this.defaultSessionId; + let needsReCreation = false; - if (needsReCreation) { - try { - defaultBrowserSession = await createNewBrowserSession(sessionId, config); - return defaultBrowserSession; - } catch (creationError) { - // Error during initial creation or recreation + if (!this.defaultBrowserSession) { + needsReCreation = true; process.stderr.write( - `[SessionManager] Initial/Recreation attempt for default session ${sessionId} failed. Error: ${ - creationError instanceof Error - ? creationError.message - : String(creationError) - }\n`, + `[SessionManager] Default session ${sessionId} not found, creating.\n`, ); - // Attempt one more time after a failure + } else if ( + !this.defaultBrowserSession.browser.isConnected() || + this.defaultBrowserSession.page.isClosed() + ) { + needsReCreation = true; process.stderr.write( - `[SessionManager] Retrying creation of default session ${sessionId} after error...\n`, + `[SessionManager] Default session ${sessionId} is stale, recreating.\n`, ); + await this.closeBrowserGracefully(this.defaultBrowserSession, sessionId); + this.defaultBrowserSession = null; + this.browsers.delete(sessionId); + } + + if (needsReCreation) { + // Set the mutex promise before starting creation + this.defaultSessionCreationPromise = (async () => { + try { + this.defaultBrowserSession = await this.createNewBrowserSession( + sessionId, + config, + ); + return this.defaultBrowserSession; + } catch (creationError) { + // Error during initial creation or recreation + process.stderr.write( + `[SessionManager] Initial/Recreation attempt for default session ${sessionId} failed. Error: ${ + creationError instanceof Error + ? creationError.message + : String(creationError) + }\n`, + ); + // Attempt one more time after a failure + process.stderr.write( + `[SessionManager] Retrying creation of default session ${sessionId} after error...\n`, + ); + try { + this.defaultBrowserSession = await this.createNewBrowserSession( + sessionId, + config, + ); + return this.defaultBrowserSession; + } catch (retryError) { + const finalErrorMessage = + retryError instanceof Error + ? retryError.message + : String(retryError); + process.stderr.write( + `[SessionManager] Failed to recreate default session ${sessionId} after retry: ${finalErrorMessage}\n`, + ); + throw new Error( + `Failed to ensure default session ${sessionId} after initial error and retry: ${finalErrorMessage}`, + ); + } + } finally { + // Clear the mutex after creation completes or fails + this.defaultSessionCreationPromise = null; + } + })(); + + return await this.defaultSessionCreationPromise; + } + + // If we reached here, the existing default session is considered okay. + this.setActiveSessionId(sessionId); // Ensure default is marked active + return this.defaultBrowserSession!; // Non-null assertion: logic ensures it's not null here + } + + // Get a specific session by ID + async getSession( + sessionId: string, + config: Config, + createIfMissing: boolean = true, + ): Promise { + if (sessionId === this.defaultSessionId && createIfMissing) { try { - defaultBrowserSession = await createNewBrowserSession( - sessionId, - config, - ); - return defaultBrowserSession; - } catch (retryError) { - const finalErrorMessage = - retryError instanceof Error ? retryError.message : String(retryError); + return await this.ensureDefaultSessionInternal(config); + } catch { process.stderr.write( - `[SessionManager] Failed to recreate default session ${sessionId} after retry: ${finalErrorMessage}\n`, - ); - throw new Error( - `Failed to ensure default session ${sessionId} after initial error and retry: ${finalErrorMessage}`, + `[SessionManager] Failed to get default session due to error in ensureDefaultSessionInternal for ${sessionId}. See previous messages for details.\n`, ); + return null; } } - } - // If we reached here, the existing default session is considered okay. - setActiveSessionId(sessionId); // Ensure default is marked active - return defaultBrowserSession!; // Non-null assertion: logic ensures it's not null here -} + // For non-default sessions + process.stderr.write(`[SessionManager] Getting session: ${sessionId}\n`); + const sessionObj = this.browsers.get(sessionId); -// Get a specific session by ID -export async function getSession( - sessionId: string, - config: Config, - createIfMissing: boolean = true, -): Promise { - if (sessionId === defaultSessionId && createIfMissing) { - try { - return await ensureDefaultSessionInternal(config); - } catch { + if (!sessionObj) { process.stderr.write( - `[SessionManager] Failed to get default session due to error in ensureDefaultSessionInternal for ${sessionId}. See previous messages for details.\n`, + `[SessionManager] WARN - Session not found in map: ${sessionId}\n`, ); - return null; // Or rethrow if getSession failing for default is critical + return null; } - } - // For non-default sessions - process.stderr.write(`[SessionManager] Getting session: ${sessionId}\n`); - const sessionObj = browsers.get(sessionId); + // Validate the found session + if (!sessionObj.browser.isConnected() || sessionObj.page.isClosed()) { + process.stderr.write( + `[SessionManager] WARN - Found session ${sessionId} is stale, removing.\n`, + ); + await this.closeBrowserGracefully(sessionObj, sessionId); + this.browsers.delete(sessionId); + if (this.activeSessionId === sessionId) { + process.stderr.write( + `[SessionManager] WARN - Invalidated active session ${sessionId}, resetting to default.\n`, + ); + this.setActiveSessionId(this.defaultSessionId); + } + return null; + } - if (!sessionObj) { + // Session appears valid, make it active + this.setActiveSessionId(sessionId); process.stderr.write( - `[SessionManager] WARN - Session not found in map: ${sessionId}\n`, + `[SessionManager] Using valid session: ${sessionId}\n`, ); - return null; + return sessionObj; } - // Validate the found session - if (!sessionObj.browser.isConnected() || sessionObj.page.isClosed()) { + /** + * Clean up a session by closing the browser and removing it from tracking. + * This method handles both closing Stagehand and cleanup, and is idempotent. + * + * @param sessionId The session ID to clean up + */ + async cleanupSession(sessionId: string): Promise { process.stderr.write( - `[SessionManager] WARN - Found session ${sessionId} is stale, removing.\n`, + `[SessionManager] Cleaning up session: ${sessionId}\n`, ); - await closeBrowserGracefully(sessionObj, sessionId); - browsers.delete(sessionId); - if (activeSessionId === sessionId) { - process.stderr.write( - `[SessionManager] WARN - Invalidated active session ${sessionId}, resetting to default.\n`, - ); - setActiveSessionId(defaultSessionId); - } - return null; - } - // Session appears valid, make it active - setActiveSessionId(sessionId); - process.stderr.write(`[SessionManager] Using valid session: ${sessionId}\n`); - return sessionObj; -} - -/** - * Clean up a session by removing it from tracking. - * This is called after a browser is closed to ensure proper cleanup. - * @param sessionId The session ID to clean up - */ -export async function cleanupSession(sessionId: string): Promise { - process.stderr.write(`[SessionManager] Cleaning up session: ${sessionId}\n`); + // Get the session to close it gracefully + const session = this.browsers.get(sessionId); + if (session) { + await this.closeBrowserGracefully(session, sessionId); + } - // Get the session to close it gracefully - const session = browsers.get(sessionId); - if (session) { - await closeBrowserGracefully(session, sessionId); - } + // Remove from browsers map + this.browsers.delete(sessionId); - // Remove from browsers map - browsers.delete(sessionId); + // Always purge screenshots for this (internal) session id + try { + clearScreenshotsForSession(sessionId); + } catch (err) { + process.stderr.write( + `[SessionManager] WARN - Failed to clear screenshots during cleanup for ${sessionId}: ${ + err instanceof Error ? err.message : String(err) + }\n`, + ); + } - // Always purge screenshots for this (internal) session id - try { - clearScreenshotsForSession(sessionId); - } catch (err) { - process.stderr.write( - `[SessionManager] WARN - Failed to clear screenshots during cleanup for ${sessionId}: ${ - err instanceof Error ? err.message : String(err) - }\n`, - ); - } + // Clear default session reference if this was the default + if (sessionId === this.defaultSessionId && this.defaultBrowserSession) { + this.defaultBrowserSession = null; + } - // Clear default session reference if this was the default - if (sessionId === defaultSessionId && defaultBrowserSession) { - defaultBrowserSession = null; + // Reset active session to default if this was the active one + if (this.activeSessionId === sessionId) { + process.stderr.write( + `[SessionManager] Cleaned up active session ${sessionId}, resetting to default.\n`, + ); + this.setActiveSessionId(this.defaultSessionId); + } } - // Reset active session to default if this was the active one - if (activeSessionId === sessionId) { - process.stderr.write( - `[SessionManager] Cleaned up active session ${sessionId}, resetting to default.\n`, - ); - setActiveSessionId(defaultSessionId); - } -} + // Function to close all managed browser sessions gracefully + async closeAllSessions(): Promise { + process.stderr.write(`[SessionManager] Closing all sessions...\n`); + const closePromises: Promise[] = []; + for (const [id, session] of this.browsers.entries()) { + process.stderr.write(`[SessionManager] Closing session: ${id}\n`); + closePromises.push( + // Use the helper for consistent logging/error handling + this.closeBrowserGracefully(session, id), + ); + } + try { + await Promise.all(closePromises); + } catch { + // Individual errors are caught and logged by closeBrowserGracefully + process.stderr.write( + `[SessionManager] WARN - Some errors occurred during batch session closing. See individual messages.\n`, + ); + } -// Function to close all managed browser sessions gracefully -export async function closeAllSessions(): Promise { - process.stderr.write(`[SessionManager] Closing all sessions...\n`); - const closePromises: Promise[] = []; - for (const [id, session] of browsers.entries()) { - process.stderr.write(`[SessionManager] Closing session: ${id}\n`); - closePromises.push( - // Use the helper for consistent logging/error handling - closeBrowserGracefully(session, id), - ); + this.browsers.clear(); + this.defaultBrowserSession = null; + this.setActiveSessionId(this.defaultSessionId); // Reset active session to default + process.stderr.write(`[SessionManager] All sessions closed and cleared.\n`); } - try { - await Promise.all(closePromises); - } catch { - // Individual errors are caught and logged by closeBrowserGracefully - process.stderr.write( - `[SessionManager] WARN - Some errors occurred during batch session closing. See individual messages.\n`, - ); - } - - browsers.clear(); - defaultBrowserSession = null; - setActiveSessionId(defaultSessionId); // Reset active session to default - process.stderr.write(`[SessionManager] All sessions closed and cleared.\n`); } diff --git a/src/stagehandStore.ts b/src/stagehandStore.ts deleted file mode 100644 index fd644d1..0000000 --- a/src/stagehandStore.ts +++ /dev/null @@ -1,213 +0,0 @@ -import { randomUUID } from "crypto"; -import { Stagehand, Page } from "@browserbasehq/stagehand"; -import { StagehandSession, CreateSessionParams } from "./types/types.js"; -import type { Config } from "../config.d.ts"; -import { clearScreenshotsForSession } from "./mcp/resources.js"; - -// Store for all active sessions -const store = new Map(); - -/** - * Create a configured Stagehand instance - */ -export const createStagehandInstance = async ( - config: Config, - params: CreateSessionParams = {}, - sessionId: string, -): Promise => { - const apiKey = params.apiKey || config.browserbaseApiKey; - const projectId = params.projectId || config.browserbaseProjectId; - - if (!apiKey || !projectId) { - throw new Error("Browserbase API Key and Project ID are required"); - } - - const stagehand = new Stagehand({ - env: "BROWSERBASE", - apiKey, - projectId, - modelName: - params.modelName || config.modelName || "google/gemini-2.0-flash", - modelClientOptions: { - apiKey: config.modelApiKey || process.env.GEMINI_API_KEY, - }, - ...(params.browserbaseSessionID && { - browserbaseSessionID: params.browserbaseSessionID, - }), - experimental: config.experimental ?? false, - browserbaseSessionCreateParams: { - projectId, - proxies: config.proxies, - keepAlive: config.keepAlive ?? false, - browserSettings: { - viewport: { - width: config.viewPort?.browserWidth ?? 1024, - height: config.viewPort?.browserHeight ?? 768, - }, - context: config.context?.contextId - ? { - id: config.context?.contextId, - persist: config.context?.persist ?? true, - } - : undefined, - advancedStealth: config.advancedStealth ?? undefined, - }, - userMetadata: { - mcp: "true", - }, - }, - logger: (logLine) => { - console.error(`Stagehand[${sessionId}]: ${logLine.message}`); - }, - }); - - await stagehand.init(); - return stagehand; -}; - -/** - * Create a new Stagehand session - */ -export const create = async ( - config: Config, - params: CreateSessionParams = {}, -): Promise => { - // Global ID, must be 100% Unique - const id = randomUUID() + "_" + config.browserbaseProjectId; - - process.stderr.write(`[StagehandStore] Creating new session ${id}...\n`); - - const stagehand = await createStagehandInstance(config, params, id); - - const page = stagehand.page as unknown as Page; - const browser = page.context().browser(); - - if (!browser) { - throw new Error("Failed to get browser from Stagehand page context"); - } - - const session: StagehandSession = { - id, - stagehand, - page, - browser, - created: Date.now(), - metadata: { - ...params.meta, - bbSessionId: stagehand.browserbaseSessionID, - }, - }; - - store.set(id, session); - - process.stderr.write( - `[StagehandStore] Session created: ${id} (BB: ${stagehand.browserbaseSessionID})\n`, - ); - process.stderr.write( - `[StagehandStore] Live debugger: https://www.browserbase.com/sessions/${stagehand.browserbaseSessionID}\n`, - ); - - // Set up disconnect handler - const disconnectHandler = () => { - process.stderr.write(`[StagehandStore] Session disconnected: ${id}\n`); - store.delete(id); - // Purge by internal store ID and Browserbase session ID - try { - clearScreenshotsForSession(id); - const bbId = session.metadata?.bbSessionId; - if (bbId) { - clearScreenshotsForSession(bbId); - } - } catch { - process.stderr.write( - `[StagehandStore] Error clearing screenshots for session ${id}\n`, - ); - } - }; - - browser.on("disconnected", disconnectHandler); - - // Store the handler for cleanup - session.metadata = { - ...session.metadata, - disconnectHandler, - }; - - return session; -}; - -/** - * Get a session by ID - */ -export const get = (id: string): StagehandSession | null => { - return store.get(id) ?? null; -}; - -/** - * List all active sessions - */ -export const list = (): StagehandSession[] => { - return Array.from(store.values()); -}; - -/** - * Remove and close a session - */ -export const remove = async (id: string): Promise => { - const session = store.get(id); - if (!session) { - process.stderr.write( - `[StagehandStore] Session not found for removal: ${id}\n`, - ); - return; - } - - process.stderr.write(`[StagehandStore] Removing session: ${id}\n`); - - try { - if (session.metadata?.disconnectHandler) { - session.browser.off("disconnected", session.metadata.disconnectHandler); - } - - await session.stagehand.close(); - process.stderr.write(`[StagehandStore] Session closed: ${id}\n`); - // Purge by internal store ID and Browserbase session ID - try { - clearScreenshotsForSession(id); - const bbId = session.metadata?.bbSessionId; - if (bbId) { - clearScreenshotsForSession(bbId); - } - } catch { - process.stderr.write( - `[StagehandStore] Error clearing screenshots for session ${id}\n`, - ); - } - } catch (error) { - process.stderr.write( - `[StagehandStore] Error closing session ${id}: ${ - error instanceof Error ? error.message : String(error) - }\n`, - ); - } finally { - store.delete(id); - } -}; - -/** - * Remove all sessions - */ -export const removeAll = async (): Promise => { - process.stderr.write( - `[StagehandStore] Removing all ${store.size} sessions...\n`, - ); - await Promise.all(list().map((s) => remove(s.id))); - process.stderr.write(`[StagehandStore] All sessions removed\n`); -}; - -/** - * Get store size - */ -export const size = (): number => { - return store.size; -}; diff --git a/src/tools/act.ts b/src/tools/act.ts index 09d5aab..3957708 100644 --- a/src/tools/act.ts +++ b/src/tools/act.ts @@ -3,23 +3,25 @@ import type { Tool, ToolSchema, ToolResult } from "./tool.js"; import type { Context } from "../context.js"; import type { ToolActionResult } from "../types/types.js"; +/** + * Stagehand Act + * Docs: https://docs.stagehand.dev/basics/act + * + * This tool is used to perform actions on a web page. + */ + const ActInputSchema = z.object({ - action: z - .string() - .describe( - "The action to perform. Should be as atomic and specific as possible, " + - "i.e. 'Click the sign in button' or 'Type 'hello' into the search input'. AVOID actions that are more than one " + - "step, i.e. 'Order me pizza' or 'Send an email to Paul asking him to call me'. The instruction should be just as specific as possible, " + - "and have a strong correlation to the text on the page. If unsure, use observe before using act.", - ), + action: z.string().describe( + `The action to perform. Should be as atomic and specific as possible, + i.e. 'Click the sign in button' or 'Type 'hello' into the search input'.`, + ), variables: z .object({}) .optional() .describe( - "Variables used in the action template. ONLY use variables if you're dealing " + - "with sensitive data or dynamic content. For example, if you're logging in to a website, " + - "you can use a variable for the password. When using variables, you MUST have the variable " + - 'key in the action template. For example: {"action": "Fill in the password", "variables": {"password": "123456"}}', + `Variables used in the action template. ONLY use variables if you're dealing + with sensitive data or dynamic content. When using variables, you MUST have the variable + key in the action template. ie: {"action": "Fill in the password", "variables": {"password": "123456"}}`, ), }); @@ -27,11 +29,7 @@ type ActInput = z.infer; const actSchema: ToolSchema = { name: "browserbase_stagehand_act", - description: - "Performs an action on a web page element. Act actions should be as atomic and " + - 'specific as possible, i.e. "Click the sign in button" or "Type \'hello\' into the search input". ' + - 'AVOID actions that are more than one step, i.e. "Order me pizza" or "Send an email to Paul ' + - 'asking him to call me".', + description: `Perform a single action on the page (e.g., click, type).`, inputSchema: ActInputSchema, }; diff --git a/src/tools/extract.ts b/src/tools/extract.ts index b371cd3..ffd649d 100644 --- a/src/tools/extract.ts +++ b/src/tools/extract.ts @@ -3,31 +3,29 @@ import type { Tool, ToolSchema, ToolResult } from "./tool.js"; import type { Context } from "../context.js"; import type { ToolActionResult } from "../types/types.js"; +/** + * Stagehand Extract + * Docs: https://docs.stagehand.dev/basics/extract + * + * This tool is used to extract structured information and text content from a web page. + * + * We currently don't support the client providing a zod schema for the extraction. + */ + const ExtractInputSchema = z.object({ - instruction: z - .string() - .describe( - "The specific instruction for what information to extract from the current page. " + - "Be as detailed and specific as possible about what you want to extract. For example: " + - "'Extract all product names and prices from the listing page' or 'Get the article title, " + - "author, and publication date from this blog post'. The more specific your instruction, " + - "the better the extraction results will be. Avoid vague instructions like 'get everything' " + - "or 'extract the data'. Instead, be explicit about the exact elements, text, or information you need.", - ), + instruction: z.string().describe( + `The specific instruction for what information to extract from the current page. + Be as detailed and specific as possible about what you want to extract. For example: + 'Extract all product names and prices from the listing page'.The more specific your instruction, + the better the extraction results will be.`, + ), }); type ExtractInput = z.infer; const extractSchema: ToolSchema = { name: "browserbase_stagehand_extract", - description: - "Extracts structured information and text content from the current web page based on specific instructions " + - "and a defined schema. This tool is ideal for scraping data, gathering information, or pulling specific " + - "content from web pages. Use this tool when you need to get text content, data, or information from a page " + - "rather than interacting with elements. For interactive elements like buttons, forms, or clickable items, " + - "use the observe tool instead. The extraction works best when you provide clear, specific instructions " + - "about what to extract and a well-defined JSON schema for the expected output format. This ensures " + - "the extracted data is properly structured and usable.", + description: `Extract structured data or text from the current page using an instruction.`, inputSchema: ExtractInputSchema, }; @@ -38,7 +36,6 @@ async function handleExtract( const action = async (): Promise => { try { const stagehand = await context.getStagehand(); - const extraction = await stagehand.page.extract(params.instruction); return { diff --git a/src/tools/index.ts b/src/tools/index.ts index d39cd80..865d9fa 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,17 +5,6 @@ import observeTool from "./observe.js"; import screenshotTool from "./screenshot.js"; import sessionTools from "./session.js"; import getUrlTool from "./url.js"; -import { - createSessionTool, - listSessionsTool, - closeSessionTool, - navigateWithSessionTool, - actWithSessionTool, - extractWithSessionTool, - observeWithSessionTool, - getUrlWithSessionTool, - getAllUrlsWithSessionTool, -} from "./multiSession.js"; // Export individual tools export { default as navigateTool } from "./navigate.js"; @@ -26,22 +15,8 @@ export { default as screenshotTool } from "./screenshot.js"; export { default as sessionTools } from "./session.js"; export { default as getUrlTool } from "./url.js"; -// Multi-session tools array -export const multiSessionTools = [ - createSessionTool, - listSessionsTool, - closeSessionTool, - navigateWithSessionTool, - actWithSessionTool, - extractWithSessionTool, - observeWithSessionTool, - getUrlWithSessionTool, - getAllUrlsWithSessionTool, -]; - // Export all tools as array export const TOOLS = [ - ...multiSessionTools, ...sessionTools, navigateTool, actTool, diff --git a/src/tools/multiSession.ts b/src/tools/multiSession.ts deleted file mode 100644 index 0a71860..0000000 --- a/src/tools/multiSession.ts +++ /dev/null @@ -1,269 +0,0 @@ -import { z } from "zod"; -import { Browserbase } from "@browserbasehq/sdk"; -import { - defineTool, - type Tool, - type ToolResult, - type InputType, -} from "./tool.js"; -import * as stagehandStore from "../stagehandStore.js"; -import { CreateSessionParams } from "../types/types.js"; -import type { Context } from "../context.js"; -import navigateTool from "./navigate.js"; -import actTool from "./act.js"; -import extractTool from "./extract.js"; -import observeTool from "./observe.js"; -import getUrlTool, { getAllUrlsTool } from "./url.js"; - -/** - * Creates a session-aware version of an existing tool - * This wraps the original tool's handler to work with a specific session - */ -function createMultiSessionAwareTool( - originalTool: Tool, - options: { - namePrefix?: string; - nameSuffix?: string; - } = {}, -): Tool { - const { namePrefix = "", nameSuffix = "_session" } = options; - - // Create new input schema that includes sessionId - const originalSchema = originalTool.schema.inputSchema; - let newInputSchema: z.ZodSchema; - - if (originalSchema instanceof z.ZodObject) { - // If it's a ZodObject, we can spread its shape - newInputSchema = z.object({ - sessionId: z.string().describe("The session ID to use"), - ...originalSchema.shape, - }); - } else { - // For other schema types, create an intersection - newInputSchema = z.intersection( - z.object({ sessionId: z.string().describe("The session ID to use") }), - originalSchema, - ); - } - - return defineTool({ - capability: originalTool.capability, - schema: { - name: `${namePrefix}${originalTool.schema.name}${nameSuffix}`, - description: `${originalTool.schema.description} (for a specific session)`, - inputSchema: newInputSchema, - }, - handle: async ( - context: Context, - params: z.infer, - ): Promise => { - const { sessionId, ...originalParams } = params; - - // Get the session - const session = stagehandStore.get(sessionId); - if (!session) { - throw new Error(`Session ${sessionId} not found`); - } - - // Create a temporary context that points to the specific session - const sessionContext = Object.create(context); - sessionContext.currentSessionId = - session.metadata?.bbSessionId || sessionId; - sessionContext.getStagehand = async () => session.stagehand; - sessionContext.getActivePage = async () => session.page; - sessionContext.getActiveBrowser = async () => session.browser; - - // Call the original tool's handler with the session-specific context - return originalTool.handle(sessionContext, originalParams); - }, - }); -} - -// Create session tool -export const createSessionTool = defineTool({ - capability: "create_session", - schema: { - name: "multi_browserbase_stagehand_session_create", - description: - "Create parallel browser session for multi-session workflows. Use this when you need multiple browser instances running simultaneously: parallel data scraping, concurrent automation, A/B testing, multiple user accounts, cross-site operations, batch processing, or any task requiring more than one browser. Creates an isolated browser session with independent cookies, authentication, and state. Always pair with session-specific tools (those ending with '_session'). Perfect for scaling automation tasks that require multiple browsers working in parallel.", - inputSchema: z.object({ - name: z - .string() - .optional() - .describe( - "Highly recommended: Descriptive name for tracking multiple sessions (e.g. 'amazon-scraper', 'user-login-flow', 'checkout-test-1'). Makes debugging and session management much easier!", - ), - browserbaseSessionID: z - .string() - .optional() - .describe( - "Resume an existing Browserbase session by providing its session ID. Use this to continue work in a previously created browser session that may have been paused or disconnected.", - ), - }), - }, - handle: async ( - context: Context, - { name, browserbaseSessionID }, - ): Promise => { - try { - const params: CreateSessionParams = { - browserbaseSessionID, - meta: name ? { name } : undefined, - }; - - const session = await stagehandStore.create(context.config, params); - - const bbSessionId = session.metadata?.bbSessionId; - if (!bbSessionId) { - throw new Error("No Browserbase session ID available"); - } - - // Get the debug URL using Browserbase SDK - const bb = new Browserbase({ - apiKey: context.config.browserbaseApiKey, - }); - const debugUrl = (await bb.sessions.debug(bbSessionId)) - .debuggerFullscreenUrl; - - return { - action: async () => ({ - content: [ - { - type: "text", - text: `Created session ${session.id}${name ? ` (${name})` : ""}\nBrowserbase session: ${bbSessionId}\nBrowserbase Live Session View URL: https://www.browserbase.com/sessions/${bbSessionId}\nBrowserbase Live Debugger URL: ${debugUrl}`, - }, - ], - }), - waitForNetwork: false, - }; - } catch (error) { - const errorMessage = - error instanceof Error ? error.message : String(error); - throw new Error( - `Failed to create browser session: ${errorMessage}. Please check your Browserbase credentials and try again.`, - ); - } - }, -}); - -// List sessions tool -export const listSessionsTool = defineTool({ - capability: "list_sessions", - schema: { - name: "multi_browserbase_stagehand_session_list", - description: - "ONLY WORKS WITH MULTI-SESSION TOOLS! Track all parallel sessions: Critical tool for multi-session management! Shows all active browser sessions with their IDs, names, ages, and Browserbase session IDs. Use this frequently to monitor your parallel automation workflows, verify sessions are running, and get session IDs for session-specific tools. Essential for debugging and resource management in complex multi-browser scenarios.", - inputSchema: z.object({}), - }, - handle: async (): Promise => { - const sessions = stagehandStore.list(); - - if (sessions.length === 0) { - return { - action: async () => ({ - content: [ - { - type: "text", - text: "No active sessions", - }, - ], - }), - waitForNetwork: false, - }; - } - - const sessionInfo = sessions.map((s) => ({ - id: s.id, - name: s.metadata?.name, - browserbaseSessionId: s.metadata?.bbSessionId, - created: new Date(s.created).toISOString(), - age: Math.floor((Date.now() - s.created) / 1000), - })); - - return { - action: async () => ({ - content: [ - { - type: "text", - text: `Active sessions (${sessions.length}):\n${sessionInfo - .map( - (s) => - `- ${s.id}${s.name ? ` (${s.name})` : ""} - BB: ${s.browserbaseSessionId} - Age: ${s.age}s`, - ) - .join("\n")}`, - }, - ], - }), - waitForNetwork: false, - }; - }, -}); - -// Close session tool -export const closeSessionTool = defineTool({ - capability: "close_session", - schema: { - name: "multi_browserbase_stagehand_session_close", - description: - "Cleanup parallel session for multi-session workflows. Properly terminates a browser session, ends the Browserbase session, and frees cloud resources. Always use this when finished with a session to avoid resource waste and billing charges. Critical for responsible multi-session automation - each unclosed session continues consuming resources!", - inputSchema: z.object({ - sessionId: z - .string() - .describe( - "Exact session ID to close (get from 'multi_browserbase_stagehand_session_list'). Double-check this ID - once closed, the session cannot be recovered!", - ), - }), - }, - handle: async (_context: Context, { sessionId }): Promise => { - const session = stagehandStore.get(sessionId); - if (!session) { - throw new Error(`Session ${sessionId} not found`); - } - - await stagehandStore.remove(sessionId); - - return { - action: async () => ({ - content: [ - { - type: "text", - text: `Closed session ${sessionId}`, - }, - ], - }), - waitForNetwork: false, - }; - }, -}); - -// Create multi-session-aware versions of the core tools -export const navigateWithSessionTool = createMultiSessionAwareTool( - navigateTool, - { - namePrefix: "multi_", - nameSuffix: "_session", - }, -); - -export const actWithSessionTool = createMultiSessionAwareTool(actTool, { - namePrefix: "multi_", - nameSuffix: "_session", -}); - -export const extractWithSessionTool = createMultiSessionAwareTool(extractTool, { - namePrefix: "multi_", - nameSuffix: "_session", -}); - -export const observeWithSessionTool = createMultiSessionAwareTool(observeTool, { - namePrefix: "multi_", - nameSuffix: "_session", -}); - -export const getUrlWithSessionTool = createMultiSessionAwareTool(getUrlTool, { - namePrefix: "multi_", - nameSuffix: "_session", -}); - -// This wraps getAllUrlsTool which doesn't need sessionId -export const getAllUrlsWithSessionTool = getAllUrlsTool; diff --git a/src/tools/navigate.ts b/src/tools/navigate.ts index 6a659b1..5992994 100644 --- a/src/tools/navigate.ts +++ b/src/tools/navigate.ts @@ -11,8 +11,8 @@ type NavigateInput = z.infer; const navigateSchema: ToolSchema = { name: "browserbase_stagehand_navigate", - description: - "Navigate to a URL in the browser. Only use this tool with URLs you're confident will work and stay up to date. Otherwise, use https://google.com as the starting point", + description: `Navigate to a URL in the browser. Only use this tool with URLs you're confident will work and be up to date. + Otherwise, use https://google.com as the starting point`, inputSchema: NavigateInputSchema, }; diff --git a/src/tools/observe.ts b/src/tools/observe.ts index 4470214..583b21f 100644 --- a/src/tools/observe.ts +++ b/src/tools/observe.ts @@ -3,25 +3,31 @@ import type { Tool, ToolSchema, ToolResult } from "./tool.js"; import type { Context } from "../context.js"; import type { ToolActionResult } from "../types/types.js"; +/** + * Stagehand Observe + * Docs: https://docs.stagehand.dev/basics/observe + * + * This tool is used to observe and identify specific interactive elements on a web page. + * You can optionally choose to have the observe tool return an action to perform on the element. + */ + const ObserveInputSchema = z.object({ - instruction: z - .string() - .describe( - "Detailed instruction for what specific elements or components to observe on the web page. " + - "This instruction must be extremely specific and descriptive. For example: 'Find the red login button " + - "in the top right corner', 'Locate the search input field with placeholder text', or 'Identify all " + - "clickable product cards on the page'. The more specific and detailed your instruction, the better " + - "the observation results will be. Avoid generic instructions like 'find buttons' or 'see elements'. " + - "Instead, describe the visual characteristics, location, text content, or functionality of the elements " + - "you want to observe. This tool is designed to help you identify interactive elements that you can " + - "later use with the act tool for performing actions like clicking, typing, or form submission.", - ), + instruction: z.string().describe( + `Detailed instruction for what specific elements or components to observe on the web page. + This instruction must be extremely specific and descriptive. For example: 'Find the red login button + in the top right corner', 'Locate the search input field with placeholder text', or 'Identify all + clickable product cards on the page'. The more specific and detailed your instruction, the better + the observation results will be. Avoid generic instructions like 'find buttons' or 'see elements'. + Instead, describe the visual characteristics, location, text content, or functionality of the elements + you want to observe. This tool is designed to help you identify interactive elements that you can + later use with the act tool for performing actions like clicking, typing, or form submission.`, + ), returnAction: z .boolean() .optional() .describe( - "Whether to return the action to perform on the element. If true, the action will be returned as a string. " + - "If false, the action will not be returned.", + `Whether to return the action to perform on the element. If true, the action will be returned as a string. + If false, the action will not be returned.`, ), }); @@ -29,15 +35,7 @@ type ObserveInput = z.infer; const observeSchema: ToolSchema = { name: "browserbase_stagehand_observe", - description: - "Observes and identifies specific interactive elements on the current web page that can be used for subsequent actions. " + - "This tool is specifically designed for finding actionable (interactable) elements such as buttons, links, form fields, " + - "dropdowns, checkboxes, and other UI components that you can interact with. Use this tool when you need to locate " + - "elements before performing actions with the act tool. DO NOT use this tool for extracting text content or data - " + - "use the extract tool instead for that purpose. The observe tool returns detailed information about the identified " + - "elements including their properties, location, and interaction capabilities. This information can then be used " + - "to craft precise actions. The more specific your observation instruction, the more accurate the element identification " + - "will be. Think of this as your 'eyes' on the page to find exactly what you need to interact with.", + description: `Find interactive elements on the page from an instruction; optionally return an action.`, inputSchema: ObserveInputSchema, }; diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 48a568c..69e06e4 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -4,6 +4,13 @@ import type { Context } from "../context.js"; import type { ToolActionResult } from "../types/types.js"; import { registerScreenshot } from "../mcp/resources.js"; +/** + * Screenshot + * Docs: https://playwright.dev/docs/screenshots + * + * This tool is used to take a screenshot of the current page. + */ + const ScreenshotInputSchema = z.object({ name: z.string().optional().describe("The name of the screenshot"), }); @@ -12,8 +19,7 @@ type ScreenshotInput = z.infer; const screenshotSchema: ToolSchema = { name: "browserbase_screenshot", - description: - "Takes a screenshot of the current page. Use this tool to learn where you are on the page when controlling the browser with Stagehand. Only use this tool when the other tools are not sufficient to get the information you need.", + description: `Capture a full-page screenshot and return it (and save as a resource).`, inputSchema: ScreenshotInputSchema, }; @@ -28,8 +34,9 @@ async function handleScreenshot( throw new Error("No active page available"); } + // We're taking a full page screenshot to give context of the entire page, similar to a snapshot const screenshotBuffer = await page.screenshot({ - fullPage: false, + fullPage: true, }); // Convert buffer to base64 string and store in memory @@ -40,7 +47,8 @@ async function handleScreenshot( .replace(/:/g, "-")}` : `screenshot-${new Date().toISOString().replace(/:/g, "-")}` + context.config.browserbaseProjectId; - // Associate with current session id and store in memory + + // Associate with current mcp session id and store in memory /src/mcp/resources.ts const sessionId = context.currentSessionId; registerScreenshot(sessionId, name, screenshotBase64); diff --git a/src/tools/session.ts b/src/tools/session.ts index 680b518..a3f9c2a 100644 --- a/src/tools/session.ts +++ b/src/tools/session.ts @@ -4,21 +4,12 @@ import type { Context } from "../context.js"; import type { ToolActionResult } from "../types/types.js"; import { Browserbase } from "@browserbasehq/sdk"; import { createUIResource } from "@mcp-ui/server"; - -// Import SessionManager functions -import { - createNewBrowserSession, - defaultSessionId, - ensureDefaultSessionInternal, - cleanupSession, - getSession, -} from "../sessionManager.js"; import type { BrowserSession } from "../types/types.js"; import { TextContent } from "@modelcontextprotocol/sdk/types.js"; // --- Tool: Create Session --- const CreateSessionInputSchema = z.object({ - // Keep sessionId optional, but clarify its role + // Keep sessionId optional sessionId: z .string() .optional() @@ -31,7 +22,7 @@ type CreateSessionInput = z.infer; const createSessionSchema: ToolSchema = { name: "browserbase_session_create", description: - "Create or reuse a single cloud browser session using Browserbase with fully initialized Stagehand. WARNING: This tool is for SINGLE browser workflows only. If you need multiple browser sessions running simultaneously (parallel scraping, A/B testing, multiple accounts), use 'multi_browserbase_stagehand_session_create' instead. This creates one browser session with all configuration flags (proxies, stealth, viewport, cookies, etc.) and initializes Stagehand to work with that session. Updates the active session.", + "Create or reuse a Browserbase browser session and set it as active.", inputSchema: CreateSessionInputSchema, }; @@ -42,28 +33,33 @@ async function handleCreateSession( ): Promise { const action = async (): Promise => { try { + const sessionManager = context.getSessionManager(); const config = context.config; // Get config from context let targetSessionId: string; + // Session ID Strategy: Use raw sessionId for both internal tracking and Browserbase operations + // Default session uses generated ID with timestamp/UUID, user sessions use provided ID as-is if (params.sessionId) { - const projectId = config.browserbaseProjectId || ""; - targetSessionId = `${params.sessionId}_${projectId}`; + targetSessionId = params.sessionId; process.stderr.write( - `[tool.createSession] Attempting to create/assign session with specified ID: ${targetSessionId}`, + `[tool.createSession] Attempting to create/assign session with specified ID: ${targetSessionId}\n`, ); } else { - targetSessionId = defaultSessionId; + targetSessionId = sessionManager.getDefaultSessionId(); } let session: BrowserSession; + const defaultSessionId = sessionManager.getDefaultSessionId(); if (targetSessionId === defaultSessionId) { - session = await ensureDefaultSessionInternal(config); + session = await sessionManager.ensureDefaultSessionInternal(config); } else { // When user provides a sessionId, we want to resume that Browserbase session - session = await createNewBrowserSession( - targetSessionId, + // Note: targetSessionId is used for internal tracking in SessionManager + // while params.sessionId is the Browserbase session ID to resume + session = await sessionManager.createNewBrowserSession( + targetSessionId, // Internal session ID for tracking config, - params.sessionId, + params.sessionId, // Browserbase session ID to resume ); } @@ -79,7 +75,8 @@ async function handleCreateSession( ); } - context.currentSessionId = targetSessionId; + // Note: No need to set context.currentSessionId - SessionManager handles this + // and context.currentSessionId is a getter that delegates to SessionManager const bb = new Browserbase({ apiKey: config.browserbaseApiKey, }); @@ -92,17 +89,6 @@ async function handleCreateSession( } const debugUrl = (await bb.sessions.debug(browserbaseSessionId)) .debuggerFullscreenUrl; - process.stderr.write( - `[tool.connected] Successfully connected to Browserbase session. Internal ID: ${targetSessionId}, Actual ID: ${browserbaseSessionId}`, - ); - - process.stderr.write( - `[SessionManager] Browserbase Live Session View URL: https://www.browserbase.com/sessions/${browserbaseSessionId}`, - ); - - process.stderr.write( - `[SessionManager] Browserbase Live Debugger URL: ${debugUrl}`, - ); return { content: [ @@ -125,7 +111,7 @@ async function handleCreateSession( const errorMessage = error instanceof Error ? error.message : String(error); process.stderr.write( - `[tool.createSession] Action failed: ${errorMessage}`, + `[tool.createSession] Action failed: ${errorMessage}\n`, ); // Re-throw to be caught by Context.run's error handling for actions throw new Error(`Failed to create Browserbase session: ${errorMessage}`); @@ -152,21 +138,23 @@ const CloseSessionInputSchema = z.object({}); const closeSessionSchema: ToolSchema = { name: "browserbase_session_close", description: - "Closes the current Browserbase session by properly shutting down the Stagehand instance, which handles browser cleanup and terminates the session recording.", + "Close the current Browserbase session and reset the active context.", inputSchema: CloseSessionInputSchema, }; async function handleCloseSession(context: Context): Promise { const action = async (): Promise => { - // Store the current session ID before it's potentially changed. + // Store the current session ID before cleanup const previousSessionId = context.currentSessionId; - let stagehandClosedSuccessfully = false; - let stagehandCloseErrorMessage = ""; + let cleanupSuccessful = false; + let cleanupErrorMessage = ""; - // Step 1: Attempt to get the session and close Stagehand + // Step 1: Get session info before cleanup let browserbaseSessionId: string | undefined; + const sessionManager = context.getSessionManager(); + try { - const session = await getSession( + const session = await sessionManager.getSession( previousSessionId, context.config, false, @@ -176,66 +164,50 @@ async function handleCloseSession(context: Context): Promise { // Store the actual Browserbase session ID for the replay URL browserbaseSessionId = session.sessionId; - process.stderr.write( - `[tool.closeSession] Attempting to close Stagehand for session: ${previousSessionId || "default"} (Browserbase ID: ${browserbaseSessionId})`, - ); - - // Use Stagehand's close method which handles browser cleanup properly - await session.stagehand.close(); - stagehandClosedSuccessfully = true; - - process.stderr.write( - `[tool.closeSession] Stagehand and browser connection for session (${previousSessionId}) closed successfully.`, - ); - - // Clean up the session from tracking - await cleanupSession(previousSessionId); - - if (browserbaseSessionId) { - process.stderr.write( - `[tool.closeSession] View session replay at https://www.browserbase.com/sessions/${browserbaseSessionId}`, - ); - } + // cleanupSession handles both closing Stagehand and cleanup (idempotent) + await sessionManager.cleanupSession(previousSessionId); + cleanupSuccessful = true; } else { process.stderr.write( - `[tool.closeSession] No Stagehand instance found for session: ${previousSessionId || "default/unknown"}`, + `[tool.closeSession] No session found for ID: ${previousSessionId || "default/unknown"}\n`, ); } } catch (error: unknown) { - stagehandCloseErrorMessage = + cleanupErrorMessage = error instanceof Error ? error.message : String(error); process.stderr.write( - `[tool.closeSession] Error retrieving or closing Stagehand (session ID was ${previousSessionId || "default/unknown"}): ${stagehandCloseErrorMessage}`, + `[tool.closeSession] Error cleaning up session (ID was ${previousSessionId || "default/unknown"}): ${cleanupErrorMessage}\n`, ); } - // Step 2: Always reset the context's current session ID to default - const oldContextSessionId = context.currentSessionId; - context.currentSessionId = defaultSessionId; + // Step 2: SessionManager automatically resets to default on cleanup + // Context.currentSessionId getter will reflect the new active session + const oldContextSessionId = previousSessionId; process.stderr.write( - `[tool.closeSession] Session context reset to default. Previous context session ID was ${oldContextSessionId || "default/unknown"}.`, + `[tool.closeSession] Session context reset to default. Previous context session ID was ${oldContextSessionId || "default/unknown"}.\n`, ); // Step 3: Determine the result message - if (stagehandCloseErrorMessage && !stagehandClosedSuccessfully) { + const defaultSessionId = sessionManager.getDefaultSessionId(); + if (cleanupErrorMessage && !cleanupSuccessful) { throw new Error( - `Failed to close the Stagehand session (session ID in context was ${previousSessionId || "default/unknown"}). Error: ${stagehandCloseErrorMessage}. Session context has been reset to default.`, + `Failed to cleanup session (session ID was ${previousSessionId || "default/unknown"}). Error: ${cleanupErrorMessage}. Session context has been reset to default.`, ); } - if (stagehandClosedSuccessfully) { - let successMessage = `Browserbase session (${previousSessionId || "default"}) closed successfully via Stagehand. Context reset to default.`; + if (cleanupSuccessful) { + let successMessage = `Browserbase session (${previousSessionId || "default"}) closed successfully. Context reset to default.`; if (browserbaseSessionId && previousSessionId !== defaultSessionId) { successMessage += ` View replay at https://www.browserbase.com/sessions/${browserbaseSessionId}`; } return { content: [{ type: "text", text: successMessage }] }; } - // No Stagehand instance was found + // No session was found let infoMessage = - "No active Stagehand session found to close. Session context has been reset to default."; + "No active session found to close. Session context has been reset to default."; if (previousSessionId && previousSessionId !== defaultSessionId) { - infoMessage = `No active Stagehand session found for session ID '${previousSessionId}'. The context has been reset to default.`; + infoMessage = `No active session found for session ID '${previousSessionId}'. The context has been reset to default.`; } return { content: [{ type: "text", text: infoMessage }] }; }; diff --git a/src/tools/url.ts b/src/tools/url.ts index 2d1bf0b..84988bb 100644 --- a/src/tools/url.ts +++ b/src/tools/url.ts @@ -2,20 +2,21 @@ import { z } from "zod"; import type { Tool, ToolSchema, ToolResult } from "./tool.js"; import type { Context } from "../context.js"; import type { ToolActionResult } from "../types/types.js"; -import * as stagehandStore from "../stagehandStore.js"; + +/** + * Stagehand Get URL + * + * This tool is used to get the current URL of the browser page. + */ // Empty schema since getting URL doesn't require any input const GetUrlInputSchema = z.object({}); -type GetUrlInput = z.infer; -// Schema for getting all session URLs -const GetAllUrlsInputSchema = z.object({}); -type GetAllUrlsInput = z.infer; +type GetUrlInput = z.infer; const getUrlSchema: ToolSchema = { name: "browserbase_stagehand_get_url", - description: - "Gets the current URL of the browser page. Returns the complete URL including protocol, domain, path, and any query parameters or fragments.", + description: "Return the current page URL (full URL with query/fragment).", inputSchema: GetUrlInputSchema, }; @@ -57,73 +58,4 @@ const getUrlTool: Tool = { handle: handleGetUrl, }; -// Schema for getting all session URLs -const getAllUrlsSchema: ToolSchema = { - name: "browserbase_stagehand_get_all_urls", - description: - "Gets the current URLs of all active browser sessions. Returns a mapping of session IDs to their current URLs.", - inputSchema: GetAllUrlsInputSchema, -}; - -async function handleGetAllUrls( - // eslint-disable-next-line @typescript-eslint/no-unused-vars - context: Context, - // eslint-disable-next-line @typescript-eslint/no-unused-vars - params: GetAllUrlsInput, -): Promise { - const action = async (): Promise => { - try { - const sessions = stagehandStore.list(); - - if (sessions.length === 0) { - return { - content: [ - { - type: "text", - text: "No active sessions found", - }, - ], - }; - } - - // Collect URLs from all sessions - const sessionUrls: Record = {}; - - for (const session of sessions) { - try { - const url = session.page.url(); - sessionUrls[session.id] = url; - } catch (error) { - // If we can't get URL for a session, mark it as error - sessionUrls[session.id] = - ``; - } - } - - return { - content: [ - { - type: "text", - text: JSON.stringify(sessionUrls, null, 2), - }, - ], - }; - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - throw new Error(`Failed to get session URLs: ${errorMsg}`); - } - }; - - return { - action, - waitForNetwork: false, - }; -} - -export const getAllUrlsTool: Tool = { - capability: "core", - schema: getAllUrlsSchema, - handle: handleGetAllUrls, -}; - export default getUrlTool; diff --git a/src/transport.ts b/src/transport.ts index da39c1b..957ecb5 100644 --- a/src/transport.ts +++ b/src/transport.ts @@ -13,11 +13,16 @@ export async function startStdioTransport( ) { // Check if we're using the default model without an API key if (config) { - const modelName = config.modelName || "google/gemini-2.0-flash"; - const hasModelApiKey = config.modelApiKey || process.env.GEMINI_API_KEY; + const modelName = config.modelName || "gemini-2.0-flash"; + const hasModelApiKey = + config.modelApiKey || + process.env.GEMINI_API_KEY || + process.env.GOOGLE_API_KEY; if (modelName.includes("gemini") && !hasModelApiKey) { - console.error(`Need to set GEMINI_API_KEY in your environment variables`); + console.error( + `Need to set GEMINI_API_KEY or GOOGLE_API_KEY in your environment variables`, + ); } } @@ -43,12 +48,11 @@ async function handleStreamable( } if (req.method === "POST") { + const sessionId = crypto.randomUUID(); const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: () => crypto.randomUUID(), - onsessioninitialized: (sessionId) => { - sessions.set(sessionId, transport); - }, + sessionIdGenerator: () => sessionId, }); + sessions.set(sessionId, transport); transport.onclose = () => { if (transport.sessionId) sessions.delete(transport.sessionId); }; @@ -66,6 +70,7 @@ export function startHttpTransport( hostname: string | undefined, serverList: ServerList, ) { + // In-memory Map of SHTTP sessions const streamableSessions = new Map(); const httpServer = http.createServer(async (req, res) => { if (!req.url) { diff --git a/src/types/models.ts b/src/types/models.ts deleted file mode 100644 index fa02563..0000000 --- a/src/types/models.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { z } from "zod"; - -export const AvailableModel = [ - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - "openai/gpt-4.1-nano", - "openai/o4-mini", - "openai/o3", - "openai/o3-mini", - "openai/o1", - "openai/o1-mini", - "openai/gpt-4o", - "openai/gpt-4o-mini", - "openai/gpt-4o-2024-08-06", - "openai/gpt-4.5-preview", - "openai/o1-preview", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-3-5-sonnet-20241022", - "anthropic/claude-3-5-sonnet-20240620", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-7-sonnet-20250219", - "cerebras/cerebras-llama-3.3-70b", - "cerebras/cerebras-llama-3.1-8b", - "groq/groq-llama-3.3-70b-versatile", - "groq/groq-llama-3.3-70b-specdec", - "google/gemini-1.5-flash", - "google/gemini-1.5-pro", - "google/gemini-1.5-flash-8b", - "google/gemini-2.0-flash-lite", - "google/gemini-2.0-flash", - "google/gemini-2.5-flash-preview-04-17", - "google/gemini-2.5-pro-preview-03-25", -] as const; - -export const AvailableModelSchema = z.enum(AvailableModel);