Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# @browserbasehq/mcp-server-browserbase

## 2.4.0

### Minor Changes

- feat: adding stagehand agent tool

## 2.3.0

### Minor Changes
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@browserbasehq/mcp-server-browserbase",
"version": "2.3.0",
"version": "2.4.0",
"description": "MCP server for AI web browser automation using Browserbase and Stagehand",
"mcpName": "io.github.browserbase/mcp-server-browserbase",
"license": "Apache-2.0",
Expand Down
4 changes: 2 additions & 2 deletions src/sessionManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ export const createStagehandInstance = async (
keepAlive: config.keepAlive ?? false,
browserSettings: {
viewport: {
width: config.viewPort?.browserWidth ?? 1024,
height: config.viewPort?.browserHeight ?? 768,
width: config.viewPort?.browserWidth ?? 1288,
height: config.viewPort?.browserHeight ?? 711,
},
context: config.context?.contextId
? {
Expand Down
83 changes: 83 additions & 0 deletions src/tools/agent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import { z } from "zod";
import type { Tool, ToolSchema, ToolResult } from "./tool.js";
import type { Context } from "../context.js";
import type { ToolActionResult } from "../types/types.js";

/**
* Stagehand Agent
* Docs: https://docs.stagehand.dev/basics/agent
*
* This tool uses Gemini Computer Use to autonomously complete web-based tasks.
* The agent will navigate, interact, and complete the task described in the prompt.
*/

const AgentInputSchema = z.object({
prompt: z.string().describe(
`The task prompt describing what you want the sub-agent to accomplish.
Be clear and specific about the goal. For example:
'Go to Hacker News and find the most controversial post from today, then summarize the top 3 comments'.
The agent will autonomously navigate and interact with web pages to complete this task.`,
),
});

type AgentInput = z.infer<typeof AgentInputSchema>;

const agentSchema: ToolSchema<typeof AgentInputSchema> = {
name: "browserbase_stagehand_agent",
description: `Execute a task autonomously using Gemini Computer Use agent. The agent will navigate and interact with web pages to complete the given task.`,
inputSchema: AgentInputSchema,
};

async function handleAgent(
context: Context,
params: AgentInput,
): Promise<ToolResult> {
const action = async (): Promise<ToolActionResult> => {
try {
const stagehand = await context.getStagehand();

// You need to provide GOOGLE_GENERATIVE_AI_API_KEY
const agent = stagehand.agent({
cua: true,
model: {
modelName: "google/gemini-2.5-computer-use-preview-10-2025",
apiKey:
process.env.GEMINI_API_KEY ||
process.env.GOOGLE_API_KEY ||
process.env.GOOGLE_GENERATIVE_AI_API_KEY,
},
});

// Execute the task
const result = await agent.execute({
instruction: params.prompt,
maxSteps: 20,
});

return {
content: [
{
type: "text",
text: `${result.message}`,
},
],
};
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to execute agent task: ${errorMsg}`);
}
};

return {
action,
waitForNetwork: false,
};
}

const agentTool: Tool<typeof AgentInputSchema> = {
capability: "core",
schema: agentSchema,
handle: handleAgent,
};

export default agentTool;
3 changes: 3 additions & 0 deletions src/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import observeTool from "./observe.js";
import screenshotTool from "./screenshot.js";
import sessionTools from "./session.js";
import getUrlTool from "./url.js";
import agentTool from "./agent.js";

// Export individual tools
export { default as navigateTool } from "./navigate.js";
Expand All @@ -14,6 +15,7 @@ export { default as observeTool } from "./observe.js";
export { default as screenshotTool } from "./screenshot.js";
export { default as sessionTools } from "./session.js";
export { default as getUrlTool } from "./url.js";
export { default as agentTool } from "./agent.js";

// Export all tools as array
export const TOOLS = [
Expand All @@ -24,6 +26,7 @@ export const TOOLS = [
observeTool,
screenshotTool,
getUrlTool,
agentTool,
];

export const sessionManagementTools = sessionTools;