Skip to content

Commit b4d1d16

Browse files
authored
Merge pull request #130 from browserbase/kylejeong/gro-585-agent-tool-for-mcp-server
feat: adding agent tool for mcp server
2 parents f841909 + 0e302a7 commit b4d1d16

File tree

5 files changed

+95
-3
lines changed

5 files changed

+95
-3
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# @browserbasehq/mcp-server-browserbase
22

3+
## 2.4.0
4+
5+
### Minor Changes
6+
7+
- feat: adding stagehand agent tool
8+
39
## 2.3.0
410

511
### Minor Changes

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@browserbasehq/mcp-server-browserbase",
3-
"version": "2.3.0",
3+
"version": "2.4.0",
44
"description": "MCP server for AI web browser automation using Browserbase and Stagehand",
55
"mcpName": "io.github.browserbase/mcp-server-browserbase",
66
"license": "Apache-2.0",

src/sessionManager.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ export const createStagehandInstance = async (
4747
keepAlive: config.keepAlive ?? false,
4848
browserSettings: {
4949
viewport: {
50-
width: config.viewPort?.browserWidth ?? 1024,
51-
height: config.viewPort?.browserHeight ?? 768,
50+
width: config.viewPort?.browserWidth ?? 1288,
51+
height: config.viewPort?.browserHeight ?? 711,
5252
},
5353
context: config.context?.contextId
5454
? {

src/tools/agent.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { z } from "zod";
2+
import type { Tool, ToolSchema, ToolResult } from "./tool.js";
3+
import type { Context } from "../context.js";
4+
import type { ToolActionResult } from "../types/types.js";
5+
6+
/**
7+
* Stagehand Agent
8+
* Docs: https://docs.stagehand.dev/basics/agent
9+
*
10+
* This tool uses Gemini Computer Use to autonomously complete web-based tasks.
11+
* The agent will navigate, interact, and complete the task described in the prompt.
12+
*/
13+
14+
const AgentInputSchema = z.object({
15+
prompt: z.string().describe(
16+
`The task prompt describing what you want the sub-agent to accomplish.
17+
Be clear and specific about the goal. For example:
18+
'Go to Hacker News and find the most controversial post from today, then summarize the top 3 comments'.
19+
The agent will autonomously navigate and interact with web pages to complete this task.`,
20+
),
21+
});
22+
23+
type AgentInput = z.infer<typeof AgentInputSchema>;
24+
25+
const agentSchema: ToolSchema<typeof AgentInputSchema> = {
26+
name: "browserbase_stagehand_agent",
27+
description: `Execute a task autonomously using Gemini Computer Use agent. The agent will navigate and interact with web pages to complete the given task.`,
28+
inputSchema: AgentInputSchema,
29+
};
30+
31+
async function handleAgent(
32+
context: Context,
33+
params: AgentInput,
34+
): Promise<ToolResult> {
35+
const action = async (): Promise<ToolActionResult> => {
36+
try {
37+
const stagehand = await context.getStagehand();
38+
39+
// You need to provide GOOGLE_GENERATIVE_AI_API_KEY
40+
const agent = stagehand.agent({
41+
cua: true,
42+
model: {
43+
modelName: "google/gemini-2.5-computer-use-preview-10-2025",
44+
apiKey:
45+
process.env.GEMINI_API_KEY ||
46+
process.env.GOOGLE_API_KEY ||
47+
process.env.GOOGLE_GENERATIVE_AI_API_KEY,
48+
},
49+
});
50+
51+
// Execute the task
52+
const result = await agent.execute({
53+
instruction: params.prompt,
54+
maxSteps: 20,
55+
});
56+
57+
return {
58+
content: [
59+
{
60+
type: "text",
61+
text: `${result.message}`,
62+
},
63+
],
64+
};
65+
} catch (error) {
66+
const errorMsg = error instanceof Error ? error.message : String(error);
67+
throw new Error(`Failed to execute agent task: ${errorMsg}`);
68+
}
69+
};
70+
71+
return {
72+
action,
73+
waitForNetwork: false,
74+
};
75+
}
76+
77+
const agentTool: Tool<typeof AgentInputSchema> = {
78+
capability: "core",
79+
schema: agentSchema,
80+
handle: handleAgent,
81+
};
82+
83+
export default agentTool;

src/tools/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import observeTool from "./observe.js";
55
import screenshotTool from "./screenshot.js";
66
import sessionTools from "./session.js";
77
import getUrlTool from "./url.js";
8+
import agentTool from "./agent.js";
89

910
// Export individual tools
1011
export { default as navigateTool } from "./navigate.js";
@@ -14,6 +15,7 @@ export { default as observeTool } from "./observe.js";
1415
export { default as screenshotTool } from "./screenshot.js";
1516
export { default as sessionTools } from "./session.js";
1617
export { default as getUrlTool } from "./url.js";
18+
export { default as agentTool } from "./agent.js";
1719

1820
// Export all tools as array
1921
export const TOOLS = [
@@ -24,6 +26,7 @@ export const TOOLS = [
2426
observeTool,
2527
screenshotTool,
2628
getUrlTool,
29+
agentTool,
2730
];
2831

2932
export const sessionManagementTools = sessionTools;

0 commit comments

Comments
 (0)