From 42f3b206897380a49ed39277acbbafbf53259225 Mon Sep 17 00:00:00 2001
From: Kylejeong2 <kylejeong21@gmail.com>
Date: Mon, 10 Nov 2025 15:27:02 -0800
Subject: [PATCH 1/4] feat: adding stagehand agent tool

---
 src/tools/agent.ts | 91 ++++++++++++++++++++++++++++++++++++++++++++++
 src/tools/index.ts |  3 ++
 2 files changed, 94 insertions(+)
 create mode 100644 src/tools/agent.ts
diff --git a/src/tools/agent.ts b/src/tools/agent.ts
new file mode 100644
index 0000000..0cbb002
--- /dev/null
+++ b/src/tools/agent.ts
@@ -0,0 +1,91 @@
+import { z } from "zod";
+import type { Tool, ToolSchema, ToolResult } from "./tool.js";
+import type { Context } from "../context.js";
+import type { ToolActionResult } from "../types/types.js";
+
+/**
+ * Stagehand Agent
+ * Docs: https://docs.stagehand.dev/basics/agent
+ *
+ * This tool uses Gemini Computer Use to autonomously complete web-based tasks.
+ * The agent will navigate, interact, and complete the task described in the prompt.
+ */
+
+const AgentInputSchema = z.object({
+  prompt: z.string().describe(
+    `The task prompt describing what you want the agent to accomplish.
+    Be clear and specific about the goal. For example:
+    'Go to Hacker News and find the most controversial post from today, then summarize the top 3 comments'.
+    The agent will autonomously navigate and interact with web pages to complete this task.`,
+  ),
+});
+
+type AgentInput = z.infer<typeof AgentInputSchema>;
+
+const agentSchema: ToolSchema<typeof AgentInputSchema> = {
+  name: "browserbase_stagehand_agent",
+  description: `Execute a task autonomously using Gemini Computer Use agent. The agent will navigate and interact with web pages to complete the given task.`,
+  inputSchema: AgentInputSchema,
+};
+
+async function handleAgent(
+  context: Context,
+  params: AgentInput,
+): Promise<ToolResult> {
+  const action = async (): Promise<ToolActionResult> => {
+    try {
+      const stagehand = await context.getStagehand();
+
+      // You need to provide GOOGLE_GENERATIVE_AI_API_KEY
+      const agent = stagehand.agent({
+        cua: true,
+        model: {
+          modelName: "google/gemini-2.5-computer-use-preview-10-2025",
+          apiKey:
+            process.env.GOOGLE_GENERATIVE_AI_API_KEY ||
+            process.env.GOOGLE_API_KEY ||
+            process.env.GEMINI_API_KEY,
+        },
+      });
+
+      // Execute the task
+      const result = await agent.execute({
+        instruction: params.prompt,
+        maxSteps: 20,
+      });
+
+      // Format response with both steps and result
+      // The result structure may vary, so we handle it flexibly
+      const resultData = result as unknown as Record<string, unknown>;
+      const response = {
+        result: resultData.result || result,
+        steps: resultData.steps || resultData.trace || [],
+      };
+
+      return {
+        content: [
+          {
+            type: "text",
+            text: `Agent execution completed:\n${JSON.stringify(response, null, 2)}`,
+          },
+        ],
+      };
+    } catch (error) {
+      const errorMsg = error instanceof Error ? error.message : String(error);
+      throw new Error(`Failed to execute agent task: ${errorMsg}`);
+    }
+  };
+
+  return {
+    action,
+    waitForNetwork: false,
+  };
+}
+
+const agentTool: Tool<typeof AgentInputSchema> = {
+  capability: "core",
+  schema: agentSchema,
+  handle: handleAgent,
+};
+
+export default agentTool;
diff --git a/src/tools/index.ts b/src/tools/index.ts
index 865d9fa..f0a19da 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -5,6 +5,7 @@ import observeTool from "./observe.js";
 import screenshotTool from "./screenshot.js";
 import sessionTools from "./session.js";
 import getUrlTool from "./url.js";
+import agentTool from "./agent.js";
 
 // Export individual tools
 export { default as navigateTool } from "./navigate.js";
@@ -14,6 +15,7 @@ export { default as observeTool } from "./observe.js";
 export { default as screenshotTool } from "./screenshot.js";
 export { default as sessionTools } from "./session.js";
 export { default as getUrlTool } from "./url.js";
+export { default as agentTool } from "./agent.js";
 
 // Export all tools as array
 export const TOOLS = [
@@ -24,6 +26,7 @@ export const TOOLS = [
   observeTool,
   screenshotTool,
   getUrlTool,
+  agentTool,
 ];
 
 export const sessionManagementTools = sessionTools;

From 15fb63f8fa085fa93c7eab19f229d3b801b93ea0 Mon Sep 17 00:00:00 2001
From: Kylejeong2 <kylejeong21@gmail.com>
Date: Mon, 10 Nov 2025 15:27:56 -0800
Subject: [PATCH 2/4] changesets

---
 CHANGELOG.md | 6 ++++++
 package.json | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f71440a..4c958af 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # @browserbasehq/mcp-server-browserbase
 
+## 2.4.0
+
+### Minor Changes
+
+- feat: adding stagehand agent tool
+
 ## 2.3.0
 
 ### Minor Changes
diff --git a/package.json b/package.json
index fb43c7c..4539004 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@browserbasehq/mcp-server-browserbase",
-  "version": "2.3.0",
+  "version": "2.4.0",
   "description": "MCP server for AI web browser automation using Browserbase and Stagehand",
   "mcpName": "io.github.browserbase/mcp-server-browserbase",
   "license": "Apache-2.0",

From 9a962982ca42e52772a292bfdadd41cc10670949 Mon Sep 17 00:00:00 2001
From: Kylejeong2 <kylejeong21@gmail.com>
Date: Mon, 10 Nov 2025 15:31:23 -0800
Subject: [PATCH 3/4] update default session size + change agent tool to only
 return the result

---
 src/sessionManager.ts |  4 ++--
 src/tools/agent.ts    | 10 +---------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/sessionManager.ts b/src/sessionManager.ts
index e6aacc9..602f869 100644
--- a/src/sessionManager.ts
+++ b/src/sessionManager.ts
@@ -47,8 +47,8 @@ export const createStagehandInstance = async (
       keepAlive: config.keepAlive ?? false,
       browserSettings: {
         viewport: {
-          width: config.viewPort?.browserWidth ?? 1024,
-          height: config.viewPort?.browserHeight ?? 768,
+          width: config.viewPort?.browserWidth ?? 1288,
+          height: config.viewPort?.browserHeight ?? 711,
         },
         context: config.context?.contextId
           ? {
diff --git a/src/tools/agent.ts b/src/tools/agent.ts
index 0cbb002..9ce0319 100644
--- a/src/tools/agent.ts
+++ b/src/tools/agent.ts
@@ -54,19 +54,11 @@ async function handleAgent(
         maxSteps: 20,
       });
 
-      // Format response with both steps and result
-      // The result structure may vary, so we handle it flexibly
-      const resultData = result as unknown as Record<string, unknown>;
-      const response = {
-        result: resultData.result || result,
-        steps: resultData.steps || resultData.trace || [],
-      };
-
       return {
         content: [
           {
             type: "text",
-            text: `Agent execution completed:\n${JSON.stringify(response, null, 2)}`,
+            text: `Agent execution completed:\n${JSON.stringify(result, null, 2)}`,
           },
         ],
       };

From 0e302a76464521c550944a0bf47824536fd566e8 Mon Sep 17 00:00:00 2001
From: Kylejeong2 <kylejeong21@gmail.com>
Date: Mon, 10 Nov 2025 15:36:15 -0800
Subject: [PATCH 4/4] have agent only return result.message

---
 src/tools/agent.ts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tools/agent.ts b/src/tools/agent.ts
index 9ce0319..e333079 100644
--- a/src/tools/agent.ts
+++ b/src/tools/agent.ts
@@ -13,7 +13,7 @@ import type { ToolActionResult } from "../types/types.js";
 
 const AgentInputSchema = z.object({
   prompt: z.string().describe(
-    `The task prompt describing what you want the agent to accomplish.
+    `The task prompt describing what you want the sub-agent to accomplish.
     Be clear and specific about the goal. For example:
     'Go to Hacker News and find the most controversial post from today, then summarize the top 3 comments'.
     The agent will autonomously navigate and interact with web pages to complete this task.`,
@@ -42,9 +42,9 @@ async function handleAgent(
         model: {
           modelName: "google/gemini-2.5-computer-use-preview-10-2025",
           apiKey:
-            process.env.GOOGLE_GENERATIVE_AI_API_KEY ||
+            process.env.GEMINI_API_KEY ||
             process.env.GOOGLE_API_KEY ||
-            process.env.GEMINI_API_KEY,
+            process.env.GOOGLE_GENERATIVE_AI_API_KEY,
         },
       });
 
@@ -58,7 +58,7 @@ async function handleAgent(
         content: [
           {
             type: "text",
-            text: `Agent execution completed:\n${JSON.stringify(result, null, 2)}`,
+            text: `${result.message}`,
           },
         ],
       };