|
| 1 | +import { z } from "zod"; |
| 2 | +import type { Tool, ToolSchema, ToolResult } from "./tool.js"; |
| 3 | +import type { Context } from "../context.js"; |
| 4 | +import type { ToolActionResult } from "../types/types.js"; |
| 5 | + |
| 6 | +/** |
| 7 | + * Stagehand Agent |
| 8 | + * Docs: https://docs.stagehand.dev/basics/agent |
| 9 | + * |
| 10 | + * This tool uses Gemini Computer Use to autonomously complete web-based tasks. |
| 11 | + * The agent will navigate, interact, and complete the task described in the prompt. |
| 12 | + */ |
| 13 | + |
| 14 | +const AgentInputSchema = z.object({ |
| 15 | + prompt: z.string().describe( |
| 16 | + `The task prompt describing what you want the sub-agent to accomplish. |
| 17 | + Be clear and specific about the goal. For example: |
| 18 | + 'Go to Hacker News and find the most controversial post from today, then summarize the top 3 comments'. |
| 19 | + The agent will autonomously navigate and interact with web pages to complete this task.`, |
| 20 | + ), |
| 21 | +}); |
| 22 | + |
| 23 | +type AgentInput = z.infer<typeof AgentInputSchema>; |
| 24 | + |
| 25 | +const agentSchema: ToolSchema<typeof AgentInputSchema> = { |
| 26 | + name: "browserbase_stagehand_agent", |
| 27 | + description: `Execute a task autonomously using Gemini Computer Use agent. The agent will navigate and interact with web pages to complete the given task.`, |
| 28 | + inputSchema: AgentInputSchema, |
| 29 | +}; |
| 30 | + |
| 31 | +async function handleAgent( |
| 32 | + context: Context, |
| 33 | + params: AgentInput, |
| 34 | +): Promise<ToolResult> { |
| 35 | + const action = async (): Promise<ToolActionResult> => { |
| 36 | + try { |
| 37 | + const stagehand = await context.getStagehand(); |
| 38 | + |
| 39 | + // You need to provide GOOGLE_GENERATIVE_AI_API_KEY |
| 40 | + const agent = stagehand.agent({ |
| 41 | + cua: true, |
| 42 | + model: { |
| 43 | + modelName: "google/gemini-2.5-computer-use-preview-10-2025", |
| 44 | + apiKey: |
| 45 | + process.env.GEMINI_API_KEY || |
| 46 | + process.env.GOOGLE_API_KEY || |
| 47 | + process.env.GOOGLE_GENERATIVE_AI_API_KEY, |
| 48 | + }, |
| 49 | + }); |
| 50 | + |
| 51 | + // Execute the task |
| 52 | + const result = await agent.execute({ |
| 53 | + instruction: params.prompt, |
| 54 | + maxSteps: 20, |
| 55 | + }); |
| 56 | + |
| 57 | + return { |
| 58 | + content: [ |
| 59 | + { |
| 60 | + type: "text", |
| 61 | + text: `${result.message}`, |
| 62 | + }, |
| 63 | + ], |
| 64 | + }; |
| 65 | + } catch (error) { |
| 66 | + const errorMsg = error instanceof Error ? error.message : String(error); |
| 67 | + throw new Error(`Failed to execute agent task: ${errorMsg}`); |
| 68 | + } |
| 69 | + }; |
| 70 | + |
| 71 | + return { |
| 72 | + action, |
| 73 | + waitForNetwork: false, |
| 74 | + }; |
| 75 | +} |
| 76 | + |
| 77 | +const agentTool: Tool<typeof AgentInputSchema> = { |
| 78 | + capability: "core", |
| 79 | + schema: agentSchema, |
| 80 | + handle: handleAgent, |
| 81 | +}; |
| 82 | + |
| 83 | +export default agentTool; |
0 commit comments