diff --git a/packages/agent/src/agent/Agent.prompt.ts b/packages/agent/src/agent/Agent.prompt.ts deleted file mode 100644 index 895e0f5..0000000 --- a/packages/agent/src/agent/Agent.prompt.ts +++ /dev/null @@ -1,339 +0,0 @@ - -/** - * @license - * Copyright 2025 BrowserOS - * SPDX-License-Identifier: AGPL-3.0-or-later - */ -/** - * Base system prompt - adapted from OpenAI Codex - * Original source: https://github.com/openai/codex/blob/main/codex-rs/core/prompt.md - */ -const SYSTEM_PROMPT = `You are a browser automation agent. You are expected to be precise, safe, and helpful. - -Your capabilities: - -- Receive user prompts and other context provided by the harness. -- Communicate with the user by streaming thinking & responses, and by making & updating plans. -- Execute browser automation tasks using available tools. - -# How you work - -## Personality - -Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work. - -## Responsiveness - -### Preamble messages - -Before making tool calls, send a brief preamble to the user explaining what you're about to do. When sending preamble messages, follow these principles and examples: - -- **Logically group related actions**: if you're about to run several related actions, describe them together in one preamble rather than sending a separate note for each. -- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words for quick updates). -- **Build on prior context**: if this is not your first tool call, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions. -- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging. -- **Exception**: Avoid adding a preamble for every trivial action (e.g., getting a single tab) unless it's part of a larger grouped action. - -**Examples:** - -- "I've explored the tabs; now checking the page content." -- "Next, I'll navigate to the page and extract the data." -- "I'm about to fill the form fields and submit." -- "Ok cool, so I've got the tab IDs. Now checking the page content." -- "Page is loaded. Next up is clicking the target button." -- "Finished extracting text. I will now parse the results." -- "Alright, tab switching worked. Checking how the page structure looks." -- "Spotted a clever login form; now hunting where the submit button is." - -## Planning - -You have access to an \`update_plan\` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go. - -Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing. Do not use plans for simple or single-step queries that you can just do or answer immediately. - -Do not repeat the full contents of the plan after an \`update_plan\` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step. - -Before performing an action, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of execution. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call \`update_plan\` with the updated plan and make sure to provide an \`explanation\` of the rationale when doing so. - -Use a plan when: - -- The task is non-trivial and will require multiple actions over a long time horizon. -- There are logical phases or dependencies where sequencing matters. -- The work has ambiguity that benefits from outlining high-level goals. -- You want intermediate checkpoints for feedback and validation. -- When the user asked you to do more than one thing in a single prompt -- The user has asked you to use the plan tool (aka "TODOs") -- You generate additional steps while working, and plan to do them before yielding to the user - -### Examples - -**High-quality plans** - -Example 1: - -1. Navigate to Amazon product page -2. Add item to shopping cart -3. Proceed to checkout -4. Fill shipping and payment info -5. Place order and get confirmation - -Example 2: - -1. Open GitHub repository page -2. Navigate to Issues tab -3. Click "New Issue" button -4. Fill issue title and description -5. Add labels and submit -6. Extract issue number and URL - -Example 3: - -1. Navigate to Google Forms URL -2. Get all form input fields -3. Fill text inputs and dropdowns -4. Select radio/checkbox options -5. Click submit button -6. Wait for confirmation and extract response - -**Low-quality plans** - -Example 1: - -1. Do the task -2. Get the data -3. Return it - -Example 2: - -1. Navigate to page -2. Click stuff -3. Extract things - -Example 3: - -1. Complete automation -2. Check it worked -3. Give results to user - -If you need to write a plan, only write high quality plans, not low quality ones. - -## Task execution - -Please keep going until the query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. Autonomously resolve the query to the best of your ability, using the tools available to you, before coming back to the user. Do NOT guess or make up an answer. - -You MUST adhere to the following criteria when solving queries: - -- Fix the problem at the root cause rather than applying surface-level workarounds, when possible. -- Avoid unneeded complexity in your solution. -- Do not attempt to fix unrelated issues. It is not your responsibility to fix them. (You may mention them to the user in your final message though.) -- Keep your approach consistent with the patterns you observe. Changes should be minimal and focused on the task. - -## Ambition vs. precision - -For tasks that have no prior context (i.e. the user is starting something brand new), you should feel free to be ambitious and demonstrate creativity with your implementation. - -If you're working on an existing flow, you should make sure you do exactly what the user asks with surgical precision. Treat the surrounding context with respect, and don't overstep. You should balance being sufficiently ambitious and proactive when completing tasks of this nature. - -You should use judicious initiative to decide on the right level of detail and complexity to deliver based on the user's needs. This means showing good judgment that you're capable of doing the right extras without gold-plating. This might be demonstrated by high-value, creative touches when scope of the task is vague; while being surgical and targeted when scope is tightly specified. - -## Sharing progress updates - -For especially longer tasks that you work on (i.e. requiring many tool calls, or a plan with multiple steps), you should provide progress updates back to the user at reasonable intervals. These updates should be structured as a concise sentence or two (no more than 8-10 words long) recapping progress so far in plain language: this update demonstrates your understanding of what needs to be done, progress so far (i.e. tabs explored, content extracted), and where you're going next. - -Before doing large chunks of work that may incur latency as experienced by the user, you should send a concise message to the user with an update indicating what you're about to do to ensure they know what you're spending time on. - -The messages you send before tool calls should describe what is immediately about to be done next in very concise language. If there was previous work done, this preamble message should also include a note about the work done so far to bring the user along. - -## Presenting your work and final message - -Your final message should read naturally, like an update from a concise teammate. For casual conversation, brainstorming tasks, or quick questions from the user, respond in a friendly, conversational tone. You should ask questions, suggest ideas, and adapt to the user's style. If you've finished a large amount of work, when describing what you've done to the user, you should follow the final answer formatting guidelines to communicate substantive changes. You don't need to add structured formatting for one-word answers, greetings, or purely conversational exchanges. - -You can skip heavy formatting for single, simple actions or confirmations. In these cases, respond in plain sentences with any relevant next step or quick option. Reserve multi-section structured responses for results that need grouping or explanation. - -If there's something that you think you could help with as a logical next step, concisely ask the user if they want you to do so. Good examples of this are extracting additional data, navigating to related pages, or automating the next logical step. If there's something that you couldn't do but that the user might want to do, include those instructions succinctly. - -Brevity is very important as a default. You should be very concise (i.e. no more than 10 lines), but can relax this requirement for tasks where additional detail and comprehensiveness is important for the user's understanding. - -### Final answer structure and style guidelines - -You are producing plain text that will later be styled. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value. - -**Section Headers** - -- Use only when they improve clarity — they are not mandatory for every answer. -- Choose descriptive names that fit the content -- Keep headers short (1–3 words) and in \`**Title Case**\`. Always start headers with \`**\` and end with \`**\` -- Leave no blank line before the first bullet under a header. -- Section headers should only be used where they genuinely improve scanability; avoid fragmenting the answer. - -**Bullets** - -- Use \`-\` followed by a space for every bullet. -- Merge related points when possible; avoid a bullet for every trivial detail. -- Keep bullets to one line unless breaking for clarity is unavoidable. -- Group into short lists (4–6 bullets) ordered by importance. -- Use consistent keyword phrasing and formatting across sections. - -**Monospace** - -- Wrap all tool names, URLs, and identifiers in backticks (\`\`...\`\`). -- Apply to inline examples and to bullet keywords if the keyword itself is a literal tool/URL. -- Never mix monospace and bold markers; choose one based on whether it's a keyword (\`**\`) or inline reference (\`\`). - -**Structure** - -- Place related bullets together; don't mix unrelated concepts in the same section. -- Order sections from general → specific → supporting info. -- For subsections, introduce with a bolded keyword bullet, then list items under it. -- Match structure to complexity: - - Multi-part or detailed results → use clear headers and grouped bullets. - - Simple results → minimal headers, possibly just a short list or paragraph. - -**Tone** - -- Keep the voice collaborative and natural, like a partner handing off work. -- Be concise and factual — no filler or conversational commentary and avoid unnecessary repetition -- Use present tense and active voice (e.g., "Extracts data" not "This will extract data"). -- Keep descriptions self-contained; don't refer to "above" or "below". -- Use parallel structure in lists for consistency. - -**Don't** - -- Don't use literal words "bold" or "monospace" in the content. -- Don't nest bullets or create deep hierarchies. -- Don't output ANSI escape codes directly — the renderer applies them. -- Don't cram unrelated keywords into a single bullet; split for clarity. -- Don't let keyword lists run long — wrap or reformat for scanability. - -Generally, ensure your final answers adapt their shape and depth to the request. For tasks with a simple implementation, lead with the outcome and supplement only with what's needed for clarity. Larger tasks can be presented as a logical walkthrough of your approach, grouping related steps, explaining rationale where it adds value, and highlighting next actions. Your answers should provide the right level of detail while being easily scannable. - -For casual greetings, acknowledgements, or other one-off conversational messages that are not delivering substantive information or structured results, respond naturally without section headers or bullet formatting. - -## \`update_plan\` - -A tool named \`update_plan\` is available to you. You can use it to keep an up‑to‑date, step‑by‑step plan for the task. - -To create a new plan, call \`update_plan\` with a short list of 1‑sentence steps (no more than 5-7 words each) with a \`status\` for each step (\`pending\`, \`in_progress\`, or \`completed\`). - -When steps have been completed, use \`update_plan\` to mark each finished step as \`completed\` and the next step you are working on as \`in_progress\`. There should always be exactly one \`in_progress\` step until everything is done. You can mark multiple items as complete in a single \`update_plan\` call. - -If all steps are complete, ensure you call \`update_plan\` to mark all steps as \`completed\`.`; - -/** - * BrowserOS-specific tool guidance and workflows - */ -const BROWSEROS_PROMPT = ` -# BrowserOS Tools - -You have access to specialized browser automation tools from the BrowserOS MCP server. - -## Core Principles - -1. **Tab Context Required**: All browser interactions need a valid tab ID. Always identify the target tab first. -2. **Use the Right Tool**: Choose the most efficient tool. Avoid over-engineering simple operations. -3. **Extract, Don't Execute**: Prefer built-in extraction tools over JavaScript execution. - -## Standard Workflow - -Before interacting with any page: -1. Identify target tab via browser_list_tabs or browser_get_active_tab -2. Switch to correct tab if needed via browser_switch_tab -3. Perform action using the tab's ID - -## Tool Selection Guidelines - -### Content Extraction (Priority Order) - -**Text content and data:** -- PREFER: browser_get_page_content(tabId, type) - - type: "text" for plain text - - type: "text-with-links" when URLs needed - - context: "visible" (viewport) or "full" (entire page) - - includeSections: ["main", "article"] to target specific parts - -**Visual context:** -- USE: browser_get_screenshot(tabId) - Only when visual layout matters - - Shows bounding boxes with nodeIds for interactive elements - - Not efficient for text extraction - -**Complex operations:** -- LAST RESORT: browser_execute_javascript(tabId, code) - - Only when built-in tools can't accomplish task - - Use for DOM manipulation or browser API access - -### Tab Management - -- browser_list_tabs - Get all tabs with IDs and URLs -- browser_get_active_tab - Get currently active tab -- browser_switch_tab(tabId) - Switch focus to tab -- browser_open_tab(url, active?) - Open new tab -- browser_close_tab(tabId) - Close tab - -### Navigation - -- browser_navigate(url, tabId?) - Navigate to URL -- browser_get_load_status(tabId) - Check if page loaded - -### Page Interaction - -**Discovery:** -- browser_get_interactive_elements(tabId, simplified?) - Get clickable/typeable elements with nodeIds - - Always call before clicking/typing to get valid nodeIds - -**Actions:** -- browser_click_element(tabId, nodeId) -- browser_type_text(tabId, nodeId, text) -- browser_clear_input(tabId, nodeId) -- browser_send_keys(tabId, key) - Enter, Tab, Escape, Arrow keys, etc. - -**Coordinate-Based:** -- browser_click_coordinates(tabId, x, y) -- browser_type_at_coordinates(tabId, x, y, text) - -### Scrolling - -- browser_scroll_down(tabId) - Scroll down one viewport -- browser_scroll_up(tabId) - Scroll up one viewport -- browser_scroll_to_element(tabId, nodeId) - Scroll element into view - -### Advanced Features - -- browser_get_bookmarks(folderId?) -- browser_create_bookmark(title, url, parentId?) -- browser_remove_bookmark(bookmarkId) -- browser_search_history(query, maxResults?) -- browser_get_recent_history(count?) - -## Best Practices - -- **Minimize Screenshots**: Only when visual context is essential. Prefer browser_get_page_content for data. -- **Avoid Unnecessary JavaScript**: Built-in tools are faster and more reliable. -- **Get Elements First**: Call browser_get_interactive_elements before clicking/typing for valid nodeIds. -- **Wait for Loading**: Verify page loaded after navigation before extracting/interacting. -- **Use Context Options**: Specify "visible" or "full" context when extracting. - -## Common Patterns - -**Extract article:** -\`\`\` -browser_get_page_content(tabId, "text") -\`\`\` - -**Get page links:** -\`\`\` -browser_get_page_content(tabId, "text-with-links") -\`\`\` - -**Fill form:** -\`\`\` -1. browser_get_interactive_elements(tabId) -2. browser_type_text(tabId, inputNodeId, "text") -3. browser_click_element(tabId, submitButtonNodeId) -\`\`\` - -Focus on efficiency. Use the most appropriate tool for each task. When in doubt, prefer simpler tools over complex ones.`; - -/** - * Combined system prompt for browser automation agent - */ -export const AGENT_SYSTEM_PROMPT = SYSTEM_PROMPT + BROWSEROS_PROMPT; diff --git a/packages/agent/src/agent/AgentFactory.ts b/packages/agent/src/agent/AgentFactory.ts deleted file mode 100644 index 08de2d8..0000000 --- a/packages/agent/src/agent/AgentFactory.ts +++ /dev/null @@ -1,142 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import type {ControllerBridge} from '@browseros/controller-server'; - -import type {BaseAgent} from './BaseAgent.js'; -import type {AgentConfig} from './types.js'; - -/** - * Agent constructor signature - * All agents must extend BaseAgent - */ -export type AgentConstructor = new ( - config: AgentConfig, - controllerBridge: ControllerBridge, -) => BaseAgent; - -/** - * Agent registration entry - */ -interface AgentRegistration { - name: string; - constructor: AgentConstructor; - description?: string; -} - -/** - * Agent Factory with Registry Pattern - * - * Allows dynamic agent registration and creation without hardcoded types. - * New agents can be registered at runtime. - * - * @example - * ```typescript - * // Register agents - * AgentFactory.register('codex-sdk', CodexSDKAgent, 'Codex SDK agent') - * AgentFactory.register('claude-sdk', ClaudeSDKAgent, 'Claude SDK agent') - * - * // Create agent dynamically - * const agent = AgentFactory.create('codex-sdk', config, bridge) - * ``` - */ -export class AgentFactory { - private static registry = new Map(); - - /** - * Register an agent type - * - * @param type - Agent type identifier (e.g., 'codex-sdk', 'claude-sdk') - * @param constructor - Agent class constructor - * @param description - Optional description - */ - static register( - type: string, - constructor: AgentConstructor, - description?: string, - ): void { - if (this.registry.has(type)) { - throw new Error(`Agent type '${type}' is already registered`); - } - - this.registry.set(type, { - name: type, - constructor, - description, - }); - } - - /** - * Create an agent instance - * - * @param type - Agent type identifier - * @param config - Agent configuration - * @param controllerBridge - Shared controller bridge - * @returns BaseAgent instance - * @throws Error if agent type is not registered - */ - static create( - type: string, - config: AgentConfig, - controllerBridge: ControllerBridge, - ): BaseAgent { - const registration = this.registry.get(type); - - if (!registration) { - const availableTypes = Array.from(this.registry.keys()).join(', '); - throw new Error( - `Agent type '${type}' is not registered. Available types: ${availableTypes}`, - ); - } - - return new registration.constructor(config, controllerBridge); - } - - /** - * Check if an agent type is registered - * - * @param type - Agent type identifier - * @returns true if registered - */ - static has(type: string): boolean { - return this.registry.has(type); - } - - /** - * Get all registered agent types - * - * @returns Array of registered agent type identifiers - */ - static getAvailableTypes(): string[] { - return Array.from(this.registry.keys()); - } - - /** - * Get registration info for an agent type - * - * @param type - Agent type identifier - * @returns Registration info or undefined - */ - static getRegistration(type: string): AgentRegistration | undefined { - return this.registry.get(type); - } - - /** - * Unregister an agent type (useful for testing) - * - * @param type - Agent type identifier - * @returns true if unregistered, false if not found - */ - static unregister(type: string): boolean { - return this.registry.delete(type); - } - - /** - * Clear all registrations (useful for testing) - */ - static clear(): void { - this.registry.clear(); - } -} diff --git a/packages/agent/src/agent/BaseAgent.test.ts b/packages/agent/src/agent/BaseAgent.test.ts deleted file mode 100644 index 6229c94..0000000 --- a/packages/agent/src/agent/BaseAgent.test.ts +++ /dev/null @@ -1,176 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {describe, it, expect, beforeEach} from 'bun:test'; - -import type {FormattedEvent} from '../utils/EventFormatter.js'; - -import {BaseAgent, DEFAULT_CONFIG} from './BaseAgent.js'; -import type {AgentConfig} from './types.js'; - -// Concrete test implementation of BaseAgent -class TestAgent extends BaseAgent { - constructor(config: AgentConfig, agentDefaults?: Partial) { - super('test-agent', config, agentDefaults); - } - - async *execute(message: string): AsyncGenerator { - // Minimal implementation for testing - yield {type: 'test', content: message, metadata: {}} as any; - } - - async destroy(): Promise { - this.markDestroyed(); - } -} - -describe('BaseAgent-unit-test', () => { - // Unit Test 1 - Constructor and config merging with defaults - it('tests that configs merge correctly with defaults', () => { - const userConfig: AgentConfig = { - resourcesDir: '/test/resources', - executionDir: '/test/execution', - apiKey: 'test-key', - maxTurns: 50, - // systemPrompt not provided, should use default - }; - - const agentDefaults = { - systemPrompt: 'Agent-specific prompt', - maxTurns: 75, - maxThinkingTokens: 5000, - }; - - const agent = new TestAgent(userConfig, agentDefaults); - - // Verify config merging priority: user > agent defaults > base defaults - expect(agent['config'].resourcesDir).toBe('/test/resources'); - expect(agent['config'].apiKey).toBe('test-key'); - expect(agent['config'].maxTurns).toBe(50); // User overrides agent default - expect(agent['config'].systemPrompt).toBe('Agent-specific prompt'); // Agent default used - expect(agent['config'].maxThinkingTokens).toBe(5000); // Agent default used - }); - - // Unit Test 2 - Metadata initialization and state tracking - it('tests that metadata initializes with correct state', () => { - const config: AgentConfig = { - resourcesDir: '/test/resources', - executionDir: '/test/execution', - apiKey: 'test-key', - }; - - const agent = new TestAgent(config); - const metadata = agent.getMetadata(); - - // Verify initial metadata state - expect(metadata.type).toBe('test-agent'); - expect(metadata.state).toBe('idle'); - expect(metadata.turns).toBe(0); - expect(metadata.toolsExecuted).toBe(0); - expect(metadata.totalDuration).toBe(0); - expect(metadata.lastEventTime).toBeGreaterThan(0); - }); - - // Unit Test 3 - Execution state transitions - it('tests that execution state tracks correctly', () => { - const config: AgentConfig = { - resourcesDir: '/test/resources', - executionDir: '/test/execution', - apiKey: 'test-key', - }; - - const agent = new TestAgent(config); - - // Initial state - expect(agent['metadata'].state).toBe('idle'); - - // Start execution - agent['startExecution'](); - expect(agent['metadata'].state).toBe('executing'); - expect(agent['executionStartTime']).toBeGreaterThan(0); - - const startTime = agent['executionStartTime']; - - // Complete execution - agent['completeExecution'](); - expect(agent['metadata'].state).toBe('idle'); - expect(agent['metadata'].totalDuration).toBeGreaterThanOrEqual(0); - }); - - // Unit Test 4 - Metadata update methods - it('tests that metadata updates through helper methods', () => { - const config: AgentConfig = { - resourcesDir: '/test/resources', - executionDir: '/test/execution', - apiKey: 'test-key', - }; - - const agent = new TestAgent(config); - const initialEventTime = agent['metadata'].lastEventTime; - - // Update event time - agent['updateEventTime'](); - expect(agent['metadata'].lastEventTime).toBeGreaterThanOrEqual( - initialEventTime, - ); - - // Increment tools executed - agent['updateToolsExecuted'](3); - expect(agent['metadata'].toolsExecuted).toBe(3); - - agent['updateToolsExecuted'](); // Default increment by 1 - expect(agent['metadata'].toolsExecuted).toBe(4); - - // Update turns - agent['updateTurns'](10); - expect(agent['metadata'].turns).toBe(10); - }); - - // Unit Test 5 - Error state handling - it('tests that error state handles correctly', () => { - const config: AgentConfig = { - resourcesDir: '/test/resources', - executionDir: '/test/execution', - apiKey: 'test-key', - }; - - const agent = new TestAgent(config); - - // Mark error with Error object - const error = new Error('Test error'); - agent['errorExecution'](error); - - expect(agent['metadata'].state).toBe('error'); - expect(agent['metadata'].error).toBe('Test error'); - - // Mark error with string - const agent2 = new TestAgent(config); - agent2['errorExecution']('String error'); - - expect(agent2['metadata'].state).toBe('error'); - expect(agent2['metadata'].error).toBe('String error'); - }); - - // Unit Test 6 - Destroyed state tracking - it('tests that destroyed state tracks correctly', async () => { - const config: AgentConfig = { - resourcesDir: '/test/resources', - executionDir: '/test/execution', - apiKey: 'test-key', - }; - - const agent = new TestAgent(config); - - // Initially not destroyed - expect(agent['isDestroyed']()).toBe(false); - - // Destroy agent - await agent.destroy(); - - // Should be marked as destroyed - expect(agent['isDestroyed']()).toBe(true); - expect(agent['metadata'].state).toBe('destroyed'); - }); -}); diff --git a/packages/agent/src/agent/BaseAgent.ts b/packages/agent/src/agent/BaseAgent.ts deleted file mode 100644 index 1ab23c7..0000000 --- a/packages/agent/src/agent/BaseAgent.ts +++ /dev/null @@ -1,222 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {logger} from '@browseros/common'; - -import type {AgentConfig, AgentMetadata, FormattedEvent} from './types.js'; - -/** - * Generic default system prompt for agents - * - * Minimal prompt - agents should override with their own specific prompts - */ -export const DEFAULT_SYSTEM_PROMPT = `You are a browser automation agent.`; - -/** - * Generic default configuration values - * - * Agents can override these with their own defaults - */ -export const DEFAULT_CONFIG = { - maxTurns: 100, - maxThinkingTokens: 10000, - systemPrompt: DEFAULT_SYSTEM_PROMPT, - mcpServers: {}, -}; - -/** - * BaseAgent - Abstract base class for all agent implementations - * - * Provides: - * - Common configuration handling with defaults - * - Metadata management - * - Logging helpers - * - Abstract methods that concrete agents must implement - * - * Subclasses can override defaults by passing them to the constructor. - * - * Usage: - * export class MyAgent extends BaseAgent { - * constructor(config: AgentConfig) { - * super('my-agent', config, { - * systemPrompt: 'My custom prompt', - * mcpServers: { ... }, - * maxTurns: 50 - * }) - * } - * async *execute(message: string): AsyncGenerator { - * // Implementation - * } - * async destroy(): Promise { - * // Cleanup - * } - * } - */ -export abstract class BaseAgent { - protected config: Required; - protected metadata: AgentMetadata; - protected executionStartTime = 0; - protected initialized = false; - - constructor( - agentType: string, - config: AgentConfig, - agentDefaults?: Partial, - ) { - // Merge config with agent-specific defaults, then with base defaults - this.config = { - resourcesDir: config.resourcesDir, - executionDir: config.executionDir, - mcpServerPort: config.mcpServerPort ?? agentDefaults?.mcpServerPort, - apiKey: config.apiKey ?? agentDefaults?.apiKey, - baseUrl: config.baseUrl, - modelName: config.modelName, - maxTurns: - config.maxTurns ?? agentDefaults?.maxTurns ?? DEFAULT_CONFIG.maxTurns, - maxThinkingTokens: - config.maxThinkingTokens ?? - agentDefaults?.maxThinkingTokens ?? - DEFAULT_CONFIG.maxThinkingTokens, - systemPrompt: - config.systemPrompt ?? - agentDefaults?.systemPrompt ?? - DEFAULT_CONFIG.systemPrompt, - mcpServers: - config.mcpServers ?? - agentDefaults?.mcpServers ?? - DEFAULT_CONFIG.mcpServers, - } as Required; - - // Initialize metadata - this.metadata = { - type: agentType, - turns: 0, - totalDuration: 0, - lastEventTime: Date.now(), - toolsExecuted: 0, - state: 'idle', - }; - - logger.debug(`🤖 ${agentType} agent created`, { - agentType, - resourcesDir: this.config.resourcesDir, - modelName: this.config.modelName, - baseUrl: this.config.baseUrl, - maxTurns: this.config.maxTurns, - maxThinkingTokens: this.config.maxThinkingTokens, - usingDefaultMcp: !config.mcpServers, - usingDefaultPrompt: !config.systemPrompt, - }); - } - - /** - * Async initialization for agents that need it - * Subclasses can override for async setup (e.g., fetching config) - */ - async init(): Promise { - this.initialized = true; - } - - /** - * Execute a task and stream events - * Must be implemented by concrete agent classes - */ - // FIXME: make it handle init if not initialized - abstract execute(message: string): AsyncGenerator; - - /** - * Cleanup agent resources - * Must be implemented by concrete agent classes - */ - abstract destroy(): Promise; - - /** - * Abort current execution - * Triggers the abort signal to stop the current task - * Must be implemented by concrete agent classes - */ - abstract abort(): void; - - /** - * Check if agent is currently executing - * Must be implemented by concrete agent classes - */ - abstract isExecuting(): boolean; - - /** - * Get current agent metadata - */ - getMetadata(): AgentMetadata { - return {...this.metadata}; - } - - /** - * Helper: Start execution tracking - */ - protected startExecution(): void { - this.metadata.state = 'executing'; - this.executionStartTime = Date.now(); - } - - /** - * Helper: Complete execution tracking - */ - protected completeExecution(): void { - this.metadata.state = 'idle'; - this.metadata.totalDuration += Date.now() - this.executionStartTime; - } - - /** - * Helper: Mark execution error - */ - protected errorExecution(error: Error | string): void { - this.metadata.state = 'error'; - this.metadata.error = error instanceof Error ? error.message : error; - } - - /** - * Helper: Update last event time - */ - protected updateEventTime(): void { - this.metadata.lastEventTime = Date.now(); - } - - /** - * Helper: Increment tool execution count - */ - protected updateToolsExecuted(count = 1): void { - this.metadata.toolsExecuted += count; - } - - /** - * Helper: Update turn count - */ - protected updateTurns(turns: number): void { - this.metadata.turns = turns; - } - - /** - * Helper: Check if agent is destroyed - */ - protected isDestroyed(): boolean { - return this.metadata.state === 'destroyed'; - } - - /** - * Helper: Mark agent as destroyed - */ - protected markDestroyed(): void { - this.metadata.state = 'destroyed'; - } - - /** - * Helper: Ensure agent is initialized - */ - protected ensureInitialized(): void { - if (!this.initialized) { - throw new Error('Agent not initialized. Call init() before execute()'); - } - } -} diff --git a/packages/agent/src/agent/ClaudeSDKAgent.formatter.ts b/packages/agent/src/agent/ClaudeSDKAgent.formatter.ts deleted file mode 100644 index 2c64f0d..0000000 --- a/packages/agent/src/agent/ClaudeSDKAgent.formatter.ts +++ /dev/null @@ -1,290 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {FormattedEvent} from './types.js'; - -/** - * Claude SDK Event Formatter - * - * Handles Claude-specific event structure: - * - system: Initialization and MCP notifications - * - assistant: Messages, tool calls, thinking - * - user: Tool results - * - result: Final completion/error events - */ -export class ClaudeEventFormatter { - /** - * Format Claude SDK event into common FormattedEvent - * - * @param event - Raw Claude event - * @returns FormattedEvent or null if event should not be displayed - */ - static format(event: any): FormattedEvent | null { - const eventType = event.type; - const subtype = (event as any).subtype; - - if (eventType === 'system') { - if (subtype === 'init') { - return this.formatInit(event); - } - if (subtype === 'mcp_server_notification') { - return this.formatMcpNotification(event); - } - return new FormattedEvent('init', 'System initialized'); - } - - if (eventType === 'assistant') { - return this.formatAssistant(event); - } - - if (eventType === 'user') { - return this.formatToolResults(event); - } - - if (eventType === 'result') { - return this.formatResult(event); - } - - return null; - } - - /** - * Format system initialization event - */ - private static formatInit(event: any): FormattedEvent { - const mcpServers = event.mcp_servers || []; - const toolCount = event.tools?.length || 0; - - if (mcpServers.length > 0) { - const serverNames = mcpServers.map((s: any) => s.name).join(', '); - return new FormattedEvent( - 'init', - `Initializing agent with ${toolCount} tools and MCP servers: ${serverNames}`, - ); - } - - return new FormattedEvent( - 'init', - `Initializing agent with ${toolCount} tools`, - ); - } - - /** - * Format MCP server notifications - */ - private static formatMcpNotification(event: any): FormattedEvent { - return new FormattedEvent( - 'init', - `MCP notification: ${JSON.stringify(event.params)}`, - ); - } - - /** - * Format assistant messages (text, tool calls, thinking) - */ - private static formatAssistant(event: any): FormattedEvent | null { - const message = event.message; - if (!message?.content || !Array.isArray(message.content)) { - return null; - } - - const toolUses = message.content.filter((c: any) => c.type === 'tool_use'); - if (toolUses.length > 0) { - return this.formatToolUse(toolUses); - } - - const textContent = message.content.find((c: any) => c.type === 'text'); - if (textContent) { - return new FormattedEvent('response', textContent.text); - } - - const thinkingContent = message.content.find( - (c: any) => c.type === 'thinking', - ); - if (thinkingContent) { - const text = thinkingContent.thinking || ''; - const truncated = - text.length > 100 ? text.substring(0, 100) + '...' : text; - return new FormattedEvent('thinking', `💭 ${truncated}`); - } - - return null; - } - - /** - * Format tool use events - */ - private static formatToolUse(toolUses: any[]): FormattedEvent { - if (toolUses.length === 1) { - const tool = toolUses[0]; - const toolName = this.cleanToolName(tool.name); - const args = this.formatToolArgs(tool.input); - const argsText = args ? `\n Args: ${args}` : ''; - return new FormattedEvent('tool_use', `🔧 ${toolName}${argsText}`); - } - - const toolNames = toolUses - .map((t: any) => this.cleanToolName(t.name)) - .join(', '); - return new FormattedEvent('tool_use', `🔧 ${toolNames}`); - } - - /** - * Format tool result events - */ - private static formatToolResults(event: any): FormattedEvent | null { - const message = event.message; - if (!message?.content || !Array.isArray(message.content)) { - return null; - } - - const toolResults = message.content.filter( - (c: any) => c.type === 'tool_result', - ); - if (toolResults.length === 0) { - return null; - } - - for (const result of toolResults) { - if (result.is_error || result.error) { - const errorMsg = - result.error || result.content?.[0]?.text || 'Unknown error'; - return new FormattedEvent('tool_result', `❌ Error: ${errorMsg}`); - } - } - - const resultTexts = toolResults - .map((r: any) => this.extractTextFromContent(r.content)) - .filter((t: string) => t.length > 0); - - if (resultTexts.length === 0) { - return new FormattedEvent('tool_result', '✓ Tool executed'); - } - - const combinedText = resultTexts.join('\n'); - const truncated = - combinedText.length > 200 - ? combinedText.substring(0, 200) + '...' - : combinedText; - - const hasImages = toolResults.some((r: any) => - this.hasImageContent(r.content), - ); - const imageIndicator = hasImages ? ' 📷' : ''; - - return new FormattedEvent('tool_result', `✓ ${truncated}${imageIndicator}`); - } - - /** - * Format result events (completion/error) - */ - private static formatResult(event: any): FormattedEvent { - const subtype = event.subtype; - const metadata = { - turnCount: event.turn_count || 0, - isError: subtype === 'error', - duration: event.duration_ms || 0, - }; - - if (subtype === 'completion') { - const usageInfo = event.usage - ? ` (${event.usage.input_tokens}/${event.usage.output_tokens} tokens)` - : ''; - return new FormattedEvent( - 'completion', - `✅ Completed${usageInfo}`, - metadata, - ); - } - - if (subtype === 'error') { - const errorMsg = event.error?.message || 'Unknown error'; - return new FormattedEvent('error', `❌ Error: ${errorMsg}`, metadata); - } - - const errorMsg = event.error?.message || event.message || 'Task stopped'; - return new FormattedEvent('completion', `⏹️ ${errorMsg}`, metadata); - } - - /** - * Create heartbeat/processing event - */ - static createProcessingEvent(): FormattedEvent { - return new FormattedEvent('thinking', '⏳ Processing...'); - } - - /** - * Clean tool name by removing prefixes - */ - private static cleanToolName(name: string): string { - return name - .replace(/^mcp__[^_]+__/, '') - .replace(/^browseros-controller__/, '') - .replace(/_/g, ' '); - } - - /** - * Format tool arguments into readable string - */ - private static formatToolArgs(input: any): string { - if (!input || typeof input !== 'object') { - return ''; - } - - const keys = Object.keys(input); - if (keys.length === 0) { - return ''; - } - - if (keys.length === 1 && keys[0] === 'url') { - return input.url; - } - - if (keys.length === 1 && (keys[0] === 'function' || keys[0] === 'script')) { - const code = input[keys[0]]; - if (typeof code === 'string') { - return code.length > 50 ? code.substring(0, 50) + '...' : code; - } - } - - const argPairs = keys.map(key => { - const value = input[key]; - if (typeof value === 'string') { - return `${key}="${value.length > 30 ? value.substring(0, 30) + '...' : value}"`; - } - return `${key}=${JSON.stringify(value)}`; - }); - - return argPairs.join(', '); - } - - /** - * Extract text content from tool result content - */ - private static extractTextFromContent(content: any): string { - if (typeof content === 'string') { - return content; - } - - if (Array.isArray(content)) { - const textBlocks = content - .filter((c: any) => c.type === 'text') - .map((c: any) => c.text); - return textBlocks.join('\n'); - } - - return ''; - } - - /** - * Check if content contains images - */ - private static hasImageContent(content: any): boolean { - if (Array.isArray(content)) { - return content.some((c: any) => c.type === 'image'); - } - return false; - } -} diff --git a/packages/agent/src/agent/ClaudeSDKAgent.ts b/packages/agent/src/agent/ClaudeSDKAgent.ts deleted file mode 100644 index de723a9..0000000 --- a/packages/agent/src/agent/ClaudeSDKAgent.ts +++ /dev/null @@ -1,420 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {query} from '@anthropic-ai/claude-agent-sdk'; -import { - logger, - fetchBrowserOSConfig, - type BrowserOSConfig, - type Provider, -} from '@browseros/common'; -import type { - ControllerBridge} from '@browseros/controller-server'; -import { - ControllerContext, -} from '@browseros/controller-server'; -import type {ToolDefinition} from '@browseros/tools'; -import {allControllerTools} from '@browseros/tools/controller-based'; - -import {AGENT_SYSTEM_PROMPT} from './Agent.prompt.js'; -import {BaseAgent} from './BaseAgent.js'; -import {ClaudeEventFormatter} from './ClaudeSDKAgent.formatter.js'; -import {createControllerMcpServer} from './ControllerToolsAdapter.js'; -import { type AgentConfig} from './types.js'; -import type {FormattedEvent} from './types.js'; - -/** - * Claude SDK specific default configuration - */ -const CLAUDE_SDK_DEFAULTS = { - maxTurns: 100, - maxThinkingTokens: 10000, -}; - -/** - * Claude SDK Agent implementation - * - * Wraps @anthropic-ai/claude-agent-sdk with: - * - In-process SDK MCP server with controller tools - * - Shared ControllerBridge for browseros-controller connection - * - Event formatting via EventFormatter - * - AbortController for cleanup - * - Metadata tracking - * - * Note: Requires external ControllerBridge (provided by main server) - */ -export class ClaudeSDKAgent extends BaseAgent { - private abortController: AbortController | null = null; - private gatewayConfig: BrowserOSConfig | null = null; - private selectedProvider: Provider | null = null; - - constructor(config: AgentConfig, controllerBridge: ControllerBridge) { - logger.info('🔧 Using shared ControllerBridge for controller connection'); - - const controllerContext = new ControllerContext(controllerBridge); - - // Get all controller tools from package and create SDK MCP server - const sdkMcpServer = createControllerMcpServer( - allControllerTools, - controllerContext, - ); - - logger.info( - `✅ Created SDK MCP server with ${allControllerTools.length} controller tools`, - ); - - // Pass Claude SDK specific defaults to BaseAgent (must call super before accessing this) - super('claude-sdk', config, { - systemPrompt: AGENT_SYSTEM_PROMPT, - mcpServers: {'browseros-controller': sdkMcpServer}, - maxTurns: CLAUDE_SDK_DEFAULTS.maxTurns, - maxThinkingTokens: CLAUDE_SDK_DEFAULTS.maxThinkingTokens, - }); - - logger.info('✅ ClaudeSDKAgent initialized with shared ControllerBridge'); - } - - /** - * Initialize agent - fetch config from BrowserOS Config URL if configured - * Falls back to ANTHROPIC_API_KEY env var if config URL not set or fails - */ - override async init(): Promise { - const configUrl = process.env.BROWSEROS_CONFIG_URL; - - if (configUrl) { - logger.info('🌐 Fetching config from BrowserOS Config URL', {configUrl}); - - try { - this.gatewayConfig = await fetchBrowserOSConfig(configUrl); - this.selectedProvider = - this.gatewayConfig.providers.find(p => p.name === 'anthropic') || null; - - if (!this.selectedProvider) { - throw new Error('No anthropic provider found in config'); - } - - this.config.apiKey = this.selectedProvider.apiKey; - if (this.selectedProvider.baseUrl) { - this.config.baseUrl = this.selectedProvider.baseUrl; - } - if (this.selectedProvider.model) { - this.config.modelName = this.selectedProvider.model; - } - - logger.info('✅ Using config from BrowserOS Config URL', { - model: this.config.modelName, - baseUrl: this.config.baseUrl, - }); - - await super.init(); - return; - } catch (error) { - logger.warn( - '⚠️ Failed to fetch from config URL, falling back to ANTHROPIC_API_KEY', - { - error: error instanceof Error ? error.message : String(error), - }, - ); - } - } - - const envApiKey = process.env.ANTHROPIC_API_KEY; - if (envApiKey) { - this.config.apiKey = envApiKey; - logger.info('✅ Using API key from ANTHROPIC_API_KEY env var'); - await super.init(); - return; - } - - throw new Error( - 'No API key found. Set either BROWSEROS_CONFIG_URL or ANTHROPIC_API_KEY', - ); - } - - /** - * Wrapper around iterator.next() that yields heartbeat events while waiting - * @param iterator - The async iterator - * @yields Heartbeat events (FormattedEvent) while waiting, then the final iterator result (IteratorResult) - */ - private async *nextWithHeartbeat( - iterator: AsyncIterator, - ): AsyncGenerator { - const heartbeatInterval = 20000; // 20 seconds - let heartbeatTimer: NodeJS.Timeout | null = null; - let abortHandler: (() => void) | null = null; - - // Call iterator.next() once - this generator wraps a single next() call - const iteratorPromise = iterator.next(); - - // Create abort promise - const abortPromise = new Promise((_, reject) => { - if (this.abortController) { - abortHandler = () => { - reject(new Error('Agent execution aborted by client')); - }; - this.abortController.signal.addEventListener('abort', abortHandler, { - once: true, - }); - } - }); - - try { - // Loop until the iterator promise resolves, yielding heartbeats while waiting - while (true) { - // Check if execution was aborted - if (this.abortController?.signal.aborted) { - logger.info('⚠️ Agent execution aborted during heartbeat wait'); - return; - } - - // Create timeout promise for this iteration - const timeoutPromise = new Promise(resolve => { - heartbeatTimer = setTimeout( - () => resolve({type: 'heartbeat'}), - heartbeatInterval, - ); - }); - - type RaceResult = {type: 'result'; result: any} | {type: 'heartbeat'}; - let race: RaceResult; - - try { - race = await Promise.race([ - iteratorPromise.then(result => ({type: 'result' as const, result})), - timeoutPromise.then(() => ({type: 'heartbeat' as const})), - abortPromise, - ]); - } catch (abortError) { - // Abort was triggered during wait - logger.info( - '⚠️ Agent execution aborted (caught during iterator wait)', - ); - // Cleanup iterator (fire-and-forget to avoid blocking) - if (iterator.return) { - iterator.return(undefined).catch(() => {}); - } - return; - } - - // Clear the timeout if it was set - if (heartbeatTimer) { - clearTimeout(heartbeatTimer); - heartbeatTimer = null; - } - - if (race.type === 'heartbeat') { - // Heartbeat timeout occurred - yield processing event and continue waiting - yield ClaudeEventFormatter.createProcessingEvent(); - // Loop continues - will race the same iteratorPromise (still pending) vs new timeout - } else { - // Iterator result arrived - yield it and exit this generator - yield race.result; - return; - } - } - } finally { - // Clean up heartbeat timer - if (heartbeatTimer) { - clearTimeout(heartbeatTimer); - } - - // Clean up abort listener if it wasn't triggered - if ( - abortHandler && - this.abortController && - !this.abortController.signal.aborted - ) { - this.abortController.signal.removeEventListener('abort', abortHandler); - } - } - } - - /** - * Execute a task using Claude SDK and stream formatted events - * - * @param message - User's natural language request - * @yields FormattedEvent instances - */ - async *execute(message: string): AsyncGenerator { - if (!this.initialized) { - await this.init(); - } - - this.startExecution(); - this.abortController = new AbortController(); - - logger.info('🤖 ClaudeSDKAgent executing', {message}); - - try { - const options: any = { - apiKey: this.config.apiKey, - maxTurns: this.config.maxTurns, - maxThinkingTokens: this.config.maxThinkingTokens, - cwd: this.config.executionDir, - systemPrompt: this.config.systemPrompt, - mcpServers: this.config.mcpServers, - abortController: this.abortController, - }; - - if (this.config.modelName) { - options.model = this.config.modelName; - logger.debug('Using model from config', { - model: this.config.modelName, - }); - } - - if (this.config.baseUrl) { - options.baseUrl = this.config.baseUrl; - logger.debug('Using custom base URL', { - baseUrl: this.config.baseUrl, - }); - } - - // Call Claude SDK - const iterator = query({prompt: message, options})[ - Symbol.asyncIterator - ](); - - // Stream events with heartbeat - while (true) { - // Check if execution was aborted - if (this.abortController?.signal.aborted) { - logger.info('⚠️ Agent execution aborted by client'); - break; - } - - let result: IteratorResult | null = null; - - // Iterate through heartbeat generator to get the actual result - for await (const item of this.nextWithHeartbeat(iterator)) { - if (item && item.done !== undefined) { - // This is the final result - result = item; - } else { - // This is a heartbeat/processing event - yield item; - } - } - - if (!result || result.done) break; - - const event = result.value; - - // Update event time - this.updateEventTime(); - - // Track tool executions (check for assistant message with tool_use content) - if (event.type === 'assistant' && (event as any).message?.content) { - const toolUses = (event as any).message.content.filter( - (c: any) => c.type === 'tool_use', - ); - if (toolUses.length > 0) { - this.updateToolsExecuted(toolUses.length); - } - } - - // Track turn count from result events - if (event.type === 'result') { - const numTurns = (event as any).num_turns; - if (numTurns) { - this.updateTurns(numTurns); - } - - // Log raw result events for debugging - logger.info('📊 Raw result event', { - subtype: (event as any).subtype, - is_error: (event as any).is_error, - num_turns: numTurns, - result: (event as any).result ?? 'N/A', - }); - } - - // Format the event using ClaudeEventFormatter - const formattedEvent = ClaudeEventFormatter.format(event); - - // Yield formatted event if valid - if (formattedEvent) { - logger.debug('📤 ClaudeSDKAgent yielding event', { - type: formattedEvent.type, - }); - yield formattedEvent; - } - } - - // Complete execution tracking - this.completeExecution(); - - logger.info('✅ ClaudeSDKAgent execution complete', { - turns: this.metadata.turns, - toolsExecuted: this.metadata.toolsExecuted, - duration: Date.now() - this.executionStartTime, - }); - } catch (error) { - // Mark execution error - this.errorExecution( - error instanceof Error ? error : new Error(String(error)), - ); - - logger.error('❌ ClaudeSDKAgent execution failed', { - error: error instanceof Error ? error.message : String(error), - stack: error instanceof Error ? error.stack : undefined, - }); - - throw error; - } finally { - // Clear AbortController reference - this.abortController = null; - } - } - - /** - * Abort current execution - * Triggers abort signal to stop the current task gracefully - */ - abort(): void { - if (this.abortController) { - logger.info('🛑 Aborting ClaudeSDKAgent execution'); - this.abortController.abort(); - } else { - logger.warn('⚠️ Cancel not fully supported - no active execution'); - } - } - - /** - * Check if agent is currently executing - */ - isExecuting(): boolean { - return this.metadata.state === 'executing' && this.abortController !== null; - } - - /** - * Cleanup agent resources - * - * Aborts the running SDK query. Does NOT close shared ControllerBridge. - */ - async destroy(): Promise { - if (this.isDestroyed()) { - logger.debug('⚠️ ClaudeSDKAgent already destroyed'); - return; - } - - this.markDestroyed(); - - // Abort the SDK query if it's running - if (this.abortController) { - logger.debug('🛑 Aborting SDK query'); - this.abortController.abort(); - await new Promise(resolve => setTimeout(resolve, 500)); - } - - // DO NOT close ControllerBridge - it's shared and owned by main server - - logger.debug('🗑️ ClaudeSDKAgent destroyed', { - totalDuration: this.metadata.totalDuration, - turns: this.metadata.turns, - toolsExecuted: this.metadata.toolsExecuted, - }); - } -} diff --git a/packages/agent/src/agent/CodexSDKAgent.config.ts b/packages/agent/src/agent/CodexSDKAgent.config.ts deleted file mode 100644 index 2c2da81..0000000 --- a/packages/agent/src/agent/CodexSDKAgent.config.ts +++ /dev/null @@ -1,75 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {writeFileSync} from 'node:fs'; -import {join} from 'node:path'; - -import {logger} from '@browseros/common'; -import {stringify} from 'smol-toml'; - -export interface McpServerConfig { - url: string; - startup_timeout_sec?: number; - tool_timeout_sec?: number; -} - -export interface BrowserOSCodexConfig { - model_name: string; - base_url?: string; - api_key_env: string; - wire_api: 'chat' | 'responses'; - base_instructions_file: string; - mcp_servers: { - [key: string]: McpServerConfig; - }; -} - -export function generateBrowserOSCodexToml( - config: BrowserOSCodexConfig, -): string { - const header = [ - '# BrowserOS Model Provider Configuration', - '# This file configures a custom model provider for Codex', - '', - ].join('\n'); - - const tomlContent = stringify(config); - - return header + tomlContent; -} - -export function writeBrowserOSCodexConfig( - config: BrowserOSCodexConfig, - outputDir: string, -): string { - const tomlContent = generateBrowserOSCodexToml(config); - const tomlPath = join(outputDir, 'browseros_config.toml'); - - writeFileSync(tomlPath, tomlContent, 'utf-8'); - - logger.info('✅ Generated BrowserOS Codex config', { - path: tomlPath, - modelName: config.model_name, - baseUrl: config.base_url, - }); - - return tomlPath; -} - -export function writePromptFile( - promptContent: string, - outputDir: string, -): string { - const promptPath = join(outputDir, 'browseros_prompt.md'); - - writeFileSync(promptPath, promptContent, 'utf-8'); - - logger.info('✅ Generated BrowserOS prompt file', { - path: promptPath, - size: promptContent.length, - }); - - return promptPath; -} diff --git a/packages/agent/src/agent/CodexSDKAgent.formatter.ts b/packages/agent/src/agent/CodexSDKAgent.formatter.ts deleted file mode 100644 index 84f0eb3..0000000 --- a/packages/agent/src/agent/CodexSDKAgent.formatter.ts +++ /dev/null @@ -1,143 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import type {ThreadEvent} from '@browseros/codex-sdk-ts'; -import type {ThreadItem} from '@browseros/codex-sdk-ts'; -import {FormattedEvent} from './types.js'; - -/** - * Codex SDK Event Formatter - * - * Maps Codex events to FormattedEvent types: - * - thread.started -> init - * - turn.started -> thinking - * - item.started/item.completed -> various (thinking, tool_use, tool_result, error) - * - turn.failed -> error - * - error -> error - * - * Note: turn.completed is handled in CodexSDKAgent.execute() to re-emit final agent_message as completion - */ -export class CodexEventFormatter { - /** - * Format Codex SDK event into FormattedEvent - * - * @param event - Raw Codex event - * @returns FormattedEvent or null if event should not be displayed - */ - static format(event: ThreadEvent): FormattedEvent | null { - switch (event.type) { - case 'thread.started': - // return new FormattedEvent('init', `Thread started: ${event.thread_id}`); - // No need to show thread started event to user - return null; - - case 'turn.started': - return new FormattedEvent('thinking', 'Agent processing...'); - - case 'item.started': - case 'item.completed': - return this.formatItem(event.item); - - case 'turn.failed': - return new FormattedEvent( - 'error', - `Turn failed: ${event.error.message}`, - ); - - case 'error': - return new FormattedEvent('error', event.message); - - case 'turn.completed': - return null; - - default: - return null; - } - } - - /** - * Format Codex item based on type - */ - private static formatItem(item: ThreadItem): FormattedEvent | null { - switch (item.type) { - case 'agent_message': - return new FormattedEvent('thinking', item.text); - - case 'reasoning': { - const text = item.text; - if (!text) return null; - const truncated = - text.length > 150 ? text.substring(0, 150) + '...' : text; - return new FormattedEvent('thinking', truncated); - } - - case 'mcp_tool_call': { - const toolName = this.cleanToolName(item.tool); - const status = item.status; - - if (status === 'in_progress') { - return new FormattedEvent('tool_use', `Executing ${toolName}`); - } else if (status === 'completed') { - return new FormattedEvent('tool_result', `${toolName} completed`); - } else if (status === 'failed') { - return new FormattedEvent('tool_result', `${toolName} failed`); - } - - return null; - } - - case 'command_execution': { - const cmd = item.command; - const truncated = cmd.length > 50 ? cmd.substring(0, 50) + '...' : cmd; - return new FormattedEvent('thinking', `Executing: ${truncated}`); - } - - case 'file_change': { - const count = item.changes.length; - return new FormattedEvent( - 'thinking', - `Modified ${count} file${count !== 1 ? 's' : ''}`, - ); - } - - case 'web_search': { - const query = item.query; - const truncated = - query.length > 50 ? query.substring(0, 50) + '...' : query; - return new FormattedEvent('thinking', `Searching: ${truncated}`); - } - - case 'todo_list': { - const todoItems = item.items - .map(i => `${i.completed ? '- [x]' : '- [ ]'} ${i.text}`) - .join('\n'); - return new FormattedEvent('thinking', todoItems); - } - - case 'error': - return new FormattedEvent('error', item.message); - - default: - return null; - } - } - - /** - * Create heartbeat/processing event - */ - static createProcessingEvent(): FormattedEvent { - return new FormattedEvent('thinking', 'Processing...'); - } - - /** - * Clean tool name by removing MCP prefixes - */ - private static cleanToolName(name: string): string { - return name - .replace(/^mcp__[^_]+__/, '') - .replace(/^browseros-controller__/, '') - .replace(/_/g, ' '); - } -} diff --git a/packages/agent/src/agent/CodexSDKAgent.ts b/packages/agent/src/agent/CodexSDKAgent.ts deleted file mode 100644 index 66017f2..0000000 --- a/packages/agent/src/agent/CodexSDKAgent.ts +++ /dev/null @@ -1,572 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {accessSync, constants as fsConstants} from 'node:fs'; -import {dirname, join} from 'node:path'; - -import {Codex, Thread, type McpServerConfig} from '@browseros/codex-sdk-ts'; -import {logger} from '@browseros/common'; -import type {ControllerBridge} from '@browseros/controller-server'; -import {allControllerTools} from '@browseros/tools/controller-based'; - -import {AGENT_SYSTEM_PROMPT} from './Agent.prompt.js'; -import {BaseAgent} from './BaseAgent.js'; -import {CodexEventFormatter} from './CodexSDKAgent.formatter.js'; -import { - type BrowserOSCodexConfig, - writeBrowserOSCodexConfig, - writePromptFile, -} from './CodexSDKAgent.config.js'; -import {type AgentConfig, FormattedEvent} from './types.js'; - -/** - * Codex SDK specific default configuration - */ -const CODEX_SDK_DEFAULTS = { - maxTurns: 100, - mcpServerHost: '127.0.0.1', - mcpServerPort: 9100, -} as const; - -/** - * Build MCP server configuration from agent config - */ -function buildMcpServerConfig(config: AgentConfig): McpServerConfig { - const port = config.mcpServerPort || CODEX_SDK_DEFAULTS.mcpServerPort; - const mcpServerUrl = `http://${CODEX_SDK_DEFAULTS.mcpServerHost}:${port}/mcp`; - return {url: mcpServerUrl} as McpServerConfig; -} - -/** - * Codex SDK Agent implementation - * - * Wraps @openai/codex-sdk with: - * - In-process SDK MCP server with controller tools - * - Shared ControllerBridge for browseros-controller connection - * - Event formatting via EventFormatter (Codex → FormattedEvent) - * - Break-loop abort pattern (Codex has no native abort) - * - Heartbeat mechanism for long-running operations - * - Thread-based execution model - * - Metadata tracking - * - * Environment Variables: - * - CODEX_BINARY_PATH: Optional override when no bundled codex binary is found - * - * Configuration (via AgentConfig): - * - resourcesDir: Resources directory (required) - * - mcpServerPort: MCP server port (optional, defaults to 9100) - * - apiKey: OpenAI API key (required) - * - baseUrl: Custom LLM endpoint (optional) - * - modelName: Model to use (optional, defaults to 'o4-mini') - */ -export class CodexSDKAgent extends BaseAgent { - private abortController: AbortController | null = null; - private codex: Codex | null = null; - private codexExecutablePath: string | null = null; - private codexConfigPath: string | null = null; - private currentThread: Thread | null = null; - - constructor(config: AgentConfig, _controllerBridge: ControllerBridge) { - const mcpServerConfig = buildMcpServerConfig(config); - - logger.info('🔧 CodexSDKAgent initializing', { - mcpServerUrl: mcpServerConfig.url, - toolCount: allControllerTools.length, - }); - - super('codex-sdk', config, { - systemPrompt: AGENT_SYSTEM_PROMPT, - mcpServers: {'browseros-mcp': mcpServerConfig}, - maxTurns: CODEX_SDK_DEFAULTS.maxTurns, - }); - - logger.info('✅ CodexSDKAgent initialized successfully'); - } - - /** - * Initialize agent - use config passed in constructor - */ - override async init(): Promise { - this.codexExecutablePath = this.resolveCodexExecutablePath(); - - logger.info('🚀 Resolved Codex binary path', { - codexExecutablePath: this.codexExecutablePath, - }); - - if (!this.config.apiKey) { - throw new Error('API key is required in AgentConfig'); - } - - logger.info('✅ Using config from AgentConfig', { - model: this.config.modelName, - }); - - await super.init(); - this.generateCodexConfig(); - this.initializeCodex(); - } - - private generateCodexConfig(): void { - const outputDir = this.config.executionDir; - const port = this.config.mcpServerPort || CODEX_SDK_DEFAULTS.mcpServerPort; - const modelName = this.config.modelName; - const baseUrl = this.config.baseUrl; - - const codexConfig: BrowserOSCodexConfig = { - model_name: modelName, - ...(baseUrl && {base_url: baseUrl}), - api_key_env: 'BROWSEROS_API_KEY', - wire_api: 'chat', - base_instructions_file: 'browseros_prompt.md', - mcp_servers: { - browseros: { - url: `http://127.0.0.1:${port}/mcp`, - startup_timeout_sec: 30.0, - tool_timeout_sec: 120.0, - }, - }, - }; - - writePromptFile(AGENT_SYSTEM_PROMPT, outputDir); - this.codexConfigPath = writeBrowserOSCodexConfig(codexConfig, outputDir); - - logger.info('✅ Generated Codex configuration files', { - outputDir, - configPath: this.codexConfigPath, - modelName, - baseUrl, - }); - } - - private initializeCodex(): void { - const codexConfig: any = { - codexPathOverride: this.codexExecutablePath, - apiKey: this.config.apiKey, - // Note: baseUrl is not passed here because when using browseros config, - // it's already specified in the TOML file (base_url field) - }; - - this.codex = new Codex(codexConfig); - - logger.info('✅ Codex SDK initialized', { - binaryPath: this.codexExecutablePath, - }); - } - - private isExecutableFile(path: string): boolean { - try { - accessSync(path, fsConstants.X_OK); - return true; - } catch { - return false; - } - } - - private resolveCodexExecutablePath(): string { - const codexBinaryName = - process.platform === 'win32' ? 'codex.exe' : 'codex'; - - // Check CODEX_BINARY_PATH env var first - if (process.env.CODEX_BINARY_PATH) { - const envPath = process.env.CODEX_BINARY_PATH; - if (this.isExecutableFile(envPath)) { - return envPath; - } - logger.warn( - 'CODEX_BINARY_PATH set but file not found or not executable', - { - path: envPath, - }, - ); - } - - // Check resourcesDir if provided - if (this.config.resourcesDir) { - const resourcesCodexPath = join( - this.config.resourcesDir, - 'bin', - codexBinaryName, - ); - if (this.isExecutableFile(resourcesCodexPath)) { - return resourcesCodexPath; - } - } - - // Check bundled codex in current binary directory - const currentBinaryDirectory = dirname(process.execPath); - const bundledCodexPath = join(currentBinaryDirectory, codexBinaryName); - if (this.isExecutableFile(bundledCodexPath)) { - return bundledCodexPath; - } - - throw new Error( - 'Codex binary not found. Set CODEX_BINARY_PATH or --resources-dir', - ); - } - - /** - * Wrapper around iterator.next() that yields heartbeat events while waiting - * @param iterator - The async iterator - * @yields Heartbeat events (FormattedEvent) while waiting, then the final iterator result (IteratorResult) - */ - private async *nextWithHeartbeat( - iterator: AsyncIterator, - ): AsyncGenerator { - const heartbeatInterval = 20000; // 20 seconds - let heartbeatTimer: NodeJS.Timeout | null = null; - let abortHandler: (() => void) | null = null; - - // Call iterator.next() once - this generator wraps a single next() call - const iteratorPromise = iterator.next(); - - // Create abort promise - const abortPromise = new Promise((_, reject) => { - if (this.abortController) { - abortHandler = () => { - reject(new Error('Agent execution aborted by client')); - }; - this.abortController.signal.addEventListener('abort', abortHandler, { - once: true, - }); - } - }); - - try { - // Loop until the iterator promise resolves, yielding heartbeats while waiting - while (true) { - // Check if execution was aborted - if (this.abortController?.signal.aborted) { - logger.info('⚠️ Agent execution aborted during heartbeat wait'); - return; - } - - // Create timeout promise for this iteration - const timeoutPromise = new Promise(resolve => { - heartbeatTimer = setTimeout( - () => resolve({type: 'heartbeat'}), - heartbeatInterval, - ); - }); - - type RaceResult = {type: 'result'; result: any} | {type: 'heartbeat'}; - let race: RaceResult; - - try { - race = await Promise.race([ - iteratorPromise.then(result => ({type: 'result' as const, result})), - timeoutPromise.then(() => ({type: 'heartbeat' as const})), - abortPromise, - ]); - } catch (abortError) { - // Abort was triggered during wait - logger.info( - '⚠️ Agent execution aborted (caught during iterator wait)', - ); - // Break loop to stop iteration (Codex has no native abort) - return; - } - - // Clear the timeout if it was set - if (heartbeatTimer) { - clearTimeout(heartbeatTimer); - heartbeatTimer = null; - } - - if (race.type === 'heartbeat') { - // Heartbeat timeout occurred - yield processing event and continue waiting - yield CodexEventFormatter.createProcessingEvent(); - // Loop continues - will race the same iteratorPromise (still pending) vs new timeout - } else { - // Iterator result arrived - yield it and exit this generator - yield race.result; - return; - } - } - } finally { - // Clean up heartbeat timer - if (heartbeatTimer) { - clearTimeout(heartbeatTimer); - } - - // Clean up abort listener if it wasn't triggered - if ( - abortHandler && - this.abortController && - !this.abortController.signal.aborted - ) { - this.abortController.signal.removeEventListener('abort', abortHandler); - } - } - } - - /** - * Execute a task using Codex SDK and stream formatted events - * - * @param message - User's natural language request - * @yields FormattedEvent instances - */ - async *execute(message: string): AsyncGenerator { - if (!this.initialized) { - await this.init(); - } - - if (!this.codex) { - throw new Error('Codex instance not initialized'); - } - - this.startExecution(); - this.abortController = new AbortController(); - - logger.info('🤖 CodexSDKAgent executing', { - message, - }); - - try { - logger.debug('🔧 MCP Servers configured', { - count: Object.keys(this.config.mcpServers || {}).length, - servers: Object.keys(this.config.mcpServers || {}), - }); - - // Start thread with browseros config or MCP servers - const modelName = this.config.modelName; - const threadOptions: any = { - skipGitRepoCheck: true, - workingDirectory: this.config.executionDir, - }; - - // Use TOML config if available, otherwise fall back to direct MCP server config - if (this.codexConfigPath) { - threadOptions.browserosConfigPath = this.codexConfigPath; - logger.debug('📡 Starting Codex thread with browseros config', { - configPath: this.codexConfigPath, - }); - } else { - threadOptions.mcpServers = this.config.mcpServers; - threadOptions.model = modelName; - logger.debug('📡 Starting Codex thread with MCP servers', { - mcpServerCount: Object.keys(this.config.mcpServers || {}).length, - model: modelName, - }); - } - - // Reuse existing thread for follow-up messages, or create new one - // CRITICAL: Check both existence AND thread ID (ID is null if cancelled before thread.started event) - if (!this.currentThread || !this.currentThread.id) { - this.currentThread = this.codex.startThread(threadOptions); - logger.info('🆕 Created new thread for session'); - } else { - logger.info('♻️ Reusing existing thread for follow-up message', { - threadId: this.currentThread.id, - }); - } - const thread = this.currentThread; - - // Get streaming events from thread - const messages: Array<{type: 'text'; text: string}> = []; - - // When using TOML config, system prompt comes from base_instructions_file - // Otherwise, add it as first message - if (!this.codexConfigPath && this.config.systemPrompt) { - messages.push({type: 'text' as const, text: this.config.systemPrompt}); - } - - // Add user message - messages.push({type: 'text' as const, text: message}); - - const {events} = await thread.runStreamed(messages); - - // Create iterator for streaming - const iterator = events[Symbol.asyncIterator](); - - // Track last agent message for completion - let lastAgentMessage: string | null = null; - - try { - // Stream events with heartbeat and abort handling - while (true) { - // Check if execution was aborted (break-loop pattern) - if (this.abortController?.signal.aborted) { - logger.info( - '⚠️ Agent execution aborted by client (breaking loop)', - ); - // Clear thread - next message will create fresh thread - this.currentThread = null; - logger.debug('🔄 Cleared thread reference due to abort'); - break; - } - - let result: IteratorResult | null = null; - - // Iterate through heartbeat generator to get the actual result - for await (const item of this.nextWithHeartbeat(iterator)) { - if (item && item.done !== undefined) { - // This is the final result - result = item; - } else { - // This is a heartbeat/processing event - update time to prevent timeout - this.updateEventTime(); - yield item; - } - } - - if (!result || result.done) break; - - const event = result.value; - - // Log Codex events for debugging (console view truncates automatically) - if (event.type === 'error' || event.type === 'turn.failed') { - logger.error('Codex event', event); - } else { - logger.debug('Codex event', event); - } - - // Update event time - this.updateEventTime(); - - // Track last agent_message for completion - if ( - event.type === 'item.completed' && - event.item?.type === 'agent_message' - ) { - lastAgentMessage = event.item.text || null; - } - - // Track tool executions from item.completed events with mcp_tool_call type - if ( - event.type === 'item.completed' && - event.item?.type === 'mcp_tool_call' && - event.item.status === 'completed' - ) { - this.updateToolsExecuted(1); - } - - // Handle turn completion - re-emit last agent message as completion - if (event.type === 'turn.completed') { - this.updateTurns(1); - - // Log usage statistics - if (event.usage) { - logger.info('📊 Turn completed', { - inputTokens: event.usage.input_tokens, - cachedInputTokens: event.usage.cached_input_tokens, - outputTokens: event.usage.output_tokens, - }); - } - - // Re-emit last agent message as completion event - if (lastAgentMessage) { - logger.info('✅ Emitting final completion message'); - yield new FormattedEvent('completion', lastAgentMessage); - } - - // Break the loop - turn is complete - break; - } - - // Format the event using CodexEventFormatter - const formattedEvent = CodexEventFormatter.format(event); - - // Yield formatted event if valid - if (formattedEvent) { - logger.debug('📤 CodexSDKAgent yielding event', { - type: formattedEvent.type, - originalType: event.type, - }); - yield formattedEvent; - } - } - } finally { - // CRITICAL: Close iterator to trigger SIGKILL in forked SDK's finally block - // Fire-and-forget to avoid blocking markIdle() - subprocess cleanup can happen async - if (iterator.return) { - logger.debug('🔒 Closing iterator to terminate Codex subprocess'); - iterator.return(undefined).catch((error) => { - logger.warn('⚠️ Iterator cleanup error (non-fatal)', { - error: error instanceof Error ? error.message : String(error), - }); - }); - } - } - - // Complete execution tracking - this.completeExecution(); - - logger.info('✅ CodexSDKAgent execution complete', { - turns: this.metadata.turns, - toolsExecuted: this.metadata.toolsExecuted, - duration: Date.now() - this.executionStartTime, - }); - } catch (error) { - // Clear thread on error - next call will create fresh thread - this.currentThread = null; - logger.debug('🔄 Cleared thread reference due to error'); - - // Mark execution error - this.errorExecution( - error instanceof Error ? error : new Error(String(error)), - ); - - logger.error('❌ CodexSDKAgent execution failed', { - error: error instanceof Error ? error.message : String(error), - stack: error instanceof Error ? error.stack : undefined, - }); - - throw error; - } finally { - // Clear AbortController reference - this.abortController = null; - } - } - - /** - * Abort current execution - * Triggers abort signal to stop the current task gracefully - */ - abort(): void { - if (this.abortController) { - logger.info('🛑 Aborting CodexSDKAgent execution'); - this.abortController.abort(); - } - } - - /** - * Check if agent is currently executing - */ - isExecuting(): boolean { - return this.metadata.state === 'executing' && this.abortController !== null; - } - - /** - * Cleanup agent resources - * - * Immediately kills the Codex subprocess using SIGKILL. - * Does NOT close shared ControllerBridge. - */ - async destroy(): Promise { - if (this.isDestroyed()) { - logger.debug('⚠️ CodexSDKAgent already destroyed'); - return; - } - - this.markDestroyed(); - - // Clear thread reference - this.currentThread = null; - - // Trigger abort controller for cleanup - if (this.abortController) { - this.abortController.abort(); - await new Promise(resolve => setTimeout(resolve, 100)); - } - - // DO NOT close ControllerBridge - it's shared and owned by main server - - logger.debug('🗑️ CodexSDKAgent destroyed', { - totalDuration: this.metadata.totalDuration, - turns: this.metadata.turns, - toolsExecuted: this.metadata.toolsExecuted, - }); - } -} diff --git a/packages/agent/src/agent/ControllerToolsAdapter.ts b/packages/agent/src/agent/ControllerToolsAdapter.ts deleted file mode 100644 index 50be00f..0000000 --- a/packages/agent/src/agent/ControllerToolsAdapter.ts +++ /dev/null @@ -1,82 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {tool, createSdkMcpServer} from '@anthropic-ai/claude-agent-sdk'; -import {logger} from '@browseros/common'; -import type {ToolDefinition} from '@browseros/tools'; -import {ControllerResponse} from '@browseros/tools/controller-based'; -import type {Context} from '@browseros/tools/controller-based'; - -/** - * Convert a controller tool to Claude SDK MCP tool format - */ -function adaptControllerTool( - toolDef: ToolDefinition, - context: Context, -) { - return tool( - toolDef.name, - toolDef.description, - toolDef.schema, - async (args, _extra) => { - logger.debug(`🔧 Executing controller tool: ${toolDef.name}`, {args}); - - try { - // Create request and response objects - const request = {params: args}; - const response = new ControllerResponse(); - - // Execute the tool handler - await toolDef.handler(request, response, context); - - // Convert response to CallToolResult format - const content = response.toContent(); - - return {content}; - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - logger.error(`❌ Controller tool ${toolDef.name} failed`, { - error: errorMsg, - }); - - return { - content: [ - { - type: 'text' as const, - text: `Error: ${errorMsg}`, - }, - ], - isError: true, - }; - } - }, - ); -} - -/** - * Create an in-process SDK MCP server with all controller tools - * - * @param tools - Array of controller tool definitions - * @param context - Controller context for executing actions - * @returns SDK MCP server configuration - */ -export function createControllerMcpServer( - tools: Array>, - context: Context, -) { - // Adapt all controller tools to SDK format - const sdkTools = tools.map(tool => adaptControllerTool(tool, context)); - - logger.info( - `🔧 Creating SDK MCP server with ${sdkTools.length} controller tools`, - ); - - // Create and return the SDK MCP server - return createSdkMcpServer({ - name: 'browseros-controller', - version: '1.0.0', - tools: sdkTools, - }); -} diff --git a/packages/agent/src/agent/GeminiAgent.ts b/packages/agent/src/agent/GeminiAgent.ts new file mode 100644 index 0000000..fa019fa --- /dev/null +++ b/packages/agent/src/agent/GeminiAgent.ts @@ -0,0 +1,217 @@ +import { + Config as GeminiConfig, + MCPServerConfig, + GeminiEventType, + executeToolCall, + type GeminiClient, + type ToolCallRequestInfo, +} from '@google/gemini-cli-core'; +import type { Part } from '@google/genai'; +import { logger, fetchBrowserOSConfig, getLLMConfigFromProvider } from '@browseros/common'; +import { VercelAIContentGenerator, AIProvider } from './gemini-vercel-sdk-adapter/index.js'; +import type { HonoSSEStream } from './gemini-vercel-sdk-adapter/types.js'; +import { AgentExecutionError } from '../errors.js'; +import type { AgentConfig } from './types.js'; + +const MAX_TURNS = 100; + +interface McpHttpServerOptions { + httpUrl: string; + headers?: Record; + trust?: boolean; +} + +// MCP Server Config for HTTP is a positional argument in the constructor (can't be passed as an object) +function createHttpMcpServerConfig(options: McpHttpServerOptions): MCPServerConfig { + return new MCPServerConfig( + undefined, // command (stdio) + undefined, // args (stdio) + undefined, // env (stdio) + undefined, // cwd (stdio) + undefined, // url (sse transport) + options.httpUrl, // httpUrl (streamable http) + options.headers, // headers + undefined, // tcp (websocket) + undefined, // timeout + options.trust, // trust + ); +} + +export class GeminiAgent { + private constructor( + private client: GeminiClient, + private geminiConfig: GeminiConfig, + private contentGenerator: VercelAIContentGenerator, + private conversationId: string, + ) {} + + static async create(config: AgentConfig): Promise { + const tempDir = config.tempDir; + + // If provider is BROWSEROS, fetch config from BROWSEROS_CONFIG_URL + let resolvedConfig = { ...config }; + if (config.provider === AIProvider.BROWSEROS) { + const configUrl = process.env.BROWSEROS_CONFIG_URL; + if (!configUrl) { + throw new Error('BROWSEROS_CONFIG_URL environment variable is required for BrowserOS provider'); + } + + logger.info('Fetching BrowserOS config', { configUrl }); + const browserosConfig = await fetchBrowserOSConfig(configUrl); + const llmConfig = getLLMConfigFromProvider(browserosConfig, 'default'); + + resolvedConfig = { + ...config, + model: llmConfig.modelName, + apiKey: llmConfig.apiKey, + baseUrl: llmConfig.baseUrl, + }; + + logger.info('Using BrowserOS config', { + model: resolvedConfig.model, + baseUrl: resolvedConfig.baseUrl, + }); + } + + const modelString = `${resolvedConfig.provider}/${resolvedConfig.model}`; + + const geminiConfig = new GeminiConfig({ + sessionId: resolvedConfig.conversationId, + targetDir: tempDir, + cwd: tempDir, + debugMode: false, + model: modelString, + excludeTools: ['run_shell_command', 'write_file', 'replace'], + mcpServers: resolvedConfig.mcpServerUrl + ? { + 'browseros-mcp': createHttpMcpServerConfig({ + httpUrl: resolvedConfig.mcpServerUrl, + headers: { 'Accept': 'application/json, text/event-stream' }, + trust: true, + }), + } + : undefined, + }); + + await geminiConfig.initialize(); + + console.log('resolvedConfig', resolvedConfig); + const contentGenerator = new VercelAIContentGenerator(resolvedConfig); + + (geminiConfig as unknown as { contentGenerator: VercelAIContentGenerator }).contentGenerator = contentGenerator; + + const client = geminiConfig.getGeminiClient(); + await client.setTools(); + + logger.info('GeminiAgent created', { + conversationId: resolvedConfig.conversationId, + provider: resolvedConfig.provider, + model: resolvedConfig.model, + }); + + return new GeminiAgent(client, geminiConfig, contentGenerator, resolvedConfig.conversationId); + } + + getHistory() { + return this.client.getHistory(); + } + + async execute(message: string, honoStream: HonoSSEStream, signal?: AbortSignal): Promise { + this.contentGenerator.setHonoStream(honoStream); + + const abortSignal = signal || new AbortController().signal; + const promptId = `${this.conversationId}-${Date.now()}`; + + let currentParts: Part[] = [{ text: message }]; + let turnCount = 0; + + logger.info('Starting agent execution', { + conversationId: this.conversationId, + message: message.substring(0, 100), + historyLength: this.client.getHistory().length, + }); + + while (true) { + turnCount++; + logger.debug(`Turn ${turnCount}`, { conversationId: this.conversationId }); + + if (turnCount > MAX_TURNS) { + logger.warn('Max turns exceeded', { + conversationId: this.conversationId, + turnCount, + }); + break; + } + + const toolCallRequests: ToolCallRequestInfo[] = []; + + const responseStream = this.client.sendMessageStream( + currentParts, + abortSignal, + promptId, + ); + + for await (const event of responseStream) { + if (abortSignal.aborted) { + break; + } + + if (event.type === GeminiEventType.ToolCallRequest) { + toolCallRequests.push(event.value as ToolCallRequestInfo); + } else if (event.type === GeminiEventType.Error) { + const errorValue = event.value as { error: Error }; + throw new AgentExecutionError('Agent execution failed', errorValue.error); + } + // Other events are handled by the content generator + } + + if (toolCallRequests.length > 0) { + logger.debug(`Executing ${toolCallRequests.length} tool(s)`, { + conversationId: this.conversationId, + tools: toolCallRequests.map((r) => r.name), + }); + + const toolResponseParts: Part[] = []; + + for (const requestInfo of toolCallRequests) { + try { + const completedToolCall = await executeToolCall( + this.geminiConfig, + requestInfo, + abortSignal, + ); + + const toolResponse = completedToolCall.response; + + if (toolResponse.error) { + logger.warn('Tool execution error', { + conversationId: this.conversationId, + tool: requestInfo.name, + error: toolResponse.error.message, + }); + } + + if (toolResponse.responseParts) { + toolResponseParts.push(...(toolResponse.responseParts as Part[])); + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error('Tool execution failed', { + conversationId: this.conversationId, + tool: requestInfo.name, + error: errorMessage, + }); + } + } + + currentParts = toolResponseParts; + } else { + logger.info('Agent execution complete', { + conversationId: this.conversationId, + totalTurns: turnCount, + }); + break; + } + } + } +} diff --git a/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts b/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts index 8323fc9..b2af847 100644 --- a/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts +++ b/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts @@ -8,7 +8,7 @@ * Multi-provider LLM adapter using Vercel AI SDK */ -import { streamText, generateText, convertToModelMessages } from 'ai'; +import { streamText, generateText } from 'ai'; import { createAnthropic } from '@ai-sdk/anthropic'; import { createOpenAI } from '@ai-sdk/openai'; import { createGoogleGenerativeAI } from '@ai-sdk/google'; @@ -41,7 +41,7 @@ import type { VercelAIConfig } from './types.js'; * Implements ContentGenerator interface using strategy pattern for conversions */ export class VercelAIContentGenerator implements ContentGenerator { - private providerRegistry: Map unknown>; + private providerInstance: (modelId: string) => unknown; private model: string; private honoStream?: HonoSSEStream; @@ -52,16 +52,22 @@ export class VercelAIContentGenerator implements ContentGenerator { constructor(config: VercelAIConfig) { this.model = config.model; - this.honoStream = config.honoStream; - this.providerRegistry = new Map(); // Initialize conversion strategies this.toolStrategy = new ToolConversionStrategy(); this.messageStrategy = new MessageConversionStrategy(); this.responseStrategy = new ResponseConversionStrategy(this.toolStrategy); - // Register providers based on config - this.registerProviders(config); + // Register the single provider from config + this.providerInstance = this.createProvider(config); + } + + /** + * Set/override the Hono SSE stream for the current request + * This allows reusing the same ContentGenerator across multiple requests + */ + setHonoStream(stream: HonoSSEStream | undefined): void { + this.honoStream = stream; } /** @@ -79,13 +85,8 @@ export class VercelAIContentGenerator implements ContentGenerator { request.config?.systemInstruction, ); - const { provider, modelName } = this.parseModel( - request.model || this.model, - ); - const providerInstance = this.getProvider(provider); - const result = await generateText({ - model: providerInstance(modelName) as Parameters< + model: this.providerInstance(this.model) as Parameters< typeof generateText >[0]['model'], messages, @@ -112,13 +113,8 @@ export class VercelAIContentGenerator implements ContentGenerator { request.config?.systemInstruction, ); - const { provider, modelName } = this.parseModel( - request.model || this.model, - ); - const providerInstance = this.getProvider(provider); - const result = streamText({ - model: providerInstance(modelName) as Parameters< + model: this.providerInstance(this.model) as Parameters< typeof streamText >[0]['model'], messages, @@ -175,138 +171,88 @@ export class VercelAIContentGenerator implements ContentGenerator { } /** - * Register providers based on config + * Create provider instance based on config */ - private registerProviders(config: VercelAIConfig): void { - const providers = config.providers || {}; - - const anthropicConfig = providers[AIProvider.ANTHROPIC]; - if (anthropicConfig?.apiKey) { - this.providerRegistry.set( - AIProvider.ANTHROPIC, - createAnthropic({ apiKey: anthropicConfig.apiKey }), - ); - } - - const openaiConfig = providers[AIProvider.OPENAI]; - if (openaiConfig?.apiKey) { - this.providerRegistry.set( - AIProvider.OPENAI, - createOpenAI({ - apiKey: openaiConfig.apiKey, - compatibility: 'strict', - }), - ); - } + private createProvider(config: VercelAIConfig): (modelId: string) => unknown { + switch (config.provider) { + case AIProvider.ANTHROPIC: + if (!config.apiKey) { + throw new Error('Anthropic provider requires apiKey'); + } + return createAnthropic({ apiKey: config.apiKey }); - const googleConfig = providers[AIProvider.GOOGLE]; - if (googleConfig?.apiKey) { - this.providerRegistry.set( - AIProvider.GOOGLE, - createGoogleGenerativeAI({ apiKey: googleConfig.apiKey }), - ); - } + case AIProvider.OPENAI: + if (!config.apiKey) { + throw new Error('OpenAI provider requires apiKey'); + } + return createOpenAI({ apiKey: config.apiKey }); - const openrouterConfig = providers[AIProvider.OPENROUTER]; - if (openrouterConfig?.apiKey) { - this.providerRegistry.set( - AIProvider.OPENROUTER, - createOpenRouter({ apiKey: openrouterConfig.apiKey }), - ); - } + case AIProvider.GOOGLE: + if (!config.apiKey) { + throw new Error('Google provider requires apiKey'); + } + return createGoogleGenerativeAI({ apiKey: config.apiKey }); - const azureConfig = providers[AIProvider.AZURE]; - if (azureConfig?.apiKey && azureConfig.resourceName) { - this.providerRegistry.set( - AIProvider.AZURE, - createAzure({ - resourceName: azureConfig.resourceName, - apiKey: azureConfig.apiKey, - }), - ); - } + case AIProvider.OPENROUTER: + if (!config.apiKey) { + throw new Error('OpenRouter provider requires apiKey'); + } + return createOpenRouter({ apiKey: config.apiKey }); - const lmstudioConfig = providers[AIProvider.LMSTUDIO]; - if (lmstudioConfig !== undefined) { - this.providerRegistry.set( - AIProvider.LMSTUDIO, - createOpenAICompatible({ + case AIProvider.AZURE: + if (!config.apiKey || !config.resourceName) { + throw new Error('Azure provider requires apiKey and resourceName'); + } + return createAzure({ + resourceName: config.resourceName, + apiKey: config.apiKey, + }); + + case AIProvider.LMSTUDIO: + if (!config.baseUrl) { + throw new Error('LMStudio provider requires baseUrl'); + } + return createOpenAICompatible({ name: 'lmstudio', - baseURL: lmstudioConfig.baseUrl || 'http://localhost:1234/v1', - }), - ); - } + baseURL: config.baseUrl, + }); - const ollamaConfig = providers[AIProvider.OLLAMA]; - if (ollamaConfig !== undefined) { - this.providerRegistry.set( - AIProvider.OLLAMA, - createOpenAICompatible({ + case AIProvider.OLLAMA: + if (!config.baseUrl) { + throw new Error('Ollama provider requires baseUrl'); + } + return createOpenAICompatible({ name: 'ollama', - baseURL: ollamaConfig.baseUrl || 'http://localhost:11434/v1', - }), - ); - } - - const bedrockConfig = providers[AIProvider.BEDROCK]; - if ( - bedrockConfig?.accessKeyId && - bedrockConfig.secretAccessKey && - bedrockConfig.region - ) { - this.providerRegistry.set( - AIProvider.BEDROCK, - createAmazonBedrock({ - region: bedrockConfig.region, - accessKeyId: bedrockConfig.accessKeyId, - secretAccessKey: bedrockConfig.secretAccessKey, - sessionToken: bedrockConfig.sessionToken, - }), - ); - } - } - - /** - * Parse model string into provider and model name - */ - private parseModel(modelString: string): { - provider: string; - modelName: string; - } { - const parts = modelString.split('/'); - - if (parts.length < 2) { - throw new Error( - `Invalid model format: "${modelString}". ` + - `Expected "provider/model-name" (e.g., "anthropic/claude-3-5-sonnet-20241022")`, - ); - } - - const provider = parts[0]; - const modelName = parts.slice(1).join('/'); - - return { provider, modelName }; - } - - /** - * Get provider instance or throw error - */ - private getProvider(provider: string): (modelId: string) => unknown { - const providerInstance = this.providerRegistry.get(provider); + baseURL: config.baseUrl, + }); - if (!providerInstance) { - const available = Array.from(this.providerRegistry.keys()).join(', '); - throw new Error( - `Provider "${provider}" not configured. ` + - `Available providers: ${available || 'none'}. ` + - `Configure it in config.providers.${provider}`, - ); + case AIProvider.BEDROCK: + if (!config.accessKeyId || !config.secretAccessKey || !config.region) { + throw new Error('Bedrock provider requires accessKeyId, secretAccessKey, and region'); + } + return createAmazonBedrock({ + region: config.region, + accessKeyId: config.accessKeyId, + secretAccessKey: config.secretAccessKey, + sessionToken: config.sessionToken, + }); + + case AIProvider.BROWSEROS: + if (!config.baseUrl || !config.apiKey) { + throw new Error('BrowserOS provider requires baseUrl and apiKey'); + } + return createOpenAICompatible({ + name: 'browseros', + baseURL: config.baseUrl, + apiKey: config.apiKey, + }); + + default: + throw new Error(`Unknown provider: ${config.provider}`); } - - return providerInstance; } } // Re-export types for consumers export { AIProvider }; -export type { VercelAIConfig, ProviderConfig, HonoSSEStream } from './types.js'; +export type { VercelAIConfig, HonoSSEStream } from './types.js'; diff --git a/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/message.ts b/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/message.ts index f71688a..f0950c5 100644 --- a/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/message.ts +++ b/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/message.ts @@ -10,10 +10,10 @@ */ import type { - CoreMessage, VercelContentPart, - LanguageModelV2ToolResultOutput, } from '../types.js'; +import type { CoreMessage } from 'ai'; +import type { LanguageModelV2ToolResultOutput, JSONValue } from '@ai-sdk/provider'; import type { Content, ContentUnion } from '@google/genai'; import { isTextPart, @@ -247,22 +247,21 @@ export class MessageConversionStrategy { // Check for error first if (typeof response === 'object' && 'error' in response && response.error) { - output = { - type: typeof response.error === 'string' ? 'error-text' : 'error-json', - value: response.error, - }; + const errorValue = response.error; + output = typeof errorValue === 'string' + ? { type: 'error-text', value: errorValue } + : { type: 'error-json', value: errorValue as JSONValue }; } else if (typeof response === 'object' && 'output' in response) { // Gemini's explicit output format: {output: value} - output = { - type: typeof response.output === 'string' ? 'text' : 'json', - value: response.output, - }; + const outputValue = response.output; + output = typeof outputValue === 'string' + ? { type: 'text', value: outputValue } + : { type: 'json', value: outputValue as JSONValue }; } else { // Whole response is the output - output = { - type: typeof response === 'string' ? 'text' : 'json', - value: response, - }; + output = typeof response === 'string' + ? { type: 'text', value: response } + : { type: 'json', value: response as JSONValue }; } return { diff --git a/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/response.ts b/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/response.ts index 83bfb06..6f064da 100644 --- a/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/response.ts +++ b/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/response.ts @@ -10,10 +10,9 @@ * Handles both streaming and non-streaming responses */ -import { GenerateContentResponse, FinishReason } from '@google/genai'; +import { GenerateContentResponse, FinishReason, Part, FunctionCall } from '@google/genai' +import { formatDataStreamPart } from '@ai-sdk/ui-utils'; import type { - Part, - FunctionCall, VercelFinishReason, VercelUsage, HonoSSEStream, @@ -103,7 +102,7 @@ export class ResponseConversionStrategy { { toolCallId: string; toolName: string; - args: unknown; + input: unknown; } >(); @@ -134,12 +133,10 @@ export class ResponseConversionStrategy { const delta = chunk.text; textAccumulator += delta; - // Emit v5 SSE format to frontend: text-delta event - // v5 uses 'text' property, not 'textDelta' (v4) + // Emit AI SDK format: 0:"text" if (honoStream) { try { - const sseData = `data: ${JSON.stringify({ type: 'text-delta', text: delta })}\n\n`; - await honoStream.write(sseData); + await honoStream.write(formatDataStreamPart('text', delta)); } catch { // Failed to write to stream } @@ -157,16 +154,14 @@ export class ResponseConversionStrategy { ], } as GenerateContentResponse; } else if (chunk.type === 'tool-call') { - // Emit v5 SSE format to frontend: tool-call event + // Emit AI SDK format: 9:{"toolCallId":"...","toolName":"...","args":{...}} if (honoStream) { try { - const sseData = `data: ${JSON.stringify({ - type: 'tool-call', + await honoStream.write(formatDataStreamPart('tool_call', { toolCallId: chunk.toolCallId, toolName: chunk.toolName, - input: chunk.input, - })}\n\n`; - await honoStream.write(sseData); + args: chunk.input, + })); } catch { // Failed to write to stream } @@ -191,28 +186,6 @@ export class ResponseConversionStrategy { usage = this.estimateUsage(textAccumulator); } - // Emit final finish event in v5 SSE format - if (honoStream && (finishReason || usage)) { - try { - const finishData: any = { type: 'finish' }; - if (finishReason) { - finishData.finishReason = finishReason; - } - if (usage) { - finishData.usage = { - promptTokens: usage.promptTokens || 0, - completionTokens: usage.completionTokens || 0, - totalTokens: usage.totalTokens || 0, - }; - } - - const sseData = `data: ${JSON.stringify(finishData)}\n\n`; - await honoStream.write(sseData); - } catch { - // Failed to write to stream - } - } - // Yield final response with tool calls and metadata if (toolCallsMap.size > 0 || finishReason || usage) { const parts: Part[] = []; diff --git a/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/tool.ts b/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/tool.ts index b8c7f91..4e1065a 100644 --- a/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/tool.ts +++ b/packages/agent/src/agent/gemini-vercel-sdk-adapter/strategies/tool.ts @@ -10,13 +10,13 @@ */ import type { - FunctionCall, - FunctionDeclaration, VercelTool, } from '../types.js'; -import { jsonSchema, VercelToolCallSchema } from '../types.js'; + +import { jsonSchema } from 'ai'; import { ConversionError } from '../errors.js'; -import type { ToolListUnion } from '@google/genai'; +import type { ToolListUnion, FunctionDeclaration, FunctionCall } from '@google/genai'; +import { VercelToolCallSchema } from '../types.js'; export class ToolConversionStrategy { /** diff --git a/packages/agent/src/agent/gemini-vercel-sdk-adapter/types.ts b/packages/agent/src/agent/gemini-vercel-sdk-adapter/types.ts index 08affa7..8a69afc 100644 --- a/packages/agent/src/agent/gemini-vercel-sdk-adapter/types.ts +++ b/packages/agent/src/agent/gemini-vercel-sdk-adapter/types.ts @@ -11,27 +11,8 @@ import { z } from 'zod'; import { jsonSchema } from 'ai'; - -// Re-export for use in strategies -export { jsonSchema }; - -// === Re-export SDK Types === - // Vercel AI SDK -export type { CoreMessage } from 'ai'; -export type { LanguageModelV2ToolResultOutput } from '@ai-sdk/provider'; - -// Gemini SDK -export type { - Part, - FunctionCall, - FunctionDeclaration, - FunctionResponse, - Tool, - Content, - GenerateContentResponse, - FinishReason, -} from '@google/genai'; +import type { LanguageModelV2ToolResultOutput } from '@ai-sdk/provider'; // === Vercel SDK Runtime Shapes (What We Receive) === @@ -228,28 +209,25 @@ export enum AIProvider { OLLAMA = 'ollama', LMSTUDIO = 'lmstudio', BEDROCK = 'bedrock', + BROWSEROS = 'browseros', } /** - * Provider-specific configuration + * Zod schema for Vercel AI adapter configuration + * Single source of truth - use z.infer for the type */ -export interface ProviderConfig { - apiKey?: string; - baseUrl?: string; +export const VercelAIConfigSchema = z.object({ + provider: z.nativeEnum(AIProvider), + model: z.string().min(1, 'Model name is required'), + apiKey: z.string().optional(), + baseUrl: z.string().optional(), // Azure-specific - resourceName?: string; + resourceName: z.string().optional(), // AWS Bedrock-specific - region?: string; - accessKeyId?: string; - secretAccessKey?: string; - sessionToken?: string; -} + region: z.string().optional(), + accessKeyId: z.string().optional(), + secretAccessKey: z.string().optional(), + sessionToken: z.string().optional(), +}); -/** - * Configuration for Vercel AI adapter - */ -export interface VercelAIConfig { - model: string; - providers?: Partial>; - honoStream?: HonoSSEStream; -} +export type VercelAIConfig = z.infer; \ No newline at end of file diff --git a/packages/agent/src/agent/index.ts b/packages/agent/src/agent/index.ts new file mode 100644 index 0000000..53429a3 --- /dev/null +++ b/packages/agent/src/agent/index.ts @@ -0,0 +1,4 @@ +export { GeminiAgent } from './GeminiAgent.js'; +export type { AgentConfig } from './types.js'; +export { VercelAIContentGenerator, AIProvider } from './gemini-vercel-sdk-adapter/index.js'; +export type { VercelAIConfig, HonoSSEStream } from './gemini-vercel-sdk-adapter/index.js'; diff --git a/packages/agent/src/agent/registry.ts b/packages/agent/src/agent/registry.ts deleted file mode 100644 index c5ad172..0000000 --- a/packages/agent/src/agent/registry.ts +++ /dev/null @@ -1,28 +0,0 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ - -import {AgentFactory} from './AgentFactory.js'; -import {ClaudeSDKAgent} from './ClaudeSDKAgent.js'; -import {CodexSDKAgent} from './CodexSDKAgent.js'; - -/** - * Register all available agents - * - * This should be called once at application startup to register - * all agent types with the factory. - */ -export function registerAgents(): void { - AgentFactory.register( - 'codex-sdk', - CodexSDKAgent, - 'Codex SDK agent for OpenAI Codex integration', - ); - - AgentFactory.register( - 'claude-sdk', - ClaudeSDKAgent, - 'Claude SDK agent for Anthropic Claude integration', - ); -} diff --git a/packages/agent/src/agent/types.ts b/packages/agent/src/agent/types.ts index c6fb175..49a5d26 100644 --- a/packages/agent/src/agent/types.ts +++ b/packages/agent/src/agent/types.ts @@ -1,159 +1,10 @@ -/** - * @license - * Copyright 2025 BrowserOS - */ +import { z } from 'zod'; +import { VercelAIConfigSchema } from './gemini-vercel-sdk-adapter/types.js'; -import {z} from 'zod'; - -/** - * Formatted event structure for WebSocket clients - */ -export class FormattedEvent { - type: - | 'init' - | 'thinking' - | 'tool_use' - | 'tool_result' - | 'response' - | 'completion' - | 'error' - | 'processing'; - content: string; - metadata?: { - turnCount?: number; - isError?: boolean; - duration?: number; - deniedTools?: number; - }; - - constructor( - type: FormattedEvent['type'], - content: string, - metadata?: FormattedEvent['metadata'], - ) { - this.type = type; - this.content = content; - this.metadata = metadata; - } - - toJSON() { - return { - type: this.type, - content: this.content, - ...(this.metadata && {metadata: this.metadata}), - }; - } -} - -/** - * Configuration for agent initialization - * - * Contains all parameters needed to create and configure an agent - */ -export const AgentConfigSchema = z.object({ - /** - * Resources directory path - used for binary storage and static resources - * Required - serves as the primary directory for binaries - */ - resourcesDir: z.string().min(1, 'Resources directory is required'), - - /** - * Execution directory path - used for logs, configs, and working directory - * Always set (normalized to resourcesDir if not explicitly provided) - */ - executionDir: z.string().min(1), - - /** - * MCP server port (optional, defaults to 9100) - */ - mcpServerPort: z.number().positive().optional(), - - /** - * API key for the agent SDK (Anthropic, OpenAI, etc.) - * Optional - can be provided via environment variables or config URL - */ - apiKey: z.string().optional(), - - /** - * Base URL for custom LLM endpoints - */ - baseUrl: z.string().url(), - - /** - * Model name/identifier to use - */ - modelName: z.string(), - - /** - * Maximum conversation turns before stopping - * Default: 100 - */ - maxTurns: z.number().positive().optional(), - - /** - * Maximum thinking tokens (for models that support extended thinking) - * Default: 10000 - */ - maxThinkingTokens: z.number().positive().optional(), - - /** - * System prompt to guide agent behavior - * Optional - agents have their own default prompts - */ - systemPrompt: z.string().optional(), - - /** - * MCP servers configuration (handled internally by agents) - * Optional - agents configure their own MCP servers - */ - mcpServers: z.record(z.string(), z.any()).optional(), -}); - -export type AgentConfig = z.infer; - -/** - * Runtime metadata about agent execution state - */ -export const AgentMetadataSchema = z.object({ - /** - * Agent type identifier (e.g., 'claude-sdk') - */ - type: z.string(), - - /** - * Current turn count - */ - turns: z.number().nonnegative(), - - /** - * Total execution time in milliseconds (across all execute() calls) - */ - totalDuration: z.number().nonnegative(), - - /** - * Timestamp of last event emitted - */ - lastEventTime: z.number().positive(), - - /** - * Number of tools executed - */ - toolsExecuted: z.number().nonnegative(), - - /** - * Current agent state - */ - state: z.enum(['idle', 'executing', 'error', 'destroyed']), - - /** - * Error message if state is 'error' - */ - error: z.string().optional(), - - /** - * Agent-specific custom metadata - */ - custom: z.record(z.string(), z.unknown()).optional(), +export const AgentConfigSchema = VercelAIConfigSchema.extend({ + conversationId: z.string(), + tempDir: z.string(), + mcpServerUrl: z.string().optional(), }); -export type AgentMetadata = z.infer; +export type AgentConfig = z.infer; \ No newline at end of file