From 2c2580bce63688e6eb78a5e36bb4713cb4abe00f Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Thu, 23 Oct 2025 13:31:31 -0700 Subject: [PATCH 1/7] fix cua example, remove root model --- examples/agent_example.py | 14 ++-- examples/agent_example_local.py | 102 ++++++++++++++++++++++++++++++ stagehand/agent/agent.py | 5 +- stagehand/agent/google_cua.py | 19 +++--- stagehand/agent/openai_cua.py | 8 +-- stagehand/handlers/cua_handler.py | 5 +- stagehand/types/agent.py | 28 ++++---- 7 files changed, 136 insertions(+), 45 deletions(-) create mode 100644 examples/agent_example_local.py diff --git a/examples/agent_example.py b/examples/agent_example.py index 5aca9c21..6cb3cb21 100644 --- a/examples/agent_example.py +++ b/examples/agent_example.py @@ -36,11 +36,8 @@ async def main(): # Build a unified configuration object for Stagehand config = StagehandConfig( env="BROWSERBASE", - # env="LOCAL", api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - model_name="gpt-4o", - self_heal=True, system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, verbose=2, @@ -51,12 +48,11 @@ async def main(): # Initialize - this creates a new session automatically. console.print("\nšŸš€ [info]Initializing Stagehand...[/]") - await stagehand.init() - if stagehand.env == "BROWSERBASE": - console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}") - console.print( - f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]" - ) + await stagehand.init() + console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}") + console.print( + f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]" + ) console.print("\nā–¶ļø [highlight] Navigating[/] to Google") await stagehand.page.goto("https://google.com/") diff --git a/examples/agent_example_local.py b/examples/agent_example_local.py new file mode 100644 index 00000000..7a725e4f --- /dev/null +++ b/examples/agent_example_local.py @@ -0,0 +1,102 @@ +import asyncio +import logging +import os + +from dotenv import load_dotenv +from rich.console import Console +from rich.panel import Panel +from rich.theme import Theme + +from stagehand import Stagehand, StagehandConfig, configure_logging + +# Create a custom theme for consistent styling +custom_theme = Theme( + { + "info": "cyan", + "success": "green", + "warning": "yellow", + "error": "red bold", + "highlight": "magenta", + "url": "blue underline", + } +) + +# Create a Rich console instance with our theme +console = Console(theme=custom_theme) + +load_dotenv() + +# Configure logging with the utility function +configure_logging( + level=logging.INFO, # Set to INFO for regular logs, DEBUG for detailed + quiet_dependencies=True, # Reduce noise from dependencies +) + +async def main(): + # Build a unified configuration object for Stagehand + config = StagehandConfig( + env="LOCAL", + system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", + model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, + verbose=2, + ) + + # Create a Stagehand client using the configuration object. + stagehand = Stagehand(config) + + # Initialize - this creates a new session automatically. + console.print("\nšŸš€ [info]Initializing Stagehand...[/]") + await stagehand.init() + + console.print("\nā–¶ļø [highlight] Navigating[/] to Google") + await stagehand.page.goto("https://google.com/") + console.print("āœ… [success]Navigated to Google[/]") + + console.print("\nā–¶ļø [highlight] Using Agent to perform a task[/]: playing a game of 2048") + agent = stagehand.agent( + model="gemini-2.5-computer-use-preview-10-2025", + instructions="You are a helpful web navigation assistant that helps users find information. You are currently on the following page: google.com. Do not ask follow up questions, the user will trust your judgement.", + options={"apiKey": os.getenv("GEMINI_API_KEY")} + ) + agent_result = await agent.execute( + instruction="Play a game of 2048", + max_steps=20, + auto_screenshot=True, + ) + + console.print(agent_result) + + console.print("šŸ“Š [info]Agent execution result:[/]") + console.print(f"šŸŽÆ Completed: [bold]{'Yes' if agent_result.completed else 'No'}[/]") + if agent_result.message: + console.print(f"šŸ’¬ Message: [italic]{agent_result.message}[/]") + + if agent_result.actions: + console.print(f"šŸ”„ Actions performed: [bold]{len(agent_result.actions)}[/]") + for i, action in enumerate(agent_result.actions): + action_type = action.type + + console.print(f" Action {i+1}: {action_type if action_type else 'Unknown'}") + + # For debugging, you can also print the full JSON + console.print("[dim]Full response JSON:[/]") + console.print_json(f"{agent_result.model_dump_json()}") + + # Close the session + console.print("\nā¹ļø [warning]Closing session...[/]") + await stagehand.close() + console.print("āœ… [success]Session closed successfully![/]") + console.rule("[bold]End of Example[/]") + + +if __name__ == "__main__": + # Add a fancy header + console.print( + "\n", + Panel( + "[light_gray]Stagehand 🤘 Agent Example[/]", + border_style="green", + padding=(1, 10), + ), + ) + asyncio.run(main()) \ No newline at end of file diff --git a/stagehand/agent/agent.py b/stagehand/agent/agent.py index bcd506be..78faeb45 100644 --- a/stagehand/agent/agent.py +++ b/stagehand/agent/agent.py @@ -170,13 +170,10 @@ async def execute( f"Agent execution finished. Success: {agent_result.completed}. Message: {agent_result.message}", category="agent", ) - # To clean up pydantic model output - actions_repr = [action.root for action in agent_result.actions] self.logger.debug( - f"Agent actions: {actions_repr}", + f"Agent actions: {agent_result.actions}", category="agent", ) - agent_result.actions = actions_repr return agent_result else: agent_config_payload = self.config.model_dump( diff --git a/stagehand/agent/google_cua.py b/stagehand/agent/google_cua.py index 125fd0fc..4ac4dbc1 100644 --- a/stagehand/agent/google_cua.py +++ b/stagehand/agent/google_cua.py @@ -25,6 +25,7 @@ AgentResult, ) from .client import AgentClient +from pydantic import TypeAdapter load_dotenv() @@ -176,7 +177,7 @@ def _process_provider_response( and candidate.safety_ratings ): error_message += f" - Safety Ratings: {candidate.safety_ratings}" - self.logger.warning(error_message, category="agent") + self.logger.error(error_message, category="agent") return [], reasoning_text, True, error_message, [] if not function_call_parts: @@ -260,7 +261,7 @@ def _process_provider_response( "keys": [self.key_to_playwright("PageDown")], } else: - self.logger.warning( + self.logger.error( f"Unsupported scroll direction: {direction}", category="agent" ) return ( @@ -282,7 +283,7 @@ def _process_provider_response( elif direction in ("left", "right"): magnitude = self._normalize_coordinates(magnitude, 0)[0] else: - self.logger.warning( + self.logger.error( f"Unsupported scroll direction: {direction}", category="agent" ) return ( @@ -352,7 +353,7 @@ def _process_provider_response( "arguments": {"url": "https://www.google.com"}, } else: - self.logger.warning( + self.logger.error( f"Unsupported Gemini CUA function: {action_name}", category="agent" ) return ( @@ -367,13 +368,11 @@ def _process_provider_response( try: # Directly construct the AgentActionType using the payload. # Pydantic will use the 'type' field in action_payload_dict to discriminate the Union. - action_payload_for_agent_action_type = AgentActionType( - **action_payload_dict - ) + action_payload_for_agent_action_type = TypeAdapter(AgentActionType).validate_python(action_payload_dict) agent_action = AgentAction( action_type=action_type_str, # This should match the 'type' in action_payload_dict - action=action_payload_for_agent_action_type, # No RootModel wrapping if AgentActionType is the RootModel itself + action=action_payload_for_agent_action_type, reasoning=reasoning_text, status="tool_code", ) @@ -598,7 +597,7 @@ async def run_task( ) if not agent_action and not task_completed: - self.logger.warning( + self.logger.debug( "Model did not request an action and task not marked complete. Ending task.", category="agent", ) @@ -614,7 +613,7 @@ async def run_task( usage=usage_obj, ) - self.logger.warning("Max steps reached for Gemini CUA task.", category="agent") + self.logger.debug("Max steps reached for Gemini CUA task.", category="agent") usage_obj = { "input_tokens": total_input_tokens, "output_tokens": total_output_tokens, diff --git a/stagehand/agent/openai_cua.py b/stagehand/agent/openai_cua.py index a1b9db89..3ab29747 100644 --- a/stagehand/agent/openai_cua.py +++ b/stagehand/agent/openai_cua.py @@ -7,7 +7,7 @@ from openai import ( OpenAI as OpenAISDK, # Renamed to avoid conflict with a potential class name ) -from pydantic import BaseModel # Ensure BaseModel is imported for isinstance check +from pydantic import BaseModel, TypeAdapter # Ensure BaseModel is imported for isinstance check from ..handlers.cua_handler import CUAHandler from ..types.agent import ( @@ -175,8 +175,8 @@ def _process_provider_response( ) try: - action_payload = AgentActionType( - **computer_call_item.action.model_dump() + action_payload = TypeAdapter(AgentActionType).validate_python( + computer_call_item.action.model_dump() ) agent_action = AgentAction( action_type=computer_call_item.action.type, @@ -225,7 +225,7 @@ def _process_provider_response( function_action_payload = FunctionAction(type="function", name=function_call_item.name, arguments=arguments) # type: ignore agent_action = AgentAction( action_type="function", # Literal 'function' - action=AgentActionType(root=function_action_payload), + action=function_action_payload, reasoning=reasoning_text, # Reasoning applies to this action status=( function_call_item.status diff --git a/stagehand/handlers/cua_handler.py b/stagehand/handlers/cua_handler.py index 2708aa3d..e95c42ee 100644 --- a/stagehand/handlers/cua_handler.py +++ b/stagehand/handlers/cua_handler.py @@ -35,13 +35,12 @@ async def get_screenshot_base64(self) -> str: async def perform_action(self, action: AgentAction) -> ActionExecutionResult: """Execute a single action on the page.""" + specific_action_model = action.action self.logger.info( - f"Performing action: {action.action.root if action.action else ''}", + f"Performing action: {specific_action_model or ''}", category=StagehandFunctionName.AGENT, ) action_type = action.action_type - # action.action is the RootModel, action.action.root is the specific action model (e.g., ClickAction) - specific_action_model = action.action.root if action.action else None if not specific_action_model: self.logger.error( diff --git a/stagehand/types/agent.py b/stagehand/types/agent.py index 8f29d3c2..c5a6a614 100644 --- a/stagehand/types/agent.py +++ b/stagehand/types/agent.py @@ -1,6 +1,6 @@ from typing import Any, Literal, Optional, Union -from pydantic import BaseModel, RootModel +from pydantic import BaseModel class AgentConfig(BaseModel): @@ -96,20 +96,18 @@ class KeyAction(BaseModel): # From Anthropic text: str -AgentActionType = RootModel[ - Union[ - ClickAction, - DoubleClickAction, - TypeAction, - KeyPressAction, - ScrollAction, - DragAction, - MoveAction, - WaitAction, - ScreenshotAction, - FunctionAction, - KeyAction, - ] +AgentActionType = Union[ + ClickAction, + DoubleClickAction, + TypeAction, + KeyPressAction, + ScrollAction, + DragAction, + MoveAction, + WaitAction, + ScreenshotAction, + FunctionAction, + KeyAction, ] From a258f9f07060619e7ade98fc27f40c720eecb7bf Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Thu, 23 Oct 2025 15:46:16 -0700 Subject: [PATCH 2/7] formatting --- stagehand/agent/google_cua.py | 4 +++- stagehand/agent/openai_cua.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/stagehand/agent/google_cua.py b/stagehand/agent/google_cua.py index 4ac4dbc1..e41d4d49 100644 --- a/stagehand/agent/google_cua.py +++ b/stagehand/agent/google_cua.py @@ -368,7 +368,9 @@ def _process_provider_response( try: # Directly construct the AgentActionType using the payload. # Pydantic will use the 'type' field in action_payload_dict to discriminate the Union. - action_payload_for_agent_action_type = TypeAdapter(AgentActionType).validate_python(action_payload_dict) + action_payload_for_agent_action_type = TypeAdapter( + AgentActionType + ).validate_python(action_payload_dict) agent_action = AgentAction( action_type=action_type_str, # This should match the 'type' in action_payload_dict diff --git a/stagehand/agent/openai_cua.py b/stagehand/agent/openai_cua.py index 3ab29747..16506f5a 100644 --- a/stagehand/agent/openai_cua.py +++ b/stagehand/agent/openai_cua.py @@ -7,7 +7,10 @@ from openai import ( OpenAI as OpenAISDK, # Renamed to avoid conflict with a potential class name ) -from pydantic import BaseModel, TypeAdapter # Ensure BaseModel is imported for isinstance check +from pydantic import ( + BaseModel, + TypeAdapter, +) # Ensure BaseModel is imported for isinstance check from ..handlers.cua_handler import CUAHandler from ..types.agent import ( From 2f7a41e3ea150f80792d66edacac9758d6c35e3b Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Thu, 23 Oct 2025 15:52:41 -0700 Subject: [PATCH 3/7] so formatted --- stagehand/agent/google_cua.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stagehand/agent/google_cua.py b/stagehand/agent/google_cua.py index e41d4d49..fc46196a 100644 --- a/stagehand/agent/google_cua.py +++ b/stagehand/agent/google_cua.py @@ -14,6 +14,7 @@ GenerateContentConfig, Part, ) +from pydantic import TypeAdapter from ..handlers.cua_handler import CUAHandler from ..types.agent import ( @@ -25,7 +26,6 @@ AgentResult, ) from .client import AgentClient -from pydantic import TypeAdapter load_dotenv() From 744b78fb88c0f1aa88f3fd2c246259d34f4fd872 Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Thu, 23 Oct 2025 15:53:34 -0700 Subject: [PATCH 4/7] add changeset --- .changeset/arrogant-caiman-of-competence.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/arrogant-caiman-of-competence.md diff --git a/.changeset/arrogant-caiman-of-competence.md b/.changeset/arrogant-caiman-of-competence.md new file mode 100644 index 00000000..d9a26e7f --- /dev/null +++ b/.changeset/arrogant-caiman-of-competence.md @@ -0,0 +1,5 @@ +--- +"stagehand": patch +--- + +add local cua example, remove root model from types From 690a801bd114fd34ae072f20fdc9dede1189a18f Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Fri, 24 Oct 2025 09:34:06 -0700 Subject: [PATCH 5/7] fix anthropic cua and add self heal back to agent exmaples --- examples/agent_example.py | 1 + examples/agent_example_local.py | 1 + stagehand/agent/anthropic_cua.py | 190 ++++++++++++++++--------------- 3 files changed, 99 insertions(+), 93 deletions(-) diff --git a/examples/agent_example.py b/examples/agent_example.py index 6cb3cb21..4c8cf877 100644 --- a/examples/agent_example.py +++ b/examples/agent_example.py @@ -40,6 +40,7 @@ async def main(): project_id=os.getenv("BROWSERBASE_PROJECT_ID"), system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, + self_heal=True, verbose=2, ) diff --git a/examples/agent_example_local.py b/examples/agent_example_local.py index 7a725e4f..5d160f54 100644 --- a/examples/agent_example_local.py +++ b/examples/agent_example_local.py @@ -38,6 +38,7 @@ async def main(): env="LOCAL", system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, + self_heal=True, verbose=2, ) diff --git a/stagehand/agent/anthropic_cua.py b/stagehand/agent/anthropic_cua.py index f0d8b2a7..85401322 100644 --- a/stagehand/agent/anthropic_cua.py +++ b/stagehand/agent/anthropic_cua.py @@ -4,6 +4,7 @@ from anthropic import Anthropic, AnthropicError from dotenv import load_dotenv +from pydantic import TypeAdapter from ..handlers.cua_handler import CUAHandler, StagehandFunctionName from ..types.agent import ( @@ -362,7 +363,7 @@ def _convert_tool_use_to_agent_action( ) return None - action_model_payload: Optional[AgentActionType] = None + action_payload_dict: Optional[dict[str, Any]] = None reasoning = tool_input.get("reasoning") try: @@ -375,52 +376,53 @@ def _convert_tool_use_to_agent_action( ) if action_type_str == "left_click": - action_model_payload = AgentActionType( - type="click", - x=x, - y=y, - button="left", - ) + action_payload_dict = { + "type": "click", + "x": x, + "y": y, + "button": "left", + } action_type_str = "click" # Normalize elif action_type_str == "right_click": - action_model_payload = AgentActionType( - type="click", - x=x, - y=y, - button="right", - ) + action_payload_dict = { + "type": "click", + "x": x, + "y": y, + "button": "right", + } action_type_str = "click" # Normalize elif action_type_str == "middle_click": - action_model_payload = AgentActionType( - type="click", - x=x, - y=y, - button="middle", - ) + action_payload_dict = { + "type": "click", + "x": x, + "y": y, + "button": "middle", + } action_type_str = "click" # Normalize elif action_type_str == "double_click": - action_model_payload = AgentActionType( - type="double_click", - x=x, - y=y, - ) + action_payload_dict = { + "type": "double_click", + "x": x, + "y": y, + } elif action_type_str == "triple_click": # Handle as double_click for now since we don't have a dedicated triple click - action_model_payload = AgentActionType( - type="double_click", - x=x, - y=y, - ) + action_payload_dict = { + "type": "double_click", + "x": x, + "y": y, + } action_type_str = "double_click" # Normalize elif action_type_str == "type": - action_model_payload = AgentActionType( - type="type", text=tool_input.get("text", "") - ) + action_payload_dict = { + "type": "type", + "text": tool_input.get("text", ""), + } elif action_type_str == "key": key_text = tool_input.get("text", "") @@ -429,10 +431,10 @@ def _convert_tool_use_to_agent_action( keys = [ self.key_to_playwright(k.strip()) for k in key_text.split("+") ] - action_model_payload = AgentActionType( - type="keypress", - keys=keys, - ) + action_payload_dict = { + "type": "keypress", + "keys": keys, + } action_type_str = "keypress" # Normalize elif action_type_str == "hold_key": @@ -446,10 +448,10 @@ def _convert_tool_use_to_agent_action( self.key_to_playwright(k.strip()) for k in key_text.split("+") ] # For now, handle as a regular keypress - action_model_payload = AgentActionType( - type="keypress", - keys=keys, - ) + action_payload_dict = { + "type": "keypress", + "keys": keys, + } action_type_str = "keypress" # Normalize elif action_type_str == "scroll": @@ -469,20 +471,20 @@ def _convert_tool_use_to_agent_action( elif scroll_direction == "left": scroll_x = -scroll_amount * scroll_multiplier - action_model_payload = AgentActionType( - type="scroll", - x=x or 0, # Default to 0 if none - y=y or 0, # Default to 0 if none - scroll_x=scroll_x, - scroll_y=scroll_y, - ) + action_payload_dict = { + "type": "scroll", + "x": x or 0, # Default to 0 if none + "y": y or 0, # Default to 0 if none + "scroll_x": scroll_x, + "scroll_y": scroll_y, + } elif action_type_str == "mouse_move": - action_model_payload = AgentActionType( - type="move", - x=x, - y=y, - ) + action_payload_dict = { + "type": "move", + "x": x, + "y": y, + } action_type_str = "move" # Normalize elif action_type_str == "left_click_drag": @@ -499,14 +501,13 @@ def _convert_tool_use_to_agent_action( and x is not None and y is not None ): - path_points = [ - Point(x=start_x, y=start_y), - Point(x=x, y=y), - ] - action_model_payload = AgentActionType( - type="drag", - path=path_points, - ) + action_payload_dict = { + "type": "drag", + "path": [ + {"x": start_x, "y": start_y}, + {"x": x, "y": y}, + ], + } action_type_str = "drag" # Normalize else: self.logger.error( @@ -517,54 +518,54 @@ def _convert_tool_use_to_agent_action( elif action_type_str == "left_mouse_down": # Currently not directly supported - handle as a click for now - action_model_payload = AgentActionType( - type="click", - x=x, - y=y, - button="left", - ) + action_payload_dict = { + "type": "click", + "x": x, + "y": y, + "button": "left", + } action_type_str = "click" # Normalize elif action_type_str == "left_mouse_up": # Currently not directly supported - handle as a click for now - action_model_payload = AgentActionType( - type="click", - x=x, - y=y, - button="left", - ) + action_payload_dict = { + "type": "click", + "x": x, + "y": y, + "button": "left", + } action_type_str = "click" # Normalize elif action_type_str == "wait": duration = tool_input.get("duration", 1) # Default 1 second # Convert seconds to milliseconds - action_model_payload = AgentActionType( - type="wait", - miliseconds=int(duration * 1000), - ) + action_payload_dict = { + "type": "wait", + "miliseconds": int(duration * 1000), + } elif action_type_str == "screenshot": - action_model_payload = AgentActionType( - type="screenshot", - ) + action_payload_dict = { + "type": "screenshot", + } elif action_type_str == "cursor_position": # This is a read operation, not directly supported # Return a no-op for now - action_model_payload = AgentActionType( - type="screenshot", # Use screenshot as a way to show cursor position - ) + action_payload_dict = { + "type": "screenshot", # Use screenshot as a way to show cursor position + } action_type_str = "screenshot" # Normalize elif action_type_str == "function": if tool_name == "goto": url = tool_input.get("url") if url: - action_model_payload = AgentActionType( - type="function", - name="goto", - arguments=FunctionArguments(url=url), - ) + action_payload_dict = { + "type": "function", + "name": "goto", + "arguments": {"url": url}, + } action_type_str = "function" else: self.logger.error( @@ -573,11 +574,11 @@ def _convert_tool_use_to_agent_action( ) return None elif tool_name == "navigate_back": - action_model_payload = AgentActionType( - type="function", - name="navigate_back", - arguments=FunctionArguments(), - ) + action_payload_dict = { + "type": "function", + "name": "navigate_back", + "arguments": None, + } action_type_str = "function" else: self.logger.error( @@ -586,7 +587,10 @@ def _convert_tool_use_to_agent_action( ) return None - if action_model_payload is not None: + if action_payload_dict is not None: + action_model_payload = TypeAdapter(AgentActionType).validate_python( + action_payload_dict + ) return AgentAction( action_type=action_type_str, action=action_model_payload, From a76eefeadc92c4b81effcc9b8b77fce272dc86e8 Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Fri, 24 Oct 2025 09:35:31 -0700 Subject: [PATCH 6/7] formatting --- stagehand/agent/anthropic_cua.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stagehand/agent/anthropic_cua.py b/stagehand/agent/anthropic_cua.py index 85401322..67d6a3a6 100644 --- a/stagehand/agent/anthropic_cua.py +++ b/stagehand/agent/anthropic_cua.py @@ -553,7 +553,9 @@ def _convert_tool_use_to_agent_action( # This is a read operation, not directly supported # Return a no-op for now action_payload_dict = { - "type": "screenshot", # Use screenshot as a way to show cursor position + "type": ( + "screenshot" + ), # Use screenshot as a way to show cursor position } action_type_str = "screenshot" # Normalize From 3d9d7dd574e41a6bc1c98bd665d4bd8fcfa2ce90 Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Fri, 24 Oct 2025 09:40:52 -0700 Subject: [PATCH 7/7] add back point and function argument types --- stagehand/agent/anthropic_cua.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/stagehand/agent/anthropic_cua.py b/stagehand/agent/anthropic_cua.py index 67d6a3a6..df6c7a23 100644 --- a/stagehand/agent/anthropic_cua.py +++ b/stagehand/agent/anthropic_cua.py @@ -501,12 +501,13 @@ def _convert_tool_use_to_agent_action( and x is not None and y is not None ): + path_points = [ + Point(x=start_x, y=start_y), + Point(x=x, y=y), + ] action_payload_dict = { "type": "drag", - "path": [ - {"x": start_x, "y": start_y}, - {"x": x, "y": y}, - ], + "path": path_points, } action_type_str = "drag" # Normalize else: @@ -566,7 +567,7 @@ def _convert_tool_use_to_agent_action( action_payload_dict = { "type": "function", "name": "goto", - "arguments": {"url": url}, + "arguments": FunctionArguments(url=url), } action_type_str = "function" else: @@ -579,7 +580,7 @@ def _convert_tool_use_to_agent_action( action_payload_dict = { "type": "function", "name": "navigate_back", - "arguments": None, + "arguments": FunctionArguments(), } action_type_str = "function" else: