Skip to content

Commit 1d0577d

Browse files
authored
Support for new Gemini Computer Use Models (#213)
* cheetah updates * model updates * changeset * update viewport
1 parent 3bcdd05 commit 1d0577d

File tree

9 files changed

+701
-11
lines changed

9 files changed

+701
-11
lines changed

.changeset/horned-giga-cockatoo.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"stagehand": patch
3+
---
4+
5+
Added support for Gemini Computer Use models

examples/agent_example.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ async def main():
4343
self_heal=True,
4444
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
4545
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
46-
verbose=1,
46+
verbose=2,
4747
)
4848

4949
# Create a Stagehand client using the configuration object.
@@ -64,9 +64,9 @@ async def main():
6464

6565
console.print("\n▶️ [highlight] Using Agent to perform a task[/]: playing a game of 2048")
6666
agent = stagehand.agent(
67-
model="computer-use-preview",
67+
model="gemini-2.5-computer-use-preview-10-2025",
6868
instructions="You are a helpful web navigation assistant that helps users find information. You are currently on the following page: google.com. Do not ask follow up questions, the user will trust your judgement.",
69-
options={"apiKey": os.getenv("MODEL_API_KEY")}
69+
options={"apiKey": os.getenv("GEMINI_API_KEY")}
7070
)
7171
agent_result = await agent.execute(
7272
instruction="Play a game of 2048",

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ description = "Python SDK for Stagehand"
99
readme = "README.md"
1010
classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent",]
1111
requires-python = ">=3.9"
12-
dependencies = [ "httpx>=0.24.0", "python-dotenv>=1.0.0", "pydantic>=1.10.0", "playwright>=1.42.1", "requests>=2.31.0", "browserbase>=1.4.0", "rich>=13.7.0", "openai>=1.99.6", "anthropic>=0.51.0", "litellm>=1.72.0,<1.75.0", "nest-asyncio>=1.6.0",]
12+
dependencies = [ "httpx>=0.24.0", "python-dotenv>=1.0.0", "pydantic>=1.10.0", "playwright>=1.42.1", "requests>=2.31.0", "browserbase>=1.4.0", "rich>=13.7.0", "openai>=1.99.6", "anthropic>=0.51.0", "litellm>=1.72.0,<1.75.0", "nest-asyncio>=1.6.0", "google-genai>=1.40.0"]
1313
[[project.authors]]
1414
name = "Browserbase, Inc."
1515
email = "support@browserbase.com"

stagehand/agent/agent.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,24 @@
1313
)
1414
from .anthropic_cua import AnthropicCUAClient
1515
from .client import AgentClient
16+
from .google_cua import GoogleCUAClient
1617
from .openai_cua import OpenAICUAClient
1718

1819
MODEL_TO_CLIENT_CLASS_MAP: dict[str, type[AgentClient]] = {
19-
"computer-use-preview": OpenAICUAClient,
20+
"computer-use-preview-03-11": OpenAICUAClient,
2021
"claude-3-5-sonnet-latest": AnthropicCUAClient,
2122
"claude-3-7-sonnet-latest": AnthropicCUAClient,
2223
"claude-sonnet-4-20250514": AnthropicCUAClient,
24+
"claude-sonnet-4-5-20250929": AnthropicCUAClient,
25+
"gemini-2.5-computer-use-preview-10-2025": GoogleCUAClient,
2326
}
2427
MODEL_TO_PROVIDER_MAP: dict[str, AgentProvider] = {
25-
"computer-use-preview": AgentProvider.OPENAI,
28+
"computer-use-preview-03-11": AgentProvider.OPENAI,
2629
"claude-3-5-sonnet-20240620": AgentProvider.ANTHROPIC,
2730
"claude-3-7-sonnet-20250219": AgentProvider.ANTHROPIC,
2831
"claude-sonnet-4-20250514": AgentProvider.ANTHROPIC,
32+
"claude-sonnet-4-5-20250929": AgentProvider.ANTHROPIC,
33+
"gemini-2.5-computer-use-preview-10-2025": AgentProvider.GOOGLE,
2934
# Add more mappings as needed
3035
}
3136

stagehand/agent/anthropic_cua.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def __init__(
6262
)
6363

6464
dimensions = (
65-
(viewport["width"], viewport["height"]) if viewport else (1024, 768)
65+
(viewport["width"], viewport["height"]) if viewport else (1288, 711)
6666
) # Default dimensions
6767
if self.config:
6868
if hasattr(self.config, "display_width") and self.config.display_width is not None: # type: ignore

0 commit comments

Comments
 (0)