Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ Before running your script, make sure you have exported the necessary environmen
```bash
export BROWSERBASE_API_KEY="your-api-key"
export BROWSERBASE_PROJECT_ID="your-project-id"
export OPENAI_API_KEY="your-openai-api-key"
export OPENAI_API_KEY="your-openai-api-key" # or other model
export STAGEHAND_SERVER_URL="url-of-stagehand-server"
```

Expand Down Expand Up @@ -158,7 +158,7 @@ For further examples, you can check out the scripts in the “examples/” direc
- `stagehand_server_url`: The Stagehand API server URL
- `browserbase_api_key`: Your BrowserBase API key (can also be set via BROWSERBASE_API_KEY environment variable)
- `browserbase_project_id`: Your BrowserBase project ID (can also be set via BROWSERBASE_PROJECT_ID environment variable)
- `openai_api_key`: Your OpenAI API key (can also be set via OPENAI_API_KEY environment variable)
- `model_api_key`: Your model API key (e.g. OpenAI, Anthropic, etc) (can also be set via MODEL_API_KEY environment variable)
- `verbose`: Verbosity level (default: 1)
- `model_name`: (optional) Model name to use for the conversation
- `dom_settle_timeout_ms`: (optional) Additional time for the DOM to settle
Expand Down
36 changes: 18 additions & 18 deletions examples/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
from dotenv import load_dotenv
from stagehand.client import Stagehand
from stagehand.config import StagehandConfig
from stagehand.schemas import ActOptions, ObserveOptions

load_dotenv()

Expand All @@ -14,21 +16,23 @@
)

async def main():

try:
# Create a Stagehand client - it will create a new session automatically
stagehand = Stagehand(
server_url=os.getenv("SERVER_URL"),
browserbase_api_key=os.getenv("BROWSERBASE_API_KEY"),
browserbase_project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
model_api_key=os.getenv("OPENAI_API_KEY"),
verbose=2,
model_name="gpt-4o", # optional - defaults to server's default
dom_settle_timeout_ms=3000, # optional - defaults to server's default
debug_dom=True, # optional - defaults to server's default
# Build a unified configuration object for Stagehand
config = StagehandConfig(
env="BROWSERBASE" if os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID") else "LOCAL",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
debug_dom=True,
headless=False,
dom_settle_timeout_ms=3000,
model_name="gpt-4o-mini",
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}
)

# Initialize - this will create a new session since we didn't provide session_id
# Create a Stagehand client using the configuration object.
stagehand = Stagehand(config=config, server_url=os.getenv("SERVER_URL"), verbose=2)

# Initialize - this creates a new session automatically.
await stagehand.init()
print(f"Created new session with ID: {stagehand.session_id}")

Expand All @@ -40,32 +44,28 @@ async def main():
await stagehand.page.navigate("https://www.google.com")
print("Navigation complete with remote Playwright.")


print("EXAMPLE: Clicking on About link using local Playwright's get_by_role")
# Click on the "About" link using Playwright
await stagehand.page.get_by_role("link", name="About", exact=True).click()
print("Clicked on About link")

await asyncio.sleep(2)

await stagehand.page.navigate("https://www.google.com")

# Hosted Stagehand API - ACT to do something like 'search for openai'
await stagehand.page.act("search for openai")
await stagehand.page.act(ActOptions(action="search for openai"))

print("EXAMPLE: Find the XPATH of the button 'News' using Stagehand API")
xpaths = await stagehand.page.observe("find the button 'News'", only_visible=True)
xpaths = await stagehand.page.observe(ObserveOptions(instruction="find the button labeled 'News'", only_visible=True))
if len(xpaths) > 0:
element = xpaths[0]
print("EXAMPLE: Click on the button 'News' using local Playwright.")
await stagehand.page.click(element["selector"])
else:
print("No element found")


except Exception as e:
print(f"An error occurred in the example: {e}")


if __name__ == "__main__":
asyncio.run(main())
49 changes: 20 additions & 29 deletions examples/extract-example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os
from dotenv import load_dotenv
from stagehand import Stagehand
from stagehand.config import StagehandConfig
from stagehand.schemas import ExtractOptions
from pydantic import BaseModel

class ExtractSchema(BaseModel):
Expand All @@ -11,45 +13,34 @@ class ExtractSchema(BaseModel):
load_dotenv()

async def main():
# Create a Stagehand instance with automatic session creation
stagehand = Stagehand(
server_url=os.getenv("STAGEHAND_SERVER_URL"),
browserbase_api_key=os.getenv("BROWSERBASE_API_KEY"),
browserbase_project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
model_api_key=os.getenv("OPENAI_API_KEY"),
verbose=2,
model_name="gpt-4o", # optional - defaults to server's default
debug_dom=True, # optional - defaults to server's default
# Build a unified Stagehand configuration object
config = StagehandConfig(
env="BROWSERBASE" if os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID") else "LOCAL",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
debug_dom=True,
headless=True,
model_name="gpt-4o",
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}
)

# Initialize - this will create a new session
# Create a Stagehand client using the configuration object.
stagehand = Stagehand(config=config, server_url=os.getenv("STAGEHAND_SERVER_URL"), verbose=2)

# Initialize - this creates a new session.
await stagehand.init()
print(f"Created new session with ID: {stagehand.session_id}")

try:

await stagehand.page.navigate("https://github.com/facebook/react")
print("Navigation complete.")

# Define schema for stars extraction
# extract_schema = {
# "type": "object",
# "properties": {
# "stars": {
# "type": "number",
# "description": "the number of stars for the project"
# }
# },
# "required": ["stars"]
# }

# we can either use a pydantic model or a json schema via dict
extract_schema = ExtractSchema

# Extract data using the schema
# Use the ExtractOptions Pydantic model to pass instruction and schema definition
data = await stagehand.page.extract(
instruction="Extract the number of stars for the project",
schema=extract_schema
ExtractOptions(
instruction="Extract the number of stars for the project",
schemaDefinition=ExtractSchema.model_json_schema()
)
)
print("\nExtracted stars:", data)

Expand Down
41 changes: 25 additions & 16 deletions examples/observe-example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,51 @@
import os
from dotenv import load_dotenv
from stagehand import Stagehand
from stagehand.config import StagehandConfig
from stagehand.schemas import ObserveOptions

# Load environment variables from .env file
load_dotenv()

async def main():
# Create a Stagehand instance with automatic session creation
stagehand = Stagehand(
server_url=os.getenv("SERVER_URL"),
browserbase_api_key=os.getenv("BROWSERBASE_API_KEY"),
browserbase_project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
model_api_key=os.getenv("OPENAI_API_KEY"),
verbose=2,
model_name="gpt-4o-mini", # optional - defaults to server's default
debug_dom=True, # optional - defaults to server's default
# Build a unified Stagehand configuration object
config = StagehandConfig(
env="BROWSERBASE" if os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID") else "LOCAL",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
debug_dom=True,
headless=True,
model_name="gpt-4o-mini",
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}
)

# Initialize - this will create a new session
# Create a Stagehand client using the configuration object.
stagehand = Stagehand(config=config, server_url=os.getenv("SERVER_URL"), verbose=2)

# Initialize - this creates a new session.
await stagehand.init()
print(f"Created new session with ID: {stagehand.session_id}")

try:
# Navigate to GitHub repository
# Navigate to the desired page
await stagehand.page.navigate("https://elpasotexas.ionwave.net/Login.aspx")
print("Navigation complete.")

# # Make observations about the site
activity = await stagehand.page.observe(
# Use ObserveOptions for detailed instructions
options = ObserveOptions(
instruction="find all the links on the page regarding the city of el paso",
only_visible=True # Use accessibility tree faster DOM parsing
only_visible=True
)
activity = await stagehand.page.observe(options)
print("\nObservations:", activity)
print("Length of observations:", len(activity))

print("Click on the first extracted element")
print(activity[0])
await stagehand.page.click(activity[0]["selector"])
if activity:
print(activity[0])
await stagehand.page.click(activity[0]["selector"])
else:
print("No elements found")

except Exception as e:
print(f"Error: {e}")
Expand Down
83 changes: 52 additions & 31 deletions stagehand/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from playwright.async_api import async_playwright
from .page import StagehandPage
from .utils import default_log_handler
from .config import StagehandConfig

load_dotenv()

Expand All @@ -19,15 +20,15 @@ class Stagehand:
Python client for interacting with a running Stagehand server and Browserbase remote headless browser.

Now supports automatically creating a new session if no session_id is provided.
You can also optionally provide modelName, domSettleTimeoutMs, verbose, and debugDom,
which will be sent to the server if a new session is created.
You can also optionally provide a configuration via the 'config' parameter to centralize all parameters.
"""

# Dictionary to store one lock per session_id
_session_locks = {}

def __init__(
self,
config: Optional[StagehandConfig] = None,
server_url: Optional[str] = None,
session_id: Optional[str] = None,
browserbase_api_key: Optional[str] = None,
Expand All @@ -42,36 +43,56 @@ def __init__(
timeout_settings: Optional[httpx.Timeout] = None,
):
"""
:param server_url: The running Stagehand server URL.
:param session_id: An existing Browserbase session ID (if you already have one).
:param browserbase_api_key: Your Browserbase API key.
:param browserbase_project_id: Your Browserbase project ID.
:param model_api_key: Your model API key (e.g. OpenAI, Anthropic, etc).
:param on_log: Async callback for log messages streamed from the server.
:param verbose: Verbosity level for console logs from this client.
:param model_name: Model name to use when creating a new session (e.g., "gpt-4o").
:param dom_settle_timeout_ms: Additional time for the DOM to settle.
:param debug_dom: Whether or not to enable DOM debug mode.
:param httpx_client: Optional custom httpx.AsyncClient instance.
:param timeout_settings: Optional custom timeout settings for httpx.
Initialize the Stagehand client.

Args:
config (Optional[StagehandConfig]): Optional configuration object encapsulating common parameters.
server_url (Optional[str]): The running Stagehand server URL.
session_id (Optional[str]): An existing Browserbase session ID.
browserbase_api_key (Optional[str]): Your Browserbase API key.
browserbase_project_id (Optional[str]): Your Browserbase project ID.
model_api_key (Optional[str]): Your model API key (e.g. OpenAI, Anthropic, etc.).
on_log (Optional[Callable[[Dict[str, Any]], Awaitable[None]]]): Async callback for log messages from the server.
verbose (int): Verbosity level for logs.
model_name (Optional[str]): Model name to use when creating a new session.
dom_settle_timeout_ms (Optional[int]): Additional time for the DOM to settle (in ms).
debug_dom (Optional[bool]): Whether to enable DOM debugging mode.
httpx_client (Optional[httpx.AsyncClient]): Optional custom httpx.AsyncClient instance.
timeout_settings (Optional[httpx.Timeout]): Optional custom timeout settings for httpx.
"""

self.server_url = server_url or os.getenv("STAGEHAND_SERVER_URL")
self.session_id = session_id
self.browserbase_api_key = browserbase_api_key or os.getenv("BROWSERBASE_API_KEY")
self.browserbase_project_id = browserbase_project_id or os.getenv("BROWSERBASE_PROJECT_ID")
self.model_api_key = model_api_key or os.getenv("OPENAI_API_KEY") # Fallback to OPENAI_API_KEY for backwards compatibility

if config:
self.browserbase_api_key = config.api_key or browserbase_api_key or os.getenv("BROWSERBASE_API_KEY")
self.browserbase_project_id = config.project_id or browserbase_project_id or os.getenv("BROWSERBASE_PROJECT_ID")
self.model_api_key = model_api_key or (
config.model_client_options.get("apiKey") if config.model_client_options else None
) or os.getenv("MODEL_API_KEY")
self.session_id = config.browserbase_session_id or session_id
self.model_name = config.model_name or model_name
self.dom_settle_timeout_ms = config.dom_settle_timeout_ms or dom_settle_timeout_ms
self.debug_dom = config.debug_dom if config.debug_dom is not None else debug_dom
self._custom_logger = config.logger # For future integration if needed
# Additional config parameters available for future use:
self.headless = config.headless
self.enable_caching = config.enable_caching
else:
self.browserbase_api_key = browserbase_api_key or os.getenv("BROWSERBASE_API_KEY")
self.browserbase_project_id = browserbase_project_id or os.getenv("BROWSERBASE_PROJECT_ID")
self.model_api_key = model_api_key or os.getenv("MODEL_API_KEY")
self.session_id = session_id
self.model_name = model_name
self.dom_settle_timeout_ms = dom_settle_timeout_ms
self.debug_dom = debug_dom

self.on_log = on_log
self.verbose = verbose
self.model_name = model_name
self.dom_settle_timeout_ms = dom_settle_timeout_ms
self.debug_dom = debug_dom
self.httpx_client = httpx_client
self.timeout_settings = timeout_settings or httpx.Timeout(
connect=90.0, # connection timeout
read=90.0, # read timeout
write=90.0, # write timeout
pool=90.0, # pool timeout
connect=90.0,
read=90.0,
write=90.0,
pool=90.0,
)
self.streamed_response = True # Default to True for streamed responses

Expand All @@ -82,16 +103,16 @@ def __init__(
self._playwright_page = None
self.page: Optional[StagehandPage] = None

self._initialized = False # Flag to track if we've already run init()
self._closed = False # Flag to track if we've closed
self._initialized = False # Flag to track if init() has run
self._closed = False # Flag to track if resources have been closed

# Validate essential fields if session_id was given
# Validate essential fields if session_id was provided
if self.session_id:
if not self.browserbase_api_key:
raise ValueError("browserbase_api_key is required (or set BROWSERBASE_API_KEY in env).")
if not self.browserbase_project_id:
raise ValueError("browserbase_project_id is required (or set BROWSERBASE_PROJECT_ID in env).")

def _get_lock_for_session(self) -> asyncio.Lock:
"""
Return an asyncio.Lock for this session. If one doesn't exist yet, create it.
Expand Down Expand Up @@ -288,7 +309,7 @@ async def _execute(self, method: str, payload: Dict[str, Any]) -> Any:
"x-bb-project-id": self.browserbase_project_id,
"Content-Type": "application/json",
"Connection": "keep-alive",
"x-streamed-response": str(self.streamed_response).lower()
"x-stream-response": str(self.streamed_response).lower()
}
if self.model_api_key:
headers["x-model-api-key"] = self.model_api_key
Expand Down
Loading