Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/tasty-teams-call.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

fix: make act, extract, and observe respect user defined timeout param
239 changes: 125 additions & 114 deletions packages/core/lib/v3/handlers/actHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { trimTrailingTextNode } from "../../utils";
import { v3Logger } from "../logger";
import { ActHandlerParams } from "../types/private/handlers";
import { ActResult, Action, V3FunctionName } from "../types/public/methods";
import { ActTimeoutError } from "../types/public/sdkErrors";
import {
captureHybridSnapshot,
diffCombinedTrees,
Expand All @@ -22,6 +23,7 @@ import {
performUnderstudyMethod,
waitForDomNetworkQuiet,
} from "./handlerUtils/actHandlerUtils";
import { createTimeoutGuard } from "./handlerUtils/timeoutGuard";

type ActInferenceElement = {
elementId?: string;
Expand Down Expand Up @@ -144,145 +146,144 @@ export class ActHandler {
const { instruction, page, variables, timeout, model } = params;

const llmClient = this.resolveLlmClient(model);
const effectiveTimeoutMs =
typeof timeout === "number" && timeout > 0 ? timeout : undefined;

const doObserveAndAct = async (): Promise<ActResult> => {
await waitForDomNetworkQuiet(
page.mainFrame(),
this.defaultDomSettleTimeoutMs,
);
const { combinedTree, combinedXpathMap } = await captureHybridSnapshot(
page,
{ experimental: true },
);

const actInstruction = buildActPrompt(
instruction,
Object.values(SupportedPlaywrightAction),
variables,
);
const ensureTimeRemaining = createTimeoutGuard(
effectiveTimeoutMs,
(ms) => new ActTimeoutError(ms),
);

const { action: firstAction, response: actInferenceResponse } =
await this.getActionFromLLM({
instruction: actInstruction,
domElements: combinedTree,
xpathMap: combinedXpathMap,
llmClient,
variables,
});
ensureTimeRemaining();
await waitForDomNetworkQuiet(
page.mainFrame(),
this.defaultDomSettleTimeoutMs,
);
ensureTimeRemaining();
const { combinedTree, combinedXpathMap } = await captureHybridSnapshot(
page,
{ experimental: true },
);

if (!firstAction) {
v3Logger({
category: "action",
message: "no actionable element returned by LLM",
level: 1,
});
return {
success: false,
message: "Failed to perform act: No action found",
actionDescription: instruction,
actions: [],
};
}
const actInstruction = buildActPrompt(
instruction,
Object.values(SupportedPlaywrightAction),
variables,
);

// First action (self-heal aware path)
const firstResult = await this.takeDeterministicAction(
firstAction,
page,
this.defaultDomSettleTimeoutMs,
ensureTimeRemaining();
const { action: firstAction, response: actInferenceResponse } =
await this.getActionFromLLM({
instruction: actInstruction,
domElements: combinedTree,
xpathMap: combinedXpathMap,
llmClient,
);

// If not two-step, return the first action result
if (actInferenceResponse?.twoStep !== true) {
return firstResult;
}
variables,
});

// Take a new focused snapshot and observe again
const {
combinedTree: combinedTree2,
combinedXpathMap: combinedXpathMap2,
} = await captureHybridSnapshot(page, {
experimental: true,
if (!firstAction) {
v3Logger({
category: "action",
message: "no actionable element returned by LLM",
level: 1,
});
return {
success: false,
message: "Failed to perform act: No action found",
actionDescription: instruction,
actions: [],
};
}

let diffedTree = diffCombinedTrees(combinedTree, combinedTree2);
if (!diffedTree.trim()) {
// Fallback: if no diff detected, use the fresh tree to avoid empty context
diffedTree = combinedTree2;
}
// First action (self-heal aware path)
ensureTimeRemaining();
const firstResult = await this.takeDeterministicAction(
firstAction,
page,
this.defaultDomSettleTimeoutMs,
llmClient,
ensureTimeRemaining,
);

const previousAction = `method: ${firstAction.method}, description: ${firstAction.description}, arguments: ${firstAction.arguments}`;

const stepTwoInstructions = buildStepTwoPrompt(
instruction,
previousAction,
Object.values(SupportedPlaywrightAction).filter(
(
action,
): action is Exclude<
SupportedPlaywrightAction,
SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN
> => action !== SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN,
),
variables,
);
// If not two-step, return the first action result
if (actInferenceResponse?.twoStep !== true) {
return firstResult;
}

const { action: secondAction } = await this.getActionFromLLM({
instruction: stepTwoInstructions,
domElements: diffedTree,
xpathMap: combinedXpathMap2,
llmClient,
variables,
// Take a new focused snapshot and observe again
ensureTimeRemaining();
const { combinedTree: combinedTree2, combinedXpathMap: combinedXpathMap2 } =
await captureHybridSnapshot(page, {
experimental: true,
});

if (!secondAction) {
// No second action found — return first result as-is
return firstResult;
}
let diffedTree = diffCombinedTrees(combinedTree, combinedTree2);
if (!diffedTree.trim()) {
// Fallback: if no diff detected, use the fresh tree to avoid empty context
diffedTree = combinedTree2;
}

const secondResult = await this.takeDeterministicAction(
secondAction,
page,
this.defaultDomSettleTimeoutMs,
llmClient,
);
const previousAction = `method: ${firstAction.method}, description: ${firstAction.description}, arguments: ${firstAction.arguments}`;

// Combine results
return {
success: firstResult.success && secondResult.success,
message: secondResult.success
? `${firstResult.message} → ${secondResult.message}`
: `${firstResult.message} → ${secondResult.message}`,
actionDescription: firstResult.actionDescription,
actions: [
...(firstResult.actions || []),
...(secondResult.actions || []),
],
};
};
const stepTwoInstructions = buildStepTwoPrompt(
instruction,
previousAction,
Object.values(SupportedPlaywrightAction).filter(
(
action,
): action is Exclude<
SupportedPlaywrightAction,
SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN
> => action !== SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN,
),
variables,
);

ensureTimeRemaining();
const { action: secondAction } = await this.getActionFromLLM({
instruction: stepTwoInstructions,
domElements: diffedTree,
xpathMap: combinedXpathMap2,
llmClient,
variables,
});

// Hard timeout for entire act() call → reject on timeout (align with extract/observe)
if (!timeout) {
return doObserveAndAct();
if (!secondAction) {
// No second action found — return first result as-is
return firstResult;
}

return await Promise.race([
doObserveAndAct(),
new Promise<ActResult>((_, reject) => {
setTimeout(
() => reject(new Error(`act() timed out after ${timeout}ms`)),
timeout,
);
}),
]);
ensureTimeRemaining();
const secondResult = await this.takeDeterministicAction(
secondAction,
page,
this.defaultDomSettleTimeoutMs,
llmClient,
ensureTimeRemaining,
);

// Combine results
return {
success: firstResult.success && secondResult.success,
message: secondResult.success
? `${firstResult.message} → ${secondResult.message}`
: `${firstResult.message} → ${secondResult.message}`,
actionDescription: firstResult.actionDescription,
actions: [
...(firstResult.actions || []),
...(secondResult.actions || []),
],
};
}

async takeDeterministicAction(
action: Action,
page: Page,
domSettleTimeoutMs?: number,
llmClientOverride?: LLMClient,
ensureTimeRemaining?: () => void,
): Promise<ActResult> {
ensureTimeRemaining?.();
const settleTimeout = domSettleTimeoutMs ?? this.defaultDomSettleTimeoutMs;
const effectiveClient = llmClientOverride ?? this.llmClient;
const method = action.method?.trim();
Expand All @@ -307,6 +308,7 @@ export class ActHandler {
const args = Array.isArray(action.arguments) ? action.arguments : [];

try {
ensureTimeRemaining?.();
await performUnderstudyMethod(
page,
page.mainFrame(),
Expand All @@ -329,6 +331,9 @@ export class ActHandler {
],
};
} catch (err) {
if (err instanceof ActTimeoutError) {
throw err;
}
const msg = err instanceof Error ? err.message : String(err);

// Attempt self-heal: rerun actInference and retry with updated selector
Expand Down Expand Up @@ -356,6 +361,7 @@ export class ActHandler {
: method;

// Take a fresh snapshot and ask for a new actionable element
ensureTimeRemaining?.();
const { combinedTree, combinedXpathMap } =
await captureHybridSnapshot(page, {
experimental: true,
Expand All @@ -367,6 +373,7 @@ export class ActHandler {
{},
);

ensureTimeRemaining?.();
const { action: fallbackAction, response: fallbackResponse } =
await this.getActionFromLLM({
instruction,
Expand All @@ -393,6 +400,7 @@ export class ActHandler {
newSelector = fallbackAction.selector;
}

ensureTimeRemaining?.();
await performUnderstudyMethod(
page,
page.mainFrame(),
Expand All @@ -416,6 +424,9 @@ export class ActHandler {
],
};
} catch (retryErr) {
if (retryErr instanceof ActTimeoutError) {
throw retryErr;
}
const retryMsg =
retryErr instanceof Error ? retryErr.message : String(retryErr);
return {
Expand Down
Loading