diff --git a/.changeset/tasty-teams-call.md b/.changeset/tasty-teams-call.md new file mode 100644 index 000000000..09741e001 --- /dev/null +++ b/.changeset/tasty-teams-call.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +fix: make act, extract, and observe respect user defined timeout param diff --git a/packages/core/lib/v3/handlers/actHandler.ts b/packages/core/lib/v3/handlers/actHandler.ts index 41f0afe9a..9ab37d21f 100644 --- a/packages/core/lib/v3/handlers/actHandler.ts +++ b/packages/core/lib/v3/handlers/actHandler.ts @@ -5,6 +5,7 @@ import { trimTrailingTextNode } from "../../utils"; import { v3Logger } from "../logger"; import { ActHandlerParams } from "../types/private/handlers"; import { ActResult, Action, V3FunctionName } from "../types/public/methods"; +import { ActTimeoutError } from "../types/public/sdkErrors"; import { captureHybridSnapshot, diffCombinedTrees, @@ -22,6 +23,7 @@ import { performUnderstudyMethod, waitForDomNetworkQuiet, } from "./handlerUtils/actHandlerUtils"; +import { createTimeoutGuard } from "./handlerUtils/timeoutGuard"; type ActInferenceElement = { elementId?: string; @@ -144,137 +146,134 @@ export class ActHandler { const { instruction, page, variables, timeout, model } = params; const llmClient = this.resolveLlmClient(model); + const effectiveTimeoutMs = + typeof timeout === "number" && timeout > 0 ? timeout : undefined; - const doObserveAndAct = async (): Promise => { - await waitForDomNetworkQuiet( - page.mainFrame(), - this.defaultDomSettleTimeoutMs, - ); - const { combinedTree, combinedXpathMap } = await captureHybridSnapshot( - page, - { experimental: true }, - ); - - const actInstruction = buildActPrompt( - instruction, - Object.values(SupportedPlaywrightAction), - variables, - ); + const ensureTimeRemaining = createTimeoutGuard( + effectiveTimeoutMs, + (ms) => new ActTimeoutError(ms), + ); - const { action: firstAction, response: actInferenceResponse } = - await this.getActionFromLLM({ - instruction: actInstruction, - domElements: combinedTree, - xpathMap: combinedXpathMap, - llmClient, - variables, - }); + ensureTimeRemaining(); + await waitForDomNetworkQuiet( + page.mainFrame(), + this.defaultDomSettleTimeoutMs, + ); + ensureTimeRemaining(); + const { combinedTree, combinedXpathMap } = await captureHybridSnapshot( + page, + { experimental: true }, + ); - if (!firstAction) { - v3Logger({ - category: "action", - message: "no actionable element returned by LLM", - level: 1, - }); - return { - success: false, - message: "Failed to perform act: No action found", - actionDescription: instruction, - actions: [], - }; - } + const actInstruction = buildActPrompt( + instruction, + Object.values(SupportedPlaywrightAction), + variables, + ); - // First action (self-heal aware path) - const firstResult = await this.takeDeterministicAction( - firstAction, - page, - this.defaultDomSettleTimeoutMs, + ensureTimeRemaining(); + const { action: firstAction, response: actInferenceResponse } = + await this.getActionFromLLM({ + instruction: actInstruction, + domElements: combinedTree, + xpathMap: combinedXpathMap, llmClient, - ); - - // If not two-step, return the first action result - if (actInferenceResponse?.twoStep !== true) { - return firstResult; - } + variables, + }); - // Take a new focused snapshot and observe again - const { - combinedTree: combinedTree2, - combinedXpathMap: combinedXpathMap2, - } = await captureHybridSnapshot(page, { - experimental: true, + if (!firstAction) { + v3Logger({ + category: "action", + message: "no actionable element returned by LLM", + level: 1, }); + return { + success: false, + message: "Failed to perform act: No action found", + actionDescription: instruction, + actions: [], + }; + } - let diffedTree = diffCombinedTrees(combinedTree, combinedTree2); - if (!diffedTree.trim()) { - // Fallback: if no diff detected, use the fresh tree to avoid empty context - diffedTree = combinedTree2; - } + // First action (self-heal aware path) + ensureTimeRemaining(); + const firstResult = await this.takeDeterministicAction( + firstAction, + page, + this.defaultDomSettleTimeoutMs, + llmClient, + ensureTimeRemaining, + ); - const previousAction = `method: ${firstAction.method}, description: ${firstAction.description}, arguments: ${firstAction.arguments}`; - - const stepTwoInstructions = buildStepTwoPrompt( - instruction, - previousAction, - Object.values(SupportedPlaywrightAction).filter( - ( - action, - ): action is Exclude< - SupportedPlaywrightAction, - SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN - > => action !== SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN, - ), - variables, - ); + // If not two-step, return the first action result + if (actInferenceResponse?.twoStep !== true) { + return firstResult; + } - const { action: secondAction } = await this.getActionFromLLM({ - instruction: stepTwoInstructions, - domElements: diffedTree, - xpathMap: combinedXpathMap2, - llmClient, - variables, + // Take a new focused snapshot and observe again + ensureTimeRemaining(); + const { combinedTree: combinedTree2, combinedXpathMap: combinedXpathMap2 } = + await captureHybridSnapshot(page, { + experimental: true, }); - if (!secondAction) { - // No second action found — return first result as-is - return firstResult; - } + let diffedTree = diffCombinedTrees(combinedTree, combinedTree2); + if (!diffedTree.trim()) { + // Fallback: if no diff detected, use the fresh tree to avoid empty context + diffedTree = combinedTree2; + } - const secondResult = await this.takeDeterministicAction( - secondAction, - page, - this.defaultDomSettleTimeoutMs, - llmClient, - ); + const previousAction = `method: ${firstAction.method}, description: ${firstAction.description}, arguments: ${firstAction.arguments}`; - // Combine results - return { - success: firstResult.success && secondResult.success, - message: secondResult.success - ? `${firstResult.message} → ${secondResult.message}` - : `${firstResult.message} → ${secondResult.message}`, - actionDescription: firstResult.actionDescription, - actions: [ - ...(firstResult.actions || []), - ...(secondResult.actions || []), - ], - }; - }; + const stepTwoInstructions = buildStepTwoPrompt( + instruction, + previousAction, + Object.values(SupportedPlaywrightAction).filter( + ( + action, + ): action is Exclude< + SupportedPlaywrightAction, + SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN + > => action !== SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN, + ), + variables, + ); + + ensureTimeRemaining(); + const { action: secondAction } = await this.getActionFromLLM({ + instruction: stepTwoInstructions, + domElements: diffedTree, + xpathMap: combinedXpathMap2, + llmClient, + variables, + }); - // Hard timeout for entire act() call → reject on timeout (align with extract/observe) - if (!timeout) { - return doObserveAndAct(); + if (!secondAction) { + // No second action found — return first result as-is + return firstResult; } - return await Promise.race([ - doObserveAndAct(), - new Promise((_, reject) => { - setTimeout( - () => reject(new Error(`act() timed out after ${timeout}ms`)), - timeout, - ); - }), - ]); + ensureTimeRemaining(); + const secondResult = await this.takeDeterministicAction( + secondAction, + page, + this.defaultDomSettleTimeoutMs, + llmClient, + ensureTimeRemaining, + ); + + // Combine results + return { + success: firstResult.success && secondResult.success, + message: secondResult.success + ? `${firstResult.message} → ${secondResult.message}` + : `${firstResult.message} → ${secondResult.message}`, + actionDescription: firstResult.actionDescription, + actions: [ + ...(firstResult.actions || []), + ...(secondResult.actions || []), + ], + }; } async takeDeterministicAction( @@ -282,7 +281,9 @@ export class ActHandler { page: Page, domSettleTimeoutMs?: number, llmClientOverride?: LLMClient, + ensureTimeRemaining?: () => void, ): Promise { + ensureTimeRemaining?.(); const settleTimeout = domSettleTimeoutMs ?? this.defaultDomSettleTimeoutMs; const effectiveClient = llmClientOverride ?? this.llmClient; const method = action.method?.trim(); @@ -307,6 +308,7 @@ export class ActHandler { const args = Array.isArray(action.arguments) ? action.arguments : []; try { + ensureTimeRemaining?.(); await performUnderstudyMethod( page, page.mainFrame(), @@ -329,6 +331,9 @@ export class ActHandler { ], }; } catch (err) { + if (err instanceof ActTimeoutError) { + throw err; + } const msg = err instanceof Error ? err.message : String(err); // Attempt self-heal: rerun actInference and retry with updated selector @@ -356,6 +361,7 @@ export class ActHandler { : method; // Take a fresh snapshot and ask for a new actionable element + ensureTimeRemaining?.(); const { combinedTree, combinedXpathMap } = await captureHybridSnapshot(page, { experimental: true, @@ -367,6 +373,7 @@ export class ActHandler { {}, ); + ensureTimeRemaining?.(); const { action: fallbackAction, response: fallbackResponse } = await this.getActionFromLLM({ instruction, @@ -393,6 +400,7 @@ export class ActHandler { newSelector = fallbackAction.selector; } + ensureTimeRemaining?.(); await performUnderstudyMethod( page, page.mainFrame(), @@ -416,6 +424,9 @@ export class ActHandler { ], }; } catch (retryErr) { + if (retryErr instanceof ActTimeoutError) { + throw retryErr; + } const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr); return { diff --git a/packages/core/lib/v3/handlers/extractHandler.ts b/packages/core/lib/v3/handlers/extractHandler.ts index 9def5154d..339f2c52b 100644 --- a/packages/core/lib/v3/handlers/extractHandler.ts +++ b/packages/core/lib/v3/handlers/extractHandler.ts @@ -19,7 +19,11 @@ import { ClientOptions, ModelConfiguration, } from "../types/public/model"; -import { StagehandInvalidArgumentError } from "../types/public/sdkErrors"; +import { + StagehandInvalidArgumentError, + ExtractTimeoutError, +} from "../types/public/sdkErrors"; +import { createTimeoutGuard } from "./handlerUtils/timeoutGuard"; import type { InferStagehandSchema, StagehandZodObject, @@ -106,146 +110,135 @@ export class ExtractHandler { const llmClient = this.resolveLlmClient(model); - const doExtract = async (): Promise< - InferStagehandSchema | { pageText: string } - > => { - // No-args → page text (parity with v2) - const noArgs = !instruction && !schema; - if (noArgs) { - const focusSelector = selector?.replace(/^xpath=/i, "") ?? ""; - const snap = await captureHybridSnapshot(page, { - experimental: this.experimental, - focusSelector: focusSelector || undefined, - }); - - const result = { pageText: snap.combinedTree }; - // Validate via the same schema used in v2 - return pageTextSchema.parse(result); - } - - if (!instruction && schema) { - throw new StagehandInvalidArgumentError( - "extract() requires an instruction when a schema is provided.", - ); - } - - const focusSelector = selector?.replace(/^xpath=/, "") ?? ""; - - // Build the hybrid snapshot (includes combinedTree; combinedUrlMap optional) - const { combinedTree, combinedUrlMap } = await captureHybridSnapshot( - page, - { - experimental: this.experimental, - focusSelector: focusSelector, - }, - ); - - v3Logger({ - category: "extraction", - message: "Starting extraction using a11y snapshot", - level: 1, - auxiliary: instruction - ? { instruction: { value: instruction, type: "string" } } - : undefined, + const effectiveTimeoutMs = + typeof timeout === "number" && timeout > 0 ? timeout : undefined; + const ensureTimeRemaining = createTimeoutGuard( + effectiveTimeoutMs, + (ms) => new ExtractTimeoutError(ms), + ); + + // No-args → page text (parity with v2) + const noArgs = !instruction && !schema; + if (noArgs) { + const focusSelector = selector?.replace(/^xpath=/i, "") ?? ""; + ensureTimeRemaining(); + const snap = await captureHybridSnapshot(page, { + experimental: this.experimental, + focusSelector: focusSelector || undefined, }); - // Normalize schema: if instruction provided without schema, use defaultExtractSchema - const baseSchema: StagehandZodSchema = (schema ?? - defaultExtractSchema) as StagehandZodSchema; - // Ensure we pass an object schema into inference; wrap non-object schemas - const isObjectSchema = getZodType(baseSchema) === "object"; - const WRAP_KEY = "value" as const; - const factory = getZFactory(baseSchema); - const objectSchema: StagehandZodObject = isObjectSchema - ? (baseSchema as StagehandZodObject) - : (factory.object({ - [WRAP_KEY]: baseSchema as ZodTypeAny, - }) as StagehandZodObject); - - const [transformedSchema, urlFieldPaths] = - transformUrlStringsToNumericIds(objectSchema); - - const extractionResponse: ExtractionResponse = - await runExtract({ - instruction, - domElements: combinedTree, - schema: transformedSchema as StagehandZodObject, - llmClient, - userProvidedInstructions: this.systemPrompt, - logger: v3Logger, - logInferenceToFile: this.logInferenceToFile, - }); - - const { - metadata: { completed }, - prompt_tokens, - completion_tokens, - reasoning_tokens = 0, - cached_input_tokens = 0, - inference_time_ms, - ...rest - } = extractionResponse; - let output = rest as InferStagehandSchema; - - v3Logger({ - category: "extraction", - message: completed - ? "Extraction completed successfully" - : "Extraction incomplete after processing all data", - level: 1, - auxiliary: { - prompt_tokens: { value: String(prompt_tokens), type: "string" }, - completion_tokens: { - value: String(completion_tokens), - type: "string", - }, - inference_time_ms: { - value: String(inference_time_ms), - type: "string", - }, - }, - }); + const result = { pageText: snap.combinedTree }; + // Validate via the same schema used in v2 + return pageTextSchema.parse(result); + } - // Update EXTRACT metrics from the LLM calls - this.onMetrics?.( - V3FunctionName.EXTRACT, - prompt_tokens, - completion_tokens, - reasoning_tokens, - cached_input_tokens, - inference_time_ms, + if (!instruction && schema) { + throw new StagehandInvalidArgumentError( + "extract() requires an instruction when a schema is provided.", ); + } + + const focusSelector = selector?.replace(/^xpath=/, "") ?? ""; + + // Build the hybrid snapshot (includes combinedTree; combinedUrlMap optional) + ensureTimeRemaining(); + const { combinedTree, combinedUrlMap } = await captureHybridSnapshot(page, { + experimental: this.experimental, + focusSelector: focusSelector, + }); + + v3Logger({ + category: "extraction", + message: "Starting extraction using a11y snapshot", + level: 1, + auxiliary: instruction + ? { instruction: { value: instruction, type: "string" } } + : undefined, + }); + + // Normalize schema: if instruction provided without schema, use defaultExtractSchema + const baseSchema: StagehandZodSchema = (schema ?? + defaultExtractSchema) as StagehandZodSchema; + // Ensure we pass an object schema into inference; wrap non-object schemas + const isObjectSchema = getZodType(baseSchema) === "object"; + const WRAP_KEY = "value" as const; + const factory = getZFactory(baseSchema); + const objectSchema: StagehandZodObject = isObjectSchema + ? (baseSchema as StagehandZodObject) + : (factory.object({ + [WRAP_KEY]: baseSchema as ZodTypeAny, + }) as StagehandZodObject); + + const [transformedSchema, urlFieldPaths] = + transformUrlStringsToNumericIds(objectSchema); + + ensureTimeRemaining(); + const extractionResponse: ExtractionResponse = + await runExtract({ + instruction, + domElements: combinedTree, + schema: transformedSchema as StagehandZodObject, + llmClient, + userProvidedInstructions: this.systemPrompt, + logger: v3Logger, + logInferenceToFile: this.logInferenceToFile, + }); - // Re-inject URLs for any url() fields we temporarily converted to number() - const idToUrl: Record = (combinedUrlMap ?? - {}) as Record; - for (const { segments } of urlFieldPaths) { - injectUrls( - output as Record, - segments, - idToUrl as unknown as Record, - ); - } - - // If we wrapped a non-object schema, unwrap the value - if (!isObjectSchema && output && typeof output === "object") { - output = (output as Record)[WRAP_KEY]; - } - - return output as InferStagehandSchema; - }; - if (!timeout) return doExtract(); - - return await Promise.race([ - doExtract(), - new Promise | { pageText: string }>( - (_, reject) => { - setTimeout( - () => reject(new Error(`extract() timed out after ${timeout}ms`)), - timeout, - ); + const { + metadata: { completed }, + prompt_tokens, + completion_tokens, + reasoning_tokens = 0, + cached_input_tokens = 0, + inference_time_ms, + ...rest + } = extractionResponse; + let output = rest as InferStagehandSchema; + + v3Logger({ + category: "extraction", + message: completed + ? "Extraction completed successfully" + : "Extraction incomplete after processing all data", + level: 1, + auxiliary: { + prompt_tokens: { value: String(prompt_tokens), type: "string" }, + completion_tokens: { value: String(completion_tokens), type: "string" }, + inference_time_ms: { + value: String(inference_time_ms), + type: "string", }, - ), - ]); + }, + }); + + // Update EXTRACT metrics from the LLM calls + this.onMetrics?.( + V3FunctionName.EXTRACT, + prompt_tokens, + completion_tokens, + reasoning_tokens, + cached_input_tokens, + inference_time_ms, + ); + + // Re-inject URLs for any url() fields we temporarily converted to number() + const idToUrl: Record = (combinedUrlMap ?? {}) as Record< + EncodedId, + string + >; + for (const { segments } of urlFieldPaths) { + injectUrls( + output as Record, + segments, + idToUrl as unknown as Record, + ); + } + + // If we wrapped a non-object schema, unwrap the value + if (!isObjectSchema && output && typeof output === "object") { + output = (output as Record)[WRAP_KEY]; + } + + return output as InferStagehandSchema; } } diff --git a/packages/core/lib/v3/handlers/handlerUtils/timeoutGuard.ts b/packages/core/lib/v3/handlers/handlerUtils/timeoutGuard.ts new file mode 100644 index 000000000..cacb8911f --- /dev/null +++ b/packages/core/lib/v3/handlers/handlerUtils/timeoutGuard.ts @@ -0,0 +1,21 @@ +import { TimeoutError } from "../../types/public/sdkErrors"; + +export type TimeoutGuard = () => void; + +export function createTimeoutGuard( + timeoutMs?: number, + errorFactory?: (timeoutMs: number) => Error, +): TimeoutGuard { + if (!timeoutMs || timeoutMs <= 0) { + return () => {}; + } + + const startTime = Date.now(); + return () => { + if (Date.now() - startTime >= timeoutMs) { + const err = + errorFactory?.(timeoutMs) ?? new TimeoutError("operation", timeoutMs); + throw err; + } + }; +} diff --git a/packages/core/lib/v3/handlers/observeHandler.ts b/packages/core/lib/v3/handlers/observeHandler.ts index e4e28c1f3..b66f3f3eb 100644 --- a/packages/core/lib/v3/handlers/observeHandler.ts +++ b/packages/core/lib/v3/handlers/observeHandler.ts @@ -13,6 +13,8 @@ import { ClientOptions, ModelConfiguration, } from "../types/public/model"; +import { ObserveTimeoutError } from "../types/public/sdkErrors"; +import { createTimeoutGuard } from "./handlerUtils/timeoutGuard"; export class ObserveHandler { private readonly llmClient: LLMClient; @@ -63,124 +65,119 @@ export class ObserveHandler { const llmClient = this.resolveLlmClient(model); + const effectiveTimeoutMs = + typeof timeout === "number" && timeout > 0 ? timeout : undefined; + const ensureTimeRemaining = createTimeoutGuard( + effectiveTimeoutMs, + (ms) => new ObserveTimeoutError(ms), + ); + const effectiveInstruction = instruction ?? "Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them."; - const doObserve = async (): Promise => { - v3Logger({ - category: "observation", - message: "starting observation", - level: 1, - auxiliary: { - instruction: { - value: effectiveInstruction, - type: "string", - }, + v3Logger({ + category: "observation", + message: "starting observation", + level: 1, + auxiliary: { + instruction: { + value: effectiveInstruction, + type: "string", }, - }); - - // Build the hybrid snapshot (a11y-centric text tree + lookup maps) - const focusSelector = selector?.replace(/^xpath=/i, "") ?? ""; - const snapshot = await captureHybridSnapshot(page, { - experimental: this.experimental, - focusSelector: focusSelector || undefined, - }); - - const combinedTree = snapshot.combinedTree; - const combinedXpathMap = snapshot.combinedXpathMap ?? {}; - - v3Logger({ - category: "observation", - message: "Got accessibility tree data", - level: 1, - }); - - // Call the LLM to propose actionable elements - const observationResponse = await runObserve({ - instruction: effectiveInstruction, - domElements: combinedTree, - llmClient, - userProvidedInstructions: this.systemPrompt, - logger: v3Logger, - logInferenceToFile: this.logInferenceToFile, - }); - - const { - prompt_tokens = 0, - completion_tokens = 0, - reasoning_tokens = 0, - cached_input_tokens = 0, - inference_time_ms = 0, - } = observationResponse; - - // Update OBSERVE metrics from the LLM observation call - this.onMetrics?.( - V3FunctionName.OBSERVE, - prompt_tokens, - completion_tokens, - reasoning_tokens, - cached_input_tokens, - inference_time_ms, - ); - - // Map elementIds -> selectors via combinedXpathMap - const elementsWithSelectors = ( - await Promise.all( - observationResponse.elements.map(async (element) => { - const { elementId, ...rest } = element; // rest may or may not have method/arguments - if (typeof elementId === "string" && elementId.includes("-")) { - const lookUpIndex = elementId as EncodedId; - const xpath = combinedXpathMap[lookUpIndex]; - const trimmedXpath = trimTrailingTextNode(xpath); - if (!trimmedXpath) return undefined; - - return { - ...rest, // if method/arguments exist, they’re preserved; otherwise they’re absent - selector: `xpath=${trimmedXpath}`, - } as { - description: string; - method?: string; - arguments?: string[]; - selector: string; - }; - } - // shadow-root fallback: + }, + }); + + // Build the hybrid snapshot (a11y-centric text tree + lookup maps) + const focusSelector = selector?.replace(/^xpath=/i, "") ?? ""; + ensureTimeRemaining(); + const snapshot = await captureHybridSnapshot(page, { + experimental: this.experimental, + focusSelector: focusSelector || undefined, + }); + + const combinedTree = snapshot.combinedTree; + const combinedXpathMap = snapshot.combinedXpathMap ?? {}; + + v3Logger({ + category: "observation", + message: "Got accessibility tree data", + level: 1, + }); + + // Call the LLM to propose actionable elements + ensureTimeRemaining(); + const observationResponse = await runObserve({ + instruction: effectiveInstruction, + domElements: combinedTree, + llmClient, + userProvidedInstructions: this.systemPrompt, + logger: v3Logger, + logInferenceToFile: this.logInferenceToFile, + }); + + const { + prompt_tokens = 0, + completion_tokens = 0, + reasoning_tokens = 0, + cached_input_tokens = 0, + inference_time_ms = 0, + } = observationResponse; + + // Update OBSERVE metrics from the LLM observation call + this.onMetrics?.( + V3FunctionName.OBSERVE, + prompt_tokens, + completion_tokens, + reasoning_tokens, + cached_input_tokens, + inference_time_ms, + ); + + // Map elementIds -> selectors via combinedXpathMap + const elementsWithSelectors = ( + await Promise.all( + observationResponse.elements.map(async (element) => { + const { elementId, ...rest } = element; // rest may or may not have method/arguments + if (typeof elementId === "string" && elementId.includes("-")) { + const lookUpIndex = elementId as EncodedId; + const xpath = combinedXpathMap[lookUpIndex]; + const trimmedXpath = trimTrailingTextNode(xpath); + if (!trimmedXpath) return undefined; + return { - description: "an element inside a shadow DOM", - method: "not-supported", - arguments: [], - selector: "not-supported", + ...rest, + selector: `xpath=${trimmedXpath}`, + } as { + description: string; + method?: string; + arguments?: string[]; + selector: string; }; - }), - ) - ).filter((e: T | undefined): e is T => e !== undefined); - - v3Logger({ - category: "observation", - message: "found elements", - level: 1, - auxiliary: { - elements: { - value: JSON.stringify(elementsWithSelectors), - type: "object", - }, + } + // shadow-root fallback: + return { + description: "an element inside a shadow DOM", + method: "not-supported", + arguments: [], + selector: "not-supported", + }; + }), + ) + ).filter((e: T | undefined): e is T => e !== undefined); + + v3Logger({ + category: "observation", + message: "found elements", + level: 1, + auxiliary: { + elements: { + value: JSON.stringify(elementsWithSelectors), + type: "object", }, - }); - - return elementsWithSelectors; - }; - - if (!timeout) return doObserve(); - - return await Promise.race([ - doObserve(), - new Promise((_, reject) => { - setTimeout( - () => reject(new Error(`observe() timed out after ${timeout}ms`)), - timeout, - ); - }), - ]); + }, + }); + + return elementsWithSelectors; } } diff --git a/packages/core/lib/v3/types/public/sdkErrors.ts b/packages/core/lib/v3/types/public/sdkErrors.ts index 8840aa30e..39b519cf1 100644 --- a/packages/core/lib/v3/types/public/sdkErrors.ts +++ b/packages/core/lib/v3/types/public/sdkErrors.ts @@ -308,6 +308,27 @@ export class TimeoutError extends StagehandError { } } +export class ActTimeoutError extends TimeoutError { + constructor(timeoutMs: number) { + super("act()", timeoutMs); + this.name = "ActTimeoutError"; + } +} + +export class ExtractTimeoutError extends TimeoutError { + constructor(timeoutMs: number) { + super("extract()", timeoutMs); + this.name = "ExtractTimeoutError"; + } +} + +export class ObserveTimeoutError extends TimeoutError { + constructor(timeoutMs: number) { + super("observe()", timeoutMs); + this.name = "ObserveTimeoutError"; + } +} + export class PageNotFoundError extends StagehandError { constructor(identifier: string) { super(`No Page found for ${identifier}`); diff --git a/packages/core/lib/v3/v3.ts b/packages/core/lib/v3/v3.ts index ebe9e39ad..e564cebbb 100644 --- a/packages/core/lib/v3/v3.ts +++ b/packages/core/lib/v3/v3.ts @@ -72,6 +72,8 @@ import { V3Context } from "./understudy/context"; import { Page } from "./understudy/page"; import { resolveModel } from "../modelUtils"; import { StagehandAPIClient } from "./api"; +import { createTimeoutGuard } from "./handlers/handlerUtils/timeoutGuard"; +import { ActTimeoutError } from "./types/public/sdkErrors"; const DEFAULT_MODEL_NAME = "openai/gpt-4.1-mini"; const DEFAULT_VIEWPORT = { width: 1288, height: 711 }; @@ -981,11 +983,20 @@ export class V3 { frameId: v3Page.mainFrameId(), }); } else { + const effectiveTimeoutMs = + typeof options?.timeout === "number" && options.timeout > 0 + ? options.timeout + : undefined; + const ensureTimeRemaining = createTimeoutGuard( + effectiveTimeoutMs, + (ms) => new ActTimeoutError(ms), + ); actResult = await this.actHandler.takeDeterministicAction( - { ...input, selector }, // ObserveResult - v3Page, // V3 Page + { ...input, selector }, + v3Page, this.domSettleTimeoutMs, this.resolveLlmClient(options?.model), + ensureTimeRemaining, ); } diff --git a/packages/core/package.json b/packages/core/package.json index 8ceb9161e..621e16f7c 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -19,7 +19,7 @@ "e2e:bb": "playwright test --config=lib/v3/tests/v3.bb.playwright.config.ts", "lint": "cd ../.. && prettier --check packages/core && cd packages/core && eslint .", "format": "prettier --write .", - "test:vitest": "pnpm run build-js && pnpm run typecheck && vitest run --config vitest.config.ts" + "test:vitest": "turbo run build && vitest run --config vitest.config.ts" }, "files": [ "dist/index.js", diff --git a/packages/core/tests/public-api/public-error-types.test.ts b/packages/core/tests/public-api/public-error-types.test.ts index b13aa35b9..6c325bb1b 100644 --- a/packages/core/tests/public-api/public-error-types.test.ts +++ b/packages/core/tests/public-api/public-error-types.test.ts @@ -53,6 +53,9 @@ export const publicErrorTypes = { UnsupportedModelProviderError: Stagehand.UnsupportedModelProviderError, XPathResolutionError: Stagehand.XPathResolutionError, ZodSchemaValidationError: Stagehand.ZodSchemaValidationError, + ActTimeoutError: Stagehand.ActTimeoutError, + ObserveTimeoutError: Stagehand.ObserveTimeoutError, + ExtractTimeoutError: Stagehand.ExtractTimeoutError, } as const; const errorTypes = Object.keys(publicErrorTypes) as Array< diff --git a/packages/core/tests/public-api/timeout-error-types.test.ts b/packages/core/tests/public-api/timeout-error-types.test.ts new file mode 100644 index 000000000..f6e6a69db --- /dev/null +++ b/packages/core/tests/public-api/timeout-error-types.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, it } from "vitest"; +import * as Stagehand from "../../dist"; + +// ============================================================================ +// Public Timeout Error Types Runtime Tests +// ============================================================================ +// These tests verify the runtime behavior of exported timeout error types, +// complementing the type-level tests in public-error-types.test.ts + +describe("Public timeout error types runtime behavior", () => { + describe("ActTimeoutError", () => { + it("is exported and extends Error", () => { + const error = new Stagehand.ActTimeoutError(1000); + expect(error).toBeInstanceOf(Error); + expect(error).toBeInstanceOf(Stagehand.ActTimeoutError); + expect(error.name).toBe("ActTimeoutError"); + }); + + it("contains timeout value in milliseconds in message", () => { + const error = new Stagehand.ActTimeoutError(500); + expect(error.message).toContain("500ms"); + }); + + it("contains operation name in message", () => { + const error = new Stagehand.ActTimeoutError(100); + expect(error.message).toContain("act()"); + }); + + it("extends TimeoutError", () => { + const error = new Stagehand.ActTimeoutError(1000); + expect(error).toBeInstanceOf(Stagehand.TimeoutError); + }); + }); + + describe("ExtractTimeoutError", () => { + it("is exported and extends Error", () => { + const error = new Stagehand.ExtractTimeoutError(1000); + expect(error).toBeInstanceOf(Error); + expect(error).toBeInstanceOf(Stagehand.ExtractTimeoutError); + expect(error.name).toBe("ExtractTimeoutError"); + }); + + it("contains timeout value in milliseconds in message", () => { + const error = new Stagehand.ExtractTimeoutError(1000); + expect(error.message).toContain("1000ms"); + }); + + it("contains operation name in message", () => { + const error = new Stagehand.ExtractTimeoutError(100); + expect(error.message).toContain("extract()"); + }); + + it("extends TimeoutError", () => { + const error = new Stagehand.ExtractTimeoutError(1000); + expect(error).toBeInstanceOf(Stagehand.TimeoutError); + }); + }); + + describe("ObserveTimeoutError", () => { + it("is exported and extends Error", () => { + const error = new Stagehand.ObserveTimeoutError(1000); + expect(error).toBeInstanceOf(Error); + expect(error).toBeInstanceOf(Stagehand.ObserveTimeoutError); + expect(error.name).toBe("ObserveTimeoutError"); + }); + + it("contains timeout value in milliseconds in message", () => { + const error = new Stagehand.ObserveTimeoutError(1500); + expect(error.message).toContain("1500ms"); + }); + + it("contains operation name in message", () => { + const error = new Stagehand.ObserveTimeoutError(100); + expect(error.message).toContain("observe()"); + }); + + it("extends TimeoutError", () => { + const error = new Stagehand.ObserveTimeoutError(1000); + expect(error).toBeInstanceOf(Stagehand.TimeoutError); + }); + }); + + describe("TimeoutError (base class)", () => { + it("is exported and extends Error", () => { + const error = new Stagehand.TimeoutError("custom operation", 2000); + expect(error).toBeInstanceOf(Error); + expect(error).toBeInstanceOf(Stagehand.TimeoutError); + }); + + it("contains operation name and timeout in message", () => { + const error = new Stagehand.TimeoutError("custom operation", 2000); + expect(error.message).toContain("custom operation"); + expect(error.message).toContain("2000ms"); + }); + + it("extends StagehandError", () => { + const error = new Stagehand.TimeoutError("operation", 1000); + expect(error).toBeInstanceOf(Stagehand.StagehandError); + }); + }); +}); diff --git a/packages/core/tests/timeout-handlers.test.ts b/packages/core/tests/timeout-handlers.test.ts new file mode 100644 index 000000000..5a4d60d52 --- /dev/null +++ b/packages/core/tests/timeout-handlers.test.ts @@ -0,0 +1,1114 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { ActHandler } from "../lib/v3/handlers/actHandler"; +import { ExtractHandler } from "../lib/v3/handlers/extractHandler"; +import { ObserveHandler } from "../lib/v3/handlers/observeHandler"; +import type { Page } from "../lib/v3/understudy/page"; +import type { ClientOptions } from "../lib/v3/types/public/model"; +import type { LLMClient } from "../lib/v3/llm/LLMClient"; +import { createTimeoutGuard } from "../lib/v3/handlers/handlerUtils/timeoutGuard"; +import { waitForDomNetworkQuiet } from "../lib/v3/handlers/handlerUtils/actHandlerUtils"; +import { captureHybridSnapshot } from "../lib/v3/understudy/a11y/snapshot"; +import { + ActTimeoutError, + ExtractTimeoutError, + ObserveTimeoutError, +} from "../lib/v3/types/public/sdkErrors"; +import { + act as actInference, + extract as extractInference, + observe as observeInference, +} from "../lib/inference"; +import { V3FunctionName } from "../lib/v3/types/public/methods"; + +vi.mock("../lib/v3/handlers/handlerUtils/timeoutGuard", () => ({ + createTimeoutGuard: vi.fn(), +})); + +vi.mock("../lib/v3/handlers/handlerUtils/actHandlerUtils", () => ({ + waitForDomNetworkQuiet: vi.fn(), + performUnderstudyMethod: vi.fn(), +})); + +vi.mock("../lib/v3/understudy/a11y/snapshot", () => ({ + captureHybridSnapshot: vi.fn(), + diffCombinedTrees: vi.fn(), +})); + +vi.mock("../lib/inference", () => ({ + act: vi.fn(), + extract: vi.fn(), + observe: vi.fn(), +})); + +describe("ActHandler timeout guard", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("throws ActTimeoutError when timeout expires before snapshot", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "", + combinedXpathMap: {}, + combinedUrlMap: {}, + }); + + // Make createTimeoutGuard return a guard that throws on call #2 + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + let calls = 0; + return vi.fn(() => { + calls += 1; + if (calls >= 2) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ActTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildActHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.act({ + instruction: "do something", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ActTimeoutError); + + // Verify pre-timeout helper ran + expect(waitForDomNetworkQuietMock).toHaveBeenCalledTimes(1); + // Verify snapshot was NOT called (timeout fired before it) + expect(captureHybridSnapshotMock).not.toHaveBeenCalled(); + }); + + it("throws ActTimeoutError when timeout expires before LLM call", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: {}, + combinedUrlMap: {}, + }); + + const actInferenceMock = vi.mocked(actInference); + + // Throw on call #3 (after snapshot but before LLM) + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + let calls = 0; + return vi.fn(() => { + calls += 1; + if (calls >= 3) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ActTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildActHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.act({ + instruction: "do something", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ActTimeoutError); + + // Snapshot should have been called + expect(captureHybridSnapshotMock).toHaveBeenCalledTimes(1); + // LLM inference should NOT have been called + expect(actInferenceMock).not.toHaveBeenCalled(); + }); + + it("throws ActTimeoutError with correct message format", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const timeoutMs = 100; + + vi.mocked(createTimeoutGuard).mockImplementation((ms, errorFactory) => { + return vi.fn(() => { + throw errorFactory ? errorFactory(ms!) : new ActTimeoutError(ms!); + }); + }); + + const handler = buildActHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + try { + await handler.act({ + instruction: "do something", + page: fakePage, + timeout: timeoutMs, + }); + throw new Error("Expected ActTimeoutError to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ActTimeoutError); + expect((error as ActTimeoutError).message).toContain("act()"); + expect((error as ActTimeoutError).message).toContain(`${timeoutMs}ms`); + expect((error as ActTimeoutError).name).toBe("ActTimeoutError"); + } + }); +}); + +describe("ActHandler two-step timeout", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("throws ActTimeoutError during step 2; step 2 action does not run", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const { performUnderstudyMethod } = await import( + "../lib/v3/handlers/handlerUtils/actHandlerUtils" + ); + const performUnderstudyMethodMock = vi.mocked(performUnderstudyMethod); + performUnderstudyMethodMock.mockResolvedValue(undefined); + + const actInferenceMock = vi.mocked(actInference); + // First call returns a two-step action + actInferenceMock.mockResolvedValueOnce({ + element: { + elementId: "1-0", + description: "click button", + method: "click", + arguments: [], + }, + twoStep: true, + prompt_tokens: 100, + completion_tokens: 50, + inference_time_ms: 500, + } as ReturnType extends Promise ? T : never); + + const diffCombinedTreesMock = vi.mocked( + (await import("../lib/v3/understudy/a11y/snapshot")).diffCombinedTrees, + ); + diffCombinedTreesMock.mockReturnValue("diff tree"); + + // Timeout fires after step 1 completes, during step 2 snapshot + // ensureTimeRemaining calls: 1=before wait, 2=after wait/before snap1, 3=before LLM1, + // 4=before action1, 5=inside takeDeterministicAction, 6=performUnderstudy, + // 7=before snap2 (this one should throw) + let callCount = 0; + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + return vi.fn(() => { + callCount += 1; + if (callCount >= 7) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ActTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildActHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.act({ + instruction: "click then type", + page: fakePage, + timeout: 50, + }), + ).rejects.toThrow(ActTimeoutError); + + // Step 1 action should have been executed + expect(performUnderstudyMethodMock).toHaveBeenCalledTimes(1); + // Step 2 LLM call should NOT have happened + expect(actInferenceMock).toHaveBeenCalledTimes(1); + }); +}); + +describe("ActHandler self-heal timeout", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("throws ActTimeoutError during self-heal snapshot; no retry action executes", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const { performUnderstudyMethod } = await import( + "../lib/v3/handlers/handlerUtils/actHandlerUtils" + ); + const performUnderstudyMethodMock = vi.mocked(performUnderstudyMethod); + // First call fails, triggering self-heal + performUnderstudyMethodMock.mockRejectedValueOnce( + new Error("Element not found"), + ); + + const actInferenceMock = vi.mocked(actInference); + actInferenceMock.mockResolvedValue({ + element: { + elementId: "1-0", + description: "click button", + method: "click", + arguments: [], + }, + twoStep: false, + prompt_tokens: 100, + completion_tokens: 50, + inference_time_ms: 500, + } as ReturnType extends Promise ? T : never); + + // Timeout during self-heal snapshot (call 7 or later) + let callCount = 0; + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + return vi.fn(() => { + callCount += 1; + // Timeout during self-heal snapshot call + if (callCount >= 7) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ActTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildActHandler({ selfHeal: true }); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.act({ + instruction: "click button", + page: fakePage, + timeout: 50, + }), + ).rejects.toThrow(ActTimeoutError); + + // First action attempt should have been tried + expect(performUnderstudyMethodMock).toHaveBeenCalledTimes(1); + // First LLM call should have happened + expect(actInferenceMock).toHaveBeenCalledTimes(1); + // Self-heal snapshot should have been started (call happened) + expect(captureHybridSnapshotMock).toHaveBeenCalled(); + }); + + it("throws ActTimeoutError during self-heal LLM inference; no retry action executes", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const { performUnderstudyMethod } = await import( + "../lib/v3/handlers/handlerUtils/actHandlerUtils" + ); + const performUnderstudyMethodMock = vi.mocked(performUnderstudyMethod); + // First call fails, triggering self-heal + performUnderstudyMethodMock.mockRejectedValueOnce( + new Error("Element not found"), + ); + + const actInferenceMock = vi.mocked(actInference); + actInferenceMock.mockResolvedValueOnce({ + element: { + elementId: "1-0", + description: "click button", + method: "click", + arguments: [], + }, + twoStep: false, + prompt_tokens: 100, + completion_tokens: 50, + inference_time_ms: 500, + } as ReturnType extends Promise ? T : never); + + // Timeout during self-heal LLM inference (call 8) + let callCount = 0; + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + return vi.fn(() => { + callCount += 1; + // Timeout during self-heal LLM call + if (callCount >= 8) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ActTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildActHandler({ selfHeal: true }); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.act({ + instruction: "click button", + page: fakePage, + timeout: 50, + }), + ).rejects.toThrow(ActTimeoutError); + + // Self-heal snapshot was captured + expect(captureHybridSnapshotMock).toHaveBeenCalledTimes(2); + // Only one LLM inference (the retry inference was aborted by timeout) + expect(actInferenceMock).toHaveBeenCalledTimes(1); + }); +}); + +describe("ExtractHandler timeout guard", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("throws ExtractTimeoutError when timeout expires before snapshot", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: {}, + combinedUrlMap: {}, + }); + + const extractInferenceMock = vi.mocked(extractInference); + + // Throw immediately on first call + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + return vi.fn(() => { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ExtractTimeoutError(timeoutMs!); + }); + }, + ); + + const handler = buildExtractHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.extract({ + instruction: "extract title", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ExtractTimeoutError); + + // Snapshot should NOT have been called + expect(captureHybridSnapshotMock).not.toHaveBeenCalled(); + // LLM inference should NOT have been called + expect(extractInferenceMock).not.toHaveBeenCalled(); + }); + + it("throws ExtractTimeoutError when timeout expires before LLM call", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: {}, + combinedUrlMap: {}, + }); + + const extractInferenceMock = vi.mocked(extractInference); + + // Throw on call #2 (after snapshot but before LLM) + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + let calls = 0; + return vi.fn(() => { + calls += 1; + if (calls >= 2) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ExtractTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildExtractHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.extract({ + instruction: "extract title", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ExtractTimeoutError); + + // Snapshot should have been called + expect(captureHybridSnapshotMock).toHaveBeenCalledTimes(1); + // LLM inference should NOT have been called + expect(extractInferenceMock).not.toHaveBeenCalled(); + }); + + it("throws ExtractTimeoutError with correct message format", async () => { + const timeoutMs = 200; + + vi.mocked(createTimeoutGuard).mockImplementation((ms, errorFactory) => { + return vi.fn(() => { + throw errorFactory ? errorFactory(ms!) : new ExtractTimeoutError(ms!); + }); + }); + + const handler = buildExtractHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + try { + await handler.extract({ + instruction: "extract title", + page: fakePage, + timeout: timeoutMs, + }); + throw new Error("Expected ExtractTimeoutError to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExtractTimeoutError); + expect((error as ExtractTimeoutError).message).toContain("extract()"); + expect((error as ExtractTimeoutError).message).toContain( + `${timeoutMs}ms`, + ); + expect((error as ExtractTimeoutError).name).toBe("ExtractTimeoutError"); + } + }); + + it("stops LLM and post-processing when timeout expires", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: {}, + combinedUrlMap: { "1-0": "https://example.com" }, + }); + + const extractInferenceMock = vi.mocked(extractInference); + + // Allow snapshot but timeout before LLM + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + let calls = 0; + return vi.fn(() => { + calls += 1; + if (calls >= 2) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ExtractTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildExtractHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.extract({ + instruction: "extract links", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ExtractTimeoutError); + + // Post-processing (URL injection) never runs because LLM was never called + expect(extractInferenceMock).not.toHaveBeenCalled(); + }); +}); + +describe("ObserveHandler timeout guard", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("throws ObserveTimeoutError when timeout expires before snapshot", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: {}, + combinedUrlMap: {}, + }); + + const observeInferenceMock = vi.mocked(observeInference); + + // Throw immediately on first call + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + return vi.fn(() => { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ObserveTimeoutError(timeoutMs!); + }); + }, + ); + + const handler = buildObserveHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.observe({ + instruction: "find buttons", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ObserveTimeoutError); + + // Snapshot should NOT have been called + expect(captureHybridSnapshotMock).not.toHaveBeenCalled(); + // LLM inference should NOT have been called + expect(observeInferenceMock).not.toHaveBeenCalled(); + }); + + it("throws ObserveTimeoutError when timeout expires before LLM call", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: {}, + combinedUrlMap: {}, + }); + + const observeInferenceMock = vi.mocked(observeInference); + + // Throw on call #2 (after snapshot but before LLM) + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + let calls = 0; + return vi.fn(() => { + calls += 1; + if (calls >= 2) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ObserveTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildObserveHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.observe({ + instruction: "find buttons", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ObserveTimeoutError); + + // Snapshot should have been called + expect(captureHybridSnapshotMock).toHaveBeenCalledTimes(1); + // LLM inference should NOT have been called + expect(observeInferenceMock).not.toHaveBeenCalled(); + }); + + it("throws ObserveTimeoutError with correct message format", async () => { + const timeoutMs = 150; + + vi.mocked(createTimeoutGuard).mockImplementation((ms, errorFactory) => { + return vi.fn(() => { + throw errorFactory ? errorFactory(ms!) : new ObserveTimeoutError(ms!); + }); + }); + + const handler = buildObserveHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + try { + await handler.observe({ + instruction: "find buttons", + page: fakePage, + timeout: timeoutMs, + }); + throw new Error("Expected ObserveTimeoutError to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ObserveTimeoutError); + expect((error as ObserveTimeoutError).message).toContain("observe()"); + expect((error as ObserveTimeoutError).message).toContain( + `${timeoutMs}ms`, + ); + expect((error as ObserveTimeoutError).name).toBe("ObserveTimeoutError"); + } + }); + + it("aborts result processing when timeout expires", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const observeInferenceMock = vi.mocked(observeInference); + + // Timeout before LLM call + vi.mocked(createTimeoutGuard).mockImplementation( + (timeoutMs, errorFactory) => { + let calls = 0; + return vi.fn(() => { + calls += 1; + if (calls >= 2) { + throw errorFactory + ? errorFactory(timeoutMs!) + : new ObserveTimeoutError(timeoutMs!); + } + }); + }, + ); + + const handler = buildObserveHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + await expect( + handler.observe({ + instruction: "find all interactive elements", + page: fakePage, + timeout: 5, + }), + ).rejects.toThrow(ObserveTimeoutError); + + // Result mapping/processing never happens + expect(observeInferenceMock).not.toHaveBeenCalled(); + }); +}); + +describe("No-timeout success paths", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("act() completes successfully without timeout and records metrics", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const { performUnderstudyMethod } = await import( + "../lib/v3/handlers/handlerUtils/actHandlerUtils" + ); + const performUnderstudyMethodMock = vi.mocked(performUnderstudyMethod); + performUnderstudyMethodMock.mockResolvedValue(undefined); + + const actInferenceMock = vi.mocked(actInference); + actInferenceMock.mockResolvedValue({ + element: { + elementId: "1-0", + description: "click button", + method: "click", + arguments: [], + }, + twoStep: false, + prompt_tokens: 100, + completion_tokens: 50, + reasoning_tokens: 10, + cached_input_tokens: 5, + inference_time_ms: 500, + } as ReturnType extends Promise ? T : never); + + // No timeout - guard never throws + vi.mocked(createTimeoutGuard).mockImplementation(() => { + return vi.fn(() => { + // No-op - never throws + }); + }); + + const metricsCallback = vi.fn(); + const handler = buildActHandler({ onMetrics: metricsCallback }); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + const result = await handler.act({ + instruction: "click button", + page: fakePage, + // No timeout specified + }); + + expect(result.success).toBe(true); + expect(metricsCallback).toHaveBeenCalledWith( + V3FunctionName.ACT, + 100, + 50, + 10, + 5, + 500, + ); + }); + + it("extract() completes successfully without timeout and records metrics", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: {}, + combinedUrlMap: {}, + }); + + const extractInferenceMock = vi.mocked(extractInference); + extractInferenceMock.mockResolvedValue({ + title: "Test Title", + metadata: { completed: true, progress: "100%" }, + prompt_tokens: 200, + completion_tokens: 100, + reasoning_tokens: 20, + cached_input_tokens: 10, + inference_time_ms: 800, + } as ReturnType extends Promise + ? T + : never); + + // No timeout - guard never throws + vi.mocked(createTimeoutGuard).mockImplementation(() => { + return vi.fn(() => { + // No-op - never throws + }); + }); + + const metricsCallback = vi.fn(); + const handler = buildExtractHandler({ onMetrics: metricsCallback }); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + const result = await handler.extract({ + instruction: "extract title", + page: fakePage, + // No timeout specified + }); + + expect(result).toHaveProperty("title", "Test Title"); + expect(metricsCallback).toHaveBeenCalledWith( + V3FunctionName.EXTRACT, + 200, + 100, + 20, + 10, + 800, + ); + }); + + it("observe() completes successfully without timeout and records metrics", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const observeInferenceMock = vi.mocked(observeInference); + observeInferenceMock.mockResolvedValue({ + elements: [ + { + elementId: "1-0", + description: "Submit button", + }, + ], + prompt_tokens: 150, + completion_tokens: 75, + reasoning_tokens: 15, + cached_input_tokens: 8, + inference_time_ms: 600, + } as ReturnType extends Promise + ? T + : never); + + // No timeout - guard never throws + vi.mocked(createTimeoutGuard).mockImplementation(() => { + return vi.fn(() => { + // No-op - never throws + }); + }); + + const metricsCallback = vi.fn(); + const handler = buildObserveHandler({ onMetrics: metricsCallback }); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + const result = await handler.observe({ + instruction: "find buttons", + page: fakePage, + // No timeout specified + }); + + expect(result).toHaveLength(1); + expect(result[0]).toHaveProperty("description", "Submit button"); + expect(metricsCallback).toHaveBeenCalledWith( + V3FunctionName.OBSERVE, + 150, + 75, + 15, + 8, + 600, + ); + }); + + it("act() with zero timeout behaves as no timeout", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const { performUnderstudyMethod } = await import( + "../lib/v3/handlers/handlerUtils/actHandlerUtils" + ); + const performUnderstudyMethodMock = vi.mocked(performUnderstudyMethod); + performUnderstudyMethodMock.mockResolvedValue(undefined); + + const actInferenceMock = vi.mocked(actInference); + actInferenceMock.mockResolvedValue({ + element: { + elementId: "1-0", + description: "click button", + method: "click", + arguments: [], + }, + twoStep: false, + prompt_tokens: 100, + completion_tokens: 50, + inference_time_ms: 500, + } as ReturnType extends Promise ? T : never); + + // When timeout is 0 or negative, createTimeoutGuard returns a no-op + vi.mocked(createTimeoutGuard).mockImplementation((timeoutMs) => { + if (!timeoutMs || timeoutMs <= 0) { + return vi.fn(() => { + // No-op + }); + } + return vi.fn(() => { + throw new ActTimeoutError(timeoutMs); + }); + }); + + const handler = buildActHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + const result = await handler.act({ + instruction: "click button", + page: fakePage, + timeout: 0, // Zero timeout should be treated as "no timeout" + }); + + expect(result.success).toBe(true); + }); + + it("act() with negative timeout behaves as no timeout", async () => { + const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); + waitForDomNetworkQuietMock.mockResolvedValue(undefined); + + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/button" }, + combinedUrlMap: {}, + }); + + const { performUnderstudyMethod } = await import( + "../lib/v3/handlers/handlerUtils/actHandlerUtils" + ); + const performUnderstudyMethodMock = vi.mocked(performUnderstudyMethod); + performUnderstudyMethodMock.mockResolvedValue(undefined); + + const actInferenceMock = vi.mocked(actInference); + actInferenceMock.mockResolvedValue({ + element: { + elementId: "1-0", + description: "click button", + method: "click", + arguments: [], + }, + twoStep: false, + prompt_tokens: 100, + completion_tokens: 50, + inference_time_ms: 500, + } as ReturnType extends Promise ? T : never); + + vi.mocked(createTimeoutGuard).mockImplementation((timeoutMs) => { + if (!timeoutMs || timeoutMs <= 0) { + return vi.fn(() => { + // No-op + }); + } + return vi.fn(() => { + throw new ActTimeoutError(timeoutMs); + }); + }); + + const handler = buildActHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + + const result = await handler.act({ + instruction: "click button", + page: fakePage, + timeout: -100, // Negative timeout should be treated as "no timeout" + }); + + expect(result.success).toBe(true); + }); +}); + +interface BuildActHandlerOptions { + selfHeal?: boolean; + onMetrics?: ( + functionName: V3FunctionName, + promptTokens: number, + completionTokens: number, + reasoningTokens: number, + cachedInputTokens: number, + inferenceTimeMs: number, + ) => void; +} + +function buildActHandler(options: BuildActHandlerOptions = {}): ActHandler { + const defaultClientOptions = {} as ClientOptions; + const fakeClient = { + type: "openai", + modelName: "gpt-4o", + clientOptions: defaultClientOptions, + } as LLMClient; + const resolveLlmClient = vi.fn().mockReturnValue(fakeClient); + + return new ActHandler( + fakeClient, + "gpt-4o", + defaultClientOptions, + resolveLlmClient, + undefined, + false, + options.selfHeal ?? false, + options.onMetrics, + undefined, + ); +} + +interface BuildExtractHandlerOptions { + onMetrics?: ( + functionName: V3FunctionName, + promptTokens: number, + completionTokens: number, + reasoningTokens: number, + cachedInputTokens: number, + inferenceTimeMs: number, + ) => void; +} + +function buildExtractHandler( + options: BuildExtractHandlerOptions = {}, +): ExtractHandler { + const defaultClientOptions = {} as ClientOptions; + const fakeClient = { + type: "openai", + modelName: "gpt-4o", + clientOptions: defaultClientOptions, + } as LLMClient; + const resolveLlmClient = vi.fn().mockReturnValue(fakeClient); + + return new ExtractHandler( + fakeClient, + "gpt-4o", + defaultClientOptions, + resolveLlmClient, + undefined, + false, + false, + options.onMetrics, + ); +} + +interface BuildObserveHandlerOptions { + onMetrics?: ( + functionName: V3FunctionName, + promptTokens: number, + completionTokens: number, + reasoningTokens: number, + cachedInputTokens: number, + inferenceTimeMs: number, + ) => void; +} + +function buildObserveHandler( + options: BuildObserveHandlerOptions = {}, +): ObserveHandler { + const defaultClientOptions = {} as ClientOptions; + const fakeClient = { + type: "openai", + modelName: "gpt-4o", + clientOptions: defaultClientOptions, + } as LLMClient; + const resolveLlmClient = vi.fn().mockReturnValue(fakeClient); + + return new ObserveHandler( + fakeClient, + "gpt-4o", + defaultClientOptions, + resolveLlmClient, + undefined, + false, + false, + options.onMetrics, + ); +}