From dac53a29b9be7d2f12d8fb3566405c132915df97 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Wed, 26 Feb 2025 17:41:01 -0800 Subject: [PATCH] test removing verifyActCompletion --- evals/tasks/simple_google_search.ts | 6 +- examples/example.ts | 1 + lib/handlers/actHandler.ts | 186 +++------------------------- lib/inference.ts | 55 -------- lib/prompt.ts | 71 ----------- types/inference.ts | 12 -- 6 files changed, 21 insertions(+), 310 deletions(-) delete mode 100644 types/inference.ts diff --git a/evals/tasks/simple_google_search.ts b/evals/tasks/simple_google_search.ts index 3f6aa22e0..97e18d1ed 100644 --- a/evals/tasks/simple_google_search.ts +++ b/evals/tasks/simple_google_search.ts @@ -15,7 +15,11 @@ export const simple_google_search: EvalFunction = async ({ await stagehand.page.goto("https://www.google.com"); await stagehand.page.act({ - action: 'Search for "OpenAI"', + action: 'Type "OpenAI" into the search bar', + }); + + await stagehand.page.act({ + action: "Click the search button", }); const expectedUrl = "https://www.google.com/search?q=OpenAI"; diff --git a/examples/example.ts b/examples/example.ts index 41d8d861f..2bf1aacaa 100644 --- a/examples/example.ts +++ b/examples/example.ts @@ -14,6 +14,7 @@ async function example() { }); await stagehand.init(); await stagehand.page.goto("https://docs.stagehand.dev"); + await stagehand.page.act("click on the quickstart"); } (async () => { diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index 48a652c00..cf59f8748 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -5,7 +5,7 @@ import { PlaywrightCommandMethodNotSupportedException, } from "../../types/playwright"; import { ActionCache } from "../cache/ActionCache"; -import { act, fillInVariables, verifyActCompletion } from "../inference"; +import { act, fillInVariables } from "../inference"; import { LLMClient } from "../llm/LLMClient"; import { LLMProvider } from "../llm/LLMProvider"; import { StagehandContext } from "../StagehandContext"; @@ -164,90 +164,6 @@ export class StagehandActHandler { return id; } - private async _verifyActionCompletion({ - completed, - requestId, - action, - steps, - llmClient, - domSettleTimeoutMs, - }: { - completed: boolean; - requestId: string; - action: string; - steps: string; - llmClient: LLMClient; - domSettleTimeoutMs?: number; - }): Promise { - if (!completed) { - return false; - } - - await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); - - // o1 is overkill for this task + this task uses a lot of tokens. So we switch it 4o - let verifyLLmClient = llmClient; - if ( - llmClient.modelName.startsWith("o1") || - llmClient.modelName.startsWith("o3") - ) { - verifyLLmClient = this.llmProvider.getClient( - "gpt-4o", - llmClient.clientOptions, - ); - } - - const { outputString: domElements } = - await this.stagehandPage.page.evaluate(() => { - return window.processAllOfDom(); - }); - - let actionCompleted = false; - if (completed) { - // Run action completion verifier - this.logger({ - category: "action", - message: "action marked as completed, verifying if this is true...", - level: 1, - auxiliary: { - action: { - value: action, - type: "string", - }, - }, - }); - - // Always use text-based DOM verification (no vision). - actionCompleted = await verifyActCompletion({ - goal: action, - steps, - llmProvider: this.llmProvider, - llmClient: verifyLLmClient, - domElements, - logger: this.logger, - requestId, - }); - - this.logger({ - category: "action", - message: "action completion verification result", - level: 1, - auxiliary: { - action: { - value: action, - type: "string", - }, - result: { - value: actionCompleted.toString(), - type: "boolean", - }, - }, - }); - } - - return actionCompleted; - } - private async _performPlaywrightMethod( method: string, args: unknown[], @@ -992,35 +908,11 @@ export class StagehandActHandler { ); if (cachedStep.completed) { - // Verify the action was completed successfully - const actionCompleted = await this._verifyActionCompletion({ - completed: true, - llmClient, - steps, - requestId, + return { + success: true, + message: "action completed successfully using cached step", action, - domSettleTimeoutMs, - }); - - this.logger({ - category: "action", - message: "action completion verification result from cache", - level: 1, - auxiliary: { - actionCompleted: { - value: actionCompleted.toString(), - type: "boolean", - }, - }, - }); - - if (actionCompleted) { - return { - success: true, - message: "action completed successfully using cached step", - action, - }; - } + }; } return this.act({ @@ -1393,65 +1285,17 @@ export class StagehandActHandler { } } - const actionCompleted = await this._verifyActionCompletion({ - completed: response.completed, - requestId, - action, - steps, - llmClient, - domSettleTimeoutMs, - }).catch((error) => { - this.logger({ - category: "action", - message: - "error verifying action completion. Assuming action completed.", - level: 1, - auxiliary: { - error: { - value: error.message, - type: "string", - }, - trace: { - value: error.stack, - type: "string", - }, - }, - }); - - return true; + this.logger({ + category: "action", + message: "action completed successfully", + level: 1, }); - - if (!actionCompleted) { - this.logger({ - category: "action", - message: "continuing to next action step", - level: 1, - }); - - return this.act({ - action, - steps, - llmClient, - chunksSeen, - requestId, - variables, - previousSelectors: [...previousSelectors, foundXpath], - skipActionCacheForThisStep: false, - domSettleTimeoutMs, - }); - } else { - this.logger({ - category: "action", - message: "action completed successfully", - level: 1, - }); - await this._recordAction(action, response.step); - return { - success: true, - message: `Action completed successfully: ${steps}${response.step}`, - action: action, - }; - } + await this._recordAction(action, response.step); + return { + success: true, + message: `Action completed successfully: ${steps}${response.step}`, + action: action, + }; } catch (error) { this.logger({ category: "action", diff --git a/lib/inference.ts b/lib/inference.ts index ff0043a6f..425b35396 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -1,6 +1,5 @@ import { z } from "zod"; import { ActCommandParams, ActCommandResult } from "../types/act"; -import { VerifyActCompletionParams } from "../types/inference"; import { LogLine } from "../types/log"; import { ChatMessage, LLMClient } from "./llm/LLMClient"; import { @@ -15,62 +14,8 @@ import { buildObserveUserMessage, buildRefineSystemPrompt, buildRefineUserPrompt, - buildVerifyActCompletionSystemPrompt, - buildVerifyActCompletionUserPrompt, } from "./prompt"; -export async function verifyActCompletion({ - goal, - steps, - llmClient, - domElements, - logger, - requestId, -}: VerifyActCompletionParams): Promise { - const verificationSchema = z.object({ - completed: z.boolean().describe("true if the goal is accomplished"), - }); - - type VerificationResponse = z.infer; - - const response = await llmClient.createChatCompletion({ - options: { - messages: [ - buildVerifyActCompletionSystemPrompt(), - buildVerifyActCompletionUserPrompt(goal, steps, domElements), - ], - temperature: 0.1, - top_p: 1, - frequency_penalty: 0, - presence_penalty: 0, - response_model: { - name: "Verification", - schema: verificationSchema, - }, - requestId, - }, - logger, - }); - - if (!response || typeof response !== "object") { - logger({ - category: "VerifyAct", - message: "Unexpected response format: " + JSON.stringify(response), - }); - return false; - } - - if (response.completed === undefined) { - logger({ - category: "VerifyAct", - message: "Missing 'completed' field in response", - }); - return false; - } - - return response.completed; -} - export function fillInVariables( text: string, variables: Record, diff --git a/lib/prompt.ts b/lib/prompt.ts index b9ee3cde1..f7eeabb8e 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -25,77 +25,6 @@ Note 2: Sometimes what your are looking for is hidden behind and element you nee Again, if the user's goal will be accomplished after running the playwright action, set completed to true. Also, if the user provides custom instructions, it is imperative that you follow them no matter what. `; -const verifyActCompletionSystemPrompt = ` -You are a browser automation assistant. The job has given you a goal and a list of steps that have been taken so far. Your job is to determine if the user's goal has been completed based on the provided information. - -# Input -You will receive: -1. The user's goal: A clear description of what the user wants to achieve. -2. Steps taken so far: A list of actions that have been performed up to this point. - -# Your Task -Analyze the provided information to determine if the user's goal has been fully completed. - -# Output -Return a boolean value: -- true: If the goal has been definitively completed based on the steps taken and the current page. -- false: If the goal has not been completed or if there's any uncertainty about its completion. - -# Important Considerations -- False positives are okay. False negatives are not okay. -- Look for evidence of errors on the page or something having gone wrong in completing the goal. If one does not exist, return true. -`; - -// ## Examples for completion check -// ### Example 1 -// 1. User's goal: "input data scientist into role" -// 2. Steps you've taken so far: "The role input field was filled with 'data scientist'." -// 3. Active DOM elements: ["data scientist", ""] - -// Output: Will need to have completed set to true. Nothing else matters. -// Reasoning: The goal the user set has already been accomplished. We should not take any extra actions outside of the scope of the goal (for example, clicking on the search button is an invalid action - ie: not acceptable). - -// ### Example 2 -// 1. User's goal: "Sign up for the newsletter" -// 2. Steps you've taken so far: ["The email input field was filled with 'test@test.com'."] -// 3. Active DOM elements: ["", ""] - -// Output: Will need to have click on the subscribe button as action. And completed set to false. -// Reasoning: There might be an error when trying to submit the form and you need to make sure the goal is accomplished properly. So you set completed to false. - -export function buildVerifyActCompletionSystemPrompt(): ChatMessage { - return { - role: "system", - content: verifyActCompletionSystemPrompt, - }; -} - -export function buildVerifyActCompletionUserPrompt( - goal: string, - steps = "None", - domElements: string | undefined, -): ChatMessage { - let actUserPrompt = ` -# My Goal -${goal} - -# Steps You've Taken So Far -${steps} -`; - - if (domElements) { - actUserPrompt += ` -# Active DOM Elements on the current page -${domElements} -`; - } - - return { - role: "user", - content: actUserPrompt, - }; -} - export function buildUserInstructionsString( userProvidedInstructions?: string, ): string { diff --git a/types/inference.ts b/types/inference.ts deleted file mode 100644 index b36e4d6c1..000000000 --- a/types/inference.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { LLMClient } from "../lib/llm/LLMClient"; -import { LLMProvider } from "../lib/llm/LLMProvider"; - -export interface VerifyActCompletionParams { - goal: string; - steps: string; - llmProvider: LLMProvider; - llmClient: LLMClient; - domElements?: string; - logger: (message: { category?: string; message: string }) => void; - requestId: string; -}