In [1]:
import os
import nest_asyncio
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langgraph.graph import END, START, StateGraph
from langchain_core.runnables import RunnableLambda
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import chain as chain_decorator
from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, PromptTemplate

### Set up

In [2]:
# This is required for running async playwright in a Jupyter notebook
nest_asyncio.apply()

In [3]:
# Change directory
path = os.getcwd()
os.chdir(os.path.dirname(path))

In [4]:
from source.vision_rover import *

In [5]:
# Load env vars and config file
load_dotenv()

HF_TOKEN = os.environ.get("HF_TOKEN")
OPENROUTER = os.environ.get("OPENROUTER_API_KEY")
llm_config = load_yaml("conf/config.yaml")["llm"]

### System prompt

In [6]:
# Creating ReAct-specific prompt
react_prompt_template = """You are an intelligent web navigation agent that helps users accomplish tasks online.

Follow the ReAct (Reasoning, Action) framework to step through the process:
1. Reason about the current state and options
2. Create a plan (or update the existing plan)
3. Choose an action to execute

Current task: {input}

Current website: {page_url}

{bbox_descriptions}

{scratchpad_content}

Based on what you can see in the browser:
1. Analyze the current state and available options
2. Update or create a plan to achieve the goal
3. Select ONE action to take

Think step by step about the task, and note any potential challenges (like popups, cookies, etc).

Available actions:
- Click [bbox_number]
- Type [bbox_number]; [text to type]
- Scroll [WINDOW or bbox_number]; [UP or DOWN]
- Wait - pauses execution for a few seconds
- GoBack - navigates back one page
- Google - navigates to Google.com
- ClosePopUp - attempts to close modals/popups
- ANSWER [your final answer] - finishes the task

Always structure your response as:

Thought: [analysis of the current state, options, and reasoning about what to do next]

Plan: [outline the steps to complete the task]

Action: [ONE of the available actions]
"""

### Set up the LLM

In [7]:
# Set up the LLM
llm = ChatOpenAI(
    model=llm_config["gemini"],
    base_url=llm_config["base_url"],
    api_key=OPENROUTER,
    max_tokens=8000,
    temperature=0.1)

In [8]:
# Create the agent with ReAct framework
agent = (
    annotate
    | RunnablePassthrough.assign(
        prediction=(
            RunnableLambda(format_descriptions)
            | RunnableLambda(create_react_prompt)
            | ChatPromptTemplate.from_template(react_prompt_template)
            | llm
            | StrOutputParser()
            | RunnableLambda(parse_react_output)
        ),
    )
)

### Build the graph

In [9]:
# Build the graph
graph_builder = StateGraph(AgentState)

graph_builder.add_node("agent", agent)
graph_builder.add_edge(START, "agent")

graph_builder.add_node("update_scratchpad", update_scratchpad_react)
graph_builder.add_edge("update_scratchpad", "agent")

tools = {
    "Click": click,
    "Type": type_text,
    "Scroll": scroll,
    "Wait": wait,
    "GoBack": go_back,
    "Google": to_google,
    "ClosePopUp": close_popups,
}

for node_name, tool in tools.items():
    graph_builder.add_node(
        node_name,
        RunnableLambda(tool) | (lambda observation: {"observation": observation}),
    )
    graph_builder.add_edge(node_name, "update_scratchpad")

graph_builder.add_conditional_edges("agent", select_tool)
graph = graph_builder.compile()

### Demo

In [10]:
async def run_example(query):
    # ANSI Color codes for terminal output
    COLORS = {
        "yellow": "\033[93m",
        "green": "\033[92m",
        "blue": "\033[94m",
        "magenta": "\033[95m",
        "cyan": "\033[96m",
        "red": "\033[91m",
        "bold": "\033[1m",
        "underline": "\033[4m",
        "reset": "\033[0m"
    }
    
    print(f"\n{COLORS['bold']}{COLORS['underline']}Starting Web Navigation Agent{COLORS['reset']}")
    print(f"{COLORS['blue']}Initializing browser...{COLORS['reset']}")
    
    browser, page = await setup_browser()
    try:
        print(f"\n{COLORS['green']}Query: {query}{COLORS['reset']}\n")
        
        result = await call_agent(
            graph,
            query,
            page,
        )
        
        print(f"\n{COLORS['bold']}{COLORS['underline']}Navigation Complete!{COLORS['reset']}")
        print(f"{COLORS['yellow']}Task completed in {result['steps']} steps{COLORS['reset']}")
        print(f"{COLORS['cyan']}Visited {len(result['visited_urls'])} unique URLs{COLORS['reset']}")
        
        return result
    finally:
        print(f"\n{COLORS['blue']}Closing browser...{COLORS['reset']}")
        await browser.close()

In [None]:
query = """
    Find a store with the following item in stock (add to cart doesnt mean necesarrily 
    that its in stock): KWG00-M Rokusho Kotobukiya. Avoid eBay, Amazon, and Best Buy.
    """
result = await run_example(query)

[93m[1mStep 1:[0m
[92mThought: The current state is the Google homepage. The task is to find a store, excluding eBay, Amazon, and Best Buy, that has the item "KWG00-M Rokusho" in st...[0m
[94mPlan: 1. Search for "KWG00-M Rokusho" on Google.
2. Scan the search results, avoiding eBay, Amazon, and Best Buy links.
3. If a promising store is found, cl...[0m
[95mAction: Type ['4', 'KWG00-M Rokusho'][0m
[96mURL: https://www.google.com/[0m
---
[93m[1mStep 2:[0m
[92mThought: The current state is the Google search results page for "KWG00-M Rokusho". I need to scan the results, avoiding eBay, Amazon, and Best Buy, and find a...[0m
[94mPlan: 1. Scan the search results, avoiding eBay, Amazon, and Best Buy links.
2. Click on the "Rise of Gunpla" link.
3. Check if the item is in stock on the ...[0m
[95mAction: Click ['44'][0m
[96mURL: https://www.google.com/search?q=KWG00-M+Rokusho&sca_esv=986fab27ce48bd4c&source=hp&ei=8gXOZ7GxAo2AhuMP04-P2QQ&iflsig=ACkRmUkAAAAAZ84UAhbrcrMmuT0sTn