In [25]:
from IPython.display import Markdown

In [1]:
from typing import TypedDict, Annotated, List
import json

In [82]:
from pydantic import BaseModel, Field

from transformers import AutoTokenizer

from trustcall import create_extractor

from langchain_ollama import ChatOllama

from dotenv import load_dotenv

In [83]:
load_dotenv()

True

In [91]:
tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-120b")
with open("netflix.html") as f:
    content = f.read()

tokens = tokenizer.encode(content)
len(tokens)

1696

In [85]:
model = ChatOllama(
    model='gpt-oss:120b-cloud',
    base_url="http://localhost:11434",
)

# response = model.invoke('What is the color of the sky')
# response.content

In [86]:
class Action(BaseModel):
    description: str = Field(
        description=(
            "An informative, user-oriented description of an interactive element that "
            "the user can visually identify and engage with. Example: 'Find my Gmail link', "
            "'Locate the main search bar', 'Access my profile options'. Must be strictly based "
            "on visibly rendered UI elements with no assumptions about backend behavior."
        )
    )
    process: str = Field(
        description=(
            "A concise explanation of the visible steps the user would take to interact "
            "with this element. The steps must reflect only observable UI behavior."
        )
    )


class Info(BaseModel):
    description: str = Field(
        description=(
            "A description of a visible, read-only UI element that conveys information "
            "but offers no user interaction. Examples include region text, footer notices, "
            "contextual labels, or static informational messages."
        )
    )


class Offering(BaseModel):
    actions: List[Action]
    info_: List[Info]

In [87]:
bmodel = create_extractor(
    llm=model,
    tools=[Actions],
    tool_choice='Actions'
)

In [97]:
result = bmodel.invoke(
    f"""
    You will be given raw HTML content.

    Your job is to identify two categories of visible elements:

    1. ACTIONS (interactive UI elements)
       - Elements the user can interact with.
       - For each action:
         • description: an informative description such as
           "Find my Gmail link", "Locate the main search bar",
           "Access my profile options".
         • process: the visible UI steps the user takes to interact
           (e.g., "Click the text input, type the query, press Search").

    2. INFO (non-interactive, read-only UI elements)
       - Elements that are visible but cannot be interacted with.
       - Examples: region labels, footer text, announcements,
         contextual labels, static messages.

    RULES:
    - Base everything strictly on what is visible in the rendered UI.
    - Do not infer backend logic or invisible behavior.
    - Do not invent elements not clearly present in the HTML.
    - Do not omit fields. If no items exist, return an empty list:
         actions: []
         info_: []

    REQUIRED OUTPUT (IMPORTANT):
    - You MUST output ONLY an Offering tool call.
    - Absolutely no commentary, reasoning, markdown, or text outside the tool call.
    - The output must include BOTH lists: actions and info_.
    - The structure must match the Offering schema exactly.

    HTML CONTENT:
    {content}
    """
)

In [98]:
offerings = result['responses'][0] if result['responses'] else None

In [99]:
if offerings and offerings.actions:
    for action in offerings.actions:
        print(action)

operation='Open Netflix homepage by clicking the Netflix logo at the top left' process='Click the Netflix logo link'
operation='Enter email address or phone number' process='Click the email/phone input field, type the email or phone number'
operation='Enter password' process='Click the password input field, type the password'
operation='Submit the sign‑in form' process='Click the red "Sign In" button'
operation='Toggle the "Remember me" option' process='Click the checkbox next to the "Remember me" label'
operation='Open help page for password recovery' process='Click the "Need Help?" link'
operation='Navigate to the sign‑up page' process='Click the "Sign up now." link'
operation='Open the FAQ page' process='Click the "FAQ" link in the footer'
operation='Open the Help Centre page' process='Click the "Help Centre" link in the footer'
operation='Open the Terms of Use page' process='Click the "Terms of Use" link in the footer'
operation='Open the Privacy Statement page' process='Click the 