In [1]:
from set_up import set_up_env
set_up_env()

In [2]:
import os
from datetime import date
from typing import List, Union

import trafilatura
import httpx

from pydantic import BaseModel
from pydantic_ai import Agent, RunContext
from pydantic_ai.settings import ModelSettings

In [3]:
class Answer(BaseModel):
    text: str

class NoAnswer(BaseModel):
    pass

class SearchResult(BaseModel):
    title: str
    url: str
    snippet: str

In [4]:
qa_agent = Agent(
    model='openai:gpt-4o-mini',
    model_settings=ModelSettings(max_tokens=1024, temperature=0),
    result_type=Union[Answer, NoAnswer],
    system_prompt=(
        "Answer the following questions about the given text. Only answer the questions if the provided text is relevant."
    ),
)

In [5]:
today = date.today().strftime("%B %d, %Y")
system_prompt = f"""
You are a helpful research agent. Provide concise and precise answers to the questions provided.
You can use the 'search_google' tool to find information on the internet, and you can use the
'ask_website' tool to extract information from a specific website.
If you don't know the answer to a specific question, say "I don't know" instead of making things up."
For your information, today is {today}.
"""

In [6]:
research_agent = Agent(
    "openai:gpt-4o-mini",
    model_settings=ModelSettings(max_tokens=1024, temperature=0),
    result_type=str,
    system_prompt=system_prompt,
)

In [7]:
@research_agent.tool
async def ask_website(ctx: RunContext, url: str, question: str) -> NoAnswer | Answer:
    """
    Ask a question about a website to get the answer.

    Args:
        url: The URL of the website to ask the question.
        question: The question to ask.

    Returns:
        The answer from the website.
    """
    try:
        resp = httpx.get(url, follow_redirects=True)
        if resp.status_code != 200:
            # raise httpx.HTTPError(f"Failed to get a successful response from {url}, status code: {resp.status_code}")
            return NoAnswer()
    except Exception:
        return NoAnswer()

    html_content = trafilatura.extract(resp.text)
    return await qa_agent.run("Question: " + question + "\n\nContext:\n" + html_content, usage=ctx.usage)

In [8]:
@research_agent.tool
def search_google(ctx: RunContext, query: str) -> List[SearchResult]:
    """
    Search the web for the given query and return the top results.

    Args:
        query: The query to search for.

    Returns:
        The top search results
    """
    api_key = os.getenv("SERPER_API_KEY")
    assert api_key, "Please set API key for serper"

    search_results = httpx.get(
        f"https://google.serper.dev/search?apiKey={api_key}&q={query}"
    ).json()

    results = []
    for item in search_results["organic"]:
        results.append(
            SearchResult(
                title=item["title"],
                url=item.get("link"),
                snippet=item.get("snippet", "n/a")
            )
        )

    return results

In [10]:
result = research_agent.run_sync("Who won the 2025 Super Bowl?")
print("Answer:", result.data)

Answer: The Philadelphia Eagles won the 2025 Super Bowl, defeating the Kansas City Chiefs with a score of 40-22. This victory marked the Eagles' second Super Bowl title.
