In [60]:
from dotenv import load_dotenv
load_dotenv()


True

In [61]:

# Prepare for the data
# Jina website - Fetch Web Page – 
# The agent should be able to fetch the content of a web page given its URL.


# Agent
# Save Summary – The agent should be able to save a summary of the page it processed.
# Search – The agent should be able to perform a search for relevant or related information.

# Orchestration agent to orchestrate which agent to use

In [62]:
# Which framework you chose (if any)
# Which LLM provider you used

# If you have time, 
# # I suggest trying to implement some of the things yourself before using a framework. 
# You can refer to lectures and ToyAIKit code for details.

In [63]:
# Prep data agent
import requests
from requests.exceptions import RequestException
from typing import Optional


# Make the function with docstring, and type -> Easily parsed by the agent 
def fetch_url(url: str) -> Optional[str]:
    """
    Fetch the textual content of a webpage.

    Args:
        url (str): The target URL to fetch content from.

    Returns:
        Optional[str]: The decoded HTML/text content of the fetched page if successful,
        or None if an error occurred.

    Raises:
        ValueError: If the provided URL is empty or invalid.
    """
    if not url or not isinstance(url, str):
        raise ValueError("The 'url' parameter must be a non-empty string.")

    jina_reader_base_url = "https://r.jina.ai/"
    jina_reader_url = jina_reader_base_url + url.lstrip("/")

    try:
        response = requests.get(jina_reader_url, timeout=10)
        response.raise_for_status()  # Raises HTTPError for bad status codes
        return response.content.decode("utf-8")
    except RequestException as e:
        # Catch all network-related errors (e.g., ConnectionError, Timeout, HTTPError)
        print(f"Error fetching URL '{jina_reader_url}': {e}")
        return None
    except UnicodeDecodeError:
        print(f"Error decoding response from '{jina_reader_url}'.")
        return None

In [64]:
result = fetch_url('https://en.wikipedia.org/wiki/LeBron_James')

In [66]:
result

'Title: LeBron James\n\nURL Source: https://en.wikipedia.org/wiki/LeBron_James\n\nPublished Time: 2003-06-06T01:41:38Z\n\nMarkdown Content:\n"LeBron" redirects here. For his son LeBron James Jr., see [Bronny James](https://en.wikipedia.org/wiki/Bronny_James "Bronny James"). For other people with the name, see [Lebrón](https://en.wikipedia.org/wiki/Lebr%C3%B3n "Lebrón").\n\nLeBron James[![Image 1](https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/LeBron_James_%2851959977144%29_%28cropped2%29.jpg/250px-LeBron_James_%2851959977144%29_%28cropped2%29.jpg)](https://en.wikipedia.org/wiki/File:LeBron_James_(51959977144)_(cropped2).jpg)\n\nJames with the [Los Angeles Lakers](https://en.wikipedia.org/wiki/Los_Angeles_Lakers "Los Angeles Lakers") in 2022\nNo. 23 – Los Angeles Lakers\nPosition[Small forward](https://en.wikipedia.org/wiki/Small_forward "Small forward") / [power forward](https://en.wikipedia.org/wiki/Power_forward "Power forward")\nLeague[NBA](https://en.wikipedia.org/wiki/N

In [65]:
len(result) / 2

279694.0

In [9]:
from pydantic_ai import Agent
from pydantic import BaseModel, Field

In [67]:
from pydantic_ai.messages import FunctionToolCallEvent

async def print_function_calls(ctx, event):
    # Detect nested streams
    if hasattr(event, "__aiter__"):
        async for sub in event:
            await print_function_calls(ctx, sub)
        return

    if isinstance(event, FunctionToolCallEvent):
        print("TOOL CALL:", event.part.tool_name, event.part.args)

In [68]:

class WikipediaRetrieveOutput(BaseModel):
    """
    A single, verifiable citation to a transcript snippet or video segment.
    Must correspond to a real snippet returned by the `search()` tool.
    """
    title: str = Field(..., description="The title of the Wikipedia page, e.g., 'LeBron James'.")
    url: str = Field(..., description="Fetch URL.")
    content: str = Field(..., description="The full clean text content of the Wikipedia article body, excluding references or unrelated sections.")


In [69]:
instructions = """
You are a fetching agent specialized in retrieving Wikipedia content.

**Role:**
Your role is to access and extract the main content of a given Wikipedia page accurately.

**Objective:**
1. Given a Wikipedia URL or topic name, fetch the full textual content of that page.
2. Include the title and the main article body only — exclude unrelated sections such as “References,” “See also,” or “External links.”
3. Return the content in plain text format suitable for downstream processing by a summarization agent.

**Data Source:**
Use only the official Wikipedia page corresponding to the given URL or topic. Do not include information from any other websites or your own prior knowledge.

"""

web_agent = Agent(
    name='web',
    instructions=instructions,
    tools=[fetch_url],
    model='gpt-4o-mini',
    output_type=WikipediaRetrieveOutput
)

In [71]:
question = "What is this page about 'https://en.wikipedia.org/wiki/LeBron_James'" # exceed the token .... TODO solve this
# question = 'What is this page about? https://en.wikipedia.org/wiki/Capybara'

results = await web_agent.run(
    user_prompt=question,
    event_stream_handler=print_function_calls
)

TOOL CALL: fetch_url {"url":"https://en.wikipedia.org/wiki/LeBron_James"}


ModelHTTPError: status_code: 400, model_name: gpt-4o-mini, body: {'message': "This model's maximum context length is 128000 tokens. However, your messages resulted in 165416 tokens (165244 in the messages, 172 in the functions). Please reduce the length of the messages or functions.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}

In [14]:
print(results)

AgentRunResult(output=WikipediaRetrieveOutput(title='Capybara', url='https://en.wikipedia.org/wiki/Capybara', content='The capybara or greater capybara (_Hydrochoerus hydrochaeris_) is the largest living rodent, native to South America. It is a member of the genus Hydrochoerus. Its close relatives include guinea pigs and rock cavies, and it is more distantly related to the agouti, the chinchilla, and the nutria. The capybara inhabits savannas and dense forests and lives near bodies of water. It is a highly social species and can be found in groups as large as one hundred individuals, but usually lives in groups of 10–20 individuals. The capybara is hunted for its meat and hide and also for grease from its thick fatty skin.\n\nEtymology\nIts common name is derived from Tupi _ka\'apiûara_, a complex agglutination of _kaá_ (leaf) + _píi_ (slender) + _ú_ (eat) + _ara_ (a suffix for agent nouns), meaning "one who eats slender leaves", or "grass-eater". The genus name, _hydrochoerus_, comes 

In [15]:
# def print_messages(messages):
#     contents = []
    
#     for m in messages:
#         print(m.kind)

#         for p in m.parts:
#             print(p.part_kind)
#             kind = p.part_kind
#             if kind == 'user-prompt' or kind == 'text':
#                 print(p.content)
#             if kind == 'tool-call': 
#                 print(p.tool_name, p.args)
#             if kind == 'tool-return':
#                 print(type(p.content), p.content)
#             print()

#         print()

# messages = orchestrator_results.new_messages()
# # message_history.extend(messages)
# print_messages(messages)

In [16]:
for m in results.new_messages():
    print(m)
    for part in m.parts:
        print(part)
    print()

ModelRequest(parts=[UserPromptPart(content='What is this page about? https://en.wikipedia.org/wiki/Capybara', timestamp=datetime.datetime(2025, 10, 27, 6, 0, 15, 278916, tzinfo=datetime.timezone.utc))], instructions='You are a fetching agent specialized in retrieving Wikipedia content.\n\n**Role:**\nYour role is to access and extract the main content of a given Wikipedia page accurately.\n\n**Objective:**\n1. Given a Wikipedia URL or topic name, fetch the full textual content of that page.\n2. Include the title and the main article body only — exclude unrelated sections such as “References,” “See also,” or “External links.”\n3. Return the content in plain text format suitable for downstream processing by a summarization agent.\n\n**Data Source:**\nUse only the official Wikipedia page corresponding to the given URL or topic. Do not include information from any other websites or your own prior knowledge.')
UserPromptPart(content='What is this page about? https://en.wikipedia.org/wiki/Cap

In [17]:
wikioutput = results.output

In [18]:
print(results.output.title)
print(results.output.content)

Capybara
The capybara or greater capybara (_Hydrochoerus hydrochaeris_) is the largest living rodent, native to South America. It is a member of the genus Hydrochoerus. Its close relatives include guinea pigs and rock cavies, and it is more distantly related to the agouti, the chinchilla, and the nutria. The capybara inhabits savannas and dense forests and lives near bodies of water. It is a highly social species and can be found in groups as large as one hundred individuals, but usually lives in groups of 10–20 individuals. The capybara is hunted for its meat and hide and also for grease from its thick fatty skin.

Etymology
Its common name is derived from Tupi _ka'apiûara_, a complex agglutination of _kaá_ (leaf) + _píi_ (slender) + _ú_ (eat) + _ara_ (a suffix for agent nouns), meaning "one who eats slender leaves", or "grass-eater". The genus name, _hydrochoerus_, comes from Greek ὕδωρ (_hýdor_ "water") and χοῖρος (_choíros_ "pig, hog") and the species name, _hydrochaeris_, comes fr

In [19]:
# wiki summary agent

# input is the wiki content

In [20]:
# tool calling monitoring 

from pydantic_ai.messages import FunctionToolCallEvent

class NamedCallback:

    def __init__(self, agent):
        self.agent_name = agent.name

    async def print_function_calls(self, ctx, event):
        # Detect nested streams
        if hasattr(event, "__aiter__"):
            async for sub in event:
                await self.print_function_calls(ctx, sub)
            return

        if isinstance(event, FunctionToolCallEvent):
            tool_name = event.part.tool_name
            args = event.part.args
            print(f"TOOL CALL ({self.agent_name}): {tool_name}({args})")

    async def __call__(self, ctx, event):
        return await self.print_function_calls(ctx, event)

In [21]:
# need to save the summary and the content

# index tool
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["title", "summary", "details"],
    keyword_fields=["category"]
)



In [22]:
from typing import Any, Dict, List

class SearchTools:

    def __init__(self, index):
        self.index = index
    
    def search(self, query: str, num_results: int = 5) -> List[Dict[str, Any]]:
        """
        Search for documents in the index related to the given query.

        Args:
            query (str): The search query string.
            num_results (int, optional): The maximum number of search results to return. Default is 5.

        Returns:
            List[Dict[str, Any]]: 
                A list of search results, where each result is represented as a dictionary containing 
                the document’s metadata and content fields (e.g., title, summary, details).
                
        Example:
            >>> tools.search("LeBron James")
            [
                {"title": "LeBron James", "summary": "NBA player...", "details": "..."},
                {"title": "Michael Jordan", "summary": "Former NBA player...", "details": "..."}
            ]
        """
        boost = {"title": 2.0, "summary": 1.0, "details": 0.5}
        
        results = self.index.search(
            query=query,
            boost_dict=boost,
            num_results=num_results,
        )
        return results

    def add_entry(self, title: str, category: str, summary: str, details: str, url: str) -> None:
        """
        Add a new document entry to the index for future search and retrieval.

        Args:
            title (str): The title of the document or topic.
            category (str): The category or type of content (e.g., 'Wikipedia', 'News', 'UserNote').
            summary (str): A short summary of the content.
            details (str): Detailed information or extended description.

        Returns:
            None

        Example:
            >>> tools.add_entry(
            ...     title="LeBron James",
            ...     category="Wikipedia",
            ...     summary="American professional basketball player.",
            ...     details="Born in 1984, LeBron has played for the Cavaliers, Heat, and Lakers..."
            ... )
        """
        doc = {
            "title": title,
            "category": category,
            "summary": summary,
            "details": details,
            "section": "user_added"
        }
        self.index.append(doc)


tools = SearchTools(index)

In [23]:
wiki_instruction = wikioutput.model_dump_json()

In [24]:
search_instruction = """
You are a SEARCH agent responsible for retrieving existing summaries from the local knowledge base.


**ROLE:**
Your role is to search the database for existing summaries that match the user's query.
- If relevant content exists, return it directly (including the title, summary, and details).
- If no relevant information is found, do NOT generate or invent anything. 
  Instead, return an explicit signal indicating that the Summary Agent should be called to create a new summary.

**OBJECTIVE:**
1. Evaluate the query against the local database using the `search()` function.
2. Determine whether a relevant entry already exists.
3. Return:
   - `"status": "found"` along with the matching summaries, if relevant content exists.
   - `"status": "not_found"` if there is no relevant summary in the database.

**DATA SOURCE:**
- You may ONLY use the `search()` function provided.
- Do NOT fabricate or modify any titles, summaries, or details.
- If the search returns nothing relevant, clearly indicate that no summary was found.
"""

from typing import Literal

class SearchOutput(BaseModel):
    """
    Structured output for the Search Agent.

    This model defines the expected response after the agent searches the local database.
    It clearly indicates whether relevant information was found and provides the matching entries if any.
    """
    user_query: str = Field(..., description="User input query.")
    status: Literal["Found", "Not Found"] = Field(..., description="Search if is in the local database and return the status, eg: 'Found' or 'Not Found'")
    results: str = Field(...,
        description=(
            "A list of search results containing the fields 'title', 'summary', and 'details'. If no relevant data is found, return an empty list."
        )
    )


In [25]:
search_agent = Agent(
    name='Searcher',
    instructions=search_instruction,
    tools=[tools.search],
    model='gpt-4o-mini',
    output_type=SearchOutput
)

searcher_callback = NamedCallback(search_agent)

In [26]:
results = await search_agent.run(
        user_prompt="Capybara",
        event_stream_handler=searcher_callback,
    )

TOOL CALL (Searcher): search({"query":"Capybara"})


In [27]:
results.new_messages()


[ModelRequest(parts=[UserPromptPart(content='Capybara', timestamp=datetime.datetime(2025, 10, 27, 6, 0, 51, 825099, tzinfo=datetime.timezone.utc))], instructions='You are a SEARCH agent responsible for retrieving existing summaries from the local knowledge base.\n\n\n**ROLE:**\nYour role is to search the database for existing summaries that match the user\'s query.\n- If relevant content exists, return it directly (including the title, summary, and details).\n- If no relevant information is found, do NOT generate or invent anything. \n  Instead, return an explicit signal indicating that the Summary Agent should be called to create a new summary.\n\n**OBJECTIVE:**\n1. Evaluate the query against the local database using the `search()` function.\n2. Determine whether a relevant entry already exists.\n3. Return:\n   - `"status": "found"` along with the matching summaries, if relevant content exists.\n   - `"status": "not_found"` if there is no relevant summary in the database.\n\n**DATA SO

In [28]:
async def do_serach(agent: Agent, search_stage_instruction: str, previous_search_stages_instructions: List[SearchOutput]) -> SearchOutput:

    # search_stage_instruction = search_stage_instruction.model_dump_json()
    previous_search_stages_instructions = '\n'.join([r.model_dump_json() for r in previous_search_stages_instructions])

    user_prompt = f"""
    User serach instrustructions:
    {search_stage_instruction}

    Previous User search instrustructions:
    {previous_search_stages_instructions}
    """

    callback = NamedCallback(agent)

    results = await agent.run(
        user_prompt=user_prompt,
        event_stream_handler=callback,
    )

    return results.output




In [29]:
# wikioutput
# TODO: Await, Async, .....

In [30]:
search_result_1 = await do_serach(
    agent=search_agent, 
    search_stage_instruction='Lebron James',
    previous_search_stages_instructions=[])

TOOL CALL (Searcher): search({"query":"Lebron James"})


In [31]:
search_result_1

SearchOutput(user_query='Lebron James', status='Not Found', results='')

In [32]:
search_result_2 = await do_serach(
    agent=search_agent, 
    search_stage_instruction='Capybara',
    previous_search_stages_instructions=[search_result_1])

TOOL CALL (Searcher): search({"query":"Capybara"})


In [33]:
search_result_2

SearchOutput(user_query='Capybara', status='Not Found', results='')

In [34]:

class WikipediaSummaryOutput(BaseModel):
    """
    A single, verifiable citation to a transcript snippet or video segment.
    Must correspond to a real snippet returned by the `search()` tool.
    """
    title: str = Field(..., description="The title of the Wikipedia page, e.g., 'LeBron James'.")
    category: str = Field(..., description="What is the category of this Wikpedia page, e.g, ['Person', 'Idea', 'Company', 'Method']")
    summary: str = Field(..., description="A concise summary of the main topic and key facts from the page.")
    details: str = Field(..., description="A more detailed overview that includes important background, achievements, or related information not covered in the short summary.")
    


summary_instruction = """
You are the SUMMARY agent.

You will receive from the Fetch agent of the relevant wikipedia content.
Then, you should use the provided wiki contnet to summary into title, category, summary, details.
After summarizing the content, you need to save into local database by calling `add_entry()` tool.

"""

summary_agent = Agent(
    name='Summarizer',
    instructions=summary_instruction,
    tools=[tools.add_entry],
    model='gpt-4o-mini',
    output_type=WikipediaSummaryOutput
)


In [35]:
async def do_summary(
        agent: Agent, 
        wiki_output_instruction: str, 
        previous_wiki_instructions: List[WikipediaRetrieveOutput]) -> WikipediaSummaryOutput:

    wiki_output_json = wiki_output_instruction.model_dump_json()
    previous_wiki_output_json = '\n'.join([r.model_dump_json() for r in previous_wiki_instructions])

    user_prompt = f"""
    Wikipedia instrustructions:
    {wiki_output_json}

    Previous Wikipedia stages:
    {previous_wiki_output_json}
    """

    callback = NamedCallback(agent)

    results = await agent.run(
        user_prompt=user_prompt,
        event_stream_handler=callback,
    )

    return results.output


In [36]:
wikioutput

WikipediaRetrieveOutput(title='Capybara', url='https://en.wikipedia.org/wiki/Capybara', content='The capybara or greater capybara (_Hydrochoerus hydrochaeris_) is the largest living rodent, native to South America. It is a member of the genus Hydrochoerus. Its close relatives include guinea pigs and rock cavies, and it is more distantly related to the agouti, the chinchilla, and the nutria. The capybara inhabits savannas and dense forests and lives near bodies of water. It is a highly social species and can be found in groups as large as one hundred individuals, but usually lives in groups of 10–20 individuals. The capybara is hunted for its meat and hide and also for grease from its thick fatty skin.\n\nEtymology\nIts common name is derived from Tupi _ka\'apiûara_, a complex agglutination of _kaá_ (leaf) + _píi_ (slender) + _ú_ (eat) + _ara_ (a suffix for agent nouns), meaning "one who eats slender leaves", or "grass-eater". The genus name, _hydrochoerus_, comes from Greek ὕδωρ (_hýdo

In [37]:
result = await do_summary(
    agent=summary_agent, 
    wiki_output_instruction=wikioutput, 
    previous_wiki_instructions=[]
    )

TOOL CALL (Summarizer): add_entry({"title":"Capybara","category":"Wikipedia","summary":"The capybara is the largest living rodent, found in South America, known for its social behavior and semi-aquatic lifestyle.","details":"Capybaras are native to South America and inhabit savannas and forests near water bodies. They can weigh up to 66 kg and typically live in groups of 10-20. Their diet consists of grasses and aquatic plants, and they are preyed upon by South American big cats and various predators. With a gestation period of 130-150 days, they usually produce four offspring. Despite some hunting pressures, capybara populations are generally stable, and they adapt well to urban environments. They can also be found in zoos, living longer in captivity.","url":"https://en.wikipedia.org/wiki/Capybara"})


In [38]:
result

WikipediaSummaryOutput(title='Capybara', category='Animal', summary='The capybara is the largest living rodent, native to South America and known for its social behavior.', details='The capybara (_Hydrochoerus hydrochaeris_) is a semi-aquatic mammal that inhabits savannas and forests near water bodies across South America, except Chile. It is highly social, typically found in groups of 10-20, but can gather in larger groups. Adults weigh 35-66 kg and grow to about 106-134 cm in length. Capybaras are herbivorous, primarily feeding on grasses and aquatic plants, and are preyed upon by large South American predators. They have a gestation period of 130-150 days, usually producing four young at a time. While their populations are stable, hunting for meat and pelts poses some threats. Capybaras adapt well to urbanization and can often be seen in zoos and parks, where their lifespan extends beyond that in the wild, averaging 12 years in captivity.')

In [39]:
index.search('Capybara')

[{'title': 'Capybara',
  'category': 'Wikipedia',
  'summary': 'The capybara is the largest living rodent, found in South America, known for its social behavior and semi-aquatic lifestyle.',
  'details': 'Capybaras are native to South America and inhabit savannas and forests near water bodies. They can weigh up to 66 kg and typically live in groups of 10-20. Their diet consists of grasses and aquatic plants, and they are preyed upon by South American big cats and various predators. With a gestation period of 130-150 days, they usually produce four offspring. Despite some hunting pressures, capybara populations are generally stable, and they adapt well to urban environments. They can also be found in zoos, living longer in captivity.',
  'section': 'user_added'}]

In [40]:
index.search('Neochoerus')

[]

In [41]:
# summary agent
# search agent
# fetch web agent

In [42]:
# orchestration

from pydantic_ai import RunContext

In [43]:
orchestrator_instructions = """
You are the orchestrator agent that manages the workflow of searching, fetching, summarizing, and storing knowledge.

**Primary Rule:**  
You must ALWAYS begin by using the `search_task` tool before doing anything else.
- If the user's query can be found from the database, retreive the summary from the documents.
- If the user's query can be answered from the search results, answer it directly.  
- Do not proceed to fetching or summarizing if the answer can be provided from the search results.

**Workflow Steps:**
1. **Search the database**
   - Use the `search_task` tool to check if relevant knowledge exists for the user's query.  
   - If relevant content is found:
       - Answer the user's question using this content.
       - Return the summary and details from the search.
       - Mark the task as complete.  
   - If the search result does not contain relevant information, proceed to step 2.

2. **Fetch Wikipedia content and summarize**
   - Use the `web_fetch` tool to retrieve raw Wikipedia content for the topic or URL.  
   - **After performing any web fetch, you must always call the `summary_tool`.**  
   - The `summary_tool` should summarize the fetched Wikipedia content into a concise, structured summary and detailed context.  
   - Store the summarized knowledge in the local database using the `add_entry` tool for future queries.
   - Always finish the Task after adding the entry.

3. **Finish and Stop**  
   - Once summarization and storage are complete, return a short summary of the findings to the user.  
   - **Do not run any more tools after `add_entry`** — this marks the workflow as **complete**.  
   - Clearly indicate in your final output whether the content was retrieved or newly summarized.


   
**Rules & Constraints**
- Only perform **one search at the beginning**.  
- Only perform one summary **One summary at the end**.  
- Do not fabricate information; only use retrieved or summarized content.  
- Follow the exact tool order:  
  1️⃣ `search_task` → 2️⃣ `web_search` → 3️⃣ `summary_tool` → 4️⃣ `add_entry`   → ✅ **STOP**  
- Ensure all outputs are structured for downstream processing and database storage.
"""

orchestrator = Agent(
    name='orchestrator',
    instructions=orchestrator_instructions,
    model='gpt-4o-mini',
)

orchestrator_callback = NamedCallback(orchestrator)

In [44]:
@orchestrator.tool
async def search_task(ctx: RunContext, query: str) -> str:
    """Search the local database for existing summaries that match the user's query.

    This tool invokes the Search Agent to check whether relevant information already exists.
    If found, it returns the matching summaries and details. 
    If not found, the orchestrator may call the Summary Agent to generate a new summary.

    Args:
        ctx (RunContext): The orchestrator context for managing tool execution.
        query (str): The raw user query to search for.

    Returns:
        SearchOutput: Structured search results containing status and any matching entries.
    """
    print("\n=== Searher (Initial) ===")
    callback = NamedCallback(search_agent)
    results = await search_agent.run(user_prompt=query, event_stream_handler=callback)
    
    return results.output

In [45]:
@orchestrator.tool
async def web_fetch(ctx: RunContext, query: str) -> WikipediaRetrieveOutput:
    """Retrieve raw Wikipedia content for the given topic or URL.

    Args:
        ctx (RunContext): The orchestrator context for managing tool execution.
        query (str): The Wikipedia topic or page URL to fetch.

    Returns:
        WikipediaFetchOutput: The fetched Wikipedia article, including title and full content text.
    """
    print(f"\n=== Web Fetch stage ===")
    
    callback = NamedCallback(web_agent)
    results = await web_agent.run(
        user_prompt=query,
        event_stream_handler=callback
    )

    return results

In [46]:
@orchestrator.tool
async def summary_tool(ctx: RunContext, query: str) -> WikipediaSummaryOutput:
    """Summarize a Wikipedia article into concise and structured key information.

    This tool uses the Summary Agent to process fetched Wikipedia content, 
    generating a summary and additional details suitable for storage or presentation.

    Args:
        ctx (RunContext): The orchestrator context for managing tool execution.
        query (str): The raw Wikipedia content or topic to summarize.

    Returns:
        WikipediaSummaryOutput: Structured summary containing the title, short summary, and detailed context.

    """
    print(f"\n=== Summary stage ===")
    prior_wiki_output = []
    print(ctx.messages)
    for m in ctx.messages:
        for p in m.parts:
            if p.part_kind == 'tool-return' and p.tool_name == 'web_fetch':
                prior_wiki_output.append(p.content.output)
                
    
    wikioutput = prior_wiki_output[-1]
    print(wikioutput)

    results = await do_summary(
        agent=summary_agent, 
        wiki_output_instruction=wikioutput, 
        previous_wiki_instructions=[]
        )

    return results

In [47]:
# RunContext.messages

In [57]:
# question = "What is this page about? https://en.wikipedia.org/wiki/Capybara"
question = "What is this page about?  https://en.wikipedia.org/wiki/Hydrochoerus"
question = "What is this page about?  https://en.wikipedia.org/wiki/Neochoerus"
# question = "What is this page about? https://en.wikipedia.org/wiki/Caviodon"
question = "What are threats to capybara populations?"

In [58]:
orchestrator_results = await orchestrator.run(
    user_prompt=question,
    # message_history=message_history,
    event_stream_handler=orchestrator_callback,
)

TOOL CALL (orchestrator): search_task({"query":"threats to capybara populations"})

=== Searher (Initial) ===
TOOL CALL (Searcher): search({"query":"threats to capybara populations"})
TOOL CALL (orchestrator): web_fetch({"query":"Capybara"})

=== Web Fetch stage ===
TOOL CALL (web): fetch_url({"url":"https://en.wikipedia.org/wiki/Capybara"})
TOOL CALL (orchestrator): summary_tool({"query":"Capybara"})

=== Summary stage ===
[ModelRequest(parts=[UserPromptPart(content='What are threats to capybara populations?', timestamp=datetime.datetime(2025, 10, 27, 6, 5, 57, 343618, tzinfo=datetime.timezone.utc))], instructions="You are the orchestrator agent that manages the workflow of searching, fetching, summarizing, and storing knowledge.\n\n**Primary Rule:**  \nYou must ALWAYS begin by using the `search_task` tool before doing anything else.\n- If the user's query can be found from the database, retreive the summary from the documents.\n- If the user's query can be answered from the search re

In [59]:
print(orchestrator_results.output)

The capybara (Hydrochoerus hydrochaeris) is the largest living rodent, native to South America. While they are generally not considered threatened and have stable populations in many areas, several factors pose risks to their survival:

1. **Hunting Pressure**: Capybaras are frequently hunted for their meat, hide, and fat, which can reduce their numbers in certain regions.

2. **Habitat Loss**: Urbanization and agricultural expansion have encroached upon their natural habitats, leading to loss of the wetlands and forest areas they rely on.

3. **Competition with Livestock**: In some agricultural settings, capybaras are viewed as competition for resources, which can result in their reduction or elimination by farmers.

4. **Predation**: They face natural threats from predators such as jaguars, caimans, and harpy eagles, especially as their habitats become more fragmented.

5. **Environmental Changes**: Climate change and related environmental shifts can impact their habitat availability

In [51]:
# def print_messages(messages):
#     contents = []
    
#     for m in messages:
#         print(m.kind)

#         for p in m.parts:
#             print(p.part_kind)
#             kind = p.part_kind
#             if kind == 'user-prompt' or kind == 'text':
#                 print(p.content)
#             if kind == 'tool-call': 
#                 print(p.tool_name, p.args)
#             if kind == 'tool-return':
#                 print(type(p.content), p.content)
#             print()

#         print()

# messages = orchestrator_results.new_messages()
# # message_history.extend(messages)
# print_messages(messages)

In [516]:
orchestrator_results.new_messages()



[ModelRequest(parts=[UserPromptPart(content='What is this page about?  https://en.wikipedia.org/wiki/Neochoerus', timestamp=datetime.datetime(2025, 10, 27, 2, 25, 47, 812657, tzinfo=datetime.timezone.utc))], instructions="You are the orchestrator agent that manages the workflow of searching, fetching, summarizing, and storing knowledge.\n\n**Primary Rule:**  \nYou must ALWAYS begin by using the `search_task` tool before doing anything else.\n- If the user's query can be found from the database, retreive the summary from the documents.\n- If the user's query can be answered from the search results, answer it directly.  \n- Do not proceed to fetching or summarizing if the answer can be provided from the search results.\n\n**Workflow Steps:**\n1. **Search the database**\n   - Use the `search_task` tool to check if relevant knowledge exists for the user's query.  \n   - If relevant content is found:\n       - Answer the user's question using this content.\n       - Return the summary and d

In [515]:
index.search('Neochoerus')

[{'title': 'Neochoerus',
  'category': 'Animal',
  'summary': 'Neochoerus is an extinct genus of rodent closely related to the capybara.',
  'details': 'Neochoerus, known as "new hog," is an extinct genus of rodent closely related to the existing capybara. Fossil remains have been discovered across North America, specifically in Mexico and the United States, as well as in South America, particularly in Boyacá, Colombia.',
  'section': 'user_added'}]

In [None]:
# TODO: Why calling multiple times ...