In [1]:
!pip install -U -q tqdm json_repair ddgs requests beautifulsoup4 SQLAlchemy wikipedia-api

In [2]:
import requests
import zipfile
import io
import sys
import os
import shutil

# 1. The URL for the source code ZIP
url = "https://github.com/anpc849/kagentic/archive/refs/heads/main.zip"

# 2. Download the file using requests
print("Downloading...")
response = requests.get(url)
if response.status_code == 200:
    # 3. Unzip the content directly from memory
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        z.extractall(".")
    print("Unzipped successfully!")
else:
    print(f"Failed to download. Status code: {response.status_code}")

# 4. Add the folder to the Python Path
# GitHub's ZIPs always create a folder named 'repo-branch'
shutil.move(os.path.abspath('kagentic-main'), os.path.abspath('kagentic'))
module_path = "/kaggle/working"
if module_path not in sys.path:
    sys.path.append(module_path)

# 5. Verify the import
try:
    import kagentic
    print("Import successful! You can now use kagents.")
except ImportError:
    print("Import failed. Try checking !ls kagentic to see the folder structure.")

Downloading...
Unzipped successfully!


Import successful! You can now use kagents.


In [9]:
import kaggle_benchmarks as kbench
import textwrap
import urllib.parse
from typing import Dict, List
import wikipediaapi
from pydantic import BaseModel, Field

from kagentic import CodeAgent, Tool, ToolInput
from kagentic.tools.python_runner import PythonCodeRunnerTool

In [10]:
class WikiPage:
    """Fetches a Wikipedia URL and parses it using wikipediaapi."""
    
    def __init__(self, url: str):
        self.url = url
        # Extract page name from URL (e.g. https://en.wikipedia.org/wiki/James_A._Garfield -> James_A._Garfield)
        self.page_name = urllib.parse.unquote(url.split("/")[-1])
        
        # Extract language if present (e.g., en.wikipedia.org -> en)
        domain = urllib.parse.urlparse(url).netloc
        self.lang = domain.split(".")[0] if "." in domain else "en"
        
        self.page_obj = None
        self.fetched = False
        self.error = None

    def fetch_and_parse(self):
        if self.fetched:
            return
            
        try:
            import wikipediaapi
        except ImportError:
            self.error = "Missing dependencies: pip install wikipediaapi"
            self.fetched = True
            return

        try:
            wiki_wiki = wikipediaapi.Wikipedia(
                user_agent='KagentFramesBenchmark', 
                language=self.lang
            )
            self.page_obj = wiki_wiki.page(self.page_name)
            
            if not self.page_obj.exists():
                self.error = f"Page '{self.page_name}' does not exist on Wikipedia."
                
            self.fetched = True
        except Exception as e:
            self.error = str(e)
            self.fetched = True

    def _extract_toc(self, sections: list, depth: int = 0) -> List[str]:
        toc = []
        indent = "    " * depth
        prefix = "‚îî‚îÄ " if depth > 0 else "- "
        for s in sections:
            # Only include if there is actual text or subsections
            para_count = len([p for p in s.text.split('\n') if p.strip()])
            if para_count > 0 or s.sections:
                toc.append(f"{indent}{prefix}{s.title} ({para_count} paragraphs)")
                toc.extend(self._extract_toc(s.sections, depth + 1))
        return toc

    def get_table_of_contents(self) -> str:
        self.fetch_and_parse()
        if self.error:
            return f"Error fetching {self.url}: {self.error}"
            
        toc = [f"Table of Contents for: {self.url}"]
        
        # Add summary/lead section
        summary_paras = len([p for p in self.page_obj.summary.split('\n') if p.strip()])
        if summary_paras > 0:
            toc.append(f"- Summary/Lead ({summary_paras} paragraphs)")
            
        # Add other sections recursively
        toc.extend(self._extract_toc(self.page_obj.sections))
        
        if len(toc) == 1:
            return f"No readable text found at {self.url}"
        return "\n".join(toc)

    def _find_section_text(self, target_name: str, sections: list) -> str:
        target_lower = target_name.lower().strip()
        for s in sections:
            if target_lower == s.title.lower().strip() or target_lower in s.title.lower():
                return s.text
            
            # Recursively search subsections
            sub_text = self._find_section_text(target_name, s.sections)
            if sub_text:
                return sub_text
        return None
        
    def _get_all_section_titles(self, sections: list) -> List[str]:
        titles = []
        for s in sections:
            titles.append(s.title)
            titles.extend(self._get_all_section_titles(s.sections))
        return titles

    def get_section(self, section_name: str) -> str:
        self.fetch_and_parse()
        if self.error:
            return f"Error fetching {self.url}: {self.error}"
            
        target_lower = section_name.lower().strip()
        
        # Special case for Summary/Lead
        if target_lower in ["summary", "lead", "summary/lead", "intro", "introduction"]:
            content = self.page_obj.summary
            name = "Summary/Lead"
        else:
            content = self._find_section_text(section_name, self.page_obj.sections)
            if content is None:
                all_titles = self._get_all_section_titles(self.page_obj.sections)
                available = "Summary/Lead, " + ", ".join(all_titles)
                return f"Section '{section_name}' not found. Available sections: {available}"
            name = section_name

        return f"--- SECTION: {name} ---\n\n{content}"

In [11]:
# Global Cache
_WIKI_CACHE: Dict[str, WikiPage] = {}

def get_cached_wiki(url: str) -> WikiPage:
    url = url.strip()
    if url not in _WIKI_CACHE:
        _WIKI_CACHE[url] = WikiPage(url)
    return _WIKI_CACHE[url]

In [12]:
# ---------------------------------------------------------------------------
# Tools
# ---------------------------------------------------------------------------
class ListWikiSectionsTool(Tool):
    name = "list_wiki_sections"
    description = (
        "Fetches a Wikipedia URL and returns its Table of Contents (a list of section headings). "
        "Use this FIRST to skim an article and understand its structure before reading specific sections."
    )
    inputs = {
        "url": ToolInput(
            type="string", description="The Wikipedia URL to map out.", required=True
        )
    }
    output_type = "string"

    def forward(self, url: str) -> str:
        page = get_cached_wiki(url)
        return page.get_table_of_contents()


class ReadWikiSectionTool(Tool):
    name = "read_wiki_section"
    description = (
        "Reads the full text of a specific section from a Wikipedia article. "
        "You MUST Use `list_wiki_sections` first to find the exact section name."
    )
    inputs = {
        "url": ToolInput(
            type="string", description="The Wikipedia URL.", required=True
        ),
        "section_name": ToolInput(
            type="string", description="The exact name of the section heading to read (e.g., 'Early life', 'Family').", required=True
        )
    }
    output_type = "string"

    def forward(self, url: str, section_name: str) -> str:
        page = get_cached_wiki(url)
        return page.get_section(section_name)

In [20]:
@kbench.task(
    name="frames_benchmark_google",
    description="Reimplementation of the FRAMES benchmark using kagentic."
)
def frames_benchmark_task(llm):
    FRAMES_AGENT_INSTRUCTIONS = """
You are answering a complex question based on several Wikipedia pages.
DO NOT GUESS. You must find the answer by searching the provided Wikipedia links.

Workflow:
1. Identify the most promising URL to start with.
2. ALWAYS use `list_wiki_sections` first to see the structure of a URL.
3. Identify the EXACT section name that likely has the answer (e.g., 'Early life', 'Family').
4. Use `read_wiki_section` to read that exact section string.
5. If you hit a dead end, move to the next URL or next section.
6. You can use `python_interpreter` if you need to do complex string manipulation.

Example Tool Usage:
If the ToC tool returns:
- History (4 paragraphs)
    ‚îî‚îÄ Modern Era (5 paragraphs)

You should call `read_wiki_section` with section_name="Modern Era".

Please respond in JSON format with two required fields: answer(you dont explain any thing in the answer. Just make it short and concise) and explanation.
"""
    class FRAMESResponse(BaseModel):
        """Structured weather report returned by the agent."""
        answer: str = Field(description="short and concise answer")
        explanation: str = Field(description="brief explanation support your answer")

    agent = CodeAgent(
        tools=[ListWikiSectionsTool(), ReadWikiSectionTool()],
        model=llm,
        max_steps=25,
        verbosity_level=2,
        additional_instructions=FRAMES_AGENT_INSTRUCTIONS,
        response_format=FRAMESResponse # Structured output
    )

    question = "How many years earlier would Punxsutawney Phil have to be canonically alive to have made a Groundhog Day prediction in the same state as the US capitol?"
    wiki_links = ['https://en.wikipedia.org/wiki/Punxsutawney_Phil', 'https://en.wikipedia.org/wiki/United_States_Capitol']

    formatted_prompt = (
        f"Here are the relevant Wikipedia articles:\n"
        f"{chr(10).join(wiki_links)}\n\n"
        f"Based on all the information, answer the query.\n\n"
        f"Query: {question}"
    )

    print(f"\n========== FRAMES TASK ==========\n")
    print(formatted_prompt)
    print(f"\n=================================\n")

    response = agent.run(formatted_prompt)


    kbench.assertions.assert_true(
        "87" in str(response.answer).lower(),
        expectation="Answer should be 87."
    )

In [22]:
frames_benchmark_task.run(kbench.llm)



Here are the relevant Wikipedia articles:
https://en.wikipedia.org/wiki/Punxsutawney_Phil
https://en.wikipedia.org/wiki/United_States_Capitol

Based on all the information, answer the query.

Query: How many years earlier would Punxsutawney Phil have to be canonically alive to have made a Groundhog Day prediction in the same state as the US capitol?


[agent] 
[agent] ü§ñ kagentic starting ‚Äî model: google/gemini-2.5-pro
[agent] üìã Task: Here are the relevant Wikipedia articles:
https://en.wikipedia.org/wiki/Punxsutawney_Phil
https://en.wikipedia.org/wiki/...
[agent] üìê response_format: FRAMESResponse


[agent] --- Step 1 ---
[agent]   üí≠ Thought: I need to determine the location of the US Capitol and the history of that location, as well as the start date of Punxsutawney Phil's predictions. The query asks about the same 'state', which is a key detail. I'll start with the US Capitol page to find its location.
[agent]   üéØ Action:  list_wiki_sections({"url": "https://en.wiki

BokehModel(combine_events=True, render_bundle={'docs_json': {'ffe2dad6-6972-45dd-bf24-d0738e390637': {'version‚Ä¶