In [1]:
!pip install -U -q tqdm json_repair ddgs requests beautifulsoup4 SQLAlchemy wikipedia-api

In [2]:
import requests
import zipfile
import io
import sys
import os
import shutil

# 1. The URL for the source code ZIP
url = "https://github.com/anpc849/kagentic/archive/refs/heads/main.zip"

# 2. Download the file using requests
print("Downloading...")
response = requests.get(url)
if response.status_code == 200:
    # 3. Unzip the content directly from memory
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        z.extractall(".")
    print("Unzipped successfully!")
else:
    print(f"Failed to download. Status code: {response.status_code}")

# 4. Add the folder to the Python Path
# GitHub's ZIPs always create a folder named 'repo-branch'
shutil.move(os.path.abspath('kagentic-main'), os.path.abspath('kagentic'))
module_path = "/kaggle/working"
if module_path not in sys.path:
    sys.path.append(module_path)

# 5. Verify the import
try:
    import kagentic
    print("Import successful! You can now use kagentic.")
except ImportError:
    print("Import failed. Try checking !ls kagentic to see the folder structure.")

Downloading...
Unzipped successfully!


Import successful! You can now use kagentic.


In [3]:
import kaggle_benchmarks as kbench

In [4]:
from kagentic import CodeAgent
from kagentic.tools import WebBrowseTool, WebSearchTool

In [5]:
# ---------------------------------------------------------------------------
# Grading helpers
# ---------------------------------------------------------------------------

def _check_date(answer: str) -> bool:
    """Q1: Answer must contain 14/01/2026."""
    return "14/01/2026" in answer


def _check_cookbook_url(answer: str) -> bool:
    """Q2: Answer must contain the exact cookbook URL."""
    return "https://github.com/Kaggle/kaggle-benchmarks/blob/ci/cookbook.md" in answer


def _check_lemonasso(answer: str) -> bool:
    """Q3: Answer must mention artistic/drawing tasks."""
    lower = answer.lower()
    return "artistic" in lower or "drawing" in lower

In [6]:
@kbench.task(name="real_time_multi_agent_qa", description="This task demonstrates multi-agent collaboration within the kagentic framework.")
def test_manager_worker(llm) -> float:
    """
    Manager delegates three factual questions to search_agent.
    Each answer is graded against the ground truth.

    Returns:
        float: score = correct_answers / 3
    """

    # ---------------------------------------------------------------------------
    # Build the multi-agent system
    # ---------------------------------------------------------------------------
    def build_multi_agent(llm) -> CodeAgent:
        """Creates a manager with a single search-capable worker agent."""
    
        search_agent = CodeAgent(
            name="search_agent",
            description=(
                "A web research specialist. Give it a specific question and it will "
                "search the web, browse relevant pages, and return a precise answer."
            ),
            tools=[WebSearchTool(), WebBrowseTool()],
            model=llm,
            max_steps=7,
            verbosity_level=2,
        )
    
        manager = CodeAgent(
            name="manager",
            description="Delegates research questions to search_agent and synthesises the findings.",
            tools=[],
            model=llm,
            managed_agents=[search_agent],
            max_steps=10,
            verbosity_level=2,
            additional_instructions=(
                "You have access to search_agent, a web research specialist.\n"
                "For each question you receive, delegate it to search_agent with a "
                "precise, self-contained research query. After all questions are "
                "answered, return a final combined response that lists each answer "
                "clearly.\n\n"
                "Investigate each question carefully using search_agent before "
                "providing your final answer."
            ),
        )
    
        return manager
    
    multi_agent = build_multi_agent(llm)

    TASK = (
    "Answer the following three questions using your research tools. "
    "For each question provide a clear, concise answer.\n\n"
    "Q1: What is the exact date that Kaggle Community Benchmarks launched in 2026? "
    "Answer format: DD/MM/YYYY.\n\n"
    "Q2: What link can I use to access the Kaggle Community Benchmarks cookbook?\n\n"
    "Q3: What does the Lemonasso benchmark in Kaggle Community Benchmarks evaluate LLMs on?"
    )
    
    print(f"\n{'='*60}")
    print("MANAGER TASK")
    print(f"{'='*60}")
    print(TASK)
    print(f"{'='*60}\n")
    try:
        answer = multi_agent.run(TASK)
    except:
        answer = "NULL"

    print(f"\n{'='*60}")
    print("FINAL ANSWER:")
    print(answer)
    print(f"{'='*60}")

    # ‚îÄ‚îÄ Grading ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    q1_pass = _check_date(answer)
    q2_pass = _check_cookbook_url(answer)
    q3_pass = _check_lemonasso(answer)

    print(f"\nüìä Grading:")
    print(f"  Q1 (launch date 14/01/2026):  {'‚úÖ' if q1_pass else '‚ùå'}")
    print(f"  Q2 (cookbook URL):             {'‚úÖ' if q2_pass else '‚ùå'}")
    print(f"  Q3 (Lemonasso/drawing tasks):  {'‚úÖ' if q3_pass else '‚ùå'}")

    # Count correct answers
    correct_answers = sum([q1_pass, q2_pass, q3_pass])
    score = correct_answers / 3.0

    # Keep assertions (optional but recommended for benchmarks)
    kbench.assertions.assert_true(
        q1_pass,
        expectation="Q1: Answer must contain the exact launch date 14/01/2026 in DD/MM/YYYY format.",
    )
    kbench.assertions.assert_true(
        q2_pass,
        expectation=(
            "Q2: Answer must contain the cookbook URL: "
            "https://github.com/Kaggle/kaggle-benchmarks/blob/ci/cookbook.md"
        ),
    )
    kbench.assertions.assert_true(
        q3_pass,
        expectation="Q3: Answer must mention that Lemonasso evaluates artistic/drawing tasks.",
    )

    print(f"\n‚≠ê Final Score: {score:.2f}")

    return score

In [7]:
test_manager_worker.run(llm=kbench.llm)


MANAGER TASK
Answer the following three questions using your research tools. For each question provide a clear, concise answer.

Q1: What is the exact date that Kaggle Community Benchmarks launched in 2026? Answer format: DD/MM/YYYY.

Q2: What link can I use to access the Kaggle Community Benchmarks cookbook?

Q3: What does the Lemonasso benchmark in Kaggle Community Benchmarks evaluate LLMs on?

[manager] 
[manager] ü§ñ kagentic starting ‚Äî model: google/gemini-2.5-flash
[manager] üìã Task: Answer the following three questions using your research tools. For each question provide a clear, concise answer.

Q1: ...


[manager] --- Step 1 ---
[manager]   üí≠ Thought: The user wants to know the exact launch date of Kaggle Community Benchmarks in 2026. I will use the `search_agent` tool to find this information. I need to be specific with the query to get a precise date.
[manager]   üéØ Action:  search_agent({"task": "What is the exact launch date of Kaggle Community Benchmarks in 202

Impersonate 'chrome_133' does not exist, using 'random'



[search_agent] --- Step 1 ---
[search_agent]   üí≠ Thought: I need to find the exact launch date of Kaggle Community Benchmarks in 2026. I will start by searching the web for this information.
[search_agent]   üéØ Action:  web_search({"query": "Kaggle Community Benchmarks launch date 2026"})
[search_agent]   üîß Calling tool: web_search({"query": "Kaggle Community Benchmarks launch date 2026"})
[search_agent]   üì§ Tool result: Search results for 'Kaggle Community Benchmarks launch date 2026':

[1] Community Benchmarks: Evaluating modern AI on Kaggle - Google Blog
    URL: https://blog.google/innovation-and-ai/technology/dev...
[search_agent]   ‚ö†Ô∏è  llm.respond() attempt 1/3 failed: Response parsing failed.
Input Value:
---
{"thought":"The search results show a few dates related to Kaggle Community Benchmarks in 2026. Result [2] from DEVOPSdigest explicitly mentions \"Jan 15, 2026 ¬∑ January 27, 2026\" in proximity to \"Kaggle Launches Community Benchmarks\". It also mentions \

BokehModel(combine_events=True, render_bundle={'docs_json': {'f498d474-df07-4307-b789-9cef2cfef25c': {'version‚Ä¶