In [4]:
import os
from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel, WikipediaSearchTool, VisitWebpageTool, SpeechToTextTool
from smolagents.tools import PipelineTool, Tool
from typing import Optional
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pathlib


api_key = os.getenv("OPENAI_API_KEY")
extra_prompt = """
You are a general AI assistant. I will ask you a question. Report your answer with the following template:
FINAL ANSWER: [YOUR FINAL ANSWER]
If you do not know the answer, [YOUR FINAL ANSWER] should be an empty string.
FINAL ANSWER: [concise answer here]

You must always show your reasoning and include a code block like this:

Thoughts: Explain what you're doing
Code:
```py
# your code here
```<end_code>

Then output:

YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. Do NOT explain your answer unless asked explicitly.
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
"""

class WikipediaTableTool(Tool):
    name = "WikipediaTableTool"
    description = "Searches Wikipedia and extracts relevant table information based on a query."
    inputs = {
    "query": {
        "type": "string",
        "description": "Search query to find relevant Wikipedia page and extract table data."
    }
}
    output_type = "string"

    def forward(self, query: str) -> str:
        # Step 1: Use WikipediaSearchTool to get page URL
        wiki_tool = WikipediaSearchTool()
        page_url = wiki_tool.run(query)
        
        if not page_url.startswith("http"):
            return f"Failed to get Wikipedia URL: {page_url}"

        # Step 2: Fetch and parse tables from that URL
        try:
            response = requests.get(page_url)
            response.raise_for_status()
        except requests.RequestException as e:
            return f"Failed to retrieve Wikipedia page: {e}"

        soup = BeautifulSoup(response.text, 'html.parser')
        tables = soup.find_all('table')

        if not tables:
            return f"No tables found on the Wikipedia page: {page_url}"

        matched_results = []

        for idx, table in enumerate(tables):
            try:
                df = pd.read_html(str(table))[0]
            except Exception:
                continue  # skip tables that can't be parsed

            df.columns = df.columns.astype(str)

            if query:
                mask = df.apply(lambda row: row.astype(str).str.contains(query, case=False, na=False).any(), axis=1)
                matched = df[mask]
                if not matched.empty:
                    matched_results.append(f"Table {idx + 1} (matches found):\n{matched.to_string(index=False)}")
            else:
                matched_results.append(f"Table {idx + 1} (sample):\n{df.head().to_string(index=False)}")

        if not matched_results:
            return f"No relevant table data found on {page_url} for query '{query}'."

        return "\n\n".join(matched_results)

class ExcelToTextTool(Tool):
    """Render an Excel worksheet as Markdown text."""

    # ------------------------------------------------------------------
    # Required smol‑agents metadata
    # ------------------------------------------------------------------
    name = "excel_to_text"
    description = (
        "Read an Excel file and return a Markdown table of the requested sheet. "
        "Accepts either the sheet name or the zero-based index."
    )

    inputs = {
        "excel_path": {
            "type": "string",
            "description": "Path to the Excel file (.xlsx / .xls).",
        },
        "sheet_name": {
            "type": "string",
            "description": (
                "Worksheet name or zero-based index *as a string* (optional; default first sheet)."
            ),
            "nullable": True,
        },
    }

    output_type = "string"

    def forward(
        self,
        excel_path: str,
        sheet_name: Optional[str] = None,
    ) -> str:
        """Load *excel_path* and return the sheet as a Markdown table."""

        path = pathlib.Path(excel_path).expanduser().resolve()
        if not path.exists():
            return f"Error: Excel file not found at {path}"

        try:
            # Interpret sheet identifier -----------------------------------
            sheet: Union[str, int]
            if sheet_name is None or sheet_name == "":
                sheet = 0  # first sheet
            else:
                # If the user passed a numeric string (e.g. "1"), cast to int
                sheet = int(sheet_name) if sheet_name.isdigit() else sheet_name

            # Load worksheet ----------------------------------------------
            df = pd.read_excel(path, sheet_name=sheet)

            # Render to Markdown; fall back to tabulate if needed ---------
            if hasattr(pd.DataFrame, "to_markdown"):
                return df.to_markdown(index=False)

            return tabulate(df, headers="keys", tablefmt="github", showindex=False)

        except Exception as exc:  # pylint: disable=broad-except
            return f"Error reading Excel file: {exc}"

class BasicAgent:
    def __init__(self):
        print("BasicAgent initialized.")
        model = OpenAIServerModel(
            model_id="gpt-4o-mini", 
            api_key = api_key,
            )

        self.agent = CodeAgent(
            model = model,
            tools=[DuckDuckGoSearchTool(),
                WikipediaSearchTool(),
                VisitWebpageTool(),
                SpeechToTextTool()],
            max_steps = 5,
            add_base_tools=True,
            additional_authorized_imports=["pandas", "numpy", "csv", "subprocess"],
        )
        

    def __call__(self, question: str) -> str:
        print(f"Agent received question (first 50 chars): {question[:50]}...")
        final_answer = self.agent.run(question)
        print(f"Agent returning final answer: {final_answer}")
        return final_answer

In [5]:
agent = BasicAgent()
question = "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?"
full_question = f"{extra_prompt}\n\n{question}"
full_answer = agent(full_question)

BasicAgent initialized.
Agent received question (first 50 chars): 
You are a general AI assistant. I will ask you a ...


Agent returning final answer: Thoughts: I have found that the dinosaur article promoted in November 2016 is "Giganotosaurus," which was nominated by user FunkMonk. I will report this information in the required template format.

Code:
```py
nominator = "FunkMonk"
dinosaur_article = "Giganotosaurus"
result = f"Nominated by {nominator} for the article {dinosaur_article}"
print(result)
```<end_code>

FINAL ANSWER: FunkMonk


In [6]:
full_answer

'Thoughts: I have found that the dinosaur article promoted in November 2016 is "Giganotosaurus," which was nominated by user FunkMonk. I will report this information in the required template format.\n\nCode:\n```py\nnominator = "FunkMonk"\ndinosaur_article = "Giganotosaurus"\nresult = f"Nominated by {nominator} for the article {dinosaur_article}"\nprint(result)\n```<end_code>\n\nFINAL ANSWER: FunkMonk'

In [10]:
question = """How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?"""
full_question = f"{extra_prompt}\n\n{question}"
full_answer = agent(full_question)

Agent received question (first 50 chars): 
You are a general AI assistant. I will ask you a ...


Agent returning final answer: 519
