In [1]:
import os
from dotenv import load_dotenv
from datasets import load_dataset
from smolagents import CodeAgent, LiteLLMModel, HfApiModel
import yaml
from tools.final_answer import FinalAnswerTool
from tools.math_operations import MathOperationsTool
from tools.visit_webpage import VisitWebpageTool
from tools.web_search import DuckDuckGoSearchTool
from tools.wikipedia_search import WikipediaSearchTool
from tools.arxiv_search import ArxivSearchTool
from tools.rag_search import RAGSearchTool
from tools.code_execution import CodeExecutionTool
from tools.document_processing import DocumentProcessingTool
from tools.image_processing import ImageProcessingTool
from tools.web_scraping import WebScrapingTool

In [2]:
visit_webpage = VisitWebpageTool()
web_search = DuckDuckGoSearchTool(max_results=5)
math_tools = MathOperationsTool()
final_answer = FinalAnswerTool()
wikipedia_search = WikipediaSearchTool(load_max_docs=2)
arxiv_search = ArxivSearchTool(load_max_docs=3)
rag_search = RAGSearchTool(persist_dir="rag_db")
code_execution = CodeExecutionTool()
document_processing = DocumentProcessingTool(temp_dir="doc_processing")
image_generation_tool = ImageProcessingTool()
web_scraping = WebScrapingTool()

  self.embeddings = HuggingFaceEmbeddings(


In [None]:
# Testing the tools
# arxiv_search
papers = arxiv_search.forward(query="AI regulation submission date:2022-06")
print("arxiv_search sucess")

# rag_search
rag_result = rag_search.forward(
    action="add",
    content=papers,
    metadata={"topic": "transformers", "subtopic": "time_series", "year": "2023"}
)
print("rag_search sucess")

# document_processing
doc_analysis = document_processing.forward(
    action="analyze_pdf",
    content="financial_report.pdf",
    query="What are the total revenue figures for 2021 and 2022?"
)
print("document_processing sucess")

# math_tools
growth = math_tools.forward(
    operation="subtract",
    a=1.2,
    b=1.5
)
print("math_tools sucess")

# internet_search
search_results = web_search.forward(query="latest deep learning frameworks comparison 2023")
print("internet_search sucess")

# web_scraping
scrapping_content = web_scraping.forward(
    url="https://www.datacamp.com/tutorial/how-transformers-work",
    action="element",
    selector="table.comparison",
    attributes=["class"]
)
print("web_scraping sucess")

# wikipedia_search
wikipedia_info = wikipedia_search.forward(query="deep learning frameworks")
print("wikipedia_search sucess")

# code_execution
code = '''
def divide_numbers(a, b):
    if b == 0:
        raise ValueError("Cannot divide by zero")
    return a / b
    
# Test the function
print(divide_numbers(10, 2))
print(divide_numbers(5, 5))
'''
result = code_execution.forward(code=code)
print("code_execution sucess")

# document_processing
text = document_processing.forward(
    action="ocr",
    content="presentation.jpg"
)
print("document_processing sucess")

text = final_answer.forward("Based on the financial report, the total revenues for 2021 and 2022 is 1.2 and 1.5 respectively.")
print("final_answer sucess")

arxiv_search sucess
rag_search sucess
document_processing sucess
math_tools sucess
internet_search sucess
web_scraping sucess
wikipedia_search sucess
code_execution sucess
document_processing sucess


In [3]:
#url = "https://en.wikipedia.org/wiki/List_of_secretaries_of_Homeland_Security"                                     
url = "https://data.worldbank.org/indicator/NY.GDS.TOTL.ZS"
web_content = visit_webpage.forward(url=url)                                                                             
print(web_content)


Gross domestic savings (% of GDP) | Data

[![The World Bank](/assets/images/logo-wb-header-en.svg)](http://www.worldbank.org)[Data](/)

[HOME](/)

[ECONOMIES](https://data360.worldbank.org/en/economies)

[DATA & RESOURCES](https://data360.worldbank.org/en/search)

[ABOUT](https://data360.worldbank.org/en/about)

EnglishEspañolFrançaisالعربية中文

Gross domestic savings (% of GDP)

Close

Browse World Development Indicators by[Country](/country)or[Indicator](/indicator)

[DataBank](http://databank.worldbank.org/data/home.aspx)[Microdata](http://microdata.worldbank.org/index.php/home)[Data Catalog](https://datacatalog.worldbank.org)

Menu

* [Microdata](http://microdata.worldbank.org/index.php/home)
* [Data Catalog](http://datacatalog.worldbank.org)
* [DataBank](http://databank.worldbank.org/data/home.aspx)

Browse World Development Indicators by[Country](/country)[Indicator](/indicator)

* [About Us](/about)
  + [Get Started](/about/get-started)
  + [FAQ](/about/faq)
  + [Help Desk](http:

In [13]:
query = "List of United States secretaries of homeland security before April 2019"                               
search_results = web_search.forward(query=query)                                                                 
print(search_results)  # Use print to output important information for the next step.       

## Search Results

[Secretaries of Homeland Security | Homeland Security](https://www.dhs.gov/secretaries-homeland-security)
List of current and former Secretaries of the Department of Homeland Security, their dates of service, and links to their biographies. ... April 10, 2019. John F. Kelly January 20, 2017 - July 28, 2017. Jeh Charles Johnson December 23, 2013 - January 20, 2017.

[United States Secretary of Homeland Security - Wikipedia](https://en.wikipedia.org/wiki/United_States_Secretary_of_Homeland_Security)
The United States secretary of homeland security is the head of the United States Department of Homeland Security, the federal department tasked with ensuring public safety in the United States.The secretary is a member of the Cabinet of the United States.The position was created by the Homeland Security Act following the terrorist attacks of September 11, 2001.

[Secretaries of Homeland Security - U-S-History.com](https://www.u-s-history.com/pages/h1236.html)
Secretaries o

In [3]:
import fitz
print(fitz.__doc__)

PyMuPDF 1.25.5: Python bindings for the MuPDF 1.25.6 library (rebased implementation).
Python 3.12 running on win32 (64-bit).



In [4]:
doc = fitz.open("curso_illescas_01.pdf") 

In [4]:
customs_tools = [    
    visit_webpage, 
    web_search, 
    math_tools,
    wikipedia_search, 
    arxiv_search,
    rag_search,
    web_scraping,
    code_execution, 
    document_processing, 
    image_generation_tool,
    final_answer
]

In [8]:
# Inicialize HuggingFace agent
load_dotenv()
# Get the Hugging Face API key from the environment
api_key = os.getenv("HUGGINGFACE_API_KEY")
# Check that the key was loaded
if not api_key:
    raise ValueError("HUGGINGFACE_API_KEY not found. Please check your .env file.")
HFModel = HfApiModel(
    max_tokens=2096,
    temperature=0,
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
    custom_role_conversions=None,
)


In [5]:
# 3. Inicializar modelo Ollama local
local_model = LiteLLMModel(
    model_id="ollama/qwen2.5-coder:7b",    
    #model_id="ollama/gemma3:1b",    
    api_base="http://localhost:11434",
    temperature=0,
    max_tokens=2096,
    request_timeout=900,
)

In [None]:
additional_imports = [
    # Document processing
    "pymupdf",                              # PDF reading
    # HTTP & URLs
    "requests",                             # HTTP client for REST calls :contentReference[oaicite:0]{index=0}
    "urllib.parse",                         # URL parsing and construction :contentReference[oaicite:1]{index=1}

    # Data formats
    "json",                                 # JSON serialization/deserialization
    "csv",                                  # CSV reading/writing
    "xml.etree.ElementTree",                # XML parsing
    "bs4",                                  # BeautifulSoup for HTML parsing

    # Text processing
    "re",                                   # Regular expressions

    # File & OS
    "os",                                   # OS interactions (env vars, paths)
    "sys",                                  # Interpreter info
    "pathlib",                              # Object‑oriented filesystem paths
    "subprocess",                           # Safe subprocess calls

    # Computation
    "math",                                 # Advanced math functions
    "random",                               # Random sampling and shuffling

    # Date & time
    "datetime",                             # Date/time parsing and arithmetic

    # Data analysis
    "numpy",                                # Numerical arrays :contentReference[oaicite:2]{index=2}
    "pandas",                               # Tabular data manipulation

    # Imaging
    "PIL.Image",                            # Image loading/inspection

    # Logging
    "logging",                              # Structured debug/info logging    
]

In [6]:
# Cargar las plantillas de los prompts
with open("prompt.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)

In [6]:
# Trace agent and send traces to Phoenix
from phoenix.otel import register
from openinference.instrumentation.smolagents import SmolagentsInstrumentor

register()
SmolagentsInstrumentor().instrument()

OpenTelemetry Tracing Details
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [7]:
# 4. Crear agente con el modelo local
agent = CodeAgent(
    model=local_model,
    #model = HFModel,
    tools = customs_tools, 
    #additional_authorized_imports = additional_imports,
    prompt_templates=prompt_templates,    
    verbosity_level = 2,
    max_steps=5,
)

In [22]:
print(agent.prompt_templates)


{'system_prompt': 'You are an expert assistant with full access to following tools: {{ tools }}. \nThese tools are basically Python functions which you can call with code.\nTo solve the task, you must plan forward to proceed in a series of steps, in a cycle of \'Thought:\', \'Code:\', and \'Observation:\' sequences.\nAt each step, in the \'Thought:\' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.\nThen in the \'Code:\' sequence, you should write the code in simple Python. The code sequence must end with \'<end_code>\' sequence.\nDuring each intermediate step, you can use \'print()\' to save whatever important information you will then need.\nThese print outputs will then appear in the \'Observation:\' field, which will be available as input for the next step.\nIn the end you have to return a final answer using the `final_answer` tool. \n\nHere are the rules you should always follow to solve your task:\n1. Understand the ta

In [7]:
dataset_validation = load_dataset(
    "gaia-benchmark/GAIA",
    '2023_all',    
    split="validation",
    token=os.getenv("HF_TOKEN"),    
    trust_remote_code=True,
)

In [None]:
#dataset_test = load_dataset(
#    "gaia-benchmark/GAIA",
#    '2023_all',    
#    split="test",
#    token=os.getenv("HF_TOKEN"),
#    trust_remote_code=True,
#)

In [8]:
print("---- Test set ----")
indices = list(range(1))
split_dataset_validation = dataset_validation.select(indices)
for example in split_dataset_validation:
    print(example)  

---- Test set ----
{'task_id': 'c61d22de-5f6c-4958-a7f6-5e9707bd3466', 'Question': 'A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?', 'Level': '2', 'Final answer': 'egalitarian', 'file_name': '', 'file_path': '', 'Annotator Metadata': {'Steps': '1. Go to arxiv.org and navigate to the Advanced Search page.\n2. Enter "AI regulation" in the search box and select "All fields" from the dropdown.\n3. Enter 2022-06-01 and 2022-07-01 into the date inputs, select "Submission date (original)", and submit the search.\n4. Go through the search results to find the article that has a figure with three axes and labels on each end of the axes, titled "Fairness in Agreement With European Values: An Interdisciplinary Perspective on AI Regulation".\n5. Note t

In [9]:
# 5. Ejecutar preguntas y recoger respuestas
predictions = []
for example in split_dataset_validation:
    q = example["Question"]
    print("Agent started running...")
    pred = agent.run(q)
    predictions.append(str(pred).strip())
    print("Agent finished running...")
    print(f"QUESTION: {q}\nANSWER: {pred}\n")

Agent started running...


Agent finished running...
QUESTION: A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?
ANSWER: FINAL ANSWER: None



In [24]:
# 6. Calcular exact‑match accuracy
gold = [ex["Final answer"].strip() for ex in split_dataset_validation]
correct = sum(1 for p, g in zip(predictions, gold) if p == g)
acc = correct / len(gold)
print(f"Exact‑match accuracy on GAIA dev: {acc:.2%}")

Exact‑match accuracy on GAIA dev: 0.00%


In [8]:
q = "According to the World Bank, which countries had gross savings of over 35% of GDP for every year in the period 2001-2010? Give your answer as a comma-separated list of countries in alphabetical order. Use the countries most common names in english when answering."
pred = agent.run(q)
print(f"QUESTION: {q}\nANSWER: {pred}\n")



[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



QUESTION: According to the World Bank, which countries had gross savings of over 35% of GDP for every year in the period 2001-2010? Give your answer as a comma-separated list of countries in alphabetical order. Use the countries most common names in english when answering.
ANSWER: Error in generating final LLM output:
litellm.APIConnectionError: OllamaException - litellm.Timeout: Connection timed out after 900.0 seconds.

