# Run this notebook on Google Collab

In [9]:
#!pip install llama-index llama-index-llms-huggingface
#!pip install llama-index-llms-google-genai llama-index

In [2]:
!pip install youtube-transcript-api
!pip install -U openai-whisper
!pip install llama-index-tools-wikipedia llama-index-tools-tavily-research



# Import packages

In [3]:
import pandas as pd
import json
import random
import os
import numpy as np


# Import data

In [4]:
# Load the JSON file
with open("questions.json", "r", encoding="utf-8-sig") as f:
    questions_data = json.load(f)

print(type(questions_data))
print(questions_data[0])

<class 'list'>
{'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be', 'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.', 'Level': '1', 'file_name': ''}


In [5]:
# Select random question
item  = random.choice(questions_data)
item = {'task_id': '99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3', 'question': 'Hi, I\'m making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I\'m not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can\'t quite make out what she\'s saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I\'ve attached the recipe as Strawberry pie.mp3.\n\nIn your response, please only list the ingredients, not any measurements. So if the recipe calls for "a pinch of salt" or "two cups of ripe strawberries" the ingredients on the list would be "salt" and "ripe strawberries".\n\nPlease format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.', 'Level': '1', 'file_name': '99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3'}

print(item)

{'task_id': '99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3', 'question': 'Hi, I\'m making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I\'m not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can\'t quite make out what she\'s saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I\'ve attached the recipe as Strawberry pie.mp3.\n\nIn your response, please only list the ingredients, not any measurements. So if the recipe calls for "a pinch of salt" or "two cups of ripe strawberries" the ingredients on the list would be "salt" and "ripe strawberries".\n\nPlease format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.', 'Level': '1', 'file_name': '99c9cc74-f

In [6]:
question_text = item.get('question')
file_name = item.get("file_name")
file_name = file_name if file_name != "" else None
task_id = item.get("task_id")

print(file_name)

99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3


# Set up LLM with Ollama
I wanted to use LlamaIndex's HuggingFaceAPI but my pc configurations won't let me use this API

In [7]:
from getpass import getpass
import os

google_api_key = getpass("Enter your Google API key: ")

Enter your Google API key: ··········


In [8]:
#from llama_index.llms.gemini import Gemini
from llama_index.llms.google_genai import GoogleGenAI

llm = GoogleGenAI(
        model="models/gemini-2.0-flash-lite",
        api_key=google_api_key
)

In [9]:
#llm.complete('what is the capital of italy?')

# Set up Llamaindex (tools, agents, prompt)

In [10]:
# calculator tool

def calculate(input: dict) -> dict:
    """Simple calculator function"""

    expression = input['input']

    # Remove any potentially unsafe operations
    if any(unsafe in expression for unsafe in ["import", "exec", "eval", "compile", "open", "__"]):
        return {"error": "Unsafe expression"}

    try:
        # Use a safer approach to evaluate mathematical expressions
        # This is a simplified version - in production you'd want more safeguards
        allowed_symbols = {
            'sqrt': np.sqrt, 'pi': np.pi, 'e': np.e,
            'sin': np.sin, 'cos': np.cos, 'tan': np.tan,
            'log': np.log, 'log10': np.log10, 'exp': np.exp,
            'floor': np.floor, 'ceil': np.ceil, 'abs': abs
        }

        # Replace common math operations with Python syntax
        expression = expression.replace('^', '**')
        result = eval(expression, {"__builtins__": {}}, allowed_symbols)
        return {"result": result}
    except Exception as e:
        return {"error": f"Failed to calculate: {str(e)}"}

In [11]:
# youtube tool

from youtube_transcript_api import YouTubeTranscriptApi
from llama_index.core import Document

def get_youtube_transcript(input: dict) -> dict:
    """Fetch transcript from YouTube based on video ID."""

    video_link = input['file_name']

    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_link)
        text = "\n".join([t["text"] for t in transcript])
        document = Document(text=text)
        return {'result': document.text}
    except Exception as e:
        return {'error': f"Failed to fetch transcript: {str(e)}"}

In [12]:
# audio tool
import whisper

def get_audio_transcript(file_path: str) -> dict:
    model = whisper.load_model("base")
    try:
        r = model.transcribe(file_path)
        return {"result": Document(text=r["text"]).text}
    except Exception as e:
        return {"error": str(e)}

In [13]:
get_audio_transcript('/content/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3')

100%|███████████████████████████████████████| 139M/139M [00:06<00:00, 22.6MiB/s]


{'result': ' In a saucepan, combine ripe strawberries, granulated sugar, freshly squeezed lemon juice and cornstarch. Cook the mixture over medium heat, stirring constantly until it thickens to a smooth consistency. Remove from heat and stir in a dash of pure vanilla extract. Allow the strawberry pie feeling to cool before using it as a delicious and fruity filling for your pie crust.'}

In [14]:
# python file execution tool

import subprocess
import tempfile

def run_python_file(file_path: str) -> dict:
    """Safely runs a Python script and returns its final printed numeric output."""

    try:
        # Run the script and capture output
        result = subprocess.run(
            ["python", file_path],
            capture_output=True,
            text=True,
            timeout=10  # prevent infinite loops
        )
        output = result.stdout.strip()
        error = result.stderr.strip()

        if result.returncode != 0:
            return {'error': f"Error running script:\n{error}"}

        # Parse final numeric output (assume last line is the answer)
        last_line = output.splitlines()[-1]
        return {'result': last_line}

    except Exception as e:
        return {'error': f"Execution failed: {str(e)}"}

In [15]:
# Excel tool

def get_info_from_excel(file_path: str) -> dict:
    """Fetch information from an Excel file."""

    try:
        df = pd.read_excel(file_path)
        text = df.to_markdown()  # Convert DataFrame to Markdown for better readability
        document = Document(text=text)
        return {'result': document.text}
    except Exception as e:
        return {'error': f"Failed to fetch data from Excel: {str(e)}"}

In [16]:
from llama_index.core.tools import FunctionTool

calculator_tool = FunctionTool.from_defaults(
    fn=calculate,
    name="calculator",
    description="A calculator that performs basic arithmetic operations."
)

youtube_tool = FunctionTool.from_defaults(
    fn=get_youtube_transcript,
    name="youtube_video_parser",
    description="A transcript extractor for youtube videos based on video path."
)

audio_tool = FunctionTool.from_defaults(
    fn=get_audio_transcript,
    name="audio_parser",
    description="A simple transcript extractor for audio based on file."
)

run_python_tool = FunctionTool.from_defaults(
    fn=run_python_file,
    name="python_code_executor",
    description="Executes a .py file and returns the final printed numeric result."
)

excel_tool = FunctionTool.from_defaults(
    fn=get_info_from_excel,
    name="excel_parser",
    description="A simple tool to extract information from an Excel file and trasform it into markdown text."
)

In [17]:
created_tools = [calculator_tool, youtube_tool, audio_tool, run_python_tool, excel_tool]
print(len(created_tools))

5


In [18]:
from getpass import getpass
import os

os.environ["TAVILY_API_KEY"] = getpass("Enter your Tavily API key: ")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

Enter your Tavily API key: ··········


In [19]:
from llama_index.tools.tavily_research.base import TavilyToolSpec
from llama_index.tools.wikipedia import WikipediaToolSpec

wikipedia_tool_spec = WikipediaToolSpec()

# to search for information on the web
tavily_tool_spec = TavilyToolSpec(api_key=TAVILY_API_KEY)

To process images I will create another agent that is based on a Vision model as llm

# Run agent on question to test it

In [20]:
#'''
def create_system_prompt_for_main_agent(tools):
    """Create a more descriptive system prompt that explains the available tools"""
    tool_descriptions = "\n".join([
        f"- {tool._metadata.name}: {tool._metadata.description}"
        for tool in tools
    ])

    system_prompt = f"""You're a helpful general AI assistant with the ability to use tools.

You have access to the following tools:
{tool_descriptions}

When a user's request requires using one of these tools:
1. First think through what information you need and which tool would be appropriate
2. Then provide a clear explanation to the user about your approach
3. Finally use the appropriate tool by including the necessary parameters

Important: If a question requires calculation, execution of python code or parsing of youtube video, audios or Excel file, ALWAYS use the appropriate
tool rather than trying to answer from your knowledge. It doesn't matter if  the answer is straightforward, use tools to ensure accuracy and reliability.

Begin."""

    return system_prompt
#''';

In [21]:
system_prompt = create_system_prompt_for_main_agent(created_tools)

In [22]:
print(system_prompt)

You're a helpful general AI assistant with the ability to use tools.

You have access to the following tools:
- calculator: A calculator that performs basic arithmetic operations.
- youtube_video_parser: A transcript extractor for youtube videos based on video path.
- audio_parser: A simple transcript extractor for audio based on file.
- python_code_executor: Executes a .py file and returns the final printed numeric result.
- excel_parser: A simple tool to extract information from an Excel file and trasform it into markdown text.

When a user's request requires using one of these tools:
1. First think through what information you need and which tool would be appropriate
2. Then provide a clear explanation to the user about your approach
3. Finally use the appropriate tool by including the necessary parameters

Important: If a question requires calculation, execution of python code or parsing of youtube video, audios or Excel file, ALWAYS use the appropriate
tool rather than trying to a

In [23]:
from llama_index.core.agent.workflow import ReActAgent

multi_agent = ReActAgent(
    name='multi_functional_agent',
    description="A general AI assistant that can use perform calculation, parse files, and execute code.",
    system_prompt=system_prompt,  # must be string
    tools=created_tools,
    llm=llm,
    verbose=False,
    can_handoff_to=['wikipedia_agent', 'search_agent']
)

In [24]:
def create_system_prompt_for_others(tools):
    """Create a more descriptive system prompt that explains the available tools"""
    tool_descriptions = "\n".join([
        f"- {tool._metadata.name}: {tool._metadata.description}"
        for tool in created_tools
    ])

    system_prompt = f"""You're a helpful general AI assistant with the ability to use tools.

You have access to the following tools:
{tool_descriptions}

When a user's request requires using one of these tools:
1. First think through what information you need and which tool would be appropriate
2. Then provide a clear explanation to the user about your approach
3. Finally use the appropriate tool by including the necessary parameters

Important: ALWAYS use the appropriate tools rather than trying to answer from your knowledge.
It doesn't matter if  the answer is straightforward, use tools to ensure accuracy and reliability.

Begin."""

    return system_prompt

In [25]:
from llama_index.core.agent.workflow import FunctionAgent

wiki_agent = ReActAgent(
    name='wikipedia_agent',
    description="A general AI assistant that can search Wikipedia for information.",
    system_prompt=create_system_prompt_for_others(wikipedia_tool_spec.to_tool_list()),
    tools=wikipedia_tool_spec.to_tool_list(),
    llm=llm,
    verbose=False,
    can_handoff_to=['multi_functional_agent', 'search_agent']
)

search_agent = ReActAgent(
    name='search_agent',
    description="A general AI assistant that can search the web for information.",
    system_prompt=create_system_prompt_for_others(tavily_tool_spec.to_tool_list()),
    tools=tavily_tool_spec.to_tool_list(),
    llm=llm,
    verbose=False,
    can_handoff_to=['multi_functional_agent', 'wiki_agent']
)

In [26]:
from llama_index.core.agent.workflow import AgentWorkflow
import requests

# Create the workflow
workflow = AgentWorkflow(
    agents=[multi_agent, wiki_agent, search_agent],
    root_agent="multi_functional_agent",
    initial_state = {
        'file_path': f'/content/{file_name}',
    }
)

if file_name != None:
  url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
  response = requests.get(url)

  # Save the file
  with open(file_name, "wb") as f:
      f.write(response.content)

  print("Downloaded:", file_name)

Downloaded: 99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3


In [27]:
extract_prompt = f"""
Answer to the following input question using the agents available to you:

{{question}}

If one agent does not have the right tools to answer the question, it can hand off to another agent that has the right tools.
If the question requires a file, you can use the file name provided in the initial state.

Finish your answer with just YOUR FINAL ANSWER (do not ouput <think> or <tool> tags):
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
"""

In [28]:
# define prompt
extract_prompt = extract_prompt.format(question=question_text)

In [29]:
#import nest_asyncio
from llama_index.core.memory import Memory
#nest_asyncio.apply()

# to keep conversation history
memory = Memory.from_defaults(token_limit=40000)

# run agent with thinking process printed
async def main():
  handler = workflow.run(extract_prompt, memory=memory)

  # così vedo come ragiona il LLM
  async for event in handler.stream_events():
      if hasattr(event, "delta"):
          print(event.delta, end="", flush=True)

  result = await handler
  return result

result = await main()

Thought: The current language of the user is: English. I need to use a tool to extract the ingredients from the audio file.
Action: audio_parser
Action Input: {"file_path": "/content/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"}




Thought: I have the transcript from the audio file. Now I need to extract the ingredients and format them as requested.
Action: python_code_executor
Action Input: {'input': "transcript = ' In a saucepan, combine ripe strawberries, granulated sugar, freshly squeezed lemon juice and cornstarch. Cook the mixture over medium heat, stirring constantly until it thickens to a smooth consistency. Remove from heat and stir in a dash of pure vanilla extract. Allow the strawberry pie feeling to cool before using it as a delicious and fruity filling for your pie crust.'\n\ningredients = []\nif 'ripe strawberries' in transcript:\n    ingredients.append('ripe strawberries')\nif 'granulated sugar' in transcript:\n    ingredients.append('granulated sugar')\nif 'freshly squeezed lemon juice' in transcript:\n    ingredients.append('lemon juice')\nif 'cornstarch' in transcript:\n    ingredients.append('cornstarch')\nif 'pure vanilla extract' in transcript:\n    ingredients.append('vanilla extract')\n\nin

In [30]:
print(result.response.blocks[0].text)

cornstarch, granulated sugar, lemon juice, ripe strawberries, vanilla extract
