<a href="https://colab.research.google.com/github/dannesbitt/GAIA-Agent/blob/main/GAIA_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -q langgraph langchain_openai langchain_huggingface

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.2/148.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.3/47.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.8/194.8 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.6/223.6 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [24]:
import os
import re
import requests
from typing import TypedDict, List
from openai import OpenAI
from langgraph.graph import Graph, END
from google.colab import userdata
from urllib.parse import urlparse, parse_qs

# Set up OpenAI client
OPEN_API_KEY = userdata.get('OPENAI_API_KEY')
client = OpenAI(api_key=OPEN_API_KEY)

# Set up YouTube API key
YOUTUBE_API_KEY = userdata.get('GOOGLE_API_KEY')

# Define the state structure
class State(TypedDict):
    messages: List[dict]
    tool_calls: List[dict]
    final_response: str
    needs_tool_call: bool

# Define available tools (optional, included for flexibility)
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather",
            "parameters": {
                "type": "object",
                "properties": {},
            },
        },
    }
]

# Dummy tool execution function (replace with actual tools if needed)
def execute_tool(tool_call):
    if tool_call["function"]["name"] == "get_weather":
        return "It's sunny today."
    return "Tool not found."

# Helper function to fetch task files
def fetch_task_files(task_id):
    """Fetches files associated with the task_id from the API."""
    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        files_data = response.json()
        file_contents = {file["filename"]: file["content"] for file in files_data}
        return file_contents
    except Exception as e:
        print(f"Error fetching files for task_id {task_id}: {e}")
        return {}

# Helper function to extract YouTube video ID from a URL
def extract_youtube_id(url):
    """Extracts the video ID from a YouTube URL."""
    parsed = urlparse(url)
    if parsed.netloc == 'www.youtube.com' and parsed.path == '/watch':
        query = parse_qs(parsed.query)
        return query.get('v', [None])[0]
    elif parsed.netloc == 'youtu.be':
        return parsed.path[1:] if parsed.path else None
    return None

# Helper function to parse SBV caption format
def parse_sbv(sbv_text):
    """Parses SBV caption text to extract plain transcript."""
    blocks = sbv_text.strip().split('\n\n')
    transcript = []
    for block in blocks:
        lines = block.split('\n')
        if len(lines) > 1:
            transcript.append(' '.join(lines[1:]))
    return ' '.join(transcript)

# Helper function to fetch YouTube transcript using YouTube Data API v3
def fetch_youtube_transcript(video_id, api_key):
    """Fetches the transcript for a YouTube video using the YouTube Data API v3."""
    if not api_key:
        print("YouTube API key not set, cannot fetch transcript.")
        return None
    try:
        # List caption tracks
        list_url = f'https://www.googleapis.com/youtube/v3/captions?part=snippet&videoId={video_id}&key={api_key}'
        list_response = requests.get(list_url).json()
        if 'items' not in list_response or not list_response['items']:
            return None
        # Select the first caption track
        caption_id = list_response['items'][0]['id']
        # Download caption in SBV format
        download_url = f'https://www.googleapis.com/youtube/v3/captions/{caption_id}?tfmt=sbv&key={api_key}'
        sbv_text = requests.get(download_url).text
        # Parse SBV to extract text
        transcript = parse_sbv(sbv_text)
        return transcript
    except Exception as e:
        print(f"Error fetching transcript for video {video_id}: {e}")
        return None

# Define the nodes
def input_node(state: State) -> State:
    """Fetches a question from the API, checks for file_id and YouTube links, downloads files and transcripts if present, and constructs the initial user message."""
    if not state['messages']:
        try:
            response = requests.get('https://agents-course-unit4-scoring.hf.space/random-question')
            response.raise_for_status()
            data = response.json()
            question = data['question']
            print("Question:", question)  # Fixed typo from {question} to question
            file_id = data.get('file_id', None)
        except Exception as e:
            print(f"Error fetching question: {e}")
            question = "What is the meaning of life?"
            file_id = None

        # Construct the user message
        user_message = f"Question: {question}\n\n"
        if file_id:
            file_contents = fetch_task_files(file_id)
            if file_contents:
                user_message += "File contents:\n"
                for filename, content in file_contents.items():
                    user_message += f"{filename}:\n{content}\n\n"

        # Check for YouTube links in the question
        youtube_urls = [url for url in re.findall(r'https?://\S+', question) if 'youtube.com' in url or 'youtu.be' in url]
        if youtube_urls:
            video_id = extract_youtube_id(youtube_urls[0])
            if video_id:
                transcript = fetch_youtube_transcript(video_id, YOUTUBE_API_KEY)
                if transcript:
                    user_message += f"\n\nTranscript of the YouTube video: {transcript}"
                else:
                    user_message += "\n\nTranscript of the YouTube video: Not available"

        state['messages'].append({"role": "user", "content": user_message})
    state['needs_tool_call'] = False
    return state

def llm_node(state: State) -> State:
    """Calls the OpenAI LLM with the current messages and processes the response."""
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=state['messages'],
        tools=tools,
        tool_choice="auto",
    )
    assistant_message = response.choices[0].message
    state['messages'].append(assistant_message)

    if assistant_message.tool_calls:
        state['needs_tool_call'] = True
    else:
        state['final_response'] = assistant_message.content
        state['needs_tool_call'] = False
    return state

def tool_node(state: State) -> State:
    """Executes tool calls and appends results to messages."""
    assistant_message = state['messages'][-1]
    for tool_call in assistant_message.tool_calls:
        result = execute_tool(tool_call)
        state['messages'].append({
            "role": "tool",
            "content": result,
            "tool_call_id": tool_call.id,
        })
    return state

def output_node(state: State) -> State:
    """Prints the LLM's response."""
    print("Response:", state['final_response'])
    return state

# Create the graph
graph = Graph()

# Add nodes
graph.add_node("input", input_node)
graph.add_node("llm", llm_node)
graph.add_node("tool", tool_node)
graph.add_node("output", output_node)

# Define edges
graph.add_edge("input", "llm")
graph.add_conditional_edges(
    "llm",
    lambda state: "tool" if state['needs_tool_call'] else "output",
    {"tool": "tool", "output": "output"}
)
graph.add_edge("tool", "input")
graph.add_edge("output", END)

# Set entry point
graph.set_entry_point("input")

# Compile the graph
app = graph.compile()

# Run the workflow
initial_state = {
    "messages": [],
    "tool_calls": [],
    "final_response": "",
    "needs_tool_call": False
}
result = app.invoke(initial_state)
print("Final State:", result)

Question: Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :(

Could you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order.
Response: I'm unable to listen to or process audio files at the moment. However, you can use a transcription service or software to convert the audio recording into text. Once you have the text, I'd be happy to help you extract the page numbers you need. Let me know how else I can assist you!
Final State: {'messages': [{'role': 'user', 'content': "Question: Hi, I was out sick from my classes on Friday, so I'm trying to fi