# OCR Test

In [27]:
import google.generativeai as genai
import io
import os
from dotenv import load_dotenv
from langchain_core.messages import HumanMessage
import base64
from langchain_google_genai import ChatGoogleGenerativeAI
from pathlib import Path
from PIL import Image
from typing import Optional

In [29]:
def extract_text_from_image(
    image_path: str,
    prompt: Optional[str] = "Extract all readable text from this image.",
    api_key: Optional[str] = None
) -> str:
    """
    Extract text from an image using Gemini 2.5 Pro Vision multimodal capabilities via LangChain.

    Args:
        image_path (str): Path to the image file.
        prompt (str): Prompt to guide Gemini in extracting text.
        api_key (str): Your Google API key with Gemini access.

    Returns:
        str: Extracted text response from Gemini.
    """


    with Image.open(image_path) as img:
        img_bytes = io.BytesIO()
        img.save(img_bytes, format="PNG")
        img_bytes.seek(0)
        image_base64 = base64.b64encode(img_bytes.read()).decode("utf-8")

        message = [
        HumanMessage(
            content=[
                {
                    "type": "text",
                    "text": (
                        "Extract all the text from this image. "
                        "Return only the extracted text, no explanations."
                    ),
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{image_base64}"
                    },
                },
            ]
        )
    ]
    if api_key is None:
        raise ValueError("You must provide a Google Gemini API key.")

    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-pro-exp-03-25",
        google_api_key=api_key,
        convert_system_message_to_human=True,
    )

    response = llm.invoke(message)

    return response.content if hasattr(response, "content") else str(response)


In [None]:
IMAGE_PATH = ".\\src\\gaia_test\\data\\sample_ocr_image.png"
GOOGLE_API_KEY = "your-google-api-key"

# List available models to debug the error
models = genai.list_models()
print("Available models:")
for model in models:
    print(model.name)

In [30]:
text = extract_text_from_image(IMAGE_PATH, api_key=GOOGLE_API_KEY)
print("Extracted Text:\n", text)



Extracted Text:
 Lorem ips's deplace
-Lorem,-ipsum dolor sit as a consectindiarm, consectresum, indisim
incildis me dolore-illant, quis appristenched labor, filsit in labor
magna, amil jouri paratum.
-A em-sim, occasentdis tempar cublling dota, consentables
noisucle disfirycliplor vise, ant Emadisalitais sild befure posscat
in at depececting hlis molcicat, tempers desforentius lean compdntr,
for the magnants thic procedients.
-A at perfermtimg the llaventor ate.
-A perum dutis may minim placere, et culdnt, blals, temporam places
vollurat paceal supper the diffectates, quis,
caperour decinsum on the offpeate maginam.


# YT-DLP Test

In [14]:
from urllib.parse import urlparse
import yt_dlp
import contextlib
import json
from pprint import pprint
import io

In [None]:
import time


def extract_youtube_metadata(video_url):
    """
    Extract metadata from a YouTube video using yt-dlp.

    Args:
        video_url (str): The URL of the YouTube video.

    Returns:
        dict: Metadata dictionary for the video.
    """
    ydl_opts = {
        'quiet': True,
        'skip_download': True,
        'extract_flat': False,
    }
    ydl_opts = {
                "quiet": True,
                "no_warnings": True,
                "extract_flat": "in_playlist",
                "forcejson": True,
                "skip_download": True,
                "youtube_include_dash_manifest": False,
            }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        video_info = ydl.extract_info(video_url, download=False)
        buffer = io.StringIO()
        with contextlib.redirect_stdout(buffer):
            ydl.extract_info(video_url, download=False)
            
        # extracted_info_json = info # buffer.getvalue()
        
        # if not extracted_info_json:
        #     return f"Error: yt-dlp returned no metadata for {video_url}."

        # first_video_info = None
        
        # for line in extracted_info_json.strip().split("\n"):
        #     try:
        #         first_video_info = json.loads(line)
        #         break
        #     except json.JSONDecodeError:
        #         continue
        # if not first_video_info:
        #     return (
        #         f"Error: Could not parse metadata JSON for {video_url}."
        #     )

        title = video_info.get("title", "N/A")
        if isinstance(title, str):
            title = title.strip()
        description = video_info.get("description", "N/A")
        if isinstance(description, str):
            description = description.strip()
        uploader = video_info.get("uploader", "N/A")
        if isinstance(uploader, str):
            uploader = uploader.strip()
        duration_s = video_info.get("duration")
        duration = (
            time.strftime("%H:%M:%S", time.gmtime(duration_s))
            if duration_s
            else "N/A"
        )
        views = video_info.get("view_count", "N/A")
        date = video_info.get("upload_date", "N/A")
        if isinstance(date, str):
            date = date.strip()
        if date and len(date) == 8:
            date = f"{date[:4]}-{date[4:6]}-{date[6:]}"
        summary = (
            f"Metadata for YouTube video: {video_url}\nTitle: {title}\nUploader: {uploader}\n"
            f"Date: {date}\nDuration: {duration}\nViews: {views}\nDesc (truncated):\n---\n"
            f"{description[:1000]}{'...' if len(description) > 1000 else ''}\n---\nNote: Metadata only."
        )
    return summary

In [23]:
video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
metadata = extract_youtube_metadata(video_url)
pprint(metadata)

('Metadata for YouTube video: https://www.youtube.com/watch?v=dQw4w9WgXcQ\n'
 'Title: Rick Astley - Never Gonna Give You Up (Official Music Video)\n'
 'Uploader: Rick Astley\n'
 'Date: 2009-10-25\n'
 'Duration: 00:03:32\n'
 'Views: 1650302083\n'
 'Desc (truncated):\n'
 '---\n'
 'The official video for “Never Gonna Give You Up” by Rick Astley. \n'
 '\n'
 'Never: The Autobiography 📚 OUT NOW! \n'
 'Follow this link to get your copy and listen to Rick’s ‘Never’ playlist ❤️ '
 '#RickAstleyNever\n'
 'https://linktr.ee/rickastleynever\n'
 '\n'
 '“Never Gonna Give You Up” was a global smash on its release in July 1987, '
 'topping the charts in 25 countries including Rick’s native UK and the US '
 'Billboard Hot 100.  It also won the Brit Award for Best single in 1988. '
 'Stock Aitken and Waterman wrote and produced the track which was the '
 'lead-off single and lead track from Rick’s debut LP “Whenever You Need '
 'Somebody”.  The album was itself a UK number one and would go on to sell '
 

In [24]:
metadata

'Metadata for YouTube video: https://www.youtube.com/watch?v=dQw4w9WgXcQ\nTitle: Rick Astley - Never Gonna Give You Up (Official Music Video)\nUploader: Rick Astley\nDate: 2009-10-25\nDuration: 00:03:32\nViews: 1650302083\nDesc (truncated):\n---\nThe official video for “Never Gonna Give You Up” by Rick Astley. \n\nNever: The Autobiography 📚 OUT NOW! \nFollow this link to get your copy and listen to Rick’s ‘Never’ playlist ❤️ #RickAstleyNever\nhttps://linktr.ee/rickastleynever\n\n“Never Gonna Give You Up” was a global smash on its release in July 1987, topping the charts in 25 countries including Rick’s native UK and the US Billboard Hot 100.  It also won the Brit Award for Best single in 1988. Stock Aitken and Waterman wrote and produced the track which was the lead-off single and lead track from Rick’s debut LP “Whenever You Need Somebody”.  The album was itself a UK number one and would go on to sell over 15 million copies worldwide.\n\nThe legendary video was directed by Simon West 

# AGENT-CORE Test

In [1]:
import os
import sys


In [3]:
# Get the path to the langgraph.prebuilt directory
langgraph_path = "C:\\Users\\ahabi\\OneDrive\\Documents\\Codes\\Python Projects\\Huggingface - AI Agents Course\\hf-agents-course_final-assignment\\.venv\\Lib\\site-packages\\langgraph\\prebuilt"

# List the contents of the directory
if os.path.isdir(langgraph_path):
    print(f"Contents of {langgraph_path}:")
    for item in os.listdir(langgraph_path):
        print(f"- {item}")
else:
    print(f"Directory not found: {langgraph_path}")

# Additionally, try importing the __init__.py file and see its contents
try:
    from langgraph.prebuilt import *
    print(f"\nSuccessfully imported from langgraph.prebuilt. Available names:")
    print(dir())
except ImportError as e:
    print(f"\nError importing from langgraph.prebuilt: {e}")

Contents of C:\Users\ahabi\OneDrive\Documents\Codes\Python Projects\Huggingface - AI Agents Course\hf-agents-course_final-assignment\.venv\Lib\site-packages\langgraph\prebuilt:
- chat_agent_executor.py
- interrupt.py
- py.typed
- tool_node.py
- tool_validator.py
- __init__.py
- __pycache__

Successfully imported from langgraph.prebuilt. Available names:
['In', 'InjectedState', 'InjectedStore', 'Out', 'ToolNode', 'ValidationNode', '_', '_1', '_2', '__', '__DW_SCOPE__', '___', '__builtin__', '__builtins__', '__doc__', '__loader__', '__name__', '__package__', '__spec__', '__vsc_ipynb_file__', '_dh', '_i', '_i1', '_i2', '_i3', '_ih', '_ii', '_iii', '_oh', 'create_react_agent', 'exit', 'get_ipython', 'item', 'langgraph_path', 'open', 'os', 'quit', 'sys', 'tools_condition']


In [10]:
import langgraph.prebuilt
print(dir(langgraph.prebuilt))

['InjectedState', 'InjectedStore', 'ToolNode', 'ValidationNode', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'chat_agent_executor', 'create_react_agent', 'tool_node', 'tool_validator', 'tools_condition']


# Test HF API ENDPOINT

In [11]:
from huggingface_hub import list_inference_endpoints
endpoints = list_inference_endpoints(namespace="*")
print(f"Available inference endpoints: {endpoints}")
for endpoint in endpoints:
    print(endpoint)

  from .autonotebook import tqdm as notebook_tqdm


Available inference endpoints: []


In [12]:
from src.gaia_agent.tools import get_all_tools

In [13]:
get_all_tools()

--- Initializing All Tools ---
⚠️ Tavily Search failed (1 validation error for TavilySearchTool
api_wrapper
  Field required [type=missing, input_value={}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing), falling back to DuckDuckGo.
✅ Initialized DuckDuckGo Search (Fallback)
✅ Initialized Wikipedia Search
✅ Initialized ArXiv Search
❌ Failed to initialize ArXiv Document Search: "ArxivDocumentSearchTool" object has no field "load_max_docs"
✅ Initialized File Tools (Save, Download)
✅ Initialized Text Analysis Tool
✅ Initialized CSV Analysis Tool
✅ Initialized Excel Analysis Tool
✅ Initialized Image Analysis Tool
✅ Initialized MP3 Analysis Tool
✅ Initialized Image OCR Tool
✅ Initialized Python REPL Tool
✅ Initialized YouTube Metadata Tool
✅ Initialized Math Tools (Add, Subtract, Multiply, Divide, Modulus)
--- Total tools initialized: 18 ---
Available tool names: ['duckduckgo_search', 'wikipedia_search', 'arxiv_search', 'save_content_to_file', 

[DuckDuckGoSearchTool(api_wrapper=DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(region='wt-wt', safesearch='moderate', time='y', max_results=5, backend='auto', source='text'))),
 WikipediaSearchTool(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\ahabi\\OneDrive\\Documents\\Codes\\Python Projects\\Huggingface - AI Agents Course\\hf-agents-course_final-assignment\\.venv\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=3, lang='en', load_all_available_meta=False, doc_content_chars_max=4000)),
 ArXivSearchTool(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=10, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=4000)),
 SaveContentTool(),
 DownloadFileTool(),
 AnalyzeTextTool(),
 AnalyzeCsvTool(),
 AnalyzeExcelTo