# Test notebook
- Instructions: https://huggingface.co/learn/agents-course/unit4/hands-on
- Leaderboard and codes: https://huggingface.co/spaces/agents-course/Students_leaderboard
    - 1 good example: https://huggingface.co/spaces/cellerson/AgentCourseFinalProject/tree/main
- GitHub models: https://github.com/marketplace?type=models

In [10]:
pip install -r requirements.txt -qU

Note: you may need to restart the kernel to use updated packages.


In [8]:
import os
import requests
import inspect
import typing

from dotenv import load_dotenv
import pandas as pd
import gradio as gr


load_dotenv()

api_url = "https://agents-course-unit4-scoring.hf.space"
questions_url = f"{api_url}/questions"
random_question_url = f"{api_url}/random-question"
files_url = f"{api_url}/files"
submit_url = f"{api_url}/submit"

def get_question() -> typing.Tuple[str, str, bytes | None, str | None]:
    """
    Get a random question from the API.
    Returns
        - question text
        - task id
        - binary file (if any)
        - file name (if any)
    """

    question_data = requests.get(questions_url, timeout=15)
    question_data.raise_for_status()
    questions_json = question_data.json()[0]

    question = questions_json["question"]
    task_id = questions_json["task_id"]
    file_content = None

    file_name = questions_json.get("file_name", None)

    if file_name:
        file_url = f"{files_url}/{task_id}"
        response = requests.get(file_url, timeout=15)
        response.raise_for_status()
        # with open(os.path.join("tmp", file_name), "wb") as f:
        #     f.write(response.content)
        file_content = response.content

    return question, task_id, file_content, file_name

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
question, task_id, file_content, file_name = get_question()

import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential

endpoint = "https://models.github.ai/inference"
model_name = "microsoft/Phi-4"
token = os.environ["GITHUB_TOKEN"]

client = ChatCompletionsClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(token),
)

response = client.complete(
    messages=[
        UserMessage(question),
    ],
    temperature=1.0,
    top_p=1.0,
    max_tokens=1000,
    model=model_name
)

print("Question:", question)
print("Response:", response.choices[0].message.content)

Question:  How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
Response:  Between 2000 and 2009, Mercedes Sosa released eight studio albums. Here is a list of those albums:

1. **Mujeres Argentinas** (2000)
2. **Cantora** (2001)
3. **30 Años de Vida de Mercedes Sosa** (2004)
4. **Y cómo es él** (2005)
5. **Canta por los ausentes** (2006) – Although primarily distributed as a promotion album, it has significant content.
6. **Alta fidelidad** (2007)
7. **Rodolfo** (2008) – A collaboration with Joan Manuel Serrat.
8. **Cantora 2** (2009)

Please note that the released albums can vary in definition based on official releases versus compilations or promotional distributions, but these are the mainstream studio albums released within that time frame.


# Agent
## Tools:
- Text from picture
- Text from CSV
- Text from XLSX
## Ideas:
- Add another agent that will validate the final answer
## Issues:
- Getting stuck on infinite loop with web search

## Initial setup

In [15]:
import os
from typing import Optional
from typing import TypedDict, List, Dict, Annotated, Any

from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
from langchain_community.tools import Tool, DuckDuckGoSearchRun
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_experimental.utilities import PythonREPL
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.graph.message import add_messages

load_dotenv()

model = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)

os.environ["LANGCHAIN_TRACING_V2"] = "true"

## Tools

In [21]:
# def extract_text(query: str) -> str:
#     """Retrieves detailed information about gala guests based on their name or relation."""
#     results = bm25_retriever.invoke(query)
#     if results:
#         return "\n\n".join([doc.page_content for doc in results[:3]])
#     else:
#         return "No matching guest information found."

# guest_info_tool = Tool(
#     name="guest_info_retriever",
#     func=extract_text,
#     description="Retrieves detailed information about gala guests based on their name or relation."
# )

### Search tool

# search_tool = DuckDuckGoSearchRun()

search_wrapper = GoogleSerperAPIWrapper()

search_tool = Tool(
        name="search_tool",
        func=search_wrapper.run,
        description="useful for when you need to ask with search",
    )

### Python tool

python_repl = PythonREPL()

python_tool = Tool(
    name="python_repl",
    description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
    func=python_repl.run,
)

tools = [
    search_tool,
    python_tool,
]

model_with_tools = model.bind_tools(tools, parallel_tool_calls=False)

## Nodes

In [22]:
class TaskState(TypedDict):

    # Input data
    question: str
    task_id: str
    file_name: Optional[str]
    file_content: Optional[bytes]

    # Output data
    answer: str
    final_answer_reached: bool
    messages: Annotated[list[AnyMessage], add_messages]


def assistant(state: TaskState):

    response = model_with_tools.invoke(state["messages"])

    return {
        "answer": response.content,
        "final_answer_reached": True,
        "messages": [response],
    }

In [23]:
# Create the graph
task_graph = StateGraph(TaskState)

# Add nodes
task_graph.add_node("assistant", assistant)
task_graph.add_node("tools", ToolNode(tools))

# Add edges
task_graph.add_edge(START, "assistant")
task_graph.add_conditional_edges("assistant", tools_condition)
task_graph.add_edge("tools", "assistant")
# task_graph.add_edge("assistant", END)

compiled_graph = task_graph.compile()

In [9]:
question, task_id, file_content, file_name = get_question()

print(question)

How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.


In [24]:
question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."

prompt = f"""
You are an AI assistant anwering complex questions.
Think step by step and answer following question.
Be very concise and output only the answer.
Answer has to be formatted as specified in the question.

Question:
{question}
"""

task_answer = compiled_graph.invoke(
    {
        "question": question,
        "task_id": "1",
        "file_name": None,
        "file_content": None,
        "answer": "",
        "final_answer_reached": False,
        "messages": [HumanMessage(content=prompt)],
    },
    {"recursion_limit": 5},
)

print(task_answer["answer"])

GraphRecursionError: Recursion limit of 5 reached without hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/GRAPH_RECURSION_LIMIT