In [None]:
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain.tools import Tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import base64 
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
import os
from langchain_core.tools import tool
from langchain_community.tools.riza.command import ExecPython
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
from langchain_community.vectorstores import FAISS
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.tools.retriever import create_retriever_tool

from agents_course_hf.agentic_rag import tools

import os
import gradio as gr
import requests
import inspect
import pandas as pd
import re
import json
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langgraph.graph import StateGraph, END, START, MessagesState
from langgraph.graph.message import add_messages
from typing import TypedDict, Annotated, Optional
from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage, AIMessage
from langgraph.prebuilt import ToolNode, tools_condition


system_prompt = """
        You are a general AI assistant. I will ask you a question.

        First, explore your reasoning process step by step. Consider all relevant facts and possibilities.

        Then, provide your answer using EXACTLY this format:

        FINAL ANSWER: [ Your consice answer here]

        Your FINAL ANSWER should be:
        - For numbers: Just the number without commas or units (unless specified)
        - For text: As few words as possible with no articles or abbreviations 
        - For lists: Comma-separated values following the above rules

        Important: The evaluation system will ONLY read what comes after "FINAL ANSWER:". Make sure your answer is correctly formatted.

        """

load_dotenv(r"C:\Projects\RAG_PoC\agents_course_hf\.env")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

vision_llm = ChatOpenAI(model="gpt-4o")
search = DuckDuckGoSearchAPIWrapper()

@tool
def search_function(query: str) -> str:
    """Search the web for information."""
    try:
        results = search.run(query)
        if not results or results.strip() == "":
            return "No search results found. Please try a different query."
        return results
    except Exception as e:
        return f"Error during search: {str(e)}"


@tool
def image_describer(image_url: str) -> str:
    """Describes the content of an image."""

    description = ""

    try:
        import requests 
        response = requests.get(image_url)
        image_bytes = response.content
        image_base64 = base64.b64encode(image_bytes).decode('utf-8')

        message = [
            HumanMessage(
                content=[
                    {
                    "type": "text",
                    "text": (
                        "Describe the type of image you see, if it is a photo, a drawing, a painting, etc. "
                        "Then describe the content of the image in the most detailled way possible. "
                        "You will start by describing the front of the image, then the back of the image if possible. "
                        "If the image contains text, you will extract it and describe it in the most detailler way possible. "
                        "If the image is a document, you will extract the text. Return only the text in this case, no explanations."
                        
                        ),
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_base64}",
                        }
                    }
                ]
            )
        ]

        # call the vision model
        response = vision_llm(message)
        description += response.content + "\n\n"

        return description.strip()

    except Exception as e:
        print(f"Error reading image file: {e}")
        return "Error reading image file."




@tool
def addition(a: int, b: int) -> int:
    """Adds two numbers.
    
    Args: 
        a: first int
        b: second int
    """
    return a + b


@tool
def subtract(a: int, b: int) -> int:
    """Substract two numbers.
    
    Args: 
        a: first int
        b: second int
    """
    return a - b

@tool
def multiply(a: int, b: int) -> int: 
    """Multiply two numbers.
    
    Args: 
        a: first int
        b: second int
    """
    return a * b

@tool
def divide(a: int, b: int) -> float:
    """Divide two numbers.
    
    Args: 
        a: first int
        b: second int
    """
    if b == 0:
        return "Error: Division by zero is not allowed."
    return a / b

@tool 
def modulus(a: int, b: int) -> int:
    """Modulus two numbers.
    
    Args: 
        a: first int
        b: second int
    """
    return a % b

@tool
def exponent(a: int, b: int) -> int:
    """Exponent two numbers.
    
    Args: 
        a: first int
        b: second int
    """
    return a ** b

@tool
def python_code_executor(code: str) -> str:
    """ Executes a Python code snippet and returns the results
    
    Args:
        code: str, the Python code to execute
    """
    try:
        exec_python = ExecPython()
        result = exec_python.run(code)
        return result
    except Exception as e:
        return f"Error executing code: {str(e)}"
    

@tool 
def wikipedia_search(query: str) -> str:
    """Search Wikipedia for a given query and return the 2 first.
    
    Args:
        query: str, the search query
    """

    try:
        search_documents = WikipediaLoader(query=query, max_result=2).load()
        results = "\n".join([doc.page_content for doc in search_documents])
        return results
    except Exception as e:
        return f"Error during Wikipedia search: {str(e)}"
    

@tool
def arvix_search(query: str) -> str:
    """Search Arxiv for a query and return maximum 3 result.
    
    Args:
        query: The search query."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ])
    return {"arvix_results": formatted_search_docs}


sys_msg = SystemMessage(system_prompt)

# # retriever

# embeddings = OpenAIEmbeddings()

# index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

# vector_store = FAISS(
#     embedding_function=embeddings,
#     index=index,
#     docstore=InMemoryDocstore(),
#     index_to_docstore_id={}
# )

# create_retriever_tool = create_retriever_tool(
#     retriever=vector_store.as_retriever(),
#     name="Question search", 
#     description="A tool to retrieve similar questions from a vector store."

# )

tools = [
    search_function,
    image_describer,
    addition,
    subtract,
    multiply,
    divide,
    modulus,
    exponent,
    python_code_executor,
    wikipedia_search,
    arvix_search
    # create_retriever_tool
]


def build_graph():
    """Build the graph"""
    chat = ChatOpenAI(model="gpt-4o")
    chat_with_tools = chat.bind_tools(tools)

    def assistant(state: MessagesState):
        return {
            "messages": [chat_with_tools.invoke(state["messages"])]
        }
    
    # def retriever(state: MessagesState):
    #     """Retriever node"""
    #     similar_question = vector_store.similarity_search(state["messages"][0].content)
    #     example_msg = HumanMessage(
    #         content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
    #     )
    #     return {"messages": [sys_msg] + state["messages"] + [example_msg]}

    builder = StateGraph(MessagesState)
    # builder.add_node("retriever", retriever)
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))
    builder.add_edge(START, "assistant")
    # builder.add_edge("retriever", "assistant")
    builder.add_conditional_edges(
        "assistant",
        tools_condition,
    )
    builder.add_edge("tools", "assistant")

    # Compile graph
    return builder.compile()

if __name__ == "__main__":
    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
    # Build the graph
    graph = build_graph()
    # Run the graph
    messages = [HumanMessage(content=question)]
    messages = graph.invoke({"messages": messages})
    for m in messages["messages"]:
        m.pretty_print()


When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?
Tool Calls:
  wikipedia_search (call_SF1HKPTRCZ1ocn1y6UF96VLf)
 Call ID: call_SF1HKPTRCZ1ocn1y6UF96VLf
  Args:
    query: Principle of double effect
Name: wikipedia_search

Error during Wikipedia search: WikipediaLoader.__init__() got an unexpected keyword argument 'max_results'
Tool Calls:
  search_function (call_E9JxYFEeu3uCNKQwjCQ1S32g)
 Call ID: call_E9JxYFEeu3uCNKQwjCQ1S32g
  Args:
    query: Principle of double effect Wikipedia edit history
Name: search_function

Medical ethics is an applied branch of ethics which analyzes the practice of clinical medicine and related scientific research. [1] Medical ethics is based on a set of values that professionals can refer to in the case of any confusion or conflict. These values include the respect for autonomy, non-maleficence, beneficence, and justice. [2] Such tenets may allow doctors, care providers, and ... Of note is the pr

In [6]:
import json 
with open('metadata.jsonl', 'r') as f: 
    json_list = list(f)

json_QA = []
for json_str in json_list: 
    json_data = json.loads(json_str)
    json_QA.append(json_data)

In [7]:
import random
random_samples = random.sample(json_QA, 1)
for sample in random_samples:
    print("=" * 50)
    print(f"Task ID: {sample['task_id']}")
    print(f"Question: {sample['Question']}")
    print(f"Level: {sample['Level']}")
    print(f"Final Answer: {sample['Final answer']}")
    print(f"Annotator Metadata: ")
    print(f"  ├── Steps: ")
    for step in sample['Annotator Metadata']['Steps'].split('\n'):
        print(f"  │      ├── {step}")
    print(f"  ├── Number of steps: {sample['Annotator Metadata']['Number of steps']}")
    print(f"  ├── How long did this take?: {sample['Annotator Metadata']['How long did this take?']}")
    print(f"  ├── Tools:")
    for tool in sample['Annotator Metadata']['Tools'].split('\n'):
        print(f"  │      ├── {tool}")
    print(f"  └── Number of tools: {sample['Annotator Metadata']['Number of tools']}")
print("=" * 50)

Task ID: edd4d4f2-1a58-45c4-b038-67337af4e029
Question: The attached spreadsheet lists the locomotives owned by a local railroad museum. What is the typical American name for the type of locomotive this museum uses for the Murder Mystery Express?
Level: 2
Final Answer: Berkshire
Annotator Metadata: 
  ├── Steps: 
  │      ├── 1. Open the provided spreadsheet.
  │      ├── 2. Locate the locomotive used for the Murder Mystery Express, which is listed as a steam locomotive with a 2-8-4 wheel configuration.
  │      ├── 3. Search the web for “2-8-4 steam locomotive”.
  │      ├── 4. Note the most common name for a locomotive with this wheel configuration, a Berkshire.
  ├── Number of steps: 4
  ├── How long did this take?: 5 minutes
  ├── Tools:
  │      ├── 1. Microsoft Excel
  │      ├── 2. Search engine
  └── Number of tools: 2


In [None]:
load_dotenv()
embeddings = OpenAIEmbeddings()

In [10]:
docs = []
count = 0 

vector_store = FAISS(
    embedding_function=embeddings,
    index=faiss.IndexFlatL2(len(embeddings.embed_query("hello world"))),
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

for sample in json_QA:
    content =f"Question: {sample['Question']}\n\nFinal answer : {sample['Final answer']}"
    doc = {
        "id": count,
        "content": content, 
        "metadata" : { 
            "source": sample['task_id']
        },
        "embedding": embeddings.embed_query(content)
    }
    docs.append(doc)
    count += 1



try:
    response = (
        vector_store.add_documents(docs)
    )
except Exception as e:
    print(f"Error instergin data into the vector store: {e}")



Error instergin data into the vector store: 'dict' object has no attribute 'id'
