In [None]:
# connect 5 tools: weather + kg frus rag + convert time + arxiv + serpapi
# allow daily conversation beyond tool use (+ memory)

# todo:
# 1- add conversation memory to agent (make sure it chats beyond tool-use)
# 2- add rag
# 3- add serpapi

In [2]:
import os
import ast
from dotenv import load_dotenv
import arxiv

from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')

In [3]:
chat_model = ChatOpenAI(model_kwargs={"stop": ["Observation:"]},)

In [None]:
from langchain_core.documents import Document
from langchain_chroma import Chroma

from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI()

file_path = (
    "../../eth_courses/thesis/gokberk_thesis_report_04_06_23.pdf"
)
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(pages)

vectorstore = Chroma.from_documents(
    all_splits,
    embedding=HuggingFaceEmbeddings(),
)

prompt = """System: You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.

Context: {context}

Human: {input}
AI:
"""

contextualize_q_system_prompt = """Given a chat history and the latest user question which might reference context in the chat history, 
formulate a standalone question which can be understood without the chat history. 
Do NOT answer the question, just reformulate it if needed and otherwise return it as is.
If chat history is empty (''), do NOT change anything in the question.

Chat history:
{history}
Question:
{input}
Reformulated Question:
"""

In [4]:
def arxiv_search(args):
    # Construct the default API client.
    client = arxiv.Client()

    # Search for the paper with ID
    search_by_id = arxiv.Search(query=args['query'],
                                id_list=[args['id']])
    # Reuse client to fetch the paper, then print its title.
    first_result = next(client.results(search_by_id))
    
    return first_result.title

def convert_time(args):

    temp = args['time'].split(':')

    return int(temp[0])*3600 + int(temp[1])*60 + int(temp[2])


def kgfrus_search(args):

    global history

    rephrased_q = chat_model.invoke(contextualize_q_system_prompt.format(input=args['question'],history=history)).content
    print(rephrased_q)

    context = "\n\n".join([doc.page_content for doc in vectorstore.similarity_search(rephrased_q)])

    prompt_after_formatting = prompt.format(input=rephrased_q,
                                            context=context)
    print(prompt_after_formatting)
    output = chat_model.invoke(prompt_after_formatting)

    history += f"Human: {args['question']}\nAI: {output.content}\n"

    return output.content


In [5]:
system_prompt = """Answer the following questions as best you can. You have access to the following tools:

{tools}

ALWAYS use the following format:

Question: the input question you must answer
Thought: you should always think about what to do. only one action at a time.
Action: the action to take, should be one of {tool_names}
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer.

Question: {input}
Thought:{agent_scratchpad}"""

tools = {
    'convert_time': 'A function to convert a time string with format H:MM:SS to seconds, args: {"time": {"type": "string"}}',
    'arxiv_search': 'A function to get information about a scientific article or articles, args: {"query": {"type": "string"}, "id": {"type": "string"}}'
}

In [6]:
history = ''
agent_scratchpad = ''

def ask(question):

    global history, agent_scratchpad

    tools_str = ''

    for func, desc in tools.items():
        tools_str += f"{func} : {desc}\n"
    
    prompt_after_formatting = system_prompt.format(input=question, 
                                                   agent_scratchpad=agent_scratchpad, 
                                                   tools=tools_str, 
                                                   tool_names=list(tools.keys()))
    print(prompt_after_formatting)
    output = chat_model.invoke(prompt_after_formatting)
    print(output.content)
    
    while output.content.find("Final Answer: ")==-1:

        agent_scratchpad += output.content
        #history += f"Human: {question}\nAI: {output.content}\n"

        function_name = output.content.split("Action: ")[1].split('\n')[0]
        function_args = ast.literal_eval(output.content.split("Action Input: ")[1].split('\n')[0])

        result = globals()[function_name](function_args)
        agent_scratchpad += f"Observation: {result}\n"
        
        prompt_after_formatting = system_prompt.format(input=question, 
                                                    agent_scratchpad=agent_scratchpad, 
                                                    tools=tools_str, 
                                                    tool_names=list(tools.keys()))

        output = chat_model.invoke(prompt_after_formatting)
        print(output.content)
    
    return output.content

In [7]:
print(ask("First answer how many seconds in 4:00:01. Then answer What's the paper 1605.08386 about?"))

Answer the following questions as best you can. You have access to the following tools:

convert_time : A function to convert a time string with format H:MM:SS to seconds, args: {"time": {"type": "string"}}
arxiv_search : A function to get information about a scientific article or articles, args: {"query": {"type": "string"}, "id": {"type": "string"}}


ALWAYS use the following format:

Question: the input question you must answer
Thought: you should always think about what to do. only one action at a time.
Action: the action to take, should be one of ['convert_time', 'arxiv_search']
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer.

Question: First answer how many seconds in 4:00:01. Then 