## Modello

In [None]:
%%capture
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("TheBloke/wizardLM-13B-1.0-fp16")
model = AutoModelForCausalLM.from_pretrained("TheBloke/wizardLM-13B-1.0-fp16",
                                            load_in_8bit=True,
                                            device_map="auto",
                                            torch_dtype=torch.float16,
                                            )
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
import torch

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.15,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
                                                      model_kwargs={"device": "cuda:1"})

## Agent

In [None]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType, AgentExecutor
from langchain.tools import BaseTool

In [None]:
from langchain.vectorstores import Chroma

retrieved_docs = 3

db1 = Chroma(persist_directory = "./VectorDBs/db_graham_1000-20",
            embedding_function=instructor_embeddings)
retriever1 = db1.as_retriever(search_kwargs={"k": retrieved_docs}, search_type="similarity")

db2 = Chroma(persist_directory = "./VectorDBs/Athena/",
            embedding_function=instructor_embeddings)
retriever2 = db2.as_retriever(search_kwargs={"k": retrieved_docs}, search_type="similarity")

In [None]:
from langchain.chains import RetrievalQA

# Queste 2 QA chain saranno usate come TOOLS dal LOCAL_LLM principale,
# non serve che anche loro siano eseguite tramite lo stesso LOCAL_LLM

qa_PaulGraham = RetrievalQA.from_chain_type(llm=local_llm, # oppure specifico "qa_llm"
                                    chain_type="stuff",
                                    retriever=retriever1
                                    )
qa_Athena = RetrievalQA.from_chain_type(llm=local_llm,
                                    chain_type="stuff",
                                    retriever=retriever2
                                    )

In [None]:
# Definisco i TOOLS tramite una funzione specifica, uso run() per le QA chain
# Il nome e descrizione dei TOOLS serve al LOCAL_LLM per capire a cosa servono

def my_math_function(input_string: str) -> str:  return "4"
tools = [
    Tool(
        name="Graham Essay Tool",
        func=qa_PaulGraham.run,
        description="""Useful tool for when you need to answer questions about the Paul Graham essay,
        the input must be a specific question about Paul Graham.""",
        return_direct=False # Se True l'Agent si limita a ritornare l'output del tool come risposta finale
        # sconsigliato in quanto interrompe il flusso di ragionamento "Thought>Observation"
    ),
    Tool(
        name="Athena Tool",
        func=qa_Athena.run,
        description="""Useful tool for when you need to answer questions about Athena,
        the input must be a specific question about Athena.""",
        return_direct=False
    ),
    Tool.from_function(
        func=my_math_function,
        name="Math Tool",
        description="Tool used to do math operations.",
        return_direct=False
    )
]

In [None]:
# CUSTOM AGENT PROMPT TEMPLATE, questo tipo di prompt è specifico per gli agents di tipo REACT in quanto formattano i ragionamenti del'LLM in questo modo
PREFIX = '''Answer the given question as best you can. You have access to the following tools:'''

FORMAT_INSTRUCTIONS = """Use the following format:
Question: the input question you must answer
Thought: you must always think about what to do in order to answer the question
Action: the action to take, choose one between: {tool_names}
Action Input: the input to the action
Observation: the result of the action
Thought: think about the information you gathered and what to do next to answer the question
... (this Thought/Action/Action Input/Observation cycle can repeat N times until you have enough information to give the final answer)
Final Answer: the answer to the original input question
"""

SUFFIX = '''Begin!
Question: {input}
Thought:{agent_scratchpad}
'''

In [None]:
# ZERO_SHOT_REACT è l'agente più generico, altrimenti c'è STRUCTURED_INPUT_REACT che
# permette di usare TOOLS con input multipli

agent = initialize_agent(tools=tools,llm=local_llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    agent_kwargs={
        'prefix': PREFIX,
        'format_instructions': FORMAT_INSTRUCTIONS,
        'suffix': SUFFIX,
    },
    stop=["\nObservation:"],
    handle_parsing_errors = False,
    max_execution_time=3,  # per evitare loop infiniti di ragionamento
    #early_stopping_method="generate",
    )

In [None]:
# Stampa il prompt che verrà passato all'LOCAL_LLM per farlo funzionare come agente
print(agent.agent.llm_chain.prompt.template)

Answer the given question as best you can. You have access to the following tools:

Graham Essay Tool: Useful tool for when you need to answer questions about the Paul Graham essay, 
        the input should be a question about Paul Graham.
Athena Tool: Useful tool for when you need to answer questions about the Athena paper, 
        the input should be a question about Athena.
Math Tool: Tool used to do math operations.

Use the following format:
Question: the input question you must answer
Thought: you must explain what action you are going to do and why
Action: the action to take, choose one between: Graham Essay Tool, Athena Tool, Math Tool
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question


Begin!
Question: {input}
Thought:{agent_scratchpad}



# Test Domande

In [None]:
# TEST VELOCE con domanda su entrambi i Documenti (richiede di usare entrambi i TOOLS)
import langchain

query = """
What did paul graham work on before college? and what is the two stage approach of Athena?
"""

response = agent.run(query)



[1m> Entering new  chain...[0m
[32;1m[1;3mI will use the Graham Essay Tool to find out more about Paul Graham's background before he went to college. Then I will use the Athena Tool to learn about the two-stage approach of Athena.
Action: Graham Essay Tool
Action Input: "What did Paul Graham work on before college?"[0m
Observation: [36;1m[1;3m Before college, Paul Graham worked on writing and programming. He wrote short stories and tried to program on the IBM 1401, a computer used by his school district.[0m
Thought:[32;1m[1;3mAction: Athena Tool
Action Input: "What is the two-stage approach of Athena?"[0m
Observation: [33;1m[1;3m The two-stage approach of Athena involves a first stage where the user formulates a natural language query, and a second stage where the query is translated into a formal query language and then executed on the database. During the first stage, the user can use any variant of a data value in the query, and the query is independent of the physica

Test su dataset contenente le domande

In [None]:
# Converto le domande del dataset in un formato accettato da Agent.apply()
questions = []
for d in dataset:
    q = {"input": d["question"]}
    questions.append(q)

In [None]:
%%time
llm_responses = agent.apply(questions)



[1m> Entering new  chain...[0m
[32;1m[1;3mAction: Graham Essay Tool
Action Input: "What were the two main things the author worked on before college?"[0m




Observation: [36;1m[1;3m Before college, the author worked on writing and programming. He didn't write essays, but instead wrote short stories that he imagined were deep due to having strong emotions. Additionally, he worked on programming on an IBM 1401 computer in 9th grade, which was located in his junior high school's basement.[0m
Thought:[32;1m[1;3mAction: Math Tool
Action Input: "What is the square root of 36?"[0m
Observation: [33;1m[1;3m4[0m
Thought:[32;1m[1;3mFinal Answer: The author worked on writing and programming before college.[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
[32;1m[1;3mAction: Graham Essay Tool
Action Input: "What made the author want to work on AI?"[0m
Observation: [36;1m[1;3m The author was inspired by a novel by Heinlein called The Moon is a Harsh Mistress, which featured an intelligent computer called Mike, and a PBS documentary that showed Terry Winograd using SHRDLU. He believed that teaching SHRDLU more words would

In [None]:
def stampa(responses):
    for r in responses:
        print("DOMANDA: " + r["input"])
        print("RISPOSTA: " + r["output"])
        print("<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>")