## **Setup Modello**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = ""

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,
                                             load_in_8bit = True
                                             device_map="auto",
                                             torch_dtype=torch.float16,
                                            )
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.0,
    top_p=0.95,
    repetition_penalty=1.15,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# Modello degli embeddings
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
                                                      model_kwargs={"device": "cuda:1"})

load INSTRUCTOR_Transformer
max_seq_length  512


## Caricamento del testo

In [None]:
from langchain.evaluation.loading import load_dataset
from langchain.document_loaders import TextLoader

# Langchain usa il formato Document quando si caricano documenti: dizionario con page_content e metadata

dataset = load_dataset("question-answering-paul-graham")
loader = TextLoader("Source_documents/paul_graham_essay.txt")
documents =loader.load()

In [None]:
# Controllo la lunghezza in termini di tokens
len(tokenizer.tokenize(documents.page_content))

## Splitting per creare i contesti

In [None]:
# PARAMETRI
chunk_size = 8000
chunk_overlap = 20
retrieved_docs = 5

In [None]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter

# i parametri possono riferirsi a caratteri o token in base allo splitter scelto
text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

# Contesti
docs = text_splitter.split_documents(documents)

# Creo NUOVO db
db = Chroma.from_documents(docs,
                            embedding=instructor_embeddings,
                            persist_directory = "db_graham_FULL", # path dove salvare il vector database
                          )

# Carico VECCHIO db
db = Chroma(persist_directory = "db_graham_FULL",
            embedding_function = instructor_embeddings
            )

In [None]:
# Controllo quanti documenti contiene
# Attenzione che re-inizializzando il db nella stessa posizione duplicherà i documenti all'interno
db._collection.count()

In [None]:
# Retriever responsabile di fare il fetching dei contesti
retriever = db.as_retriever(search_kwargs={"k": retrieved_docs},
                            search_type = "similarity",
                            )
#retriever.get_relevant_documents("Hacker News")

## SVM retriever

In [None]:
from langchain.retrievers import SVMRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter

retriever = SVMRetriever.from_texts(docs_texts, instructor_embeddings, k=retrieved_docs,)

## QA chain

In [None]:
# NON NECESSARIO ma alcuni modelli performano meglio con prompt a cui sono abituati
from langchain.prompts import PromptTemplate

template="""
### Instruction:
Answer the following question only using the context provided, if the answer is not in the context say i don't know, answer only using the context provided.
Question: {question}
### Input:
{context}
### Response:
"""
PROMPT = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}

In [None]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
                                       chain_type="stuff", # passo al LLM tutti i contesti insieme
                                       retriever=retriever,
                                       input_key="question", # specifica quale colonna (in caso di dataset) passare come input variable "question", si occupa invece automaticamente di passare il "context" nel prompt
                                       verbose=True,
                                       chain_type_kwargs = chain_type_kwargs
                                    )

In [None]:
res = qa_chain(dataset[0]["question"])
print(res)

In [None]:
# Per eseguirla su un dataset
llm_responses = qa_chain.apply(dataset)

## Auto Evaluation con ChatGPT o altri LLM

In [None]:
from langchain.llms import OpenAI
from langchain.evaluation.qa import QAEvalChain

eval_llm = OpenAI(temperature=0, openai_api_key="my-api-key")

# Chain che automatizza il processo
eval_chain = QAEvalChain.from_llm(eval_llm)

graded_outputs = eval_chain.evaluate(
    dataset, llm_responses, question_key="question", prediction_key="result"
)

In [None]:
# Conteggio dei risultati
for i, llm_response in enumerate(llm_responses):
    llm_response['grade'] = graded_outputs[i]['text']

from collections import Counter
Counter([pred['grade'] for pred in llm_responses])

Counter({' CORRECT': 19, ' INCORRECT': 3})

In [None]:
for i, eg in enumerate(dataset):
    print(f"Example {i}:")
    print("Question: " + llm_responses[i]['question'])
    print("Real Answer: " + llm_responses[i]['answer'])
    print("Predicted Answer: " + llm_responses[i]['result'])
    print("Predicted Grade: " + llm_responses[i]['grade'])
    print()

Example 0:
Question: What were the two main things the author worked on before college?
Real Answer: The two main things the author worked on before college were writing and programming.
Predicted Answer:  Writing and programming.
Predicted Grade:  CORRECT

Example 1:
Question: What made the author want to work on AI?
Real Answer: The novel 'The Moon is a Harsh Mistress' and a PBS documentary showing Terry Winograd using SHRDLU made the author want to work on AI.
Predicted Answer:  The author was inspired to work on AI by two sources: a science fiction novel called The Moon is a Harsh Mistress by Robert Heinlein and a PBS documentary featuring Terry Winograd using SHRDLU. When the author saw Winograd using SHRDLU, they believed that teaching the computer more words was all that was needed to bring about significant advancements in AI. However, during the author's first year of graduate school, they came to realize that AI as practiced at the time was a hoax. They eventually decided to 

## Valutazione "Manuale"

In [None]:
## Print Senza Voti
for i, eg in enumerate(dataset):
    print(f"Example {i}:")
    print("QUESTION: " + llm_responses[i]['question'])
    print("REAL ANSWER: " + llm_responses[i]['answer'])
    print("PREDICTED: " + llm_responses[i]['result'])
    print()

Example 0:
QUESTION: What were the two main things the author worked on before college?
REAL ANSWER: The two main things the author worked on before college were writing and programming.
PREDICTED:  The two main things the author worked on before college were writing and programming.

Example 1:
QUESTION: What made the author want to work on AI?
REAL ANSWER: The novel 'The Moon is a Harsh Mistress' and a PBS documentary showing Terry Winograd using SHRDLU made the author want to work on AI.
PREDICTED:  The author wanted to work on AI after reading a novel by Robert Heinlein called The Moon is a Harsh Mistress, which featured an intelligent computer called Mike, and watching a PBS documentary that showed Terry Winograd using SHRDLU.

Example 2:
QUESTION: What did the author realize while looking at a painting at the Carnegie Institute?
REAL ANSWER: The author realized that paintings were something that could be made to last and that making them was a way to be independent and make a liv