# 🔹 DOCUMENTS LOADING

In [1]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter

loader = DirectoryLoader('HP content/',
                         glob="./*.pdf",
                         loader_cls=PyPDFLoader,
                         show_progress=True,
                         use_multithreading=True)

documents = loader.load()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:26<00:00,  9.00s/it]


In [2]:
# Pulizia degli spazi, degli "a capo", ...
for i in range(len(documents)):
    documents[i].page_content = documents[i].page_content.replace('\t', ' ')\
                                                         .replace('\n', ' ')\
                                                         .replace('       ', ' ')\
                                                         .replace('      ', ' ')\
                                                         .replace('     ', ' ')\
                                                         .replace('    ', ' ')\
                                                         .replace('   ', ' ')\
                                                         .replace('  ', ' ')

# Check:
documents[186].page_content

'186could feel it vibrating and let go; it hung in midair, unsupported, at exactly the right height for him to mount it. His eyes moved from thegolden registration number at the top of the handle, right down to theperfectly smooth, streamlined birch twigs that made up the tail. "Who sent it to you?" said Ron in a hushed voice."Look and see if there\'s a card," said Harry.Ron ripped apart the Firebolt\'s wrappings."Nothing! Blimey, who\'d spend that much on you?""Well," said Harry, feeling stunned, "I\'m betting it wasn\'t the Dursleys." I bet it was Dumbledore," said Ron, now walking around and around the Firebolt, taking in every glorious inch. "He sent you the InvisibilityCloak anonymously...." "That was my dad\'s, though," said Harry. "Dumbledore was just Passing it on to me. He wouldn\'t spend hundreds of Galleons on me. He can\'t gogiving students stuff like this --" "That\'s why he wouldn\'t say it was from him!" said Ron. "In case some git like Malfoy said it was favoritism. Hey

## 🔹 Indexing process -> turning it into a vector database

In [5]:
from langchain.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(documents)

db = FAISS.from_documents(texts,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

In [6]:
# Salva quanto indicizzato in modo da evitare di ri-indicizzare tutto ogni volta
db.save_local("harrypotter_faiss_index")

In [7]:
retriever = db.as_retriever(k = 3)

## 🔹 Load the existing vector database & use it as a retriever

In [1]:
from langchain.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

new_db = FAISS.load_local("harrypotter_faiss_index", 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'),
                          allow_dangerous_deserialization=True)

In [80]:
# top k=3 relevant documents are retrieved
retriever = new_db.as_retriever(search_kwargs={'k': 3, })

# 🔹 LLAMA2 - chat

## 🔹 Upload the model: 

In [8]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel

#################################################################
# Tokenizer
#################################################################

model_name="meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## 🔹 Pipeline 

In [4]:
from langchain.llms import HuggingFacePipeline

response_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation", # deve matchare la scheda del modello HF o dà errore
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

response_generation_llm = HuggingFacePipeline(pipeline=response_generation_pipeline)

## OPT1: zero-shot prompt template  + context 
GitHub con Elia - non funziona perché non capisce cosa vuole

In [5]:
from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.
    Given this question: {question} \n you must answer by choosing only one option above these: {options}. \n
    Here is context to help: {context} \n
    The correct answer is:
 """
)

In [6]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"options": itemgetter("options"), "context": itemgetter("question") | retriever, "question": itemgetter("question"),}
llm_chain = augmentation | prompt_template | response_generation_llm

In [7]:
llm_chain.invoke({'question': "What is the name of the family living in the Burrow?", 'options': "A. Malfoy; B. Potter; C. Weasley"})



"\n    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. \n    Answer with the correct option only and then stop.\n    Given this question: What is the name of the family living in the Burrow? \n you must answer by choosing only one option above these: A. Malfoy; B. Potter; C. Weasley. \n\n    Here is context to help: [Document(page_content='a straggling line, their little shoulders hunched. “They’ll be back,” said Ron as they watched the gnomes disappear into the hedge on the other side of the field. “They love it here. … Dad’s too soft with them; he thinks they’re funny. …” Just then, the front door slammed. “He’s back!” said George. “Dad’s home!”', metadata={'source': 'HP content/Harry Potter - Book 2 - The Chamber of Secrets.pdf', 'page': 42}), Document(page_content='moved into Dudley’s second bedroom. “Why?” said Harry. “Don’t ask questions!” snapped his uncle. “Take this stuff upstairs, now.” The Dursleys’ house had fo

## OPT2: zero-shot prompt template + context with `###`
Non funziona uguale (se non peggio) ma può aiutare con la formattazione

In [None]:
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to answer a QUESTION correctly, given a certain number of options. 
    You must answer providing only one of the possible OPTIONS. You are given CONTEXT to help.
    ### QUESTION: {question}
    ### OPTIONS: {options}. 
    Here is context to help: 
    ### CONTEXT: {context} 
    ### ANSWER:
 """
)

In [None]:
augmentation = {"options": itemgetter("options"), "context": itemgetter("question") | retriever, "question": itemgetter("question"),}
llm_chain = augmentation | prompt_template | response_generation_llm

In [None]:
llm_chain.invoke({'question': "Who is Taylor Swift?", 'options':"A. a snowboard; B. a cat; C. a singer"})

## OPT3: zero-shot no context
Basarsi sulla conoscenza pregressa pare fornire la soluzione corretta...

In [None]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template("""
You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
Answer with the correct option only and then stop.\n 

Given this question: {question} \n you must answer by choosing only one option above these: {options}. The correct answer is: 
""")

response_generation_llm.invoke(prompt_template.format(question="What is the name of Ron's family?", 
                                                      options="A. Malfoy; B. Potter; C. Weasley"))

## OPT4: one-shot prompt template + context
Anche qui nada

In [None]:
prompt_template = PromptTemplate.from_template("""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. Answer with the correct option only and then stop.\n 
    
    Here is an example showing how I want the answer to be:
    - Given this question: What is the sun? \n just answer by choosing one options above these: A) a planet; B) a star; C) a galaxy. 
    The correct answer is: B) a star. \n 

    Given this question: {question} \n you must answer by choosing only one option above these: {options}. \n
    
    Here is context to help: {context} \n
    
    The correct answer is:
"""
)

In [None]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"options": itemgetter("options"), "context": itemgetter("question") | retriever, "question": itemgetter("question"),}
llm_chain = augmentation | prompt_template | response_generation_llm

In [None]:
llm_chain.invoke({'question': "What is the name of the young Dursley?", 'options': "A. Malfoy; B. Potter; C. Dudley"})

## OPT5: one-shot prompt template no context
Qui paradossalmente funziona! 
Sembra che sia il `context` a sminchiare la risposta...

In [None]:
prompt_template = PromptTemplate.from_template("""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.\n 
    
    Here is an example showing how I want the answer to be:
    - Given this question: What is the sun? \n just answer by choosing one options above these: A) a planet; B) a star; C) a galaxy. 
    The correct answer is: B) a star. \n 

    Given this question: {question} \n you must answer by choosing only one option above these: {options}. \n
    
    The correct answer is:
"""
)

In [None]:
response_generation_llm.invoke(prompt_template.format(question="What is the name of Ron's family?", 
                                                      options="A. Malfoy; B. Potter; C. Weasley"))

In [None]:
response_generation_llm.invoke(prompt_template.format(question="What is the name of the young Dursley?", options="A. Malfoy; B. Potter; C. Dudley"))

## OPT6: one-shot prompt template with options splitted into different objects + context
Non va

In [None]:
prompt_template = PromptTemplate.from_template("""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.
    
    Here is an example showing how I want the answer to be:
    - Given this question: What is the sun? \n just answer by choosing one options above these: A. a planet; B. a star; C. a galaxy. 
    The correct answer is: B. a star. 
    Here is context to help: The Sun is the star at the center of the Solar System. It is a massive, hot ball of plasma, inflated and heated by energy produced by nuclear fusion.

    Given this question: {question} \n you must answer by choosing only one option above these: {option_1}; {option_2}; {option_3}.
    Here is context to help: {context} \n
    
    The correct answer is:
"""
)

In [None]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"option_1": itemgetter("option_1"), "option_2": itemgetter("option_2"), "option_3": itemgetter("option_3"), 
                "context": itemgetter("question") | retriever, "question": itemgetter("question"),}
llm_chain = augmentation | prompt_template | response_generation_llm

In [None]:
llm_chain.invoke({'question': "Who is Hermione?", 'option_1': "A. a Gryffindor", "option_2": "B. a Slytherin", "option_3": "C. a Muggle"})

## OPT7: one-shot prompt template with options splitted into different objects no context

In [None]:
from langchain import PromptTemplate

prompt_template = PromptTemplate.from_template("""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.
    
    Here is an example showing how I want the answer to be:
    - Given this question: What is the sun? \n just answer by choosing one options above these: A. a planet; B. a star; C. a galaxy. 
    The correct answer is: B. a star. 
    
    Given this question: {question} \n you must answer by choosing only one option above these: {option_1}; {option_2}; {option_3}.
    The correct answer is:
"""
)

In [None]:
response_generation_llm.invoke(prompt_template.format(question="What is the name of the young Dursley?", option_1="A. Malfoy",
                                                      option_2= "B. Potter", option_3 = "C. Dudley"))

## Other things

### Response Schemas

In [None]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

response_schemas = [
    ResponseSchema(name="question", description="Question from input text data."),
    ResponseSchema(name="choices", description="Available options for a multiple-choice question in comma separated."),
    ResponseSchema(name="answer", description="Correct answer for the asked question.")
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
output_parser

In [None]:
# This helps us fetch the instructions the langchain creates to fetch the response in desired format
format_instructions = output_parser.get_format_instructions()
 
print(format_instructions)

In [None]:
prompt_template = PromptTemplate.from_template(
    """
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.

    Given this question: {question} \n you must answer by choosing only one option above these: {option_1}; {option_2}; {option_3}.
    The correct answer is:
    """)  
    ],
    input_variables=["user_prompt"],
    partial_variables={"format_instructions": format_instructions}
)
final_query = prompt.format_prompt(user_prompt = answer)
print(final_query)

## 🔹 DIVIDE ET IMPERA: retrieve, format properly, append.

In [92]:
documents = retriever.invoke("What is the name of the potion allowing to change appearance?")
documents

[Document(page_content='P a g e | 184 Harry Potter and the Chamber of Secrets – J. K. Rowling man who seemed to have been turned inside out and a witch sprouting several extra pairs of arms out of her head. “Here it is,” said Hermione excitedly as she found the page headed The Polyjuice Potion. It was decorated with drawings of people halfway through transforming into other people. Harry sincerely hoped the artist had imagined the looks of intense pain on their faces. “This is the most complicated potion I’ve ever seen,” said Hermione as they scanned the recipe. “Lacewing flies, leeches, fluxweed, and knotgrass,” she murmured, running her finger down the list of ingredients. “Well, they’re easy enough, they’re in the student store-cupboard, we can help ourselves. … Oooh, look, powdered horn of a bicorn — don’t know where we’re going to get that — shredded skin of a boomslang — that’ll be tricky, too — and of course a bit of whoever we want to change into.” “Excuse me?” said Ron sharply

In [82]:
def format_output(documents):
    formatted_output = ""
    for idx, document in enumerate(documents, start=1):
        formatted_output += f"[{idx}] {document.page_content}\n"
    return formatted_output

In [93]:
formatted_context = format_output(documents)
formatted_context

'[1] P a g e | 184 Harry Potter and the Chamber of Secrets – J. K. Rowling man who seemed to have been turned inside out and a witch sprouting several extra pairs of arms out of her head. “Here it is,” said Hermione excitedly as she found the page headed The Polyjuice Potion. It was decorated with drawings of people halfway through transforming into other people. Harry sincerely hoped the artist had imagined the looks of intense pain on their faces. “This is the most complicated potion I’ve ever seen,” said Hermione as they scanned the recipe. “Lacewing flies, leeches, fluxweed, and knotgrass,” she murmured, running her finger down the list of ingredients. “Well, they’re easy enough, they’re in the student store-cupboard, we can help ourselves. … Oooh, look, powdered horn of a bicorn — don’t know where we’re going to get that — shredded skin of a boomslang — that’ll be tricky, too — and of course a bit of whoever we want to change into.” “Excuse me?” said Ron sharply. “What d’you mean,

In [94]:
from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.
    Given this question: {question} \n you must answer by choosing only one option above these: {options}. \n
    Here is context to help: {context} \n
    The correct answer is:
 """
)

In [95]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"options": itemgetter("options"), "context": itemgetter("context"), "question": itemgetter("question"),}
llm_chain = augmentation | prompt_template | response_generation_llm

In [96]:
given_answer = llm_chain.invoke({'question': "What is the name of the potion allowing to change appearance?", 
                                 'options': "A. Felix Felicis; B. Polyjuice Potion; C. Amortentia", 'context': formatted_context})



In [97]:
given_answer

"\n    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. \n    Answer with the correct option only and then stop.\n    Given this question: What is the name of the potion allowing to change appearance? \n you must answer by choosing only one option above these: A. Felix Felicis; B. Polyjuice Potion; C. Amortentia. \n\n    Here is context to help: [1] P a g e | 184 Harry Potter and the Chamber of Secrets – J. K. Rowling man who seemed to have been turned inside out and a witch sprouting several extra pairs of arms out of her head. “Here it is,” said Hermione excitedly as she found the page headed The Polyjuice Potion. It was decorated with drawings of people halfway through transforming into other people. Harry sincerely hoped the artist had imagined the looks of intense pain on their faces. “This is the most complicated potion I’ve ever seen,” said Hermione as they scanned the recipe. “Lacewing flies, leeches, fluxweed, and 

# 🔹 PRIMO TENTATIVO DI CHECK

In [98]:
response_check_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation", # deve matchare la scheda del modello HF o dà errore
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

response_check_llm = HuggingFacePipeline(pipeline=response_check_pipeline)

In [99]:
from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to check whether or not a question was answered correctly, given a certain number of candidate options and the context. 
    Answer whether and why you think the answer is correct.
    Given this question: {question} \n these are the possible options: {options} and the answer to check is {candidate_answer}. 
    Here is context to help: {context} \n
    Is the answer correct? Why or why not?
 """
)

In [101]:
import re

# Use regular expression to extract the correct answer
correct_answer_match = re.search(r"The correct answer is:\n\s*\([A-Z]\)", given_answer)

if correct_answer_match:
    correct_answer = correct_answer_match.group(1)
    print(f"The correct answer is: {correct_answer}")
else:
    print("Correct answer not found.")

Correct answer not found.


In [103]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"options": itemgetter("options"), "context": itemgetter("context"), 
                "question": itemgetter("question"), "candidate_answer": itemgetter("candidate_answer")}
llm_chain = augmentation | prompt_template | response_check_llm

In [104]:
llm_chain.invoke({'question': "What is the name of the potion allowing to change appearance?", 
                  'options': "A. Felix Felicis; B. Polyjuice Potion; C. Amortentia", 
                  'context': formatted_context,'candidate_answer': 'B'})



"\n    You're a helpful assistant and you are asked to check whether or not a question was answered correctly, given a certain number of candidate options and the context. \n    Answer whether and why you think the answer is correct.\n    Given this question: What is the name of the potion allowing to change appearance? \n these are the possible options: A. Felix Felicis; B. Polyjuice Potion; C. Amortentia and the answer to check is B. \n    Here is context to help: [1] P a g e | 184 Harry Potter and the Chamber of Secrets – J. K. Rowling man who seemed to have been turned inside out and a witch sprouting several extra pairs of arms out of her head. “Here it is,” said Hermione excitedly as she found the page headed The Polyjuice Potion. It was decorated with drawings of people halfway through transforming into other people. Harry sincerely hoped the artist had imagined the looks of intense pain on their faces. “This is the most complicated potion I’ve ever seen,” said Hermione as they 

# LLAMA3 - work in progress 

In [None]:
Il problema è che non hai ancora capito come formattare

##  Transformers pipeline (and zero-shot):

In [None]:
import transformers
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. Answer with the correct option only and then stop."},
    {"role": "user", "content": "Given this question: Who is Taylor Swift? \n you must answer by choosing only one option above these: A. a snowboard; B. a cat; C. a singer. The correct answer is:"},
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False, 
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt):])


##  Transformers AutoModelForCausalLM  (and zero-shot)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. Answer with the correct option only and then stop."},
    {"role": "user", "content": "Given this question: Who is Taylor Swift? \n you must answer by choosing only one option above these: A. a snowboard; B. a cat; C. a singer. The correct answer is:"},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))