# Dataset, documents, FAISS; retriever

## 🔹 Load the dataset containing the tuples `(query, correct_answer, distractor_1, distractor_2)` and the one containing the documents

In [5]:
from datasets import load_dataset

dataset = load_dataset('saracandu/msmarco_modified', split="train", trust_remote_code=True)
dataset

Dataset({
    features: ['Unnamed: 0', 'answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'correct_answer', 'distractor_1', 'distractor_2'],
    num_rows: 82326
})

In [None]:
from langchain.document_loaders import HuggingFaceDatasetLoader

loader = HuggingFaceDatasetLoader('saracandu/msmarco_filtered', 'passage_text')
documents = loader.load()
documents[0] # just to check

## 🔹 Turn `documents` into a vector database using FAISS

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# create an instance of the RecursiveCharacterTextSplitter class with specific parameters
# (it splits text into chunks of 50 characters each with a 20-character overlap)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)

# 'documents' holds the text you want to split, split the text into documents using the text splitter
docs = text_splitter.split_documents(documents)

In [None]:
# choose an embedding method
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1",  
)

In [None]:
# embed the documents 'docs' into vectors using the embedding method specified by 'embedding'
# the result is stored in a FAISS index:
db = FAISS.from_documents(docs, embeddings)

# to avoid computing it each time (since the docs won't change), save the result in the storage
db.save_local(folder_path="faiss_db", index_name="MSMARCO_FaissIndex_MPNet")

## 🔹 Upload the already existing vector database (if it exists)

In [16]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/multi-qa-MiniLM-L6-dot-v1",  
)

db = FAISS.load_local(
    folder_path="faiss_db", # where to find it
    embeddings=embeddings, # in which "embedding language" it is expressed
    index_name="MSMARCO_FaissIndex_MiniLM", # since the folder contains multiple vector databases, specify its name
    allow_dangerous_deserialization=True
)

You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.6.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





## 🔹 Use it as a `retriever`

**Note:** `'k'=10` specifies the number of documents to retrieve each time `retrieved` is invoked. 
The default type of search performed is `similarity`.

In [30]:
retriever = db.as_retriever(
    search_kwargs={'k': 3,}
) 

Why `'k'=10`? Because MSMARCO assigns to each `(query, answer)` pair 10 text passages, and only 1 or 2 of these are truly relevant. 
In this first step of analysis I chose not to create `len(dataset)` different vector databases, one for each `(query, answer)` pair, but instead to merge all the passages together and store them into an unique vector database. 


**SE `'k'=10` SBAGLIA ALCUNE RISPOSTE! SE LO ABBASSI A `3` O A `4` NO :)**

# Model part (`Llama-2-7b-chat-hf`)

## ▪️ Upload the model: 

In [None]:
# do not run this unless necessary!

from huggingface_hub import login
login()

In [2]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel

#################################################################
# Tokenizer
#################################################################

model_name="meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## 🔹 Pipeline for **thesis** generation

In [3]:
from langchain.llms import HuggingFacePipeline

response_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation", # deve matchare la scheda del modello HF o dà errore
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

response_generation_llm = HuggingFacePipeline(pipeline=response_generation_pipeline)

## ▪️ Select a subset of the true dataset as a test

In [6]:
# select a subset of the queries, just for test:
first_queries = dataset['query'][:5]
first_queries

['what is rba',
 'was ronald reagan a democrat',
 'how long do you need for sydney and surrounding areas',
 'price to install tile in shower',
 'why conversion observed in body']

In [7]:
# same for correct answers and distractors:
correct_answers = dataset['correct_answer'][:5]
distractors_1 = dataset['distractor_1'][:5]
distractors_2 = dataset['distractor_2'][:5]

## ▪️ Merge the true answer and the distractors into a vector, shuffling the order of the elements

In [8]:
# shuffles the order of the vector containing the correct answer and the two distractors
# returns another vector, shuffled
import random

def shuffleAnswers(correct_answer, distractor_1, distractor_2):
    merge_options = [correct_answer, distractor_1, distractor_2]
    random.shuffle(merge_options)
    return merge_options

## ▪️ Function to format them properly the retrieved documents

In [9]:
# auxiliary function to format properly the output of the retrieval step

def format_page_content(documents):
    """
    Formats the list of retrieved documents such that 'page_content', 'Documents', 'metadata' 
    words are removed and just the true content is kept.
    """
    formatted_output = ""
    for i, doc in enumerate(documents, start=1):
        content = doc.page_content.strip(" ")
        formatted_output += f"[{i}]: {content}\n"
    return formatted_output

## 🔹 PromptTemplate definition and a LLMChain for the **thesis** 

In [10]:
# prompt template definition
# requires question, options (a string containing the possible options) and the context as input variables!

from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.
    Given this question: {question} \n
    You must answer by choosing only one option above these: {option_a}, {option_b}, {option_c}. \n
    Here is context to help: {context} \n
    The correct answer is:
 """
)

In [11]:
# LLM chain definition
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "context": itemgetter("context"), }

thesis_chain = augmentation | prompt_template | response_generation_llm

In [None]:
prompt_template

In [None]:
response_generation_llm

In [None]:
thesis_chain

## 🔹 Function that generates the output given the prompt, the question and the set of options

In [31]:
def thesisGeneration(query, prompt_template, merged):
    documents_retrieved = retriever.invoke(query)
    formatted_context = format_page_content(documents_retrieved)
    
    given_answer = thesis_chain.invoke({'question': query, 
                                     'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 
                                     'context': formatted_context})
    return given_answer

In [32]:
# extract the true answer (i.e. remove the unnecessary)

def extract_answer(text):
    # trova l'indice in cui inizia il testo "the correct answer is:"
    start_index = text.find("The correct answer is:") + len("The correct answer is:")
    # estrai il testo dopo "The correct answer is:"
    correct_answer_text = text[start_index:].strip()
    
    return correct_answer_text

## 🔹 Test: how well the thesis alone is able to perform?

In [33]:
answers = []
for i in range(5):
    print(f"True answer: {correct_answers[i]}")
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    answers.append(extract_answer(thesisGeneration(first_queries[i], prompt_template, merged_options)))
    print(f"Given answer: {extract_answer(thesisGeneration(first_queries[i], prompt_template, merged_options))}")
    print('****************')

True answer: ['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.']




Given answer: [[2]] Results-based accountability
****************
True answer: ['Yes']




Given answer: 'No'.
****************
True answer: ['20-25 minutes']




Given answer: ["Long enough"]
****************
True answer: ['$11 to $22 per square foot']




Given answer: ["$11 to $22 per square foot"]
****************
True answer: ['Due to symptoms in the body']




Given answer: Due to symtions in the Body
****************


In [20]:
answers

['[[7]] - gets from talk to action quickly',
 '["B"] Yes',
 "'Two days'.",
 '["For a medium-sized shower," "$11 to $22 per square foot"]',
 'Due to symtoms in the Body']

## 🔸 Pipeline for **antithesis** generation

In [83]:
response_check_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

response_check_llm = HuggingFacePipeline(pipeline=response_check_pipeline)

## 🔸 PromptTemplate definition and a LLMChain for the **antithesis** 

In [99]:
from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to check whether or not a question was answered correctly, given a certain number of candidate options and the context. 
    Given this question: {question} \n 
    These are the possible options: {option_a}, {option_b}, {option_c} \n.
    The answer that you have to check is {candidate_answer}. 
    Here is context to help: {context} \n
    Answer by saying 'Yes' if it is correct and 'No' otherwise, then explain why you think so.
    Why or why not the answer is correct:
 """
)

In [85]:
# LLM chain definition

from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "candidate_answer": itemgetter("candidate_answer"),
                "context": itemgetter("context"), }

antithesis_chain = augmentation | prompt_template | response_check_llm

## 🔸 Function to generate the antithesis given the question, the thesis, the context and the options

In [100]:
def antithesisGeneration(query, prompt_template, merged, candidate_answer):
    documents_retrieved = retriever.invoke(query)
    formatted_context = format_page_content(documents_retrieved)
    
    second_answer = antithesis_chain.invoke({'question': query, 
                                            'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 
                                            'candidate_answer': candidate_answer,
                                            'context': formatted_context})
    return second_answer

In [101]:
def extract_answer_ant(text):
    # Trova l'indice in cui inizia il testo "Why or why not the answer is correct:"
    start_index = text.find("Why or why not the answer is correct:")
    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        start_index += len("Why or why not the answer is correct:")
        # Estrai il testo dopo "Why or why not the answer is correct:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

In [102]:
ant_answers = []
for i in range(5):
    print(f"True answer: {correct_answers[i]}")
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    ant_answers.append(extract_answer_ant(antithesisGeneration(first_queries[i], prompt_template, merged_options, answers[i])))
    print(f"Given answer: {extract_answer_ant(antithesisGeneration(first_queries[i], prompt_template, merged_options, answers[i]))}")
    print('****************')

True answer: ['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.']




Given answer: Yes - This option matches exactly one part from among several parts provided for reference purposes only (the last sentence).
****************
True answer: ['Yes']




Given answer: No - This option does not correspond directly to any part of the text provided for analysis because there isn’t anything mentioned about temperature ranges within those options listed above; instead they all relate specifically towards political affiliations/history related questions regarding either Presidents like Jimmy Carter & Ronald Regan during their respective times serving office at federal level here USA).
****************
True answer: ['20-25 minutes']




Given answer: Yes - This option matches what has been said earlier about getting there early "long enoughto land". It also makes sense because even though they don't specify exactly when but just say few days/hours prior which could mean anything between like an hour ago till tomorrow morning! So technically speaking anytime within those time frames would qualify as being “long enough”.
****************
True answer: ['$11 to $22 per square foot']




Given answer: Yes - This option matches what we know from reading through the text provided for each choice; specifically when discussing costs associated directly related to installing tiles within an area such as bathroom floors & walls which includes but isn’t limited too material prices ($8 – $6), additional charges like grout sealing($7– $10). It also aligns well considering there aren’t any other factors mentioned throughout passage affect these estimates i e no discount offered nor does anything suggest they would be higher than expected range stated earlier(i.e., between eleven dollars twenty two cents per sq ft.). So based on information presented thus far,installatingtilesinashowerwould likely fall somewhere inside those ranges specified above!
****************
True answer: ['Due to symptoms in the body']




Given answer: Yes - This option matches what we know about conversions from human physiological processes such as metabolism which can cause weight gain due to various factors like hormonal imbalances, digestive issues etc. It also aligns well wiyh how our bodies work & function at cellular level where cells undergo constant turnover leading too different types o f transformations within tissues& organs resulting into observable physical manifestations over time (e g fat accumulation). So based on provided information choice A seems most plausible explanation fo rwhy observation occurred inside person’s bodily system(ies) mentioned earlier during conversation between doctor patient duo discussed previously through text-based dialogues found here [[Ref]].
****************


In [89]:
ant_answers[1]

'No - This option does not correspond directly to any information provided within either reference text passage; therefore I cannot confirm its accuracy based on those sources alone'

## 🔺 Pipeline for **synthesis** generation

In [73]:
final_response_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

final_response_llm = HuggingFacePipeline(pipeline=final_response_pipeline)

## 🔺 PromptTemplate definition and a LLMChain for the **synthesis** 

In [108]:
from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to answer a certain question, given a certain number of candidate options and the context.
    You are also provided with an initial response and its critique, that could enforce or not the first opinion.
    Make a reasonable synthesis of these two opinions, but answer by outputting exactly one of the answer options only.
    Given this question: {question} \n 
    These are the possible options: {option_a}, {option_b}, {option_c} \n.
    The answer that you have to check is {candidate_answer} and this is its critique: {critique}.
    Here is context to help: {context} \n
    The answer is:
 """
)

In [109]:
# LLM chain definition

from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "candidate_answer": itemgetter("candidate_answer"),
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }

synthesis_chain = augmentation | prompt_template | response_check_llm

## 🔺 Function to generate the synthesis given literally everything

In [110]:
def synthesisGeneration(query, prompt_template, merged, candidate_answer, critique):
    documents_retrieved = retriever.invoke(query)
    formatted_context = format_page_content(documents_retrieved)
    
    final_answer = synthesis_chain.invoke({'question': query, 
                                            'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 
                                            'candidate_answer': candidate_answer,
                                            'critique': critique,
                                            'context': formatted_context})
    return final_answer

In [105]:
def extract_answer_syn(text):
    # Trova l'indice in cui inizia il testo "The answer is:"
    start_index = text.find("The answer is:")
    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        # Estrai il testo dopo "The answer is:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

In [111]:
syn_answers = []
for i in range(5):
    print(f"True answer: {correct_answers[i]}")
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    syn_answers.append(synthesisGeneration(first_queries[i], prompt_template, merged_options, answers[i], ant_answers[i]))
    print(f"Given answer: {synthesisGeneration(first_queries[i], prompt_template, merged_options, answers[i], ant_answers[i])}")
    print('****************')

True answer: ['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.']




Given answer: 
    You're a helpful assistant and you are asked to answer a certain question, given a certain number of candidate options and the context.
    You are also provided with an initial response and its critique, that could enforce or not the first opinion.
    Make a reasonable synthesis of these two opinions, but answer by outputting exactly one of the answer options only.
    Given this question: what is rba 
 
    These are the possible options: ['Webbed feet'], ["'Other Allowance' which is basically to compensate all and any kind of allowances which is required to be paid at different regions/localities of the various project sites."], ['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.'] 
.
    The answer that you have to check is [[2]] Results-based accountability and this is its critique: Yes - This option matches exactly one 



Given answer: 
    You're a helpful assistant and you are asked to answer a certain question, given a certain number of candidate options and the context.
    You are also provided with an initial response and its critique, that could enforce or not the first opinion.
    Make a reasonable synthesis of these two opinions, but answer by outputting exactly one of the answer options only.
    Given this question: was ronald reagan a democrat 
 
    These are the possible options: ['50, 55, 60, 65 and 70 °C'], ['A contamination which is associated with the food itself and not through other causes of contamination.'], ['Yes'] 
.
    The answer that you have to check is 'No'. and this is its critique: No - This option does not correspond directly to any part of the text provided for analysis because there isn’t anything mentioned about temperature ranges within those options listed above; instead they all relate specifically towards political affiliations/history related questions regarding 



Given answer: 
    You're a helpful assistant and you are asked to answer a certain question, given a certain number of candidate options and the context.
    You are also provided with an initial response and its critique, that could enforce or not the first opinion.
    Make a reasonable synthesis of these two opinions, but answer by outputting exactly one of the answer options only.
    Given this question: how long do you need for sydney and surrounding areas 
 
    These are the possible options: ['Yes'], ['20-25 minutes'], ['Oatmeal, beans, apples, pears, barley and prunes.'] 
.
    The answer that you have to check is ["Long enough"] and this is its critique: Yes - This option matches what has been said earlier about getting there early "long enoughto land". It also makes sense because even though they don't specify exactly when but just say few days/hours prior which could mean anything between like an hour ago till tomorrow morning! So technically speaking anytime within those



Given answer: 
    You're a helpful assistant and you are asked to answer a certain question, given a certain number of candidate options and the context.
    You are also provided with an initial response and its critique, that could enforce or not the first opinion.
    Make a reasonable synthesis of these two opinions, but answer by outputting exactly one of the answer options only.
    Given this question: price to install tile in shower 
 
    These are the possible options: ['They are fatty acids that have one double bond in the fatty acid chain with all of the remainder carbon atoms being single-bonded.'], ['Honolulu'], ['$11 to $22 per square foot'] 
.
    The answer that you have to check is ["$11 to $22 per square foot"] and this is its critique: Yes - This option matches what we know from reading through the text provided for each choice; specifically when discussing costs associated directly related to installing tiles within an area such as bathroom floors & walls which in



Given answer: 
    You're a helpful assistant and you are asked to answer a certain question, given a certain number of candidate options and the context.
    You are also provided with an initial response and its critique, that could enforce or not the first opinion.
    Make a reasonable synthesis of these two opinions, but answer by outputting exactly one of the answer options only.
    Given this question: why conversion observed in body 
 
    These are the possible options: ['Nigeria/Cameroon'], ['A chief engineer is responsible for all operations and maintenance that has to do with any and all engineering equipment throughout the entire ship.', 'The chief engineer is responsible for the technical supervision of the development, production or operation of an engineering project for a multinational corporation, a major company or a government institution.'], ['Due to symptoms in the body'] 
.
    The answer that you have to check is Due to symtions in the Body and this is its crit

##  Transformers pipeline (and zero-shot):

In [None]:
import transformers
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. Answer with the correct option only and then stop."},
    {"role": "user", "content": "Given this question: Who is Taylor Swift? \n you must answer by choosing only one option above these: A. a snowboard; B. a cat; C. a singer. The correct answer is:"},
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False, 
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt):])


##  Transformers AutoModelForCausalLM  (and zero-shot)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. Answer with the correct option only and then stop."},
    {"role": "user", "content": "Given this question: Who is Taylor Swift? \n you must answer by choosing only one option above these: A. a snowboard; B. a cat; C. a singer. The correct answer is:"},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))