# Dataset, documents, FAISS; retriever

## 🔹 Load the dataset containing the tuples `(query, correct_answer, distractor_1, distractor_2)` and the one containing the documents

In [1]:
from datasets import load_dataset

dataset = load_dataset('saracandu/msmarco_modified', split="train", trust_remote_code=True)
dataset

Dataset({
    features: ['Unnamed: 0', 'answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'correct_answer', 'distractor_1', 'distractor_2'],
    num_rows: 82326
})

In [11]:
dataset[1]['passages']

'{\'is_selected\': [0, 1, 0, 0, 0, 0, 0], \'passage_text\': [\'In his younger years, Ronald Reagan was a member of the Democratic Party and campaigned for Democratic candidates; however, his views grew more conservative over time, and in the early 1960s he officially became a Republican. In November 1984, Ronald Reagan was reelected in a landslide, defeating Walter Mondale and his running mate Geraldine Ferraro (1935-), the first female vice-presidential candidate from a major U.S. political party.\', "From Wikipedia, the free encyclopedia. A Reagan Democrat is a traditionally Democratic voter in the United States, especially a white working-class Northerner, who defected from their party to support Republican President Ronald Reagan in either or both the 1980 and 1984 elections. During the 1980 election a dramatic number of voters in the U.S., disillusioned with the economic \'malaise\' of the 1970s and the presidency of Jimmy Carter (even more than, four years earlier, Liberal Republ

In [23]:
selected_passages = []

for row in dataset:
    passages_data = ast.literal_eval(row['passages'])
    try:
        selected_index = passages_data['is_selected'].index(1)
        selected_passage = {
            'is_selected': 1,
            'passage_text': passages_data['passage_text'][selected_index],
        }
        selected_passages.append(selected_passage)
    except ValueError:
        # Aggiungi un passaggio vuoto se non c'è nessun passaggio selezionato
        selected_passages.append({'is_selected': 0, 'passage_text': ''})

# Assicurati che la lunghezza dei passaggi selezionati corrisponda alla lunghezza del dataset originale
assert len(selected_passages) == len(dataset), "Errore: lunghezza dei passaggi selezionati non corrisponde alla lunghezza del dataset originale"

In [24]:
from datasets import Dataset

# Creazione di un nuovo dizionario con i dati desiderati
new_dataset = {
    'answers': dataset['answers'],
    'query': dataset['query'],
    'query_id': dataset['query_id'],
    'query_type': dataset['query_type'],
    'wellFormedAnswers': dataset['wellFormedAnswers'],
    'correct_answer': dataset['correct_answer'],
    'distractor_1': dataset['distractor_1'],
    'distractor_2': dataset['distractor_2'],
    'selected_passages': selected_passages
}

# Creazione del nuovo dataset
new_dataset = Dataset.from_dict(new_dataset)

In [29]:
new_dataset[20]

{'answers': "['The Camerata are a group of four powerful and influential individuals in the city of Cloudbank. Established by Grant Kendrell and Royce Bracket, the group circumvents the official administration and democratic nature of Cloudbank, in order to establish some form of stability in the ever-changing city.']",
 'query': 'what is a camerata',
 'query_id': 19719,
 'query_type': 'description',
 'wellFormedAnswers': '[]',
 'correct_answer': "['The Camerata are a group of four powerful and influential individuals in the city of Cloudbank. Established by Grant Kendrell and Royce Bracket, the group circumvents the official administration and democratic nature of Cloudbank, in order to establish some form of stability in the ever-changing city.']",
 'distractor_1': "['Stores food (sugar and salts in solution) and water.']",
 'distractor_2': "['Africa']",
 'selected_passages': {'is_selected': 1,
  'passage_text': 'The Camerata are a group of four powerful and influential individuals i

# Model part (`Llama-2-7b-chat-hf`)

## ▪️ Upload the model: 

In [None]:
# do not run this unless necessary!

from huggingface_hub import login
login()

In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel

#################################################################
# Tokenizer
#################################################################

model_name="meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## 🔹 Pipeline for **thesis** generation

In [2]:
from langchain.llms import HuggingFacePipeline

response_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation", # deve matchare la scheda del modello HF o dà errore
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

response_generation_llm = HuggingFacePipeline(pipeline=response_generation_pipeline)

## ▪️ Select a subset of the true dataset as a test

In [3]:
# select a subset of the queries, just for test:
first_queries = new_dataset['query'][:10]
# first_queries

NameError: name 'new_dataset' is not defined

In [None]:
# same for correct answers and distractors:
correct_answers = new_dataset['correct_answer'][:10]
distractors_1 = new_dataset['distractor_1'][:10]
distractors_2 = new_dataset['distractor_2'][:10]

In [None]:
# same for relevant sources
sources = new_dataset['selected_passages'][:10]

## ▪️ Merge the true answer and the distractors into a vector, shuffling the order of the elements

In [None]:
# shuffles the order of the vector containing the correct answer and the two distractors
# returns another vector, shuffled
import random

def shuffleAnswers(correct_answer, distractor_1, distractor_2):
    merge_options = [correct_answer, distractor_1, distractor_2]
    random.shuffle(merge_options)
    return merge_options

## 🔹 PromptTemplate definition and a LLMChain for the **thesis** 

In [None]:
# prompt template definition
# requires question, options (a string containing the possible options) and the context as input variables!

from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to answer a question correctly, given a certain number of options. 
    Answer with the correct option only and then stop.
    Given this question: {question} \n
    You must answer by choosing only one option above these: {option_a}, {option_b}, {option_c}. \n
    Here is context to help: {context} \n
    The correct answer is:
 """
)

In [None]:
# LLM chain definition
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "context": itemgetter("context"), }

thesis_chain = augmentation | prompt_template | response_generation_llm

In [None]:
prompt_template

In [None]:
response_generation_llm

In [None]:
thesis_chain

## 🔹 Function that generates the output given the prompt, the question and the set of options

In [None]:
def thesisGeneration(query, prompt_template, merged, sources):
    given_answer = thesis_chain.invoke({'question': query, 
                                     'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 
                                     'context': sources})
    return given_answer

In [None]:
# extract the true answer (i.e. remove the unnecessary)

def extract_answer(text):
    # trova l'indice in cui inizia il testo "the correct answer is:"
    start_index = text.find("The correct answer is:") + len("The correct answer is:")
    # estrai il testo dopo "The correct answer is:"
    correct_answer_text = text[start_index:].strip()
    
    return correct_answer_text

## 🔹 Test: how well the thesis alone is able to perform?

In [None]:
answers = []
for i in range(10):
    print(f"True answer: {correct_answers[i]}")
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    answers.append(extract_answer(thesisGeneration(first_queries[i], prompt_template, merged_options, sources[i])))
    print(f"Given answer: {extract_answer(thesisGeneration(first_queries[i], prompt_template, merged_options, sources[i]))}")
    print('****************')

In [None]:
answers

## 🔸 Pipeline for **antithesis** generation

In [48]:
response_check_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

response_check_llm = HuggingFacePipeline(pipeline=response_check_pipeline)

## 🔸 PromptTemplate definition and a LLMChain for the **antithesis** 

In [49]:
from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to check whether or not a question was answered correctly, given a certain number of candidate options and the context. 
    Given this question: {question} \n 
    These are the possible options: {option_a}, {option_b}, {option_c} \n.
    The answer that you have to check is {candidate_answer}. 
    Here is context to help: {context} \n
    Answer by saying 'Yes' if it is correct and 'No' otherwise, then explain why you think so.
    Why or why not the answer is correct:
 """
)

In [50]:
# LLM chain definition

from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "candidate_answer": itemgetter("candidate_answer"),
                "context": itemgetter("context"), }

antithesis_chain = augmentation | prompt_template | response_check_llm

## 🔸 Function to generate the antithesis given the question, the thesis, the context and the options

In [51]:
def antithesisGeneration(query, prompt_template, merged, candidate_answer, sources):
    
    second_answer = antithesis_chain.invoke({'question': query, 
                                            'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 
                                            'candidate_answer': candidate_answer,
                                            'context': sources})
    return second_answer

In [52]:
def extract_answer_ant(text):
    # Trova l'indice in cui inizia il testo "Why or why not the answer is correct:"
    start_index = text.find("Why or why not the answer is correct:")
    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        start_index += len("Why or why not the answer is correct:")
        # Estrai il testo dopo "Why or why not the answer is correct:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

In [81]:
ant_answers = []
for i in range(50):
    print(f"True answer: {correct_answers[i]}")
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    ant_answers.append(extract_answer_ant(antithesisGeneration(first_queries[i], prompt_template, merged_options, answers[i], sources[i])))
    print(f"Given answer: {extract_answer_ant(antithesisGeneration(first_queries[i], prompt_template, merged_options, answers[i], sources[i]))}")
    print('****************')

True answer: ['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.']




Given answer: Yes - This option matches exactly the passage text provided in the prompt. Therefore, based solely upon the information presented herein, we may confidentiality say "yes" because there exists no other possibility more likely than others among these three choices offered above; however please note while checking against actual knowledge about topic might reveal additional relevant details leading us towards another conclusion!
****************
True answer: ['Yes']




Given answer: I would say yes because according to what we know about how people voted during those times as described on wikipedia, many Democrats did indeed switch parties for reasons related specifically to presidential candidates like jimmy carter being seen negatively compared to republican counterparts such as gerry ford. This shift away form traditional allegiance towards another political ideology became known colloquilly among some groups within these communities themselves - hence terming them “Reagans”!
****************
True answer: ['20-25 minutes']




Given answer: Yes - This option matches exactly what the passage says about taking thetraintoConcordeWeststationonthenorthernline( redlinenotspecial) which takes around twenty five minutefromthecitydirectlyandbybiketoexploretheareabythisthewillbehelpfulforthetraveler
****************
True answer: ['$11 to $22 per square foot']




Given answer: Yes - This option has been selected as the passage text mentions "between" which implies there could be multiple values within those ranges rather than just two fixed amounts like Honululu ($11-$22).
****************
True answer: ['Due to symptoms in the body']




Given answer: Yes - This option matches exactly what the passage says about converssion order which states "a psychological factor contributes directly to both the genesis and duration" (emphasis added)
****************
True answer: ['Inside the rib cage.']




Given answer: Yes - Because according to passage text "The lungs in the front and back are inside the rib cage." So option A which states they are found inside the rib cages matches with what has been stated int he passge
****************
True answer: ['The most expensive patents are international patents, which can run up to $100,000 or higher.Domestically the costs can be $10,000 or above.']




Given answer: Yes - This option has been selected as being true according to passage information provided
****************
True answer: []




Given answer: Yes - This option refers specifically to geological features related to volcanism (light coloration) rather than any medical conditions associated with sexual dysfunction as described earlier in passage text; therefore, it cannot possibly relate back towards reversing an individual who has been diagnosed/experiencing Metabolic Acidosis!
****************
True answer: ['Sophocles, Aeschylus and Euripides']




Given answer: Yes - This option matches one text passage exactly; therefore, according to our rules for checking answers (i) an exact match wins out over any other possibility even though there may be additional relevant information provided elsewhere about another choice as long both passages share at least some words/phrases with each other.)
****************
True answer: ['A tree or shrub which produces distinctive cones as part of its sexual reproduction.']




Given answer: Yes - This option matches exactly with how I would define "conifer". It refers specifically to trees/shrubs producing cone like structures for reproductive purposes; both passages mention these characteristics prominently enough such that there could only logically one choice here!
****************
True answer: ['Somatic cells are produced by mitosis and gametes produced by most organisms combine to form a zygote with n pairs of chromosomes.']




Given answer: Yes - This option matches what we know about animal reproduction from high school biology class; during interphase before division occurs there will be one round of DNA replication which results in duplicated genome material called S phase followed immediately afterward by another process known as M Phases whereby those duplicate sets get separated into identical copies now referred to as sisterchromatin until they separate at anaphase resulting ultimately in four distinct nuclei containing half set of original parent genomic information for every new individual created through binary fission type mechanism involving only nuclear divisions without any cytoplasmic components involved such separation occurring between mother & offspring thus making sure no mixing takes place while still maintaining proper segregation rules ensuring accurate transmission pass on unique identities inherited traits acquired over generations via sexual vs nonsexual reproductive strategies emplo



Given answer: Yes - This option matches all parts of the passage text provided as well as being one of only three available answers for an adult reading level audience member with basic knowledge on topic matter (i.e., they know what/who “the Cat In The Hat” refers too). Therefore selecting choice #3 makes sense based upon information presented within same paragraph alone!
****************
True answer: ['45 minutes to an hour']




Given answer: Yes - This option matches exactly what we see described inthe passage provided which states "Bakedchickenthiglsandquartersinashallowbingedingdishesprinkledwithgarlixpowedernddrizzlesoy Saucesondriedfor45minutestojustbeforecookingeverythinglooksperspectively"
****************
True answer: ['$6-$16 a square foot']




Given answer: Yes - This option matches exactly whats written after "Expect" at the beginning of passage text which means its directly related with the topic being discussed there(heatng costs).
****************
True answer: ['Granite.', 'Granite.']




Given answer: I would say yes because according to passage text Mount Pinotabo contains mostly rock like granites which means option A(granits).
****************
True answer: ['$4.64 - $6.36']




Given answer: Yes I am checking because...the passage text mentions "per square foot" which matches one option provided ($4.64-$6.36).
****************
True answer: ['Fish']




Given answer: Yes - This option matches exactly the text provided from passage about "three striped Damselfish".
****************
True answer: ["In a New York State hermit's letter to the editor of an Adirondack Mountain newspaper."]




Given answer: Yes - This option matches exactly one sentence found within the passage provided which reads,"It is written as follows;...the very same thing we now call 'it-ism.' It has been said many times before but never quite like these words..." Therefore based on just those two sentences alone,option A would be considered true because they both contain some form of wording similar enough for us to consider them matching answers.However there may exist other instances where another part could also match depending upon how closely examining each individual component might reveal more subtle connections between phrases used elsewhere throughout text passages included herein thus making any definitive conclusion impossible at present time without further investigation into all available information regarding source material(s) involved when answering questions such as yours truly!
****************
True answer: ['A macintosh made from cotton fabric treated with oil and pigment to make 



Given answer: Yes! This option matches exactly one passage text which has been highlighted as relevant for your search query based on its content similarity score.
****************
True answer: ['7 days before to 7 days after the rash appears']




Given answer: Yes - I am checking based on what has been provided within the passage text; specifically from line "The German measles...appears" downwards wherein we find information about when they become infective (i.e., seven day period). Therefore option A matches perfectly with available details here!
****************
True answer: ['The Camerata are a group of four powerful and influential individuals in the city of Cloudbank. Established by Grant Kendrell and Royce Bracket, the group circumvents the official administration and democratic nature of Cloudbank, in order to establish some form of stability in the ever-changing city.']




Given answer: Yes - This option matches exactly with passage text content provided above which clearly states "establishment" done by grant kendrelle as part of cloudbanks circling around groups.
****************
True answer: ['An hour to 1 hour 15 minutes.']




Given answer: Yes - This option matches exactly what the passage says about preheating the oven at 300 degrees Fahrenheit before pouring battery mix inside after lining up the mold/pannings which will require around one hundred fifty degree fahrenheit temperature range during its final stage as stated below from original text passage :"Some ovenscookuneaven –if thistdescribesyours,keepaneyeontit". So accordingtocontext passagereference above choice A shouldbe selectedascorrectanswer.
****************
True answer: ['8 mg on the first day and 16 mg (the full dosage) on the second day.']




Given answer: Yes - This option corresponds exactly to how Dr. Smith describes starting treatment using Subutex; therefore, yes!
****************
True answer: ['$56,000']




Given answer: I am checking "yes" because according to what has been provided within passage text there exists at least one instance where someone who works on the exact job title as another person makes significantly different amount yearly income from them which answers option (b) ($56k).
****************
True answer: ['The Tet Offensive was a military campaign during the Vietnam War that began on January 31, 1968 by the forces of the National Liberation Front for South Vietnam. ']




Given answer: Yes - This option matches exactly with passage text provided above which states "the Tet offensive" started from Janurary 31st, 1968 when NLF (National liberations front) launched attack against SVN(South vietnam). So yes its matching perfectly.
****************
True answer: []




Given answer: Nope! Unfortunately for our friend here who wants us all excited about ancient history (and rightfully so), there isn’t actually any information in their passage text regarding how an electrical signal moves along what they call "a nervous system". So while I appreciate them trying something new with these questions today - sadly enough none were present within said passages provided which would allow me too accurately assess answers accordingly thus resulting into incorrect response from yours truly =(
****************
True answer: ["Photosynthesis is a process used by plants and other organisms to convert light energy, normally from the Sun, into chemical energy that can be later released to fuel the organisms' activities."]




Given answer: Yes - This option corresponds exactly with passage text content about Photosythesis being defined as “a processeed usesdby plantns an dother orgnisms tto conver lightegyernormally frm thesuN intochemical egntry dat ca bbe laetr releasaid fulearjtheorganism’saactivitics.” It matches word for wardwith exact phrasing foundin teh pasge texthaving same structureand meaning
****************
True answer: ['Capillaries.']




Given answer: Yes - This option matches what we know about how gases diffuse through thin tubes like those found within lung tissue where they come into contact with red blood cell containing hemoglobin for transportation across these barriers
****************
True answer: ['Rappelling is the process of coming down from a mountain that is usually done with two pieces of rope.']




Given answer: Yes - This option matches exactly how "rappeling" appears throughout the passage provided (i.e., as one uninterrupted term).
****************
True answer: ['They are the soldiers on the ground that Boost Beer Sales.Selling Beer is by far one of the most amazing jobs that any person can have.']




Given answer: No, I don't agree with your assessment as there isn't enough information provided about "what" exactly they represent nor which company/beverage specifically they work at; therefore making all three statements incorrect since none relate directly back towards those specific details mentioned earlier within passage text itself when discussing roles related closely around selling products involving knowledge gained through experience rather than just generalizations made without proper understanding found elsewhere online etcetera...
****************
True answer: ['An overnight stay for a keyhole surgery and three to five days stay for an open surgery.']




Given answer: I am sorry but based off what has been provided there isn’t enough information available about glycosylation being related with Gall blader Surgury therefore my response would be no because theirs nothing within passage text which suggests any relation between those two topics
****************
True answer: ['65']




Given answer: Yes! In light of provided passage text information, option "Age 65" matches because according to the content presented therein; Most individuals start receiving medical coverage through Medicaid when they reach their sixties—specifically turning sixteen years old during any particular calendar year.
****************
True answer: ['$85 ($125.00 for a pair)']




Given answer: Yes - I am checking based on what has been written above; according to passage text when we read "The adoptiondonationfee" there should onlybe one option available among those listed i. e., ["$85"]. So since they mentioned "$85", therefore my conclusion would bethat indeedthe rightanswer choice ("[$85(($125.00fora pair'))]")has bean selected as per their statement."
****************
True answer: ["A complication of type 1 or type 2 diabetes caused by damage to the kidneys' delicate filtering system. "]




Given answer: Yes - This option matches exactly with how I understand Diabetic Kidney Disease as described in provided passage text which states directly; “diabetic kideneydisease...causedby damageto thesmallestblood vessels( glmerulli )in thee kidnies”.
****************
True answer: ['James Madison made a great contribution by writing The federalist paper for the constitution of United states.']




Given answer: Yes - This option matches what we know about history accurately! According to our passage text,Jame sMadion did indeed write th eFederalists papers which contributed significantly towards tge rationa lization o fthe constition.Therefore selecting this opiton would be accurate response based on historical facts
****************
True answer: ['160° F to 165° F.']




Given answer: Yes! This option has been selected as being closest match for passage text content in terms of relevance/contextual fitness score which ranges from 0(least relevant) through scaled values upwards towards higher scores indicating greater degrees of relevancy until reaching maximum value equaling perfect matching degree where all words appear exactly within chosen response choice set; thus making Medium Rare an appropriate selection here since its definition falls squarely inside range specified earlier when discussing ideal internal temperatures required during grilling process involving different types meats & proteins found commonly used across various culinary traditions worldwide today.”
****************
True answer: ['Advanced Mobile Phone System (AMPS) is an analog mobile cell phone system standard developed by Bell Labs, and officially introduced in the Americas on October 13, 1983, Israel in 1986, Australia in 1987, and Pakistan in 1990.']




Given answer: Yes - This option matches exactly with passage text content where "advanced" refers specifically to bell labs development work for which they hold patents/trademarks
****************
True answer: ['Homologous']




Given answer: I believe "homologous" should be selected because according to passage text; Convergent evolution create analogs structured with similar forms/function than their respective group’s previous common Ancestors didn't possess them (emphasis added). This means these new developed trait aren't inherited directly nor do they share any direct lineal relationship between each other yet still resemble one another due convergence under different select pressures leading up too distinct outcomes despite shared origins
****************
True answer: ['$15,000 and $40,000']




Given answer: Yes - This option has been selected as per passage text which states "As mentioned earlier..." indicating direct correlation betwee nthe statement made here anf what follows next
****************
True answer: ['5 percent of the unpaid taxes for each month']




Given answer: Yes - This option matches what IRS states as their normal penalties policy regarding delinquent returns; they charge an additional fee equal to five (or sometimes twenty-five) percentage points above whatever amount remains owed from previous months until paid off completely!
****************
True answer: ['In the quadratic formula, the expression under the square root sign, b 2 – 4 ac, is called the discriminant.']




Given answer: Yes - This option matches both parts of the passage text exactly; therefore, according to the information provided in the paragraph about checking questions for accuracy, we should mark "yes" as accurate because all three criteria match perfectly with one part from each list (b 2–4ac).
****************
True answer: ['Festive party']




Given answer: Yes - This option has been selected as the best fit based on information provided within passage text about what "Gayla" means according to its definition from different sources such as British/Australian dictionaries which define festivity while also mentioning soul urges numbers associated wiith particular meanings like expression numbe
****************
True answer: ['Foxes, cats and birds.', 'A possum (plural form: possums) is any of about 70 small-to medium-sized arboreal marsupial species native to Australia, New Guinea, and Sulawesi (and introduced to New Zealand and China).']




Given answer: Posum - Correct! This option contains all words related directly with "possum" including its plurals forms like positives ("a", singular), which match exactly one passage text sentence containing word(s): "the possum".
****************
True answer: ['It is basically of two types aerobic and anaerobic which may be obligate or facultative.']




Given answer: Yes - This option matches exactly what the passage says about "types of bacterial respiration". It states specifically that there are two main categories of bacterial respiration (anaerobic/facultative vs obsolete) and provides examples including one specific type called obstacle aerbs whereby these organisms need constant exposure to air throughout all stages growth reproduction etc...
****************
True answer: ['Monocytes are a type of white blood cells (leukocytes).']




Given answer: Yes! This option matches exactly with passage text content word wise as well as meaningwise.
****************
True answer: ['$50 and $200']




Given answer: Yes - This option has been selected because according to passage text "the most common range for rates charged" among professional Interior Designer firms/individuals is indeed "$50-$200".
****************
True answer: ['$37,000']




Given answer: Yes - This option has been selected as being the right one based on information provided within passage text itself (specifically line starting with "Average").
****************
True answer: ['bugs']




Given answer: Yes - This option should be selected because in the passage provided there isn’t any mention about $107 being used as an ingredient for killing scorpios naturally nor does anyone use such amount when talking abut natural ways o f getting rid off them. Also,the other two opyons don t make sense either since they aren ot related directly wiht anything mentioned int he pasge
****************
True answer: ['It is the ritual ceremony of encounter.']




Given answer: Yes - This option matches both the passage text provided as well as any additional information from within the prompt itself such as the word count for example which states specifically that one possibility among others could fit into an allocated time frame
****************
True answer: ['Often refers to a man who is able to grow a corn-silk like beard.']




Given answer: Yes - This option matches all parts ofthe definition providedin thestring. It mentions growinga "corn-silk"like beard which fits with part oneofthedefinitionand also includes thementioningeclectictaste ssuchasmusicandsauthorswhichfitthe thirdpartoftheaddefinition."Aburlywoodisfriend whoreliable".
****************


In [75]:
ant_answers

['Yes - This option matches exactly the passage text provided in terms of wordings match between Options A & B passages respectively; therefore I will say "yes" because they both mean same thing based from how each words were defined within those two paragraph sections!',
 'I would say yes because according to what we know about presidential history as well as political science research on voting behavior during his time period there were indeed many Democrats across America including those living up north particularly among blue collar workers whose views aligned closely enough for them to vote republican under presidents like him even though they may still identify themselves politically leftward overall thus making such individuals known historians today refer too collectively simply put -Reagon Dems',
 'Yes - This option matches exactly what the passage says about taking thetraintoConcordeWeststationonthenorthernline( redlinenotspecial) which takes around twenty five minutefromthec

## 🔺 Pipeline for **synthesis** generation

In [56]:
final_response_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

final_response_llm = HuggingFacePipeline(pipeline=final_response_pipeline)

## 🔺 PromptTemplate definition and a LLMChain for the **synthesis** 

In [57]:
from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    You're a helpful assistant and you are asked to answer a certain question, given a certain number of candidate options and the context.
    You are also provided with an initial response and its critique, that could enforce or not the first opinion.
    Make a reasonable synthesis of these two opinions, but answer by outputting exactly one of the answer options only.
    Given this question: {question} \n 
    These are the possible options: {option_a}, {option_b}, {option_c} \n.
    The answer that you have to check is {candidate_answer} and this is its critique: {critique}.
    Here is context to help: {context} \n
    The answer is:
 """
)

In [58]:
# LLM chain definition

from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "candidate_answer": itemgetter("candidate_answer"),
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }

synthesis_chain = augmentation | prompt_template | response_check_llm

## 🔺 Function to generate the synthesis given literally everything

In [60]:
def synthesisGeneration(query, prompt_template, merged, candidate_answer, critique, sources):
    final_answer = synthesis_chain.invoke({'question': query, 
                                            'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 
                                            'candidate_answer': candidate_answer,
                                            'critique': critique,
                                            'context': sources})
    return final_answer

In [61]:
def extract_answer_syn(text):
    # Trova l'indice in cui inizia il testo "The answer is:"
    start_index = text.find("The answer is:")
    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        # Estrai il testo dopo "The answer is:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

In [82]:
syn_answers = []
for i in range(50):
    print(f"True answer: {correct_answers[i]}")
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    syn_answers.append(extract_answer_syn(synthesisGeneration(first_queries[i], prompt_template, merged_options, answers[i], ant_answers[i], sources[i])))
    print(f"Given answer: {extract_answer_syn(synthesisGeneration(first_queries[i], prompt_template, merged_options, answers[i], ant_answers[i], sources[i]))}")
    print('****************')

True answer: ['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.']




Given answer: The answer is:
  Results-Based Accountability
****************
True answer: ['Yes']




Given answer: The answer is:
  No
****************
True answer: ['20-25 minutes']




Given answer: The answer is:
  [[20-25 minutes]]
****************
True answer: ['$11 to $22 per square foot']




Given answer: The answer is:
  [['$11 to $22 per square foot')]]
****************
True answer: ['Due to symptoms in the body']




Given answer: The answer is:
  [[ due to Symptons In Body ]].
****************
True answer: ['Inside the rib cage.']




Given answer: The answer is:
  Inthe Rib Cage
****************
True answer: ['The most expensive patents are international patents, which can run up to $100,000 or higher.Domestically the costs can be $10,000 or above.']




Given answer: The answer is:
  [[ Internationa lpatens, domestic]]
****************
True answer: []




Given answer: The answer is:
  Light coloured Igneous Rocks
****************
True answer: ['Sophocles, Aeschylus and Euripides']




Given answer: The answer is:
  [[('Aeschlyulus'),'$140-$$202')]]
****************
True answer: ['A tree or shrub which produces distinctive cones as part of its sexual reproduction.']




Given answer: The answer is:
  Lapel pin
****************
True answer: ['Somatic cells are produced by mitosis and gametes produced by most organisms combine to form a zygote with n pairs of chromosomes.']




Given answer: The answer is:
 
[B] Gametes produce by Meiotic Division
****************
True answer: ['Dr. Seuss']




Given answer: The answer is:
  [[Dr.Seuss]]
****************
True answer: ['45 minutes to an hour']




Given answer: The answer is:
  * Four additionalsessionof medianschoolduringthefourthandfiftyearoftrainingtoturnintothespecializationyouwant
****************
True answer: ['$6-$16 a square foot']




Given answer: The answer is:
  $$6-$16\space{\rm{a}\ space}$
****************
True answer: ['Granite.', 'Granite.']




Given answer: The answer is:
  * Granito
****************
True answer: ['$4.64 - $6.36']




Given answer: The answer is:
  $$4.64 – $6.36$$
****************
True answer: ['Fish']




Given answer: The answer is:
  [Fish]
****************
True answer: ["In a New York State hermit's letter to the editor of an Adirondack Mountain newspaper."]




Given answer: The answer is:
  [[True]].
****************
True answer: ['A macintosh made from cotton fabric treated with oil and pigment to make it waterproof.']




Given answer: The answer is:
  Macintosh
****************
True answer: ['7 days before to 7 days after the rash appears']




Given answer: The answer is:
  [[7 Days Before To 7Days After Rash Appear]]
****************
True answer: ['The Camerata are a group of four powerful and influential individuals in the city of Cloudbank. Established by Grant Kendrell and Royce Bracket, the group circumvents the official administration and democratic nature of Cloudbank, in order to establish some form of stability in the ever-changing city.']




Given answer: The answer is:
  [[GrantKENDRELL]]
****************
True answer: ['An hour to 1 hour 15 minutes.']




Given answer: The answer is:
  * An HOUR TO ONE HUNDRED FIFTEEN MINUTES
****************
True answer: ['8 mg on the first day and 16 mg (the full dosage) on the second day.']




Given answer: The answer is:
  ('8 mg on the first day').
****************
True answer: ['$56,000']




Given answer: The answer is:
  $${24,421perannum}$$
****************
True answer: ['The Tet Offensive was a military campaign during the Vietnam War that began on January 31, 1968 by the forces of the National Liberation Front for South Vietnam. ']




Given answer: The answer is:
  [[The Tet Offensive was a mili...]]
****************
True answer: []




Given answer: The answer is:
****************
True answer: ["Photosynthesis is a process used by plants and other organisms to convert light energy, normally from the Sun, into chemical energy that can be later released to fuel the organisms' activities."]




Given answer: The answer is:
  [[0]]
****************
True answer: ['Capillaries.']




Given answer: The answer is:
  [CAPILLARIES].
****************
True answer: ['Rappelling is the process of coming down from a mountain that is usually done with two pieces of rope.']




Given answer: The answer is:
  Rappelling is the process of coming down from a mountain...
****************
True answer: ['They are the soldiers on the ground that Boost Beer Sales.Selling Beer is by far one of the most amazing jobs that any person can have.']




Given answer: The answer is:
  [0].
****************
True answer: ['An overnight stay for a keyhole surgery and three to five days stay for an open surgery.']




Given answer: The answer is:
  [('Overnight Stay')]
****************
True answer: ['65']




Given answer: The answer is:
  Age  65
****************
True answer: ['$85 ($125.00 for a pair)']




Given answer: The answer is:
  [[$85]]
****************
True answer: ["A complication of type 1 or type 2 diabetes caused by damage to the kidneys' delicate filtering system. "]




Given answer: The answer is:
 0]
****************
True answer: ['James Madison made a great contribution by writing The federalist paper for the constitution of United states.']




Given answer: The answer is:
  [[James Maddison Wrote Thedeferalits Paapers]]
****************
True answer: ['160° F to 165° F.']




Given answer: The answer is:
  [[Meat needs to reach]]  130° – 135°F
****************
True answer: ['Advanced Mobile Phone System (AMPS) is an analog mobile cell phone system standard developed by Bell Labs, and officially introduced in the Americas on October 13, 1983, Israel in 1986, Australia in 1987, and Pakistan in 1990.']




Given answer: The answer is:
  [[BellLabs]]
****************
True answer: ['Homologous']




Given answer: The answer is:
  ------------------
| Trait | Description | Critique | Is Selected? | Passage Text |
****************
True answer: ['$15,000 and $40,000']




Given answer: The answer is:
  [['$15,000', '$40,000']].
****************
True answer: ['5 percent of the unpaid taxes for each month']




Given answer: The answer is:
  [[5%]]
****************
True answer: ['In the quadratic formula, the expression under the square root sign, b 2 – 4 ac, is called the discriminant.']




Given answer: The answer is:
  [[('in the quadratic formula')]]
****************
True answer: ['Festive party']




Given answer: The answer is:
  [[('Homeodomain') | ('Festive Party'))]]
****************
True answer: ['Foxes, cats and birds.', 'A possum (plural form: possums) is any of about 70 small-to medium-sized arboreal marsupial species native to Australia, New Guinea, and Sulawesi (and introduced to New Zealand and China).']




Given answer: The answer is:
  [[Possum]]
****************
True answer: ['It is basically of two types aerobic and anaerobic which may be obligate or facultative.']




Given answer: The answer is:
  Obstacle Aerobs
****************
True answer: ['Monocytes are a type of white blood cells (leukocytes).']




Given answer: The answer is:
  [[monocytes]]
****************
True answer: ['$50 and $200']




Given answer: The answer is:
  [['$50']].
****************
True answer: ['$37,000']




Given answer: The answer is:
  $37,000
****************
True answer: ['bugs']




Given answer: The answer is:
  [[$107.  2]]
****************
True answer: ['It is the ritual ceremony of encounter.']




Given answer: The answer is:
  It is the ritual ceremony of encounter
****************
True answer: ['Often refers to a man who is able to grow a corn-silk like beard.']




Given answer: The answer is:
  ["Often referestoamanthatgrows acorn-silk likebeard"]
****************


In [83]:
syn_answers

['The answer is:\n  Results-Based Accountability',
 'The answer is:\n  No',
 'The answer is:\n  [[20-25 minutes]]',
 "The answer is:\n  [['$11 to $22 per square foot')]]",
 'The answer is:\n  [[ due to Symptons In Body ]].',
 'The answer is:\n  Inthe Rib Cage',
 'The answer is:\n  [[ Internationa lpatens, domestic]]',
 'The answer is:\n  Light coloured Igneous Rocks',
 "The answer is:\n  [[('Aeschlyulus'),'$140-$$202')]]",
 'The answer is:\n  Lapel pin',
 'The answer is:\n \n[B] Gametes produce by Meiotic Division',
 'The answer is:\n  [[Dr.Seuss]]',
 'The answer is:\n  * Four additionalsessionof medianschoolduringthefourthandfiftyearoftrainingtoturnintothespecializationyouwant',
 'The answer is:\n  $$6-$16\\space{\\rm{a}\\ space}$',
 'The answer is:\n  * Granito',
 'The answer is:\n  $$4.64 – $6.36$$',
 'The answer is:\n  [Fish]',
 'The answer is:\n  [[True]].',
 'The answer is:\n  Macintosh',
 'The answer is:\n  [[7 Days Before To 7Days After Rash Appear]]',
 'The answer is:\n  [[Gra

In [84]:
correct_answers

["['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.']",
 "['Yes']",
 "['20-25 minutes']",
 "['$11 to $22 per square foot']",
 "['Due to symptoms in the body']",
 "['Inside the rib cage.']",
 "['The most expensive patents are international patents, which can run up to $100,000 or higher.Domestically the costs can be $10,000 or above.']",
 '[]',
 "['Sophocles, Aeschylus and Euripides']",
 "['A tree or shrub which produces distinctive cones as part of its sexual reproduction.']",
 "['Somatic cells are produced by mitosis and gametes produced by most organisms combine to form a zygote with n pairs of chromosomes.']",
 "['Dr. Seuss']",
 "['45 minutes to an hour']",
 "['$6-$16 a square foot']",
 "['Granite.', 'Granite.']",
 "['$4.64 - $6.36']",
 "['Fish']",
 '["In a New York State hermit\'s letter to the editor of an Adirondack Mountain newspaper."]

In [79]:
answers

["[['Results-Based Accountancy']]",
 "[['No']]",
 "[['20-25 minutes']]",
 "[['$11 to $22 per square foot']]",
 "[[' Due to symtions in the bod']]"]