# Dataset, documents, FAISS; retriever

## 🔹 Load the dataset containing the tuples `(query, correct_answer, distractor_1, distractor_2)` and the one containing the documents

In [1]:
from datasets import load_dataset
import ast

dataset = load_dataset('saracandu/msmarco_modified', split="train", trust_remote_code=True)
dataset

Dataset({
    features: ['Unnamed: 0', 'answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'correct_answer', 'distractor_1', 'distractor_2'],
    num_rows: 82326
})

In [2]:
selected_passages = []

for row in dataset:
    passages_data = ast.literal_eval(row['passages'])
    try:
        selected_index = passages_data['is_selected'].index(1)
        selected_passage = {
            'is_selected': 1,
            'passage_text': passages_data['passage_text'][selected_index],
        }
        selected_passages.append(selected_passage)
    except ValueError:
        # Aggiungi un passaggio vuoto se non c'è nessun passaggio selezionato
        selected_passages.append({'is_selected': 0, 'passage_text': ''})

# Assicurati che la lunghezza dei passaggi selezionati corrisponda alla lunghezza del dataset originale
assert len(selected_passages) == len(dataset), "Errore: lunghezza dei passaggi selezionati non corrisponde alla lunghezza del dataset originale"

In [3]:
from datasets import Dataset

# Creazione di un nuovo dizionario con i dati desiderati
new_dataset = {
    'answers': dataset['answers'],
    'query': dataset['query'],
    'query_id': dataset['query_id'],
    'query_type': dataset['query_type'],
    'wellFormedAnswers': dataset['wellFormedAnswers'],
    'correct_answer': dataset['correct_answer'],
    'distractor_1': dataset['distractor_1'],
    'distractor_2': dataset['distractor_2'],
    'selected_passages': selected_passages
}

# Creazione del nuovo dataset
new_dataset = Dataset.from_dict(new_dataset)

In [4]:
def filter_correct_answer(example):
    return example['correct_answer'] != '[]'

# Applica la funzione di filtro
new_dataset = new_dataset.filter(filter_correct_answer)

# Visualizza le prime righe del dataset filtrato per verificare il risultato
new_dataset

Filter:   0%|          | 0/82326 [00:00<?, ? examples/s]

Dataset({
    features: ['answers', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'correct_answer', 'distractor_1', 'distractor_2', 'selected_passages'],
    num_rows: 80143
})

# Model loading and dataset selection (for testing purposes)

## ▪️ Upload the model: 

In [None]:
# do not run this unless necessary!

from huggingface_hub import login
login()

In [5]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel

#################################################################
# Tokenizer
#################################################################

model_name="nvidia/Llama3-ChatQA-1.5-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
from outlines import models, generate, samplers

new_model = models.Transformers(model, tokenizer)
sampler = samplers.greedy()

In [9]:
import outlines

system_message = """You are a multiple-choice question answering assistant.
Choose which of the following options: a star, a planet, a galaxy is the object below.

Object: the sun
"""

generator = outlines.generate.choice(new_model, ["a planet", "a galaxy", "a star"], sampler = sampler)
answer = generator(system_message)
answer

Compiling FSM index for all state transitions: 100%|██| 17/17 [00:00<00:00, 19.63it/s]


'a star'

In [10]:
import outlines
from langchain import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

prompt_template = PromptTemplate.from_template(
"""You are a multiple-choice question answering assistant.
Choose which of the following options: {option_a}, {option_b} is most likely the answer to a question. 
You have also a suggestion on which answer is the most appropriate, that you have to use as an additional context.

Question: {question}
Context: {critique}
"""
)

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "candidate_answer": itemgetter("candidate_answer"),
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }

synthesis_chain = augmentation | prompt_template 

In [11]:
def synthesisGeneration(query, prompt_template, merged, candidate_answer, critique, sources):
    augmented_prompt = synthesis_chain.invoke({'question': query, 
                                            'option_a': merged[0], 'option_b': merged[1],
                                            'candidate_answer': candidate_answer,
                                            'critique': critique,
                                            'context': sources})

    normal_string = clean_text(augmented_prompt.text)
    options = [merged[0], merged[1]]
    generator = outlines.generate.choice(new_model, options, sampler = sampler)
    final_answer = generator(normal_string, max_tokens = 30)
    
    return final_answer

In [14]:
synthesisGeneration('what is the sun', prompt_template, ['a star', 'a planet'], 'a planet', 
                    'the correct answer is: a star since bot an asteroid and a planet are inadequate and not supported by the context', 
                    'The Sun is the star at the center of the Solar System. It is a massive, nearly perfect sphere of hot plasma, heated to incandescence by nuclear fusion reactions in its core, radiating the energy from its surface mainly as visible light and infrared radiation with 10% at ultraviolet energies.')

'a star'

In [23]:
source = """ Arthur\'s Magazine (1844–1846) was an American literary periodical published in Philadelphia in the 19th century. 
Edited by T.S. Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. 
In May 1846 it was merged into "Godey\'s Lady\'s Book. First for Women is a woman\'s magazine published by Bauer Media Group in the USA.  
The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011 the circulation of the magazine was 1,310,696 copies.
    """

synthesisGeneration('Which magazine was started first Arthur\'s Magazine or First for Women?', 
                    prompt_template, ['First for Women', 'Arthur\'s Magazine'], 'First for Women', 
                    'the correct answer is Arthur\'s Magazine and the context agrees', 
                    source)

"Arthur's Magazine"

In [24]:
source = """ "The Oberoi family is an Indian family that is famous for its involvement in hotels, namely through The Oberoi Group. 
The Oberoi Group is a hotel company with its head office in Delhi. 
Founded in 1934, the company owns and/or operates 30+ luxury hotels and two river cruise ships in six countries, primarily under its Oberoi Hotels & Resorts and Trident Hotels brands.".
    """

synthesisGeneration('The Oberoi family is part of a hotel company that has a head office in what city?', 
                    prompt_template, ['Delhi', 'Sammardenchia'], 'Sammardenchia', 
                    'Delhi is correct', 
                    source)

'Delhi'

In [35]:
source = """ Allison Beth Allie Goertz (born March 2, 1991) is an American musician. 
Goertz is known for her satirical songs based on various pop culture topics. Her videos are posted on YouTube under the name of Cossbysweater. 
Subjects of her songs have included the film The Room, the character Milhouse from the television show The Simpsons, and the game Dungeons & Dragons. 
Her style has been compared to that of Bo Burnham. In December 2015, Goertz released a concept album based on the Adult Swim series Rick and Morty, Sad Dance Songs, 
with the album\'s cover emulating the animation and logo of the series.  The album was made possible through Kickstarter. 
She is co-host of Everything's Coming Up Podcast, a Simpsons-focused podcast along with Julia Prescott. 
Milhouse Mussolini van Houten is a fictional character featured in the animated television series The Simpsons, voiced by Pamela Hayden, and created by Matt Groening 
who named the character after President Richard Nixon\'s middle name. Later in the series, it is revealed that Milhouse\'s middle name is Mussolini. "
    """

synthesisGeneration('Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?', 
                    prompt_template, ['President Richard Nixon', 'Gabibbo'], 'President Richard Nixon', 
                    'the correct answer is President Richard Nixon', 
                    source)

'President Richard Nixon'

## ▪️ Select a subset of the true dataset as a test

In [71]:
N_examples = 20

In [77]:
# select a subset of the queries, just for test:
first_queries = new_dataset['query'][:N_examples]

# same for correct answers and distractors:
correct_answers = new_dataset['correct_answer'][:N_examples]
distractors_1 = new_dataset['distractor_1'][:N_examples]
distractors_2 = new_dataset['distractor_2'][:N_examples]
# and for the sources:
sources = new_dataset['selected_passages'][:N_examples]

## ▪️ Merge the true answer and the distractors into a vector, shuffling the order of the elements

In [13]:
# shuffles the order of the vector containing the correct answer and the two distractors
# returns another vector, shuffled
import re
import random

# Definisci una funzione di pulizia per rimuovere caratteri non validi
def clean_text(text):
    return re.sub(r"[^\w\s.,!?\-:;()]+", '', text)

# Funzione per mescolare le risposte
def shuffleAnswers(correct_answer, distractor_1, distractor_2):
    # Applica la pulizia a ciascun elemento
    correct_answer_cleaned = clean_text(correct_answer)
    distractor_1_cleaned = clean_text(distractor_1)
    distractor_2_cleaned = clean_text(distractor_2)
    
    # Unisci le opzioni pulite
    merge_options = [correct_answer_cleaned, distractor_1_cleaned, distractor_2_cleaned]
    
    # Mescola le opzioni
    random.shuffle(merge_options)
    
    return merge_options

# Thesis

In [106]:
merged_options = []
for i in range(N_examples):
    merged_options.append(shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i]))

In [107]:
merged_options

[["'Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.'",
  '"\'Other Allowance\' which is basically to compensate all and any kind of allowances which is required to be paid at different regionslocalities of the various project sites."',
  "'Webbed feet'"],
 ["'Yes'",
  "'50, 55, 60, 65 and 70 C'",
  "'A contamination which is associated with the food itself and not through other causes of contamination.'"],
 ["'Yes'",
  "'Oatmeal, beans, apples, pears, barley and prunes.'",
  "'20-25 minutes'"],
 ["'11 to 22 per square foot'",
  "'They are fatty acids that have one double bond in the fatty acid chain with all of the remainder carbon atoms being single-bonded.'",
  "'Honolulu'"],
 ["'A chief engineer is responsible for all operations and maintenance that has to do with any and all engineering equipment throughout the entire ship.', 'The chief en

## 🔹 PromptTemplate definition and a LLMChain for the **thesis** 

In [108]:
# prompt template definition
# requires question, options (a string containing the possible options) and the context as input variables!

from langchain import PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
    System: This is a chat between a user and an AI assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions based on the context. 
    {context}
    User: {question}
    Possible options: {option_a}, {option_b}, {option_c}.
    Assistant:
"""
)

## 🔹 Function that generates the output given the prompt, the question and the set of options

In [109]:
# LLM chain definition
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "context": itemgetter("context"), }

thesis_chain = augmentation | prompt_template 

In [110]:
import outlines

def thesisGeneration(query, merged, sources):
    augmented_prompt = thesis_chain.invoke({'question': query, 'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 'context': sources})
    normal_string = clean_text(augmented_prompt.text)
    options = [merged[0], merged[1], merged[2]]
    generator = outlines.generate.choice(new_model, options)
    answer = generator(normal_string)
    # if not answer:
    #    answer = random.choice(options)
    return answer

## 🔹 Test: how well the thesis alone is able to perform?

In [111]:
answers = []
for i in range(N_examples):
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    answers.append(thesisGeneration(first_queries[i], merged_options, sources[i]))

In [112]:
answers[:5]

["'Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.'",
 "'A contamination which is associated with the food itself and not through other causes of contamination.'",
 "'20-25 minutes'",
 "'11 to 22 per square foot'",
 "'A chief engineer is responsible for all operations and maintenance that has to do with any and all engineering equipment throughout the entire ship.', 'The chief engineer is responsible for the technical supervision of the development, production or operation of an engineering project for a multinational corporation, a major company or a government institution.'"]

# Antithesis

In [113]:
merged_options = []
for i in range(N_examples):
    merged_options.append(shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i]))

## 🔸 PromptTemplate definition and a LLMChain for the **antithesis** 

In [114]:
import transformers

pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.5,
    return_full_text=True,
    max_new_tokens=400,
    top_p=0.0
)

In [115]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from IPython.display import display, Markdown

def query_model(
        system_message,
        user_message,
        temperature = 0.0,
        max_length=1024
        ):

    user_message = "Question: " + user_message + " Answer:"
    messages = [
        {"role": "System", "content": system_message},
        {"role": "User", "content": user_message},
        ]
    prompt = pipeline.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
        )
    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    sequences = pipeline(
        prompt,
        do_sample=False,
        top_p=0.0,
        temperature=temperature,
        #num_return_sequences=1,
        eos_token_id=terminators,
        max_new_tokens=max_length,
        return_full_text=False,
        pad_token_id=pipeline.model.config.eos_token_id
    )

    answer = sequences[0]['generated_text']

    return user_message + " " + answer  

In [116]:
system_message = """
    This is a chat between a user and an AI assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions based on the context.
    {context}
    The assistant is asked to check whether or not a question was answered correctly, given a certain number of candidate options and the context.
"""


user_message = """
    Question: {question}? The candidate answer for the question has to be one of these: {option_a}, {option_b}, {option_c}.
    It has been previously suggested that {candidate_answer} could be the correct answer. 
    Which of the candidate answers is the most proper answer for the question? Why? You can also say "I don't know".
"""

## 🔸 Function to generate the antithesis given the question, the thesis, the context and the options

In [117]:
def antithesisGeneration(query, prompt_template, merged, candidate_answer, sources):
    second_answer = query_model(system_message.format(context = sources),
    user_message.format(question=query, option_a = merged[0], option_b = merged[1], 
                        option_c = merged[2], candidate_answer = candidate_answer, context = sources,), max_length=400)
    return second_answer

In [118]:
def extract_answer_ant(text):
    # Trova l'indice in cui inizia il testo "Why or why not the answer is correct:"
    start_index = text.find("Answer:")

    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        start_index += len("Answer:")
        # Estrai il testo dopo "Why or why not the answer is correct:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

In [119]:
ant_answers = []
for i in range(N_examples):
    ant_answers.append(extract_answer_ant(antithesisGeneration(first_queries[i], prompt_template, merged_options[i], answers[i], sources[i])))



In [120]:
ant_answers[:5]

['The passage text clearly states Results Based accountability(RBAs), it means its just another name called RBa',
 'The passage text states clearly about how he changed his political affiliation before becoming president',
 'The reference text states,"A Week In Sydny Will Help See Many Of Its Sights And Understand City Culture."',
 'The average costs range from eleven dollars ($11) up until twenty-two dollar($21).',
 "'Due To Symptoms In Body'"]

In [121]:
first_queries[:5]

['what is rba',
 'was ronald reagan a democrat',
 'how long do you need for sydney and surrounding areas',
 'price to install tile in shower',
 'why conversion observed in body']

# Synthesis

## 🔺 PromptTemplate definition and a LLMChain for the **synthesis** 

In [153]:
# old

from langchain import PromptTemplate

prompt_template = PromptTemplate.from_template(
"""You are a multiple-choice question answering assistant.
Choose which of the following options: {option_a}, {option_b}, {option_c} is most likely the answer to a question. 
You have also an attempt of answer, an opinion on which answer is the most appropriate and the context.

Question: {question}
Attempt of answer: {candidate_answer}
Opinion: {critique}
Context: {context}
"""
)

# LLM chain definition

from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "candidate_answer": itemgetter("candidate_answer"),
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }

synthesis_chain = augmentation | prompt_template 

In [223]:
prompt_template = PromptTemplate.from_template(
"""You are a multiple-choice question answering assistant.
Choose which of the following options: {option_a}, {option_b}, {option_c} is most likely the answer to a question. 
You have also an advice on which answer is the most appropriate and the context.
Choose the option that is most similar to the advice.

Question: {question}
Advice: {critique}
Context: {context}
"""
)

augmentation = {"question": itemgetter("question"),
                "option_a": itemgetter("option_a"), 
                "option_b": itemgetter("option_b"),
                "option_c": itemgetter("option_c"),
                "candidate_answer": itemgetter("candidate_answer"),
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }

synthesis_chain = augmentation | prompt_template 

## 🔺 Function to generate the synthesis given literally everything

In [191]:
sampler = samplers.greedy()

In [224]:
def synthesisGeneration(query, prompt_template, merged, candidate_answer, critique, sources):
    augmented_prompt = synthesis_chain.invoke({'question': query, 
                                            'option_a': merged[0], 'option_b': merged[1], 'option_c': merged[2], 
                                            'candidate_answer': candidate_answer,
                                            'critique': critique,
                                            'context': sources})

    normal_string = clean_text(augmented_prompt.text)
    options = [merged[0], merged[1], merged[2]]
    generator = outlines.generate.choice(new_model, options, sampler = sampler)
    final_answer = generator(normal_string)
    
    # if not final_answer:
    #     final_answer = candidate_answer
    return final_answer

In [228]:
def_answers = [clean_text(correct_answer) + "is correct" for correct_answer in correct_answers]
def_answers

['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.is correct',
 'Yesis correct',
 '20-25 minutesis correct',
 '11 to 22 per square footis correct',
 'Due to symptoms in the bodyis correct',
 'Inside the rib cage.is correct',
 'The most expensive patents are international patents, which can run up to 100,000 or higher.Domestically the costs can be 10,000 or above.is correct',
 'Sophocles, Aeschylus and Euripidesis correct',
 'A tree or shrub which produces distinctive cones as part of its sexual reproduction.is correct',
 'Somatic cells are produced by mitosis and gametes produced by most organisms combine to form a zygote with n pairs of chromosomes.is correct',
 'Dr. Seussis correct',
 '45 minutes to an houris correct',
 '6-16 a square footis correct',
 'Granite., Granite.is correct',
 '4.64 - 6.36is correct',
 'Fishis correct',
 'In a New Yor

In [229]:
syn_answers = []
for i in range(20):
    merged_options = shuffleAnswers(correct_answers[i], distractors_1[i], distractors_2[i])
    syn_answers.append(synthesisGeneration(first_queries[i], prompt_template, merged_options, correct_answers[i], def_answers[i], sources[i]))

Compiling FSM index for all state transitions: 100%|█| 373/373 [00:18<00:00, 20.68it/s
Compiling FSM index for all state transitions: 100%|██| 65/65 [00:03<00:00, 20.66it/s]
Compiling FSM index for all state transitions: 100%|█| 164/164 [00:07<00:00, 20.83it/s
Compiling FSM index for all state transitions: 100%|█| 408/408 [00:19<00:00, 20.95it/s
Compiling FSM index for all state transitions: 100%|██| 42/42 [00:02<00:00, 20.67it/s]
Compiling FSM index for all state transitions: 100%|█| 184/184 [00:08<00:00, 20.84it/s
Compiling FSM index for all state transitions: 100%|█| 146/146 [00:06<00:00, 20.93it/s
Compiling FSM index for all state transitions: 100%|█| 205/205 [00:09<00:00, 20.80it/s
Compiling FSM index for all state transitions: 100%|█| 467/467 [00:22<00:00, 20.99it/s
Compiling FSM index for all state transitions: 100%|██| 39/39 [00:01<00:00, 20.79it/s]
Compiling FSM index for all state transitions: 100%|█| 277/277 [00:13<00:00, 20.91it/s
Compiling FSM index for all state transitio

In [230]:
syn_answers

['Results-Based Accountability is a disciplined way of thinking and taking action that communities can use to improve the lives of children, youth, families, adults and the community as a whole.',
 'Yes',
 'Yes',
 'They are fatty acids that have one double bond in the fatty acid chain with all of the remainder carbon atoms being single-bonded.',
 'A chief engineer is responsible for all operations and maintenance that has to do with any and all engineering equipment throughout the entire ship., The chief engineer is responsible for the technical supervision of the development, production or operation of an engineering project for a multinational corporation, a major company or a government institution.',
 'Inside the rib cage.',
 'The most expensive patents are international patents, which can run up to 100,000 or higher.Domestically the costs can be 10,000 or above.',
 'The front segment is called the Cephalothorax and the second part of the body is called the Abdomen.',
 'Is the bran

In [218]:
merged_options = [clean_text(correct_answers[4]), clean_text(distractors_1[4]), clean_text(distractors_2[4])]

synthesisGeneration(clean_text(first_queries[4]), prompt_template, merged_options, 
                    clean_text(correct_answers[4]), 'answer by saying Due to symptoms', sources[4])

"'NigeriaCameroon'"

In [67]:
dataset_new = {'query': first_queries, 'correct answer': correct_answers, 'thesis': answers, 'antithesis': ant_answers, 'synthesis': syn_answers}

In [68]:
import pandas as pd

df = pd.DataFrame(dataset_new)

In [69]:
df

Unnamed: 0,query,correct answer,thesis,antithesis,synthesis
0,what is rba,['Results-Based Accountability is a discipline...,'Results-Based Accountability is a disciplined...,"The text clearly states ""RBA"" stands for Resul...",'Results-Based Accountability is a disciplined...
1,was ronald reagan a democrat,['Yes'],'Yes',"The text explicitly states that ""[f]romm tradi...",'Yes'
2,how long do you need for sydney and surroundin...,['20-25 minutes'],"'Oatmeal, beans, apples, pears, barley and pru...",The text states that taking public transportat...,"'Oatmeal, beans, apples, pears, barley and pru..."
3,price to install tile in shower,['$11 to $22 per square foot'],'11 to 22 per square foot',"The reference text states ""Average costs range...",'11 to 22 per square foot'
4,why conversion observed in body,['Due to symptoms in the body'],'Due to symptoms in the body',Yes\n\nExplanation:\nSomatization Disorder (SD...,'NigeriaCameroon'
5,where are the lungs located in the back,['Inside the rib cage.'],'Inside the rib cage.',"The reference text states that ""This [the loca...",'yes'
6,cost to get a patent,['The most expensive patents are international...,'The most expensive patents are international ...,"""The most expensiv epatents arerinternational ...",'Both Kit and Pup'
7,best tragedies of ancient greece,"['Sophocles, Aeschylus and Euripides']","'Sophocles, Aeschylus and Euripides'",Yes,'The front segment is called the Cephalothorax...
8,what is a conifer,['A tree or shrub which produces distinctive c...,'A tree or shrub which produces distinctive co...,"The text explicitly states that ""a Confer"" ref...",'Is the brand name of a drug called procaine w...
9,in animals somatic cells are produced by and g...,['Somatic cells are produced by mitosis and ga...,"'dream-like feeling, blurred vision, double vi...","The reference text states that ""somatic"" refer...","'dream-like feeling, blurred vision, double vi..."


In [76]:
df['antithesis'][199]

'Candidate response matches reference text exactly; therefore it must have been correct'

In [72]:
df.to_csv('test-4.csv')