## ▪️ Upload the model: 

In [None]:
# do not run this unless necessary!

from huggingface_hub import login
login()

In [None]:
import pandas as pd
import torch
import datasets
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

import warnings
warnings.filterwarnings("ignore")

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "gg-hf/gemma-2b-it",
    device_map="cuda",
    torch_dtype=torch.bfloat16,
    max_length = 1000,
    do_sample = False
)

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", use_fast=False)

In [None]:
from guidance import models, select

new_model = models.Transformers(model, tokenizer, temperature=0.0)

# new_model + f'Do you want a joke or a poem? A ' + select(['joke', 'poem'])

## ▪️ Test with `guidance`: 

In [None]:
system_message = """You are a multiple-choice question answering assistant.
Choose which of the following options: a star, a planet, a galaxy is the object below.

Object: the sun
"""

new_model + system_message + select(["a planet", "a galaxy", "a star"])

In [None]:
from langchain import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter

prompt_template = PromptTemplate.from_template(
"""You are a multiple-choice question answering assistant.
You have a suggestion on which answer is the most appropriate, that is treated as context. Use the suggestion to choose the most proper option.
You also have an attempt of answer that you are suggested to neglect. 

Question: {question}
Attempt: {candidate_answer}
Context: {critique}

The most proper option between {options} is:
"""
)

augmentation = {"question": itemgetter("question"),
                "options": itemgetter("options"), 
                "candidate_answer": itemgetter("candidate_answer"),
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }

synthesis_chain = augmentation | prompt_template 

In [None]:
# shuffles the order of the vector containing the correct answer and the two distractors
# returns another vector, shuffled
import re
import random

# Definisci una funzione di pulizia per rimuovere caratteri non validi
def clean_text(text):
    return re.sub(r"[^\w\s.,!?\-:;()]+", '', text)

In [None]:
def synthesisGeneration(query, prompt_template, merged, candidate_answer, critique, sources):
    # merged = ast.literal_eval(merged)
    augmented_prompt = synthesis_chain.invoke({'question': query, 
                                            'options': merged,
                                            'candidate_answer': candidate_answer,
                                            'critique': critique,
                                            'context': sources})

    normal_string = clean_text(augmented_prompt.text)
    new_model + normal_string + select(merged)

In [None]:
synthesisGeneration('what is the sun', prompt_template, ['star', 'planet'], 'planet', 
                    'the correct answer is: a star since bot an asteroid and a planet are inadequate and not supported by the context', 
                    'The Sun is the star at the center of the Solar System. It is a massive, nearly perfect sphere of hot plasma, heated to incandescence by nuclear fusion reactions in its core, radiating the energy from its surface mainly as visible light and infrared radiation with 10% at ultraviolet energies.')

In [None]:
source = """ Arthur\'s Magazine (1844–1846) was an American literary periodical published in Philadelphia in the 19th century. 
Edited by T.S. Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. 
In May 1846 it was merged into "Godey\'s Lady\'s Book. First for Women is a woman\'s magazine published by Bauer Media Group in the USA.  
The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011 the circulation of the magazine was 1,310,696 copies.
    """

synthesisGeneration('Which magazine was started first Arthur\'s Magazine or First for Women?', 
                    prompt_template, ['First for Women', 'Arthur\'s Magazine'], 'First for Women', 
                    'the correct answer is Arthur\'s Magazine and the context agrees', 
                    source)

In [None]:
source = """ "The Oberoi family is an Indian family that is famous for its involvement in hotels, namely through The Oberoi Group. 
The Oberoi Group is a hotel company with its head office in Delhi. 
Founded in 1934, the company owns and/or operates 30+ luxury hotels and two river cruise ships in six countries, primarily under its Oberoi Hotels & Resorts and Trident Hotels brands.".
    """

synthesisGeneration('The Oberoi family is part of a hotel company that has a head office in what city?', 
                    prompt_template, ['Delhi', 'Sammardenchia'], 'Sammardenchia', 
                    'Delhi is correct', 
                    source)

In [None]:
source = """ Allison Beth Allie Goertz (born March 2, 1991) is an American musician. 
Goertz is known for her satirical songs based on various pop culture topics. Her videos are posted on YouTube under the name of Cossbysweater. 
Subjects of her songs have included the film The Room, the character Milhouse from the television show The Simpsons, and the game Dungeons & Dragons. 
Her style has been compared to that of Bo Burnham. In December 2015, Goertz released a concept album based on the Adult Swim series Rick and Morty, Sad Dance Songs, 
with the album\'s cover emulating the animation and logo of the series.  The album was made possible through Kickstarter. 
She is co-host of Everything's Coming Up Podcast, a Simpsons-focused podcast along with Julia Prescott. 
Milhouse Mussolini van Houten is a fictional character featured in the animated television series The Simpsons, voiced by Pamela Hayden, and created by Matt Groening 
who named the character after President Richard Nixon\'s middle name. Later in the series, it is revealed that Milhouse\'s middle name is Mussolini. "
    """

synthesisGeneration('Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?', 
                    prompt_template, ['Nixon', 'Obama'], 'Obama', 
                    'the correct answer is Nixon', 
                    source)

In [None]:
source = """ 
Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 – June 16, 1979) was an American film director best known for the movie Rebel Without a Cause. 
Elia Kazan (born Elias Kazantzoglou September 7, 1909 – September 28, 2003) was a Greek-American director, producer, writer and actor, described by The New York Times as one of the most honored and influential directors in Broadway and Hollywood history.
    """

synthesisGeneration('What profession does Nicholas Ray and Elia Kazan have in common?', 
                    prompt_template, ['director', 'writer'], 'director', 
                    'the correct answer is writer', 
                    source)

## 🔹 PromptTemplate definition and a LLMChain for the **thesis** 

In [None]:
def create_message(question, options, context):
    options_str = '", "'.join(options)
    content = f"""

    Now do the same for this question: "{question}", where options: ["{options_str}"]. Assistant:
    """

    user_content = "Answer to the following question: " + question + " providing one of these options as answer: " + str(options) + "Assistant:"

    messages = [
        {"role": "system", "content": """
        You are an helpful AI assistant. You have to provide helpful answers to the user’s questions based on the context: 
        """ + context},
        {"role": "user", "content": user_content}
    ]

    return messages

In [None]:
create_message(first_queries[0], possibilities[0], sources[0])

## 🔹 Function that generates the output given the prompt, the question and the set of options

In [None]:
import ast

def thesisGeneration(query, merged, sources):
    merged = ast.literal_eval(merged)
    augmented_prompt = create_message(query, merged, sources)
    ans = new_model + str(augmented_prompt) + select([clean_text(merged[0]), clean_text(merged[1])])
    return str(ans)

In [None]:
def extract_answer(text):
    # Trova l'indice in cui inizia il testo "Why or why not the answer is correct:"
    start_index = text.find("}]")

    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        start_index += len("}]")
        # Estrai il testo dopo "Why or why not the answer is correct:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

## 🔹 Test: how well the thesis alone is able to perform?

In [None]:
answers = []
for i in range(N_examples):
    answers.append(extract_answer(thesisGeneration(first_queries[i], possibilities[i], sources[i])))

In [None]:
answers[:10]

In [None]:
# one-shot
examples =[
  {
    "prompt": """
        Question: What is the sun, a star or a planet?
        Options: ['a star', 'a planet']
        Candidate answer: a planet
        Suggestion: 'a star' is the correct option since the context clearly specifies that the Sun is the star at the center of the Solar System
        Context: The Sun is the star at the center of the Solar System. It is a massive, nearly perfect sphere of hot plasma, heated to incandescence by nuclear fusion reactions in its core, radiating the energy from its surface mainly as visible light and infrared radiation with 10% at ultraviolet energies.
    """,
    "target": "The correct option is 'a star', since the suggestion is grounded in the context ('The Sun is the star at the center of the Solar System'), even if the candidate answer does not agree by saying 'a planet'."
  }
]

# template -> to extract the answer later 
example_template = """
User: {prompt}
AI: {target}
"""

example_prompt = PromptTemplate(
    input_variables=['prompt', 'target'],
    template=example_template
)

prefix = """
You are an helpful AI assistant. You are asked to determine the most correct answer for a given question, provided a set of possible options.
You also have at disposal a first tentative answer and a suggestion on which is the correct answer.
Your goal is to decree which is the most correct answer to the question between the available options according to the context.

Here's an example of how to do it:
"""

suffix = """
User: {prompt}
AI: """

few_shot_prompt_template = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["prompt"],
    example_separator="\n\n"
)

In [None]:
def create_message(question, candidate, suggestion, options, context):

    user_content = "Question: " + question + "\n Options: " + str(options) + "\n Candidate answer: " + candidate + "\n Suggestion: " + suggestion + "\n Context: " + context + "\n Assistant: \n"

    messages = [
        {"role": "user", "content": user_content
        },
    ]

    return messages

In [None]:
def preSynthGeneration(query, candidate_answer, critique, merged, sources):
    prompt = create_message(query, candidate_answer, critique, merged, sources)
    response = few_shot_prompt_template.format(prompt=prompt)
    input_ids = tokenizer(response, return_tensors="pt").to("cuda")
    outputs = model.generate(**input_ids)
    return extract_from_second_ai(tokenizer.decode(outputs[0]))

In [None]:
pre_answers = []
for i in range(N_examples):
    pre_answers.append(preSynthGeneration(first_queries[i], answers[i], ant_answers[i], possibilities[i], sources[i]))

In [None]:
preSynthGeneration(first_queries[0], answers[0], ant_answers[0], possibilities[0], sources[0])

In [None]:
pre_answers[0]

## 🔺 PromptTemplate definition and a LLMChain for the **synthesis** 

In [None]:
from langchain import PromptTemplate
from operator import itemgetter

prompt_template = PromptTemplate.from_template(
"""You are a multiple-choice question answering assistant.
Choose the most proper answer between {options} that best matches with the suggestion. 

Question: {question}
Suggestion: {critique}

Assistant:
"""
)

augmentation = {"question": itemgetter("question"),
                "options": itemgetter("options"), 
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }

synthesis_chain = augmentation | prompt_template 

## 🔺 Function to generate the synthesis given literally everything

In [None]:
def synthesisGeneration(query, prompt_template, merged, pre_answer, sources):
    merged = ast.literal_eval(merged)
    augmented_prompt = synthesis_chain.invoke({'question': query, 
                                            'options': merged,
                                            'critique': pre_answer,
                                            'context': sources})

    normal_string = clean_text(augmented_prompt.text)
    ans = new_model + normal_string + select([clean_text(merged[0]), clean_text(merged[1])])
    return str(ans)

In [None]:
def extract_answer(text):
    # Trova l'indice in cui inizia il testo "Why or why not the answer is correct:"
    start_index = text.find("assistant:\n")

    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        start_index += len("assistant:\n")
        # Estrai il testo dopo "Why or why not the answer is correct:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

In [None]:
syn_answers = []
for i in range(N_examples):
    syn_answers.append(extract_answer(
        synthesisGeneration(
            first_queries[i], prompt_template, possibilities[i], 
            pre_answers[i], sources[i])))

In [None]:
syn_answers[:10]

In [None]:
def_answers = ["the correct option is " + clean_text(correct_answer)
               + " since the other options is not mentioned in the context" for correct_answer in correct_answers]

In [None]:
goat_answers = []
for i in range(N_examples):
    goat_answers.append(extract_answer(
        synthesisGeneration(
            first_queries[i], prompt_template, possibilities[i], 
            def_answers[i], sources[i])))

In [None]:
df = {
    'query': first_queries,
    'correct': correct_answers,
    'thesis': answers,
    'antithesis': ant_answers,
    'pre-synthesis': pre_answers,
    'synthesis': syn_answers,
    'goat': goat_answers,
    'context': sources
} 

In [None]:
import pandas as pd

df = pd.DataFrame(df)
df.head()

In [None]:
# df.to_csv('baseline-true.csv')

In [None]:
# Funzione per rimuovere le quadre e ottenere solo il contenuto
def remove_brackets(s):
    return s.strip("[] ")

# Definisci una funzione di pulizia per rimuovere caratteri non validi
def clean_text(text):
    text = re.sub(r'[^\w\s.,!?\'"\-:;()]+', '', text)  # Rimuove i caratteri speciali
    text = re.sub(r"['\"-]", '', text)  # Rimuove apostrofi, virgolette e trattini
    text = text.lower()  # Converte in minuscolo
    return text

# Applica la funzione alla colonna 'correct answer'
df['correct'] = df['correct'].apply(clean_text)
df['thesis'] = df['thesis'].apply(clean_text)
df['synthesis'] = df['synthesis'].apply(clean_text)
df['goat'] = df['goat'].apply(clean_text)


df.head()

In [None]:
df['antithesis'][0]

In [None]:
# Conta quante righe combaciano e quante no
matches = 0
non_matches = 0

for index, row in df.iterrows():
    correct_answer = str(row['correct']).strip()
    thesis = str(row['thesis']).strip()
    
    if correct_answer == thesis:
        matches += 1
    else:
        non_matches += 1

print(f"Number of matches: {matches}")
print(f"Number of non-matches: {non_matches}")

In [None]:
# Conta quante righe combaciano e quante no
matches = 0
non_matches = 0

for index, row in df.iterrows():
    correct_answer = str(row['correct']).strip()
    thesis = str(row['synthesis']).strip()
    
    if correct_answer == thesis:
        matches += 1
    else:
        # print("Synthesis: {}, Correct: {}".format(thesis, correct_answer))
        non_matches += 1

print(f"Number of matches: {matches}")
print(f"Number of non-matches: {non_matches}")

In [None]:
# Conta quante righe combaciano e quante no
matches = 0
non_matches = 0
which_ones_syn = []

for index, row in df.iterrows():
    correct_answer = str(row['correct']).strip()
    thesis = str(row['goat']).strip()
    
    if correct_answer == thesis:
        matches += 1
    else:
        # print("Synthesis: {}, Correct: {}".format(thesis, correct_answer))
        non_matches += 1
        which_ones_syn.append("Synthesis: {}, Correct: {}".format(thesis, correct_answer))

print(f"Number of matches: {matches}")
print(f"Number of non-matches: {non_matches}")

In [None]:
df.to_csv('gg-hf-gemma-2b-it-baseline-2.csv')

In [1]:
import pandas as pd
import torch
import ast
import re
import datasets
from datasets import load_dataset
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
)
from guidance import models, select
from langchain_core.prompts import PromptTemplate
from operator import itemgetter

import warnings
warnings.filterwarnings("ignore")

  backends.update(_get_backends("networkx.backends"))


In [2]:
# Definisci una funzione di pulizia per rimuovere caratteri non validi
def clean_text(text):
    return re.sub(r"[^\w\s.,!?\-:;()]+", '', text)

# Definisci una funzione di pulizia per rimuovere caratteri non validi
def clean_text_final(text):
    text = re.sub(r'[^\w\s.,!?\'"\-:;()]+', '', text)  # Rimuove i caratteri speciali
    text = re.sub(r"['\"-]", '', text)  # Rimuove apostrofi, virgolette e trattini
    text = text.lower()  # Converte in minuscolo
    return text

In [3]:
# prompts and similar things:

# --------------------------------------------------

# prompt augmentation for the (format of the) synthesis:
prompt_template = PromptTemplate.from_template(
"""You are a multiple-choice question answering assistant.
Choose the most proper option between {options} that best matches with the suggestion. 

Question: {question}
Context: {critique}
Sources: {context}

Assistant:
"""
)
augmentation = {"question": itemgetter("question"),
                "options": itemgetter("options"), 
                "critique": itemgetter("critique"),
                "context": itemgetter("context"), }
synthesis_chain = augmentation | prompt_template 

# --------------------------------------------------

# for generating the 'thought' of the synthesis
system_message = """
    You are an helpful AI assistant.
    You are asked to determine the most correct answer for a given question.
    You have at disposal a first tentative answer (a candidate answer) and another opinion on which should be the correct option according to context (a suggestion).
    
    They could agree on the correct option; in this case, directly output the option on which they agree.
    If instead they disagree, use the context to determine the correct answer for the question, given the set of possible options.
    
    The goal of the assistant is to decree which is the most correct answer to the question between the available options. 
    Answer by explicitly reporting the correct answer to you.
"""
user_message = """
    Question: {question}
    Options: {options}
    Candidate answer: {candidate_answer}
    Suggestion: {critique}
    Which of the candidate answers {options} is the most proper answer for the question?
"""

# --------------------------------------------------

In [47]:
def create_message_thesis(question, options, context):
    options_str = '", "'.join(options)
    content = f"""

    Now do the same for this question: "{question}", where options: ["{options_str}"]. Assistant:
    """

    user_content = "Answer to the following question: " + question + " providing one of these options as answer: " + str(options) + "Assistant:"

    messages = [
        {"role": "system", "content": """
        You are an helpful AI assistant. You have to provide helpful answers to the user’s questions based on the context: 
        """ + context},
        {"role": "user", "content": user_content}
    ]

    return messages

def extract_thesis(text):
    # Trova l'indice in cui inizia il testo "Why or why not the answer is correct:"
    start_index = text.find("}]")

    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        start_index += len("}]")
        # Estrai il testo dopo "Why or why not the answer is correct:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

def thesisGeneration(query, merged, sources):
    merged = ast.literal_eval(merged)
    augmented_prompt = create_message_thesis(query, merged, sources)
    ans = new_model + str(augmented_prompt) + select(merged)
    return extract_thesis(str(ans))

In [48]:
def create_message_antithesis(question, candidate, options, context):
    options_str = '", "'.join(options)
    content = f"""

    Now do the same for this question: "{question}", where options: ["{options_str}"]. Assistant:
    """

    user_content = "Question: " + question + "\n Options: " + str(options) + "\n Candidate answer: " + candidate + "\n Context: " + context + "\n\n Assistant:"

    messages = [
        {"role": "user", "content": """
        You are an helpful AI assistant. You are asked to determine the most correct answer for a given question, provided a set of possible options.
        You also have at disposal a first tentative answer that you are required to check with respect to the question and the relevant context.
        Your goal is to decree which is the most correct answer to the question between the available options.

        Here's an example of how to do it:
        Question: What is the sun, a star or a planet?
        Options: ['a star', 'a planet']
        Candidate answer: a planet
        Context: The Sun is the star at the center of the Solar System. It is a massive, nearly perfect sphere of hot plasma, heated to incandescence by nuclear fusion reactions in its core, radiating the energy from its surface mainly as visible light and infrared radiation with 10% at ultraviolet energies.
        """},
        {"role": "assistant", "content": """
        The correct answer should be 'a star' due to the fact that the context explicitly say so. On the opposite, the context never mentions the fact that the Sun could be a planet.
        """
        },
        {"role": "user", "content": "Now do the same for the following question: \n" + user_content}
    ]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return prompt

def extract_antithesis(text):
    pattern = re.compile(r'<start_of_turn>model(.*?)<end_of_turn>', re.DOTALL)
    matches = pattern.findall(text)
    
    if matches:
        # Prendi l'ultimo match
        extracted_text = matches[-1]
        # Rimuovi i simboli "_"
        cleaned_text = extracted_text.replace('▁', '').strip()
        return cleaned_text
    else:
        return None

def antithesisGeneration(query, merged, candidate, sources):
    merged = ast.literal_eval(merged)
    prompt = create_message_antithesis(query, candidate, merged, sources)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=500)
    return extract_antithesis(tokenizer.decode(outputs[0]))

In [67]:
def create_message_presynthesis(question, candidate, suggestion, options, context):
    user_content = "Question: " + question + "\n Options: " + str(options) + "\n Candidate answer: " + candidate + "\n Suggestion: " + suggestion + "\n Context: " + context 
    chat = [
            {"role": "user", "content": """
            You are an helpful AI assistant. You are asked to determine the most correct answer for a given question, provided a set of possible options.
            You also have at disposal a first tentative answer that you are required to check with respect to the question and the relevant context.
            Your goal is to decree which is the most correct answer to the question between the available options.
    
            Here's an example of how to do it:
            Question: What is the sun, a star or a planet?
            Options: ['a star', 'a planet']
            Candidate answer: a planet
            Context: The Sun is the star at the center of the Solar System. It is a massive, nearly perfect sphere of hot plasma, heated to incandescence by nuclear fusion reactions in its core, radiating the energy from its surface mainly as visible light and infrared radiation with 10% at ultraviolet energies.
            """},
            {"role": "assistant", "content": """
            The correct answer should be 'a star' due to the fact that the context explicitly say so. On the opposite, the context never mentions the fact that the Sun could be a planet.
            """
            },
            {"role": "user", "content": "Now do the same for the following question: "+ user_content}
        ]
    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    return prompt

def preSynthGeneration(query, candidate_answer, critique, merged, sources):
    prompt = create_message_presynthesis(query, merged, candidate_answer, critique, sources)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=500)
    return extract_antithesis(tokenizer.decode(outputs[0]))

In [94]:
def synthesisGeneration(query, merged, pre_answer, sources):
    merged = ast.literal_eval(merged)
    augmented_prompt = synthesis_chain.invoke({'question': query, 
                                            'options': merged,
                                            'critique': pre_answer,
                                            'context': sources})

    normal_string = clean_text(augmented_prompt.text)
    ans = new_model + normal_string + select(merged)
    return extract_synthesis(str(ans))

def extract_synthesis(text):
    # Trova l'indice in cui inizia il testo "Why or why not the answer is correct:"
    start_index = text.find("\nAssistant:\n")

    
    # Se l'indice è stato trovato, estrai la risposta corretta
    if start_index != -1:
        start_index += len("\nAssistant:\n")
        # Estrai il testo dopo "Why or why not the answer is correct:"
        correct_answer_text = text[start_index:].strip()
        return correct_answer_text
    else:
        return "The correct answer could not be found."

In [103]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", use_fast = False)
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2-2b-it",
    device_map="auto",
    torch_dtype=torch.bfloat16
)

new_model = models.Transformers(model, tokenizer, temperature=0.0)

tokenizer_config.json:   0%|          | 0.00/46.9k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/838 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [104]:
dataset = load_dataset('saracandu/hotpotQA_nli', split="train", trust_remote_code=True)

# select a subset of the queries, just for test:
first_queries = dataset['question']

# same for correct answers and distractors:
correct_answers = dataset['answer']
possibilities = dataset['options']

# and for the sources:
sources = dataset['passages']

#nli
first_nli = dataset['first nli']
second_nli = dataset['second nli']

bart1 = dataset['BART1']
bart2 = dataset['BART2']

rob1 = dataset['ROBERTA1']
rob2 = dataset['ROBERTA2']

N_rows = 5

In [105]:
# THESIS
answers = []
for i in range(N_rows):
    answers.append(thesisGeneration(first_queries[i], possibilities[i], sources[i]))


# ANTITHESIS
ant_answers = []
for i in range(N_rows):
    ant_answers.append(antithesisGeneration(first_queries[i], possibilities[i], answers[i], sources[i]))

# SYNTHESIS
pre_answers = []
for i in range(N_rows):
    pre_answers.append(preSynthGeneration(first_queries[i], possibilities[i], answers[i], ant_answers[i], sources[i]))


# format synthesis
syn_answers = []
for i in range(N_rows):
    syn_answers.append(
        synthesisGeneration(
            first_queries[i], possibilities[i], 
            pre_answers[i], sources[i]))

In [106]:
syn_answers

["Arthur's Magazine", 'Henri Leconte', 'The Wolfhounds', 'yes', 'yes']