In [55]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")


`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [56]:
def create_message(question, options):
    options_str = '", "'.join(options)
    content = f"""
    You are asked to create two opposite statements from a question that is given to you.
    The idea is that they have to be used as multiple-choices answers for the given question.
    Insert also "than" when asked for a comparison.

    Here's an example:
    Question: "Which magazine was started first, Arthur's Magazine or First for Women?"
    Options: ["Arthur's Magazine", "First for Women"]
    Assistant: ["Arthur's Magazine was started before First for Women", "First for Women was started before Arthur's Magazine"]

    Now do the same for this question: "{question}", where options: ["{options_str}"].
    Assistant:
    """
    
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant."},
        {"role": "user", "content": content}
    ]
    
    return messages

In [57]:
question = "Which tennis player won more Grand Slam titles, Henri Leconte or Jonathan Stark?"
options = ["Henri Leconte", "Jonathan Stark"]
messages = create_message(question, options)
messages

[{'role': 'system', 'content': 'You are a helpful AI assistant.'},
 {'role': 'user',
  'content': '\n    You are asked to create two opposite statements from a question that is given to you.\n    The idea is that they have to be used as multiple-choices answers for the given question.\n    Insert also "than" when asked for a comparison.\n\n    Here\'s an example:\n    Question: "Which magazine was started first, Arthur\'s Magazine or First for Women?"\n    Options: ["Arthur\'s Magazine", "First for Women"]\n    Assistant: ["Arthur\'s Magazine was started before First for Women", "First for Women was started before Arthur\'s Magazine"]\n\n    Now do the same for this question: "Which tennis player won more Grand Slam titles, Henri Leconte or Jonathan Stark?", where options: ["Henri Leconte", "Jonathan Stark"].\n    Assistant:\n    '}]

In [58]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "do_sample": False,
}

In [59]:
output = pipe(messages, **generation_args)
print(output[0]['generated_text'])

 ["Henri Leconte won more Grand Slam titles than Jonathan Stark", "Jonathan Stark won more Grand Slam titles than Henri Leconte"]


# Dataset

In [133]:
from datasets import load_dataset

dataset = load_dataset('saracandu/filtered_hotpotQA', split='train')
dataset

questions = dataset['question']
options = dataset['options']

Downloading data: 100%|█████████████████████████████| 304k/304k [00:00<00:00, 597kB/s]


Generating train split: 0 examples [00:00, ? examples/s]

In [134]:
import ast

prompts = []
for i in range(352):
  options[i] = ast.literal_eval(options[i])
  prompts.append(create_message(questions[i], options[i]))

In [135]:
outs = []
for i in range(352):
    output = pipe(prompts[i], **generation_args)
    outs.append(output[0]['generated_text'])



In [136]:
outs[0]

' ["Arthur\'s Magazine was started before First for Women", "First for Women was started before Arthur\'s Magazine"]'

# cure them and put them in a dataset to use them

In [137]:
def remove_assistant_label2(input_string):
    # Verifica se la stringa inizia con 'Assistant:'
    if input_string.startswith(' '):
        # Rimuovi 'Assistant:' e gli spazi bianchi che seguono
        return input_string[len(' '):].strip()
    return input_string

In [138]:
for i in range(352):
    outs[i] = remove_assistant_label2(outs[i])
    # outs[i] = rimuovi_virgolette_interne(outs[i])

In [139]:
outs[129]

'["Philip Terzian is an American journalist and has been Literary Editor of The Weekly Standard", "Derek Sherinian is an American journalist and has been Literary Editor of The Weekly Standard"]'

In [140]:
for i in range(352):
    outs[i] = ast.literal_eval(outs[i])

In [141]:
nli1 = []
nli2 = []

for i in range(352):
    nli1.append(outs[i][0])
    nli2.append(outs[i][1])

In [142]:
answer = dataset['answer']
questionType = dataset['type']
level = dataset['level']
passages = dataset['selected_passages']

In [143]:
import pandas as pd

# Crea un dizionario con i vettori
dati = {
    'question': questions,
    'answer': answer,
    'options': options,
    'first nli': nli1,
    'second nli': nli2,
    'type': questionType,
    'level': level,
    'passages': passages
}

# Crea il DataFrame utilizzando il dizionario
df = pd.DataFrame(dati)

In [145]:
df.tail()

Unnamed: 0,question,answer,options,first nli,second nli,type,level,passages
347,Are Melissa and Darmera vegatables,no,"[yes, no]",Melissa and Darmera are not vegetables,Melissa and Darmera are vegetables,comparison,easy,Melissa is a genus of perennial herbs in the L...
348,"Which takes place farther east, The Mosuo Sist...",The Mosuo Sisters,"[The Silent Historian, The Mosuo Sisters]",The Silent Historian takes place farther east ...,The Mosuo Sisters take place farther east than...,comparison,medium,"The Silent Historian (original title: ""Het zwi..."
349,Were Yellowcard and For Against both American ...,yes,"[yes, no]","Yes, Yellowcard and For Against were both Amer...","No, Yellowcard and For Against were not both A...",comparison,medium,Yellowcard was an American pop punk band that ...
350,Were the buildings at 270 Park Avenue and 100 ...,no,"[yes, no]","Yes, the buildings at 270 Park Avenue and 100 ...","No, the buildings at 270 Park Avenue and 100 E...",comparison,medium,100 East 53rd Street (formerly known as 610 Le...
351,"Which American film, Dragonslayer or Swiss Fam...",Swiss Family Robinson,"[Dragonslayer, Swiss Family Robinson]",Dragonslayer was shot outside London,Swiss Family Robinson was shot outside London,comparison,medium,Dragonslayer is a 1981 American fantasy film d...


In [146]:
df.to_csv('nli_augmented.csv')

# Add NLI scores - part 1

In [8]:
import pandas as pd

df = pd.read_csv('nli_augmented-2.csv')
df.head()

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,question,answer,options,first nli,second nli,type,level,passages,scores 1,scores 2
0,0,0,0,"Which magazine was started first, Arthur's Mag...",Arthur's Magazine,"[""Arthur's Magazine"", 'First for Women']",Arthur's Magazine was started before First for...,First for Women was started before Arthur's Ma...,comparison,medium,Arthur's Magazine (1844–1846) was an American ...,0.421857,0.2129
1,1,1,1,Which tennis player won more Grand Slam titles...,Jonathan Stark,"['Henri Leconte', 'Jonathan Stark']",Henri Leconte won more Grand Slam titles than ...,Jonathan Stark won more Grand Slam titles than...,comparison,medium,Henri Leconte (born 4 July 1963) is a former F...,0.081073,0.169455
2,2,2,2,"Which band was founded first, Hole (the rock b...",The Wolfhounds,"['The Wolfhounds', 'Courtney Love']",Hole (the rock band that Courtney Love was a f...,The Wolfhounds were founded before Hole (the r...,comparison,medium,The Wolfhounds are an indie pop/noise pop band...,0.959961,0.72731
3,3,3,3,Were Pavel Urysohn and Leonid Levin known for ...,no,"['yes', 'no']","Yes, Pavel Urysohn and Leonid Levin were known...","No, Pavel Urysohn and Leonid Levin were not kn...",comparison,medium,Leonid Anatolievich Levin ( ; Russian: Леони́д...,0.732361,0.006006
4,4,4,4,Are both The New Pornographers and Kings of Le...,yes,"['yes', 'no']","Yes, both The New Pornographers and Kings of L...","No, neither The New Pornographers nor Kings of...",comparison,hard,Kings of Leon is an American rock band that fo...,0.494671,0.001089


In [9]:
import torch
import ast
from transformers import AutoModelForSequenceClassification, AutoTokenizer

device = torch.device('cuda')

nli_model = AutoModelForSequenceClassification.from_pretrained('FacebookAI/roberta-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('FacebookAI/roberta-large-mnli')

Some weights of the model checkpoint at FacebookAI/roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [10]:
scores_1 = []

for i in range(len(df)): 
    
    options = ast.literal_eval(df['options'][i])
    hypothesis1 = df['first nli'][i]
    # hypothesis2 = df['second nli'][i]
    premise = df['passages'][i]

    
    # run through model pre-trained on MNLI
    x1 = tokenizer.encode(premise, hypothesis1, return_tensors='pt',
                         truncation_strategy='only_first')
    logits1 = nli_model(x1)[0]
    entail_contradiction_logits1 = logits1[:,[0,2]]
    probs1 = entail_contradiction_logits1.softmax(dim=1)
    prob_label_is_true1 = probs1[:,1]
    scores_1.append(prob_label_is_true1.item())



In [11]:
df['scores 1 - roberta'] = scores_1
df.to_csv('nli_augmented-3.csv')

# Add NLI scores - part 2

In [18]:
import pandas as pd

df = pd.read_csv('nli_augmented-3.csv')
df.head()

Unnamed: 0.5,Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,question,answer,options,first nli,second nli,type,level,passages,scores 1,scores 2,scores 1 - roberta
0,0,0,0,0,0,"Which magazine was started first, Arthur's Mag...",Arthur's Magazine,"[""Arthur's Magazine"", 'First for Women']",Arthur's Magazine was started before First for...,First for Women was started before Arthur's Ma...,comparison,medium,Arthur's Magazine (1844–1846) was an American ...,0.421857,0.050776,0.844637
1,1,1,1,1,1,Which tennis player won more Grand Slam titles...,Jonathan Stark,"['Henri Leconte', 'Jonathan Stark']",Henri Leconte won more Grand Slam titles than ...,Jonathan Stark won more Grand Slam titles than...,comparison,medium,Henri Leconte (born 4 July 1963) is a former F...,0.081073,0.25081,0.038453
2,2,2,2,2,2,"Which band was founded first, Hole (the rock b...",The Wolfhounds,"['The Wolfhounds', 'Courtney Love']",Hole (the rock band that Courtney Love was a f...,The Wolfhounds were founded before Hole (the r...,comparison,medium,The Wolfhounds are an indie pop/noise pop band...,0.959961,0.129603,0.110932
3,3,3,3,3,3,Were Pavel Urysohn and Leonid Levin known for ...,no,"['yes', 'no']","Yes, Pavel Urysohn and Leonid Levin were known...","No, Pavel Urysohn and Leonid Levin were not kn...",comparison,medium,Leonid Anatolievich Levin ( ; Russian: Леони́д...,0.732361,0.015183,0.567181
4,4,4,4,4,4,Are both The New Pornographers and Kings of Le...,yes,"['yes', 'no']","Yes, both The New Pornographers and Kings of L...","No, neither The New Pornographers nor Kings of...",comparison,hard,Kings of Leon is an American rock band that fo...,0.494671,0.000362,0.685024


In [21]:
import torch
import ast
from transformers import AutoModelForSequenceClassification, AutoTokenizer

device = torch.device('cuda')

nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')

In [22]:
scores_2 = []

for i in range(len(df)): 
    
    options = ast.literal_eval(df['options'][i])
    hypothesis2 = df['second nli'][i]
    premise = df['passages'][i]

    
    # run through model pre-trained on MNLI
    x2 = tokenizer.encode(premise, hypothesis2, return_tensors='pt',
                         truncation_strategy='only_first')
    logits2 = nli_model(x2)[0]
    entail_contradiction_logits2 = logits2[:,[0,2]]
    probs2 = entail_contradiction_logits2.softmax(dim=1)
    prob_label_is_true2 = probs2[:,1]
    scores_2.append(prob_label_is_true2.item())



In [23]:
df['scores 2'] = scores_2
df.to_csv('nli_augmented-def.csv')

In [24]:
df.tail()

Unnamed: 0.5,Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,question,answer,options,first nli,second nli,type,level,passages,scores 1,scores 2,scores 1 - roberta,scores 2 - roberta
347,347,347,347,347,347,Are Melissa and Darmera vegatables,no,"['yes', 'no']",Melissa and Darmera are not vegetables,Melissa and Darmera are vegetables,comparison,easy,Melissa is a genus of perennial herbs in the L...,0.009427,0.009904,0.099834,0.00452
348,348,348,348,348,348,"Which takes place farther east, The Mosuo Sist...",The Mosuo Sisters,"['The Silent Historian', 'The Mosuo Sisters']",The Silent Historian takes place farther east ...,The Mosuo Sisters take place farther east than...,comparison,medium,"The Silent Historian (original title: ""Het zwi...",0.042687,0.36973,0.011292,0.034224
349,349,349,349,349,349,Were Yellowcard and For Against both American ...,yes,"['yes', 'no']","Yes, Yellowcard and For Against were both Amer...","No, Yellowcard and For Against were not both A...",comparison,medium,Yellowcard was an American pop punk band that ...,0.289446,0.003603,0.858297,0.003057
350,350,350,350,350,350,Were the buildings at 270 Park Avenue and 100 ...,no,"['yes', 'no']","Yes, the buildings at 270 Park Avenue and 100 ...","No, the buildings at 270 Park Avenue and 100 E...",comparison,medium,100 East 53rd Street (formerly known as 610 Le...,0.784575,0.002681,0.973729,0.007865
351,351,351,351,351,351,"Which American film, Dragonslayer or Swiss Fam...",Swiss Family Robinson,"['Dragonslayer', 'Swiss Family Robinson']",Dragonslayer was shot outside London,Swiss Family Robinson was shot outside London,comparison,medium,Dragonslayer is a 1981 American fantasy film d...,0.833212,0.985439,0.993581,0.990956
