In [1]:
# Load model directly
from transformers import BartTokenizer, BartForConditionalGeneration
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")

In [2]:
from datasets import load_dataset

sciq_dataset = load_dataset("allenai/sciq")
sciq_dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 11679
    })
    validation: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 1000
    })
})

### Sequential Model Distractors Analysis 

In [3]:
model_qa = BartForConditionalGeneration.from_pretrained('tmp-QA/results_CL_large_QA/model_CL_QA_large_test').to(device)

In [4]:
text = "In the first law, an object will not change its motion unless a force acts on it. In the second law, the force on an object is equal to its mass times its acceleration. In the third law, when two objects interact, they apply forces to each other of equal magnitude and opposite direction."

In [5]:
def qa_inference(context):
    text = "Support: {}".format(context)
    max_length = 600
    tokenized_inputs = tokenizer.encode_plus(text, return_tensors="pt", max_length=max_length, truncation=True, padding='max_length').to(device) 

    output = model_qa.generate(input_ids=tokenized_inputs["input_ids"], max_length=1024)
    
    answer = tokenizer.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    return answer

question_answer = qa_inference(text)
question_answer

['Question: In the first law, an object will not change its motion unless a force acts on it. in the second law, the force on an object is equal to its mass times what? Answer: its acceleration']

In [6]:
model_da = BartForConditionalGeneration.from_pretrained('tmp-DA/results_CL_large/model_CL_large_test').to(device)

In [7]:
def da_inference(context, result):
    text = "Support: {} {}".format(context, result[0])
    max_length = 600
    tokenized_inputs = tokenizer.encode_plus(text, return_tensors="pt", max_length=max_length, truncation=True, padding='max_length').to(device) 

    output = model_da.generate(input_ids=tokenized_inputs["input_ids"], max_length=1024)
    
    answer = tokenizer.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    return answer

distractors = da_inference(text, question_answer)
distractors

['Distractor1: its velocity Distractor2: its weight Distractor3: its density']

In [8]:
import re
def pipeline(context):
    question_answer = qa_inference(context)
    distractors = da_inference(context, question_answer)
    
    question_pattern = r"Question: (.+?\?)"
    answer_pattern = r"Answer: (.+)"
    dis1_pattern = r"Distractor1: (.+)"
    dis2_pattern = r"Distractor2: (.+)"
    dis3_pattern = r"Distractor3: (.+)"

    question = ""
    answer = ""
    
    question_match = re.search(question_pattern, question_answer[0])
    answer_match = re.search(answer_pattern, question_answer[0])

    if question_match:
        question = question_match.group(1)
    if answer_match:
        answer = answer_match.group(1)

    distractor_pattern = r"Distractor\d+: (.+?)(?= Distractor|$)"
    distractors = re.findall(distractor_pattern, distractors[0])

    return question, answer, distractors

pipeline(text)

('In the first law, an object will not change its motion unless a force acts on it. in the second law, the force on an object is equal to its mass times what?',
 'its acceleration',
 ['its velocity', 'its weight', 'its density'])

In [9]:
filtered_sciq = sciq_dataset.filter(lambda example: example["support"] != '')
filtered_dataset = sciq_dataset.filter(lambda example: example['support'] is not None and example['support'] != "")
# And remove any datapoints which contain questions that have a 'fill-in-the-blank' type answer
filtered_sciq = filtered_dataset.filter(lambda example: '_______' not in example['question'] and '______' not in example['question'] and '_____' not in example['question'] and '____' not in example['question'] and '___' not in example['question'])
filtered_sciq 

DatasetDict({
    train: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 10423
    })
    validation: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 881
    })
    test: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 880
    })
})

In [10]:
test_data = filtered_sciq['validation']
test_data

Dataset({
    features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
    num_rows: 881
})

In [11]:
# from huggingface_hub import notebook_login

# notebook_login()

In [12]:
# model_da.push_to_hub("final-bart-distractor-generation")

In [13]:
# model_qa.push_to_hub("final-bart-question-generation")

In [14]:
from gensim.models.word2vec import Word2Vec
import gensim.downloader as api

glove_vectors = api.load("glove-wiki-gigaword-50")

In [15]:
glove_vectors.most_similar('velocity')

[('velocities', 0.8433403372764587),
 ('angle', 0.8210359215736389),
 ('angular', 0.8034953474998474),
 ('gravity', 0.7958716154098511),
 ('amplitude', 0.7937971353530884),
 ('wavelength', 0.7915680408477783),
 ('gradient', 0.7870423793792725),
 ('probability', 0.7667955756187439),
 ('frequency', 0.7655791640281677),
 ('measurement', 0.7501229047775269)]

In [139]:
glove_vectors.similarity('angry', 'james')

0.22834066

In [17]:
glove_vectors.similarity('velocity', 'acceleration')

0.67458355

In [18]:
glove_vectors.similarity('weight', 'acceleration')

0.5167572

In [19]:
glove_vectors.similarity('density', 'acceleration')

0.5337082

In [30]:
pipeline(test_data[469]['support'])

('The smallest particle of an element that still has the properties of that element is called?',
 'the atom',
 ['the electron', 'the neutron', 'the nucleus'])

In [21]:
def infer_all(examples):
    question, answer, distractors = pipeline(examples['support'])
    
    return {
        "pred_answer": answer if answer != None else "",
        "pred_question": question,
        "pred_distractors1": distractors[0] if len(distractors) > 0 else "",
        "pred_distractors2": distractors[1] if len(distractors) > 1 else "",
        "pred_distractors3": distractors[2] if len(distractors) > 2 else "",
    }

test_data_pred = test_data.map(infer_all)

Map:   0%|          | 0/881 [00:00<?, ? examples/s]

In [31]:
test_data_pred[170]

{'question': 'The transfer of energy by electromagnetic waves is called what?',
 'distractor3': 'magnetic radiation',
 'distractor1': 'particulate radiation',
 'distractor2': 'mechanical radiation',
 'correct_answer': 'electromagnetic radiation',
 'support': 'Electromagnetic waves are waves that consist of vibrating electric and magnetic fields. Like other waves, electromagnetic waves transfer energy from one place to another. The transfer of energy by electromagnetic waves is called electromagnetic radiation . Electromagnetic waves can transfer energy through matter or across empty space. For an excellent video introduction to electromagnetic waves, go to this URL: http://www. youtube. com/watch?v=cfXzwh3KadE.',
 'pred_answer': 'magnetic fields',
 'pred_question': 'Electromagnetic waves are waves that consist of vibrating electric and what else?',
 'pred_distractors1': 'gravitational fields',
 'pred_distractors2': 'electromagnetic currents',
 'pred_distractors3': 'magnetic currents'}

In [115]:
import numpy as np
def phrase_vector(phrase):
    ## empty assume zero
    if phrase == "":
        wv = np.zeros(glove_vectors.vector_size)
    else:
        words = phrase.split()
        word_vectors = [glove_vectors[word] for word in words if word in glove_vectors]
        if len(word_vectors) == 0:
            wv = np.zeros(glove_vectors.vector_size)
        else:
            wv = np.mean(word_vectors, axis=0)
    return wv 
vec = phrase_vector("iron-def")
vec

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [116]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_similarity([phrase_vector("happy")], [phrase_vector("sad")])[0][0]

0.68906325

In [132]:
def calc_vector_answer_dis(examples):
    answer_vec = phrase_vector(examples['correct_answer'])
    dis1_vec = phrase_vector(examples['distractor1'])
    dis2_vec = phrase_vector(examples['distractor2'])
    dis3_vec = phrase_vector(examples['distractor3'])

    #distance between answers and distractors
    sim_answer_dis1 = cosine_similarity([answer_vec], [dis1_vec])
    sim_answer_dis2 = cosine_similarity([answer_vec], [dis2_vec])
    sim_answer_dis3 = cosine_similarity([answer_vec], [dis3_vec])

    sim_answer_dis = np.average([sim_answer_dis1, sim_answer_dis2, sim_answer_dis3])
    return sim_answer_dis

def calc_vector_inter_dis(examples):
    dis1_vec = phrase_vector(examples['distractor1'])
    dis2_vec = phrase_vector(examples['distractor2'])
    dis3_vec = phrase_vector(examples['distractor3'])

    #distance between each distractors
    sim_dis1_dis2 = cosine_similarity([dis1_vec], [dis2_vec])
    sim_dis2_dis3 = cosine_similarity([dis2_vec], [dis3_vec])
    sim_dis3_dis1 = cosine_similarity([dis3_vec], [dis1_vec])

    sim_dis = np.average([sim_dis1_dis2, sim_dis2_dis3, sim_dis3_dis1])
    return sim_dis

def calc_vector_answer_dis_pred(examples):
    answer_vec = phrase_vector(examples['pred_answer'])
    dis1_vec = phrase_vector(examples['pred_distractors1'])
    dis2_vec = phrase_vector(examples['pred_distractors2'])
    dis3_vec = phrase_vector(examples['pred_distractors3'])

     #distance between answers and distractors
    sim_answer_dis1 = cosine_similarity([answer_vec], [dis1_vec])
    sim_answer_dis2 = cosine_similarity([answer_vec], [dis2_vec])
    sim_answer_dis3 = cosine_similarity([answer_vec], [dis3_vec])

    sim_answer_dis = np.average([sim_answer_dis1, sim_answer_dis2, sim_answer_dis3])

    return sim_answer_dis

def calc_vector_inter_dis_pred(examples):
    dis1_vec = phrase_vector(examples['pred_distractors1'])
    dis2_vec = phrase_vector(examples['pred_distractors2'])
    dis3_vec = phrase_vector(examples['pred_distractors3'])

    #distance between each distractors
    sim_dis1_dis2 = cosine_similarity([dis1_vec], [dis2_vec])
    sim_dis2_dis3 = cosine_similarity([dis2_vec], [dis3_vec])
    sim_dis3_dis1 = cosine_similarity([dis3_vec], [dis1_vec])

    sim_dis = np.average([sim_dis1_dis2, sim_dis2_dis3, sim_dis3_dis1])

    return sim_dis

In [133]:
answer_dis = []
inter_dis = []
answer_pred_dis = []
inter_pred_dis = []
for data in test_data_pred:
    answer_dis.append(calc_vector_answer_dis(data))
    inter_dis.append(calc_vector_inter_dis(data))
    answer_pred_dis.append(calc_vector_answer_dis_pred(data))
    inter_pred_dis.append(calc_vector_inter_dis_pred(data))

Ground Truth Answer and Distractors Similarity

In [134]:
np.average(answer_dis)

0.5800197711281374

Ground Truth Inter Distractors Similarity

In [135]:
np.average(inter_dis)

0.5230395514299552

Predicted Answer and Distractors Similarity

In [136]:
np.average(answer_pred_dis)

0.6643107100007571

Predicted Inter Distractors Similarity

In [137]:
np.average(inter_pred_dis)

0.6502474400676812

### Single Fine Tuned Model Distractors Analysis 

In [141]:
model_all = BartForConditionalGeneration.from_pretrained("b-b-brouwer/CL_base").to(device)

config.json:   0%|          | 0.00/1.74k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/292 [00:00<?, ?B/s]

In [146]:
def inference_sft(context):
    text = "Support: {}".format(context)
    max_length = 600
    tokenized_inputs = tokenizer.encode_plus(text, return_tensors="pt", max_length=max_length, truncation=True, padding='max_length').to(device) 

    output = model_all.generate(input_ids=tokenized_inputs["input_ids"], max_length=1024)
    
    res = tokenizer.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    return res
    
inference_sft(text)

['Question:Question: Which law states that an object will not change its motion unless a force acts on it? Answer: first law Distractor1:  third law and second law of inertia Distractor2: fourth law and third law of relativity Distractor3: law and law']

In [152]:
def infer_all_sft(examples):
    result = inference_sft(examples['support'])

    question_pattern = r"Question: (.+?\?)"
    answer_pattern = r"Answer: (.+?)(?= Distractor\d+: |$)"
    dis1_pattern = r"Distractor1: (.+)"
    dis2_pattern = r"Distractor2: (.+)"
    dis3_pattern = r"Distractor3: (.+)"

    question = ""
    answer = ""
    
    question_match = re.search(question_pattern, result[0])
    answer_match = re.search(answer_pattern, result[0])

    if question_match:
        question = question_match.group(1)
    if answer_match:
        answer = answer_match.group(1)

    distractor_pattern = r"Distractor\d+: (.+?)(?= Distractor|$)"
    distractors = re.findall(distractor_pattern, result[0])
    
    return {
        "pred_answer": answer if answer != None else "",
        "pred_question": question,
        "pred_distractors1": distractors[0] if len(distractors) > 0 else "",
        "pred_distractors2": distractors[1] if len(distractors) > 1 else "",
        "pred_distractors3": distractors[2] if len(distractors) > 2 else "",
    }

test_data_pred = test_data.map(infer_all_sft)

Map:   0%|          | 0/881 [00:00<?, ? examples/s]

In [153]:
answer_dis = []
inter_dis = []
answer_pred_dis = []
inter_pred_dis = []
for data in test_data_pred:
    answer_dis.append(calc_vector_answer_dis(data))
    inter_dis.append(calc_vector_inter_dis(data))
    answer_pred_dis.append(calc_vector_answer_dis_pred(data))
    inter_pred_dis.append(calc_vector_inter_dis_pred(data))

In [154]:
np.average(answer_dis)

0.5800197711281374

In [155]:
np.average(inter_dis)

0.5230395514299552

In [156]:
np.average(answer_pred_dis)

0.5562485193243836

In [157]:
np.average(inter_pred_dis)

0.5263792485850741