In [1]:
import json


In [2]:
with open('questions_with_answers.json') as f:
    data = json.load(f)

In [3]:
## Load the sentence transformer models
questions = data["questions"]
correct_answers = [q['correct_answer'] for q in questions]
wrong_answers = [q['wrong_answer'] for q in questions]
paraphrased_answers = [q['paraphrased_answer'] for q in questions]



In [4]:
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch


#Mean Pooling - Take attention mask into account for correct averaging

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


# Load model from HuggingFace Hub

models = [
    SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2'),
    SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2'),
    SentenceTransformer('sentence-transformers/all-distilroberta-v1'),

]


In [5]:
from torch.nn import CosineSimilarity

# Function that scores the answers
cosine_sim = CosineSimilarity()
def score_answers(example_answers, your_answers, model):
    embedded_example_answers = model.encode(example_answers, convert_to_tensor=True)
    embedded_your_answers = model.encode(your_answers, convert_to_tensor=True)

    # Compute cosine-similarities
    cosine_scores = cosine_sim(embedded_your_answers, embedded_example_answers)
    return cosine_scores

all_parapharased_scores = [score_answers(correct_answers, paraphrased_answers, model) for model in models]
all_wrong_scores = [score_answers(correct_answers, wrong_answers, model) for model in models]





In [6]:
all_parapharased_scores

[tensor([0.7701, 0.8141, 0.9436, 0.8376, 0.9051], device='cuda:0'),
 tensor([0.7798, 0.8654, 0.9675, 0.8664, 0.9494], device='cuda:0'),
 tensor([0.6332, 0.8804, 0.9663, 0.8820, 0.9761], device='cuda:0')]

In [7]:
all_wrong_scores

[tensor([0.5365, 0.9195, 0.9389, 0.5280, 0.6323], device='cuda:0'),
 tensor([0.5566, 0.9316, 0.9663, 0.8775, 0.7440], device='cuda:0'),
 tensor([0.4252, 0.9062, 0.9660, 0.8351, 0.7568], device='cuda:0')]

In [8]:
for score_1, score_2 in zip(all_parapharased_scores, all_wrong_scores):
    print(score_1)
    print(score_2)
    print(score_1 > score_2)
    print('------------------')

tensor([0.7701, 0.8141, 0.9436, 0.8376, 0.9051], device='cuda:0')
tensor([0.5365, 0.9195, 0.9389, 0.5280, 0.6323], device='cuda:0')
tensor([ True, False,  True,  True,  True], device='cuda:0')
------------------
tensor([0.7798, 0.8654, 0.9675, 0.8664, 0.9494], device='cuda:0')
tensor([0.5566, 0.9316, 0.9663, 0.8775, 0.7440], device='cuda:0')
tensor([ True, False,  True, False,  True], device='cuda:0')
------------------
tensor([0.6332, 0.8804, 0.9663, 0.8820, 0.9761], device='cuda:0')
tensor([0.4252, 0.9062, 0.9660, 0.8351, 0.7568], device='cuda:0')
tensor([ True, False,  True,  True,  True], device='cuda:0')
------------------
