In [33]:
import torch
import json
import pickle
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM
import numpy as np

In [3]:
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

with open("dataset/dev-v2.0.json", 'r') as handle:
    jdata = json.load(handle)
    data = jdata['data']

In [111]:
def calc_prob(context, question):
    gt_question = question
    gt_q_tokens = tokenizer.tokenize(gt_question)
    gt_indexed_q_tokens = tokenizer.convert_tokens_to_ids(gt_q_tokens)
    
    mask_tokens = ["[MASK]"]*len(gt_indexed_q_tokens)
    indexed_mask_tokens = tokenizer.convert_tokens_to_ids(mask_tokens)
    
    context_tokens = tokenizer.tokenize(context)
    indexed_context_tokens = tokenizer.convert_tokens_to_ids(context_tokens)
    
    tokens_tensor = torch.tensor([indexed_context_tokens + gt_indexed_q_tokens])#indexed_mask_tokens])
    segments_tensors = torch.tensor([0]*len(indexed_context_tokens) + [1]*len(gt_indexed_q_tokens))
    predictions = model(tokens_tensor, segments_tensors)
    
    total = 0
    context_len = len(context_tokens)
    q_len = len(gt_indexed_q_tokens)
    for i in range(q_len):
        preds = predictions[0, context_len+i].data.numpy()
        total += -np.log(np.exp(preds[gt_indexed_q_tokens[i]]) / sum(np.exp(preds)))
    entropy = total / q_len
    perplexity = np.exp(entropy)
    return perplexity


In [155]:
def calc_prob_nn(context, question):
    gt_question = question
    gt_q_tokens = tokenizer.tokenize(gt_question)
    gt_indexed_q_tokens = tokenizer.convert_tokens_to_ids(gt_q_tokens)
    
    mask_tokens = ["[MASK]"]*len(gt_indexed_q_tokens)
    indexed_mask_tokens = tokenizer.convert_tokens_to_ids(mask_tokens)
    
    context_tokens = tokenizer.tokenize(context)
    indexed_context_tokens = tokenizer.convert_tokens_to_ids(context_tokens)
    
    tokens_tensor = torch.tensor([indexed_context_tokens + gt_indexed_q_tokens])#indexed_mask_tokens])
    segments_tensors = torch.tensor([0]*len(indexed_context_tokens) + [1]*len(gt_indexed_q_tokens))
    predictions = model(tokens_tensor, segments_tensors)
    
    total = 0
    context_len = len(context_tokens)
    q_len = len(gt_indexed_q_tokens)
    for i in range(q_len):
        preds = predictions[0, context_len+i]
        m = torch.nn.LogSoftmax(0)
        total += -m(preds)[gt_indexed_q_tokens[i]]
    entropy = total / q_len
    perplexity = torch.exp(entropy)
    return perplexity.item()


In [156]:
answerable_probs = []
unanswerable_probs = []
counter = 0
for i in range(1):#len(data)):
    section = data[i]['paragraphs']
    for sec in section:
        context = sec['context']
        qas = sec['qas']
        for j in range(1):#len(qas)):
            question = qas[j]['question']
            label = qas[j]['is_impossible']
            try:
                prob = calc_prob_nn(context, question)
            except:
                continue
            if label:
                unanswerable_probs.append(prob)
            else:
                answerable_probs.append(prob)
            counter += 1
            if counter % 100 == 0:
                print("Processed ", counter)


In [157]:
len(unanswerable_probs), len(answerable_probs)

(0, 39)

In [158]:
answerable_probs

[1.0386896133422852,
 1.00559663772583,
 1.0815110206604004,
 1.0120166540145874,
 1.373599886894226,
 1.668235421180725,
 1.1020958423614502,
 1.0394967794418335,
 1.0531052350997925,
 1.0175679922103882,
 1.2521883249282837,
 1.3428235054016113,
 1.0025913715362549,
 1.017141580581665,
 2.231492519378662,
 1.0351502895355225,
 2.9918084144592285,
 1.0116231441497803,
 1.0478287935256958,
 1.0523213148117065,
 1.3863749504089355,
 1.4054036140441895,
 1.0181043148040771,
 1.2002761363983154,
 1.0166414976119995,
 1.3167959451675415,
 1.7294028997421265,
 1.0487662553787231,
 1.147337794303894,
 1.0318613052368164,
 2.6944332122802734,
 1.014600157737732,
 1.3016120195388794,
 1.0237298011779785,
 1.095757007598877,
 1.1719547510147095,
 1.0836412906646729,
 1.17014479637146,
 1.1324546337127686]

In [154]:
gt_question = question
gt_q_tokens = tokenizer.tokenize(gt_question)
gt_indexed_q_tokens = tokenizer.convert_tokens_to_ids(gt_q_tokens)

mask_tokens = ["[MASK]"]*len(gt_indexed_q_tokens)
indexed_mask_tokens = tokenizer.convert_tokens_to_ids(mask_tokens)

context_tokens = tokenizer.tokenize(context)
indexed_context_tokens = tokenizer.convert_tokens_to_ids(context_tokens)

tokens_tensor = torch.tensor([indexed_context_tokens + gt_indexed_q_tokens])#indexed_mask_tokens])
segments_tensors = torch.tensor([0]*len(indexed_context_tokens) + [1]*len(gt_indexed_q_tokens))
predictions = model(tokens_tensor, segments_tensors)

total = 0
context_len = len(context_tokens)
q_len = len(gt_indexed_q_tokens)
for i in range(q_len):
    preds = predictions[0, context_len+i]
    m = torch.nn.LogSoftmax(0)
    total += -m(preds)[gt_indexed_q_tokens[i]]
entropy = total / q_len
perplexity = torch.exp(entropy)
perplexity.item()



tensor(2.9956e-13, grad_fn=<ExpBackward>)
tensor(5.6307e-14, grad_fn=<ExpBackward>)
tensor(2.2631e-16, grad_fn=<ExpBackward>)
tensor(2.4225e-10, grad_fn=<ExpBackward>)
tensor(3.0055e-10, grad_fn=<ExpBackward>)
tensor(8.1845e-13, grad_fn=<ExpBackward>)
tensor(1.4191e-14, grad_fn=<ExpBackward>)


1.1324546337127686

In [138]:
gt_question = question
gt_q_tokens = tokenizer.tokenize(gt_question)
gt_indexed_q_tokens = tokenizer.convert_tokens_to_ids(gt_q_tokens)

mask_tokens = ["[MASK]"]*len(gt_indexed_q_tokens)
indexed_mask_tokens = tokenizer.convert_tokens_to_ids(mask_tokens)

context_tokens = tokenizer.tokenize(context)
indexed_context_tokens = tokenizer.convert_tokens_to_ids(context_tokens)

tokens_tensor = torch.tensor([indexed_context_tokens + gt_indexed_q_tokens])#indexed_mask_tokens])
segments_tensors = torch.tensor([0]*len(indexed_context_tokens) + [1]*len(gt_indexed_q_tokens))
predictions = model(tokens_tensor, segments_tensors)

total = 0
context_len = len(context_tokens)
q_len = len(gt_indexed_q_tokens)
for i in range(q_len):
    preds = predictions[0, context_len+i]
    m = torch.nn.LogSoftmax(0)
    total += -m(preds)[i]
entropy = total / q_len
perplexity = torch.exp(entropy)
print(perplexity.item())



2292208631808.0


In [None]:
with open("./results.pkl", "wb") as handle:
    pickle.dump((answerable_probs, unanswerable_probs), handle)