In [184]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
sys.path.append("examples/")

import logging
import argparse
import json
from tqdm import tqdm, trange
import csv
from collections import Counter

import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler

from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertForPreTraining 
from pytorch_pretrained_bert.optimization import BertAdam

from torch.utils.data import Dataset
import random

In [185]:
from train_cond import InputExample, random_word, InputFeatures, BERTDataset

In [186]:
# args
gradient_accumulation_steps = 1
train_batch_size = 1
eval_file = "dataset/dev-v2.0.json"
max_seq_length=256
on_memory = True
bert_model = "model_cond/pytorch_model6.bin"

In [187]:
device = torch.device("cuda" if torch.cuda.is_available()  else "cpu")
n_gpu = torch.cuda.device_count()

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if n_gpu > 0:
    torch.cuda.manual_seed_all(42)

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)

# Load eval_data
eval_dataset_answerable = BERTDataset(eval_file, "qparts/part_labels%s.pkl", tokenizer, seq_len=max_seq_length,
                                    on_memory=on_memory)
eval_dataset_unanswerable = BERTDataset(eval_file, "qparts/part_labels%s.pkl", tokenizer, seq_len=max_seq_length,
                                    on_memory=on_memory, keep_answerable=False)

# Prepare model
model_state_dict = torch.load(bert_model, map_location='cpu') #TODO daniter: remove this map_location
## TODO daniter: check if bert model is being loaded correctly
model = BertForPreTraining.from_pretrained("bert-base-uncased", state_dict=model_state_dict)
model.to(device)


# Prepare optimizer
print("Checking the vocab size:", len(tokenizer.vocab))
# 768 is bert hidden size, 256 is GRU hidden size, 1 is the layers in the GRU

# eval loader
eval_sampler_ans = SequentialSampler(eval_dataset_answerable)
eval_dataloader_ans = DataLoader(eval_dataset_answerable, sampler=eval_sampler_ans,
                                 batch_size=train_batch_size)
eval_sampler_unans = SequentialSampler(eval_dataset_unanswerable)
eval_dataloader_unans = DataLoader(eval_dataset_unanswerable, sampler=eval_sampler_unans,
                                   batch_size=train_batch_size)


03/27/2019 11:52:44 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/daniter/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 1409.17it/s]
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 1514.50it/s]
03/27/2019 11:52:48 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
03/27/2019 11:52:48 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d

Checking the vocab size: 30522


In [31]:
target = 219 #50
with torch.no_grad():
    model.eval()

    eval_loss_ans = 0
    for batch_i, eval_batch in enumerate(eval_dataloader_unans):
        eval_batch = tuple(t.to(device) for t in eval_batch)
        input_ids, input_mask, segment_ids, lm_label_ids, is_next = eval_batch
        if batch_i != target:
            continue
        if batch_i == target:
            print(tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]))
        output, _ = model(input_ids, segment_ids, input_mask, None, None)
        if batch_i == target:
            break

['[CLS]', 'at', 'the', 'same', 'time', 'the', 'mongols', 'imported', 'central', 'asian', 'muslims', 'to', 'serve', 'as', 'administrators', 'in', 'china', ',', 'the', 'mongols', 'also', 'sent', 'han', 'chinese', 'and', 'k', '##hita', '##ns', 'from', 'china', 'to', 'serve', 'as', 'administrators', 'over', 'the', 'muslim', 'population', 'in', 'bu', '##khar', '##a', 'in', 'central', 'asia', ',', 'using', 'foreigners', 'to', 'curt', '##ail', 'the', 'power', 'of', 'the', 'local', 'peoples', 'of', 'both', 'lands', '.', 'han', 'chinese', 'were', 'moved', 'to', 'central', 'asian', 'areas', 'like', 'be', '##sh', 'bali', '##q', ',', 'alma', '##li', '##q', ',', 'and', 'sam', '##ar', '##qa', '##nd', 'by', 'the', 'mongols', 'where', 'they', 'worked', 'as', 'artisans', 'and', 'farmers', '.', 'alan', '##s', 'were', 'recruited', 'into', 'the', 'mongol', 'forces', 'with', 'one', 'unit', 'called', '"', 'right', 'alan', 'guard', '"', 'which', 'was', 'combined', 'with', '"', 'recently', 'surrendered', '"',

In [32]:
labels = [lab for lab in lm_label_ids.data.numpy().ravel() if lab != -1]
tokens = tokenizer.convert_ids_to_tokens(labels)
print(tokens)



['administrators']


In [33]:
start_i = tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]).index('[PAD]')
print(start_i)
print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1))[start_i:])


254
['what', 'asian']


In [65]:
target = 4 #50
with torch.no_grad():
    model.eval()

    eval_loss_ans = 0
    for batch_i, eval_batch in enumerate(eval_dataloader_ans):
        eval_batch = tuple(t.to(device) for t in eval_batch)
        input_ids, input_mask, segment_ids, lm_label_ids, is_next = eval_batch
        if batch_i != target:
            continue
        if batch_i == target:
            print(tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]))
        output, _ = model(input_ids, segment_ids, input_mask, None, None)
        if batch_i == target:
            break

['[CLS]', 'oxygen', 'is', 'the', 'most', 'abundant', 'chemical', 'element', 'by', 'mass', 'in', 'the', 'earth', "'", 's', 'bio', '##sphere', ',', 'air', ',', 'sea', 'and', 'land', '.', 'oxygen', 'is', 'the', 'third', 'most', 'abundant', 'chemical', 'element', 'in', 'the', 'universe', ',', 'after', 'hydrogen', 'and', 'helium', '.', 'about', '0', '.', '9', '%', 'of', 'the', 'sun', "'", 's', 'mass', 'is', 'oxygen', '.', 'oxygen', 'constitutes', '49', '.', '2', '%', 'of', 'the', 'earth', "'", 's', 'crust', 'by', 'mass', 'and', 'is', 'the', 'major', 'component', 'of', 'the', 'world', "'", 's', 'oceans', '(', '88', '.', '8', '%', 'by', 'mass', ')', '.', 'oxygen', 'gas', 'is', 'the', 'second', 'most', 'common', 'component', 'of', 'the', 'earth', "'", 's', 'atmosphere', ',', 'taking', 'up', '20', '.', '8', '%', 'of', 'its', 'volume', 'and', '23', '.', '1', '%', 'of', 'its', 'mass', '(', 'some', '101', '##5', 'tonnes', ')', '.', '[', 'd', ']', 'earth', 'is', 'unusual', 'among', 'the', 'planets'

In [66]:
labels = [lab for lab in lm_label_ids.data.numpy().ravel() if lab != -1]
tokens = tokenizer.convert_ids_to_tokens(labels)
print(tokens)



['oxygen']


In [67]:
start_i = tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]).index('[PAD]')
print(start_i)
print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1))[start_i:])


203
['earth', "'", 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', "'", 's', "'", 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', "'", "'", "'", "'", 's', 's', 's', 's', 's', 's', 's']


In [70]:
for j in range(3):
    c = Counter()
    for i, o in enumerate(output[0][start_i+j]):
        c[i] = o
    for x, val in c.most_common(25):
        print(tokenizer.convert_ids_to_tokens([x]), val)
    print("#"*20)

['earth'] tensor(9.5742)
['oxygen'] tensor(7.6202)
['world'] tensor(6.4623)
['what'] tensor(6.1127)
['most'] tensor(5.4165)
['oceans'] tensor(5.3127)
['atmosphere'] tensor(4.8200)
['which'] tensor(4.7238)
['carbon'] tensor(4.6831)
['water'] tensor(4.6738)
['ocean'] tensor(4.6734)
['mars'] tensor(4.6709)
['planet'] tensor(4.5660)
['air'] tensor(4.5641)
["'"] tensor(4.4854)
['hydrogen'] tensor(4.3737)
['its'] tensor(4.0654)
['neptune'] tensor(3.9373)
['how'] tensor(3.7449)
['life'] tensor(3.7068)
['composed'] tensor(3.5799)
['humans'] tensor(3.5747)
['solar'] tensor(3.5316)
['atmospheric'] tensor(3.5303)
['sea'] tensor(3.4772)
####################
["'"] tensor(11.3023)
['s'] tensor(7.1991)
['gas'] tensor(7.0532)
['.'] tensor(6.5830)
['element'] tensor(6.2769)
['surface'] tensor(6.2494)
['percent'] tensor(6.2119)
['%'] tensor(6.0049)
['much'] tensor(5.8014)
['planet'] tensor(5.6757)
['and'] tensor(5.6275)
['ocean'] tensor(5.5587)
['level'] tensor(5.5387)
['##s'] tensor(5.4988)
['energy'] 

# TODO
1- Print context and question, manually enter the set of conditionals, and evaluate   

2- evaluate some answerable and unanswerable questions by full question conditional and getting actual probabilities of each thing correctly normalized

In [188]:
from itertools import permutations 

def build_input(context, tokens_b, target_tokens):
    tokenized_context = tokenizer.tokenize(context)
    buff_size = sum([len(t) for t in tokens_b]) + len(tokens_b) - 1 + len(target_tokens)
    if len(tokenized_context) + buff_size > max_seq_length - 3:
        end = max_seq_length - 3 - buff_size
        tokenized_context = tokenized_context[:end]
    
    tokens = []
    segment_ids = []
    tokens.append("[CLS]")
    segment_ids.append(0)
    for token in tokenized_context:
        tokens.append(token)
        segment_ids.append(0)
    tokens.append("[SEP]")
    segment_ids.append(0)

    for i, conditional in enumerate(tokens_b):
        for token in conditional:
            tokens.append(token)
            segment_ids.append(1)
        tokens.append("[SEP]")
        segment_ids.append(1)
    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_ids)
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)

    return torch.tensor([input_ids]), torch.tensor([input_mask]), torch.tensor([segment_ids])

In [113]:
target = 4 #50
order = [2, 3, 1, 0]
cid, qid, targetid, _ = eval_dataloader_ans.dataset.examples[target]
context = eval_dataloader_ans.dataset.contexts[cid]
question = eval_dataloader_ans.dataset.questions[qid]
raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]

raw_targ = [raw_targ[i] for i in order]
raw_targ_copy = list(raw_targ)
print(context)
print("~"*20)
print(question)
print(raw_targ)
print("~"*20)

with torch.no_grad():
    model.eval()
    
    best_perm = None
    best_odds = 0
    num_perms = len(list(permutations(range(len(raw_targ_copy)))))
    if num_perms > 24:
        print("Too many options")
        raw_targ_copy = None
    for perm_idx, raw_targ in enumerate(permutations(raw_targ_copy)):
        targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        # print("Total Odds:", total_odds)
        print("Finished ", perm_idx, "of", num_perms)
print(best_odds)
print(best_perm)
            
            
            

Oxygen is the most abundant chemical element by mass in the Earth's biosphere, air, sea and land. Oxygen is the third most abundant chemical element in the universe, after hydrogen and helium. About 0.9% of the Sun's mass is oxygen. Oxygen constitutes 49.2% of the Earth's crust by mass and is the major component of the world's oceans (88.8% by mass). Oxygen gas is the second most common component of the Earth's atmosphere, taking up 20.8% of its volume and 23.1% of its mass (some 1015 tonnes).[d] Earth is unusual among the planets of the Solar System in having such a high concentration of oxygen gas in its atmosphere: Mars (with 0.1% O
2 by volume) and Venus have far lower concentrations. The O
2 surrounding these other planets is produced solely by ultraviolet radiation impacting oxygen-containing molecules such as carbon dioxide.
~~~~~~~~~~~~~~~~~~~~
Where  by mass is oxygen a major part?
[('[NP]', 'oxygen'), ('[NP]', 'major part'), ('[NP]', 'mass'), ('[WHADVP]', 'Where')]
~~~~~~~~~~

In [129]:
target = 11 #50
cid, qid, targetid, _ = eval_dataloader_unans.dataset.examples[target]
context = eval_dataloader_ans.dataset.contexts[cid]
question = eval_dataloader_ans.dataset.questions[qid]
raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]
raw_targ_copy = list(raw_targ)

print(context)
print("~"*20)
print(question)
print(raw_targ)
print("~"*20)

with torch.no_grad():
    model.eval()
    
    best_perm = None
    best_odds = 0
    num_perms = len(list(permutations(range(len(raw_targ_copy)))))
    if num_perms > 24:
        print("Too many options")
        raw_targ_copy = None
    for perm_idx, raw_targ in enumerate(permutations(raw_targ_copy)):
        targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        # print("Total Odds:", total_odds)
        print("Finished ", perm_idx, "of", num_perms)
print(best_odds)
print(best_perm)
            
            
            

The official record high temperature for Fresno is 115 °F (46.1 °C), set on July 8, 1905, while the official record low is 17 °F (−8 °C), set on January 6, 1913. The average windows for 100 °F (37.8 °C)+, 90 °F (32.2 °C)+, and freezing temperatures are June 1 thru September 13, April 26 thru October 9, and December 10 thru January 28, respectively, and no freeze occurred between in the 1983/1984 season. Annual rainfall has ranged from 23.57 inches (598.7 mm) in the “rain year” from July 1982 to June 1983 down to 4.43 inches (112.5 mm) from July 1933 to June 1934. The most rainfall in one month was 9.54 inches (242.3 mm) in November 1885 and the most rainfall in 24 hours 3.55 inches (90.2 mm) on November 18, 1885. Measurable precipitation falls on an average of 48 days annually. Snow is a rarity; the heaviest snowfall at the airport was 2.2 inches (0.06 m) on January 21, 1962.
~~~~~~~~~~~~~~~~~~~~
What is the record high in January?
[('[WHNP]', 'What'), ('[NP]', 'record high'), ('[NP]',

In [119]:
len(list(permutations(range(5))))

120

In [142]:
context = 0
ans_questions = set()
unans_questions = set()
ans_examples = []
unans_examples = []
for x in eval_dataloader_unans.dataset.examples:
    if x[0] == context:
        if x[1] not in unans_questions:
            unans_examples.append(x)
        unans_questions.add(x[1])        
for x in eval_dataloader_ans.dataset.examples:
    if x[0] == context:
        if x[1] not in ans_questions:
            ans_examples.append(x)
        ans_questions.add(x[1])
print (eval_dataloader_unans.dataset.contexts[context])

The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.


In [151]:
for example in ans_examples:
    cid, qid, targetid, _ = example
    context = eval_dataloader_ans.dataset.contexts[cid]
    question = eval_dataloader_ans.dataset.questions[qid]
    raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]
    raw_targ_copy = list(raw_targ)

    #print(context)
    print("~"*20)
    print(question)
    print(raw_targ)
    print("~"*20)
    
    first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
    middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
    middle_targets = []
    for i in range(20):
        for tag, words in middle_targets_int:
            if str(i) in tag:
                middle_targets.append((tag,words))

    second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
    random.shuffle(first_targets)
    random.shuffle(second_targets)
    raw_targ = (first_targets + middle_targets + second_targets)

    with torch.no_grad():
        model.eval()

        targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        print("Total Odds:", total_odds)


~~~~~~~~~~~~~~~~~~~~
From which countries did the Norse originate?
[('[WHPP]', 'From which countries'), ('[NP]', 'Norse'), ('[VB]', 'originate')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(1.8462)
~~~~~~~~~~~~~~~~~~~~
What century did the Normans first gain their separate identity?
[('[WHNP]', 'What century'), ('[NP]', 'Normans'), ('[VBP]', 'gain'), ('[NP]', 'their separate identity')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(3.4586)
~~~~~~~~~~~~~~~~~~~~
In what country is Normandy located?
[('[WHNP]', 'what country'), ('[NP]', 'Normandy'), ('[VBN]', 'located')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(3.2766)
~~~~~~~~~~~~~~~~~~~~
When were the Normans in Normandy?
[('[WHADVP]', 'When'), ('[NP]', 'Normans'), ('[NP]', 'Normandy')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(3.9580)
~~~~~~~~~~~~~~~~~~~~
Who was the Norse leader?
[('[WHNP]', 'Who'), ('[NP]', 'Norse leader')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(2.0593)


In [153]:
for example in unans_examples:
    cid, qid, targetid, _ = example
    context = eval_dataloader_ans.dataset.contexts[cid]
    question = eval_dataloader_ans.dataset.questions[qid]
    raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]

    raw_targ_copy = list(raw_targ)
    #print(context)
    print("~"*20)
    print(question)
    print(raw_targ)
    print("~"*20)
    
    first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
    middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
    middle_targets = []
    for i in range(20):
        for tag, words in middle_targets_int:
            if str(i) in tag:
                middle_targets.append((tag,words))

    second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
    random.shuffle(first_targets)
    random.shuffle(second_targets)
    raw_targ = (first_targets + middle_targets + second_targets)

    with torch.no_grad():
        model.eval()

        targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        print("Total Odds:", total_odds)



            

~~~~~~~~~~~~~~~~~~~~
What is France a region of?
[('[WHNP]', 'What'), ('[NP]', 'France'), ('[NP]', 'region')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(4.0956)
~~~~~~~~~~~~~~~~~~~~
When did the Frankish identity emerge?
[('[WHADVP]', 'When'), ('[NP]', 'Frankish identity'), ('[VB]', 'emerge')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(2.2725)
~~~~~~~~~~~~~~~~~~~~
Who gave their name to Normandy in the 1000's and 1100's
[('[WHNP]', 'Who'), ('[VBD]', 'gave'), ('[NP]', 'their name'), ('[NP]', 'Normandy'), ('[NP]', "1000 's"), ('[NP]', "1100 's")]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(2.9115)
~~~~~~~~~~~~~~~~~~~~
Who did King Charles III swear fealty to?
[('[WHNP]', 'Who'), ('[NP]', 'King Charles III'), ('[VBP]', 'swear'), ('[NP]', 'fealty')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(3.1540)


In [160]:
def get_examples(contexts):
    ans_examples = []
    unans_examples = []
    for context in contexts:
        ans_questions = set()
        unans_questions = set()
        for x in eval_dataloader_unans.dataset.examples:
            if x[0] == context:
                if x[1] not in unans_questions:
                    unans_examples.append(x)
                unans_questions.add(x[1])        
        for x in eval_dataloader_ans.dataset.examples:
            if x[0] == context:
                if x[1] not in ans_questions:
                    ans_examples.append(x)
                ans_questions.add(x[1])
        #print (eval_dataloader_unans.dataset.contexts[context])
    return((ans_examples, unans_examples))

In [213]:
def get_avg_odds(examples, dataloader):
    if len(examples) == 0:
        return 0
    total_total_odds = 0
    for example in examples:
        cid, qid, targetid, _ = example
        context = dataloader.dataset.contexts[cid]
        question = dataloader.dataset.questions[qid]
        raw_targ = dataloader.dataset.raw_targets[targetid]

        raw_targ_copy = list(raw_targ)

        first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
        middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
        middle_targets = []
        for i in range(20):
            for tag, words in middle_targets_int:
                if str(i) in tag:
                    middle_targets.append((tag,words))

        second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
        random.shuffle(first_targets)
        random.shuffle(second_targets)
        raw_targ = (first_targets + middle_targets + second_targets)
        raw_targ = [t for t in raw_targ if t]

        with torch.no_grad():
            model.eval()

            targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
            targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
            total_odds = 0
            for token_idx in range(len(raw_targ)):
                odds = 0
                input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx], targs_2_tokens[token_idx])
                output, _ = model(input_ids, segment_ids, input_mask, None, None)
                
                #print(input_mask)
                start_id = np.where(input_mask.data.numpy() == 0)[1][0]
                for t_i, t in enumerate(targs_2_ids[token_idx]):
                    odds += output[0][start_i+t_i][t]
                if len(targs_2_ids[token_idx]) == 0:
                    print(token_idx, targs_2_ids, targs_2_ids[token_idx])
                odds = odds/len(targs_2_ids[token_idx])
                # print(odds)
                total_odds += odds
            total_odds /= len(raw_targ)
            total_total_odds += total_odds
            print("Total Odds:", total_odds)
    return (total_total_odds / len(examples)).item()





In [190]:
contexts = range(20)
ans_e, unans_e = get_examples(contexts)
for context in contexts:
    avg_ans_odds = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    avg_unans_odds = get_avg_odds([e for e in unans_e if e[0] == context], eval_dataloader_unans)
    print(avg_ans_odds, avg_unans_odds)

tensor(3.5535) tensor(3.3792)
tensor(-1.0365) tensor(-1.3864)
tensor(4.2462) tensor(4.7270)
tensor(4.1131) tensor(1.8831)
tensor(0.4667) tensor(3.4406)
tensor(2.7906) tensor(2.3163)
tensor(3.4063) tensor(2.8470)
tensor(5.2118) tensor(4.2501)
tensor(1.7427) tensor(1.4449)
tensor(2.4113) tensor(1.0845)
tensor(3.4868) tensor(1.6543)
tensor(-2.3763) tensor(-3.0515)
tensor(3.3259) tensor(1.9908)
tensor(2.5147) tensor(1.4459)
tensor(2.6071) tensor(2.7918)
tensor(4.4486) tensor(2.1497)
tensor(3.0473) tensor(3.1637)
tensor(5.8250) tensor(2.5912)
tensor(0.6822) tensor(0.7596)
tensor(4.7678) tensor(2.6951)


In [201]:
def print_context_and_questions(c_i, ans_e, unans_e):
    context = eval_dataloader_ans.dataset.contexts[c_i]
    ans_q = [eval_dataloader_ans.dataset.questions[x[1]] for x in ans_e if x[0] == c_i]
    unans_q = [eval_dataloader_unans.dataset.questions[x[1]] for x in unans_e if x[0] == c_i]
    print(context)
    print("~"*20)
    print("Answerable")
    for q in ans_q:
        print(q)
    print("~"*20)
    print("Unanswerable")
    for q in unans_q:
        print(q)
    print("#"*20)

In [215]:
contexts = random.sample(range(1203), 20)
ans_e, unans_e = get_examples(contexts)
for context in contexts:
    #print_context_and_questions(context, ans_e, unans_e)
    print("ans")
    avg_ans_odds = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    print("unans")
    avg_unans_odds = get_avg_odds([e for e in unans_e if e[0] == context], eval_dataloader_unans)
    print(avg_ans_odds, avg_unans_odds)

ans
Total Odds: tensor(-0.8865)
Total Odds: tensor(-2.5891)
Total Odds: tensor(-1.3466)
Total Odds: tensor(-3.0396)
Total Odds: tensor(-0.5524)
Total Odds: tensor(-1.2663)
Total Odds: tensor(-0.6792)
Total Odds: tensor(-1.5053)
Total Odds: tensor(-1.3839)
Total Odds: tensor(-1.0146)
unans
Total Odds: tensor(-1.8431)
Total Odds: tensor(-2.9306)
Total Odds: tensor(-2.7798)
Total Odds: tensor(-1.8064)
Total Odds: tensor(-1.6742)
-1.4263426065444946 -2.206820011138916
ans
Total Odds: tensor(0.3675)
Total Odds: tensor(-0.5556)
Total Odds: tensor(0.1865)
Total Odds: tensor(-0.9537)
unans
Total Odds: tensor(-1.3722)
Total Odds: tensor(-0.7243)
Total Odds: tensor(-0.9052)
Total Odds: tensor(-1.7553)
Total Odds: tensor(-1.7770)
-0.23881275951862335 -1.306786298751831
ans
Total Odds: tensor(2.5054)
Total Odds: tensor(4.0030)
Total Odds: tensor(2.6075)
Total Odds: tensor(2.4823)
Total Odds: tensor(2.0693)
unans
Total Odds: tensor(2.3941)
Total Odds: tensor(0.9004)
Total Odds: tensor(3.2778)
Total

ZeroDivisionError: division by zero