In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
sys.path.append("examples/")

import logging
import argparse
import json
from tqdm import tqdm, trange
import csv
from collections import Counter

import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler

from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertForPreTraining 
from pytorch_pretrained_bert.optimization import BertAdam

from torch.utils.data import Dataset
import random

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
from train_cond_with_copy import InputExample, random_word, InputFeatures, BERTDataset

In [3]:
# args
gradient_accumulation_steps = 1
train_batch_size = 1
eval_file = "dataset/dev-v2.0.json"
max_seq_length=256
on_memory = True
bert_model = "model_copy/pytorch_model0.bin"

In [4]:
device = torch.device("cuda" if torch.cuda.is_available()  else "cpu")
n_gpu = torch.cuda.device_count()

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if n_gpu > 0:
    torch.cuda.manual_seed_all(42)

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)

# Load eval_data
eval_dataset_answerable = BERTDataset(eval_file, "qparts/copy_parts2/parsed_qs_labels%s.pkl", tokenizer, seq_len=max_seq_length,
                                    on_memory=on_memory)
eval_dataset_unanswerable = BERTDataset(eval_file, "qparts/copy_parts2/parsed_qs_labels%s.pkl", tokenizer, seq_len=max_seq_length,
                                    on_memory=on_memory, keep_answerable=False)

# Prepare model
model_state_dict = torch.load(bert_model, map_location='cpu') #TODO daniter: remove this map_location
## TODO daniter: check if bert model is being loaded correctly
model = BertForPreTraining.from_pretrained("bert-base-uncased", state_dict=model_state_dict)
model.to(device)


# Prepare optimizer
print("Checking the vocab size:", len(tokenizer.vocab))
# 768 is bert hidden size, 256 is GRU hidden size, 1 is the layers in the GRU

# eval loader
eval_sampler_ans = SequentialSampler(eval_dataset_answerable)
eval_dataloader_ans = DataLoader(eval_dataset_answerable, sampler=eval_sampler_ans,
                                 batch_size=train_batch_size)
eval_sampler_unans = SequentialSampler(eval_dataset_unanswerable)
eval_dataloader_unans = DataLoader(eval_dataset_unanswerable, sampler=eval_sampler_unans,
                                   batch_size=train_batch_size)


04/26/2019 10:20:58 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/daniter/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 1607.72it/s]
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 1406.76it/s]
04/26/2019 10:21:04 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
04/26/2019 10:21:04 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d

Checking the vocab size: 30522


In [5]:
target = 0 #50
with torch.no_grad():
    model.eval()

    eval_loss_ans = 0
    for batch_i, eval_batch in enumerate(eval_dataloader_unans):
        eval_batch = tuple(t.to(device) for t in eval_batch)
        input_ids, input_mask, segment_ids, lm_label_ids, is_next = eval_batch
        if batch_i != target:
            continue
        if batch_i == target:
            print(tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]))
        output, _ = model(input_ids, segment_ids, input_mask, None, None)
        if batch_i == target:
            break

['[CLS]', 'the', 'harvard', 'business', 'school', 'and', 'many', 'of', 'the', 'university', "'", 's', 'athletics', 'facilities', ',', 'including', 'harvard', 'stadium', ',', 'are', 'located', 'on', 'a', '35', '##8', '-', 'acre', '(', '145', 'ha', ')', 'campus', 'opposite', 'the', 'cambridge', 'campus', 'in', 'all', '##ston', '.', 'the', 'john', 'w', '.', 'weeks', 'bridge', 'is', 'a', 'pedestrian', 'bridge', 'over', 'the', 'charles', 'river', 'connecting', 'both', 'campuses', '.', 'the', 'harvard', 'medical', 'school', ',', 'harvard', 'school', 'of', 'dental', 'medicine', ',', 'and', 'the', 'harvard', 'school', 'of', 'public', 'health', 'are', 'located', 'on', 'a', '21', '-', 'acre', '(', '8', '.', '5', 'ha', ')', 'campus', 'in', 'the', 'long', '##wood', 'medical', 'and', 'academic', 'area', 'approximately', '3', '.', '3', 'miles', '(', '5', '.', '3', 'km', ')', 'southwest', 'of', 'downtown', 'boston', 'and', '3', '.', '3', 'miles', '(', '5', '.', '3', 'km', ')', 'south', 'of', 'the', '

In [6]:
labels = [lab for lab in lm_label_ids.data.numpy().ravel() if lab != -1]
tokens = tokenizer.convert_ids_to_tokens(labels)
print(tokens)



['they', '[SEP]']


In [7]:
start_i = tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]).index('[PAD]')
print(start_i)
print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1))[start_i:])


143
['you', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', 'you', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', 

In [8]:
target = 4 #50
with torch.no_grad():
    model.eval()

    eval_loss_ans = 0
    for batch_i, eval_batch in enumerate(eval_dataloader_ans):
        eval_batch = tuple(t.to(device) for t in eval_batch)
        input_ids, input_mask, segment_ids, lm_label_ids, is_next = eval_batch
        if batch_i != target:
            continue
        if batch_i == target:
            print(tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]))
        output, _ = model(input_ids, segment_ids, input_mask, None, None)
        if batch_i == target:
            break

['[CLS]', 'research', 'by', 'harvard', 'economist', 'robert', 'barr', '##o', ',', 'found', 'that', 'there', 'is', '"', 'little', 'overall', 'relation', 'between', 'income', 'inequality', 'and', 'rates', 'of', 'growth', 'and', 'investment', '"', '.', 'according', 'to', 'work', 'by', 'barr', '##o', 'in', '1999', 'and', '2000', ',', 'high', 'levels', 'of', 'inequality', 'reduce', 'growth', 'in', 'relatively', 'poor', 'countries', 'but', 'encourage', 'growth', 'in', 'richer', 'countries', '.', 'a', 'study', 'of', 'swedish', 'counties', 'between', '1960', 'and', '2000', 'found', 'a', 'positive', 'impact', 'of', 'inequality', 'on', 'growth', 'with', 'lead', 'times', 'of', 'five', 'years', 'or', 'less', ',', 'but', 'no', 'correlation', 'after', 'ten', 'years', '.', 'studies', 'of', 'larger', 'data', 'sets', 'have', 'found', 'no', 'correlation', '##s', 'for', 'any', 'fixed', 'lead', 'time', ',', 'and', 'a', 'negative', 'impact', 'on', 'the', 'duration', 'of', 'growth', '.', '[SEP]', 'swedish',

In [9]:
labels = [lab for lab in lm_label_ids.data.numpy().ravel() if lab != -1]
tokens = tokenizer.convert_ids_to_tokens(labels)
print(tokens)



['when', '[SEP]']


In [10]:
start_i = tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]).index('[PAD]')
print(start_i)
print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1))[start_i:])


128
['when', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', 'when', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', 'when', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]',

In [11]:
for j in range(3):
    c = Counter()
    for i, o in enumerate(output[0][start_i+j]):
        c[i] = o
    for x, val in c.most_common(25):
        print(tokenizer.convert_ids_to_tokens([x]), val)
    print("#"*20)

['when'] tensor(15.2796)
['how'] tensor(13.4058)
['where'] tensor(10.9517)
['why'] tensor(9.4068)
['years'] tensor(7.2966)
['between'] tensor(6.9106)
['what'] tensor(6.8394)
['before'] tensor(6.3345)
['and'] tensor(6.1056)
['times'] tensor(6.0610)
['after'] tensor(5.7624)
['[SEP]'] tensor(5.2550)
['during'] tensor(5.2102)
['which'] tensor(5.1672)
['once'] tensor(5.1606)
['time'] tensor(5.1575)
['while'] tensor(4.9841)
['if'] tensor(4.9710)
['that'] tensor(4.9629)
['whenever'] tensor(4.9058)
['since'] tensor(4.7756)
['twice'] tensor(4.6689)
['so'] tensor(4.6205)
['in'] tensor(4.3549)
['whereby'] tensor(4.2706)
####################
['[SEP]'] tensor(10.7239)
['long'] tensor(9.7922)
['years'] tensor(8.2666)
['many'] tensor(7.7319)
['often'] tensor(6.8625)
['times'] tensor(6.8383)
['between'] tensor(6.3456)
['when'] tensor(6.1236)
['much'] tensor(5.7831)
['how'] tensor(5.7790)
['and'] tensor(5.3981)
['in'] tensor(5.2458)
['time'] tensor(4.9973)
['ago'] tensor(4.8761)
['of'] tensor(4.5707)
[

In [22]:
from itertools import permutations 

def build_input(context, tokens_b, target_tokens):
    tokenized_context = tokenizer.tokenize(context)
    buff_size = sum([len(t) for t in tokens_b]) + len(tokens_b) - 1 + len(target_tokens)
    if len(tokenized_context) + buff_size > max_seq_length - 3:
        end = max_seq_length - 3 - buff_size
        tokenized_context = tokenized_context[:end]
    
    tokens = []
    segment_ids = []
    tokens.append("[CLS]")
    segment_ids.append(0)
    for token in tokenized_context:
        tokens.append(token)
        segment_ids.append(0)
    tokens.append("[SEP]")
    segment_ids.append(0)

    for i, conditional in enumerate(tokens_b):
        for token in conditional:
            tokens.append(token)
            segment_ids.append(1)
        tokens.append("[SEP]")
        segment_ids.append(1)
    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_ids)
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)

    return torch.tensor([input_ids]), torch.tensor([input_mask]), torch.tensor([segment_ids])

In [17]:
target = 4 #50
order = [2, 3, 1, 0]
cid, qid, targetid, _ = eval_dataloader_ans.dataset.examples[target]
context = eval_dataloader_ans.dataset.contexts[cid]
question = eval_dataloader_ans.dataset.questions[qid]
raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]

raw_targ = [raw_targ[i] for i in order]
raw_targ_copy = list(raw_targ)
print(context)
print("~"*20)
print(question)
print(raw_targ)
print("~"*20)

with torch.no_grad():
    model.eval()
    
    best_perm = None
    best_odds = 0
    num_perms = len(list(permutations(range(len(raw_targ_copy)))))
    if num_perms > 24:
        print("Too many options")
        raw_targ_copy = None
    for perm_idx, raw_targ in enumerate(permutations(raw_targ_copy)):
        print (raw_targs)
        targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        # print("Total Odds:", total_odds)
        print("Finished ", perm_idx, "of", num_perms)
print(best_odds)
print(best_perm)
            
            
            

Research by Harvard economist Robert Barro, found that there is "little overall relation between income inequality and rates of growth and investment". According to work by Barro in 1999 and 2000, high levels of inequality reduce growth in relatively poor countries but encourage growth in richer countries. A study of Swedish counties between 1960 and 2000 found a positive impact of inequality on growth with lead times of five years or less, but no correlation after ten years. Studies of larger data sets have found no correlations for any fixed lead time, and a negative impact on the duration of growth.
~~~~~~~~~~~~~~~~~~~~
When was a study conducted of Swedish counties?
[(['conducted'], ('conducted', 'VBN')), (['when'], ('When', 'WHADVP')), (['study'], ('study', 'NN')), (['swedish', 'counties'], ('Swedish counties', 'NP'))]
~~~~~~~~~~~~~~~~~~~~


NameError: name 'raw_targs' is not defined

In [129]:
target = 11 #50
cid, qid, targetid, _ = eval_dataloader_unans.dataset.examples[target]
context = eval_dataloader_ans.dataset.contexts[cid]
question = eval_dataloader_ans.dataset.questions[qid]
raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]
raw_targ_copy = list(raw_targ)

print(context)
print("~"*20)
print(question)
print(raw_targ)
print("~"*20)

with torch.no_grad():
    model.eval()
    
    best_perm = None
    best_odds = 0
    num_perms = len(list(permutations(range(len(raw_targ_copy)))))
    if num_perms > 24:
        print("Too many options")
        raw_targ_copy = None
    for perm_idx, raw_targ in enumerate(permutations(raw_targ_copy)):
        targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        # print("Total Odds:", total_odds)
        print("Finished ", perm_idx, "of", num_perms)
print(best_odds)
print(best_perm)
            
            
            

The official record high temperature for Fresno is 115 °F (46.1 °C), set on July 8, 1905, while the official record low is 17 °F (−8 °C), set on January 6, 1913. The average windows for 100 °F (37.8 °C)+, 90 °F (32.2 °C)+, and freezing temperatures are June 1 thru September 13, April 26 thru October 9, and December 10 thru January 28, respectively, and no freeze occurred between in the 1983/1984 season. Annual rainfall has ranged from 23.57 inches (598.7 mm) in the “rain year” from July 1982 to June 1983 down to 4.43 inches (112.5 mm) from July 1933 to June 1934. The most rainfall in one month was 9.54 inches (242.3 mm) in November 1885 and the most rainfall in 24 hours 3.55 inches (90.2 mm) on November 18, 1885. Measurable precipitation falls on an average of 48 days annually. Snow is a rarity; the heaviest snowfall at the airport was 2.2 inches (0.06 m) on January 21, 1962.
~~~~~~~~~~~~~~~~~~~~
What is the record high in January?
[('[WHNP]', 'What'), ('[NP]', 'record high'), ('[NP]',

In [119]:
len(list(permutations(range(5))))

120

In [18]:
context = 0
ans_questions = set()
unans_questions = set()
ans_examples = []
unans_examples = []
for x in eval_dataloader_unans.dataset.examples:
    if x[0] == context:
        if x[1] not in unans_questions:
            unans_examples.append(x)
        unans_questions.add(x[1])        
for x in eval_dataloader_ans.dataset.examples:
    if x[0] == context:
        if x[1] not in ans_questions:
            ans_examples.append(x)
        ans_questions.add(x[1])
print (eval_dataloader_unans.dataset.contexts[context])

The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.


In [21]:
for example in ans_examples:
    cid, qid, targetid, _ = example
    context = eval_dataloader_ans.dataset.contexts[cid]
    question = eval_dataloader_ans.dataset.questions[qid]
    raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]
    raw_targ_copy = list(raw_targ)

    #print(context)
    print("~"*20)
    print(question)
    print(raw_targ)
    print("~"*20)
    
#     first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
#     middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
#     middle_targets = []
#     for i in range(20):
#         for tag, words in middle_targets_int:
#             if str(i) in tag:
#                 middle_targets.append((tag,words))

#     second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
#     random.shuffle(first_targets)
#     random.shuffle(second_targets)
#     raw_targ = (first_targets + middle_targets + second_targets)
    raw_targ = [(tag,word) for (word,(_,tag)) in raw_targ if word]
    

    with torch.no_grad():
        model.eval()

        targs_2_tokens = [t for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        print("Total Odds:", total_odds)


~~~~~~~~~~~~~~~~~~~~
From which countries did the Norse originate?
[(['norse'], ('Norse', 'NNP')), (['originate'], ('originate', 'VB')), (['from', 'which', 'countries'], ('From which countries', 'WHPP'))]
~~~~~~~~~~~~~~~~~~~~


TypeError: build_input() missing 1 required positional argument: 'target_tokens'

In [153]:
for example in unans_examples:
    cid, qid, targetid, _ = example
    context = eval_dataloader_ans.dataset.contexts[cid]
    question = eval_dataloader_ans.dataset.questions[qid]
    raw_targ = eval_dataloader_ans.dataset.raw_targets[targetid]

    raw_targ_copy = list(raw_targ)
    #print(context)
    print("~"*20)
    print(question)
    print(raw_targ)
    print("~"*20)
    
    first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
    middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
    middle_targets = []
    for i in range(20):
        for tag, words in middle_targets_int:
            if str(i) in tag:
                middle_targets.append((tag,words))

    second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
    random.shuffle(first_targets)
    random.shuffle(second_targets)
    raw_targ = (first_targets + middle_targets + second_targets)

    with torch.no_grad():
        model.eval()

        targs_2_tokens = [tokenizer.tokenize(t) for _, t in raw_targ]
        targs_2_ids = [tokenizer.convert_tokens_to_ids(t) for t in targs_2_tokens]
        total_odds = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            #print("Given ", [t for _, t in raw_targ[:token_idx]], "predict", raw_targ[token_idx][1])
            # print("P(%s | %s )" % (raw_targ[token_idx][1], ",".join([t for _, t in raw_targ[:token_idx]])))
            input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx])
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            #print(tokenizer.convert_ids_to_tokens(np.argmax(output[0].data.numpy(), axis=1)))
            start_id = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx]):
                odds += output[0][start_i+t_i][t]
            odds = odds/len(targs_2_ids[token_idx])
            # print(odds)
            total_odds += odds
        total_odds /= len(raw_targ)
        if total_odds > best_odds:
            best_odds = total_odds
            best_perm = list(raw_targ)
        print("Total Odds:", total_odds)



            

~~~~~~~~~~~~~~~~~~~~
What is France a region of?
[('[WHNP]', 'What'), ('[NP]', 'France'), ('[NP]', 'region')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(4.0956)
~~~~~~~~~~~~~~~~~~~~
When did the Frankish identity emerge?
[('[WHADVP]', 'When'), ('[NP]', 'Frankish identity'), ('[VB]', 'emerge')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(2.2725)
~~~~~~~~~~~~~~~~~~~~
Who gave their name to Normandy in the 1000's and 1100's
[('[WHNP]', 'Who'), ('[VBD]', 'gave'), ('[NP]', 'their name'), ('[NP]', 'Normandy'), ('[NP]', "1000 's"), ('[NP]', "1100 's")]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(2.9115)
~~~~~~~~~~~~~~~~~~~~
Who did King Charles III swear fealty to?
[('[WHNP]', 'Who'), ('[NP]', 'King Charles III'), ('[VBP]', 'swear'), ('[NP]', 'fealty')]
~~~~~~~~~~~~~~~~~~~~
Total Odds: tensor(3.1540)


In [23]:
def get_examples(contexts):
    ans_examples = []
    unans_examples = []
    for context in contexts:
        ans_questions = set()
        unans_questions = set()
        for x in eval_dataloader_unans.dataset.examples:
            if x[0] == context:
                if x[1] not in unans_questions:
                    unans_examples.append(x)
                unans_questions.add(x[1])        
        for x in eval_dataloader_ans.dataset.examples:
            if x[0] == context:
                if x[1] not in ans_questions:
                    ans_examples.append(x)
                ans_questions.add(x[1])
        #print (eval_dataloader_unans.dataset.contexts[context])
    return((ans_examples, unans_examples))

In [24]:
from torch.nn import LogSoftmax
softmax_model = LogSoftmax(dim=0)

def perplexity(logit_idx, dist):
    log_prob = 0
    for i, lg_idx in enumerate(logit_idx):
        prob = softmax_model(dist[i])[lg_idx]
        log_prob += prob
    return (log_prob / len(logit_idx)).item()

In [25]:
def build_input(context, tokens_b, target_tokens, multihint=False):
    tokenized_context = tokenizer.tokenize(context)
    buff_size = sum([len(t[0]) for t in tokens_b]) + len(tokens_b) - 1 + len(target_tokens[2]) + len(target_tokens[1]) + 1
    if len(tokenized_context) + buff_size > max_seq_length - 3:
        end = max_seq_length - 3 - buff_size
        tokenized_context = tokenized_context[:end]
    
    tokens = []
    segment_ids = []
    tokens.append("[CLS]")
    segment_ids.append(0)
    for token in tokenized_context:
        tokens.append(token)
        segment_ids.append(0)
    tokens.append("[SEP]")
    segment_ids.append(0)

    for i, conditional in enumerate(tokens_b):
        for token in conditional[0]:
            tokens.append(token)
            segment_ids.append(1)
        tokens.append("[SEP]")
        segment_ids.append(1)
    
    for token in target_tokens[1]:
        tokens.append(token)
        segment_ids.append(1)
    tokens.append("[SEP]")
    segment_ids.append(1)
    
    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_ids)
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)

    return torch.tensor([input_ids]), torch.tensor([input_mask]), torch.tensor([segment_ids])

In [29]:
def get_avg_odds(examples, dataloader, multihint=False):
    if len(examples) == 0:
        return 0
    total_total_odds = 0
    total_total_perlex = 0
    max_perplex = 0
    results = {}
    for example in examples:
        cid, qid, targetid, _ = example
        context = dataloader.dataset.contexts[cid]
        question = dataloader.dataset.questions[qid]
        raw_targ = dataloader.dataset.raw_targets[targetid]
        results[(context, question)] = {}

        raw_targ_copy = list(raw_targ)

#         first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
#         middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
#         middle_targets = []
#         for i in range(20):
#             for tag, words in middle_targets_int:
#                 if str(i) in tag:
#                     middle_targets.append((tag,words))

#         second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
#         random.shuffle(first_targets)
#         random.shuffle(second_targets)
#         raw_targ = (first_targets + middle_targets + second_targets)
#         raw_targ = [t for t in raw_targ if t[1]]
        raw_targ = [(tag,word) for (word,(_,tag)) in raw_targ if word]


        with torch.no_grad():
            model.eval()

            targs_2_tokens = []#[tokenizer.tokenize(t) for _, t in raw_targ]            
            for tidx, (tag, words) in enumerate(raw_targ):
                clean_tag = tag
                if multihint:
                    span = tokenizer.tokenize(clean_tag) + ["[SEP]"] + words
                else:
                    span = words
                targs_2_tokens.append((span, tokenizer.tokenize(clean_tag), words))
                    
            targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]

            total_odds = 0
            min_odds = 100
            total_perplex = 0
            for token_idx in range(len(raw_targ)):
                odds = 0
                odds_list = []
                input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx], targs_2_tokens[token_idx], multihint)
                output, _ = model(input_ids, segment_ids, input_mask, None, None)
                
                #print(input_mask)
                start_i = np.where(input_mask.data.numpy() == 0)[1][0]
                for t_i, t in enumerate(targs_2_ids[token_idx][2]):
                    odds += output[0][start_i+t_i][t]
                    odds_list.append(output[0][start_i+t_i][t])
                if len(targs_2_ids[token_idx]) == 0:
                    print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)
                odds = odds/len(targs_2_ids[token_idx])
                if odds < min_odds:
                    min_odds = odds
                # print(odds)
                total_odds += odds
                perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
                results[(context, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                              str(targs_2_tokens[token_idx][1:]))] = -perplex
                total_perplex += perplex / len(raw_targ)
                #print(perplex)
            #print("Perplexity", -total_perplex)
            total_odds /= len(raw_targ)
            total_total_odds += total_odds
            total_total_perlex += -total_perplex
            if -total_perplex > max_perplex:
                max_perplex = -total_perplex
            #print("Total Odds:", total_odds)
            #print("Min odds:", min_odds)
    return (total_total_perlex / len(examples)), max_perplex, results





In [30]:
contexts = range(1)
ans_e, unans_e = get_examples(contexts)
for context in contexts:
    avg_ans_odds = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    avg_unans_odds = get_avg_odds([e for e in unans_e if e[0] == context], eval_dataloader_unans)
    print(avg_ans_odds, avg_unans_odds)

(3.2651897017161047, 5.310597658157349, {('The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.', 'From which countries did the Norse originate?'): {('[]', "(['n', '##np'], ['norse'])"): 10.089546203613281, ("[['norse']]", "(['v', '##b'], ['originate'])"): 2.869373321533203, ("[['norse'], ['originate']]", "([

In [31]:
def print_context_and_questions(c_i, ans_e, unans_e):
    context = eval_dataloader_ans.dataset.contexts[c_i]
    ans_q = [eval_dataloader_ans.dataset.questions[x[1]] for x in ans_e if x[0] == c_i]
    unans_q = [eval_dataloader_unans.dataset.questions[x[1]] for x in unans_e if x[0] == c_i]
    print(context)
    print("~"*20)
    print("Answerable")
    for q in ans_q:
        print(q)
    print("~"*20)
    print("Unanswerable")
    for q in unans_q:
        print(q)
    print("#"*20)

In [32]:
contexts = random.sample(range(1203), 20)
ans_e, unans_e = get_examples(contexts)
ans_res, unans_res = {}, {}
for context in contexts:
    #print_context_and_questions(context, ans_e, unans_e)
    #print("ans")
    avg_ans_odds, max_ent_ans, r = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    ans_res.update(r)
    #print("unans")
    avg_unans_odds, max_ent_unans, r = get_avg_odds([e for e in unans_e if e[0] == context], eval_dataloader_unans)
    unans_res.update(r)
    print(avg_ans_odds, avg_unans_odds, max_ent_ans, max_ent_unans)

2.7226944851875308 4.579863770802816 4.585283637046814 8.034750938415527
12.515760445594788 13.028699105978014 13.430518507957458 13.949241876602171
3.3953035295009615 3.6342637062072756 4.763099431991577 6.1258080800374355
2.365267864863078 3.4835811614990235 3.4965551694234214 8.483185291290283
4.761589556194488 5.072316064039866 5.473004261652628 6.15186627705892
1.7553026676177979 3.898937910397848 4.658357381820679 5.163528084754944
3.1842331329981484 3.3742789729436238 4.519792874654134 4.683317184448242
13.654083570412226 12.792067101796468 16.38955007280622 15.097410837809244
1.8402368982632957 3.5777226765950525 2.769144455591838 5.379620671272278
4.120625464121501 4.788714746634165 5.51239538192749 7.3216835260391235
4.156959588328997 4.822536284923554 6.430231730143229 6.602421283721924
3.895792452494303 5.09821488459905 5.055176496505737 7.187891006469727
2.9102950632572178 3.4650682701004873 4.391720771789552 6.214620971679688
4.157574738065401 4.977793335914612 5.88205474

In [110]:
ans_res

{('Some of the income was dispensed in the form of aid to other underdeveloped nations whose economies had been caught between higher oil prices and lower prices for their own export commodities, amid shrinking Western demand. Much went for arms purchases that exacerbated political tensions, particularly in the Middle East. Saudi Arabia spent over 100 billion dollars in the ensuing decades for helping spread its fundamentalist interpretation of Islam, known as Wahhabism, throughout the world, via religious charities such al-Haramain Foundation, which often also distributed funds to violent Sunni extremist groups such as Al-Qaeda and the Taliban.',
  'Which group benefited from the funds distributed by the religious charity, al-Haramain Foundation?'): {('[]',
   "(['v', '##b', '##d'], ['benefited'])"): 5.804782867431641,
  ("[['benefited']]", "(['v', '##bn'], ['distributed'])"): 0.7734231948852539,
  ("[['benefited'], ['distributed']]",
   "(['np'], ['funds'])"): 4.643692970275879,
  ("

In [None]:
unans_res

# Things that work on aggregate:
- maximum perplexity
- average logit scores

In [None]:
# double check that perplexity is correct

In [271]:
contexts = random.sample(range(1203), 1)
ans_e, unans_e = get_examples(contexts)
for context in contexts:
    #avg_ans_odds, max_ent_ans, r = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    examples = [e for e in unans_e if e[0] == context]
    dataloader = eval_dataloader_unans
    multihint = False
    if len(examples) == 0:
        assert(False)
    total_total_odds = 0
    total_total_perlex = 0
    max_perplex = 0
    results = {}
    for example in examples:
        cid, qid, targetid, _ = example
        context_text = dataloader.dataset.contexts[cid]
        question = dataloader.dataset.questions[qid]
        raw_targ = dataloader.dataset.raw_targets[targetid]
        results[(context_text, question)] = {}

        raw_targ_copy = list(raw_targ)

        first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
        middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
        middle_targets = []
        for i in range(20):
            for tag, words in middle_targets_int:
                if str(i) in tag:
                    middle_targets.append((tag,words))

        second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
        random.shuffle(first_targets)
        random.shuffle(second_targets)
        raw_targ = (first_targets + middle_targets + second_targets)
        raw_targ = [t for t in raw_targ if t[1]]

        with torch.no_grad():
            model.eval()

            targs_2_tokens = []#[tokenizer.tokenize(t) for _, t in raw_targ]            
            for tidx, (tag, words) in enumerate(raw_targ):
                clean_tag = tag[1:-1]
                if "-" in tag:
                    clean_tag = tag[:tag.index("-")]
                if multihint:
                    span = clean_tag + " [SEP] " + words
                else:
                    span = words
                targs_2_tokens.append((tokenizer.tokenize(span), tokenizer.tokenize(clean_tag), 
                                      tokenizer.tokenize(words)))
                    
            targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]

            total_odds = 0
            min_odds = 100
            total_perplex = 0
            for token_idx in range(len(raw_targ)):
                odds = 0
                odds_list = []
                input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:token_idx], targs_2_tokens[token_idx], multihint)
                output, _ = model(input_ids, segment_ids, input_mask, None, None)
                
                #print(input_mask)
                start_i = np.where(input_mask.data.numpy() == 0)[1][0]
                for t_i, t in enumerate(targs_2_ids[token_idx][2]):
                    odds += output[0][start_i+t_i][t]
                    odds_list.append(output[0][start_i+t_i][t])
                if len(targs_2_ids[token_idx]) == 0:
                    print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)
                odds = odds/len(targs_2_ids[token_idx])
                if odds < min_odds:
                    min_odds = odds
                # print(odds)
                total_odds += odds
                perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
                print(context_text)
                print(question)
                print(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                              str(targs_2_tokens[token_idx][1:]))
                print(-perplex)
                print(targs_2_ids[token_idx][2])
                assert(False)
                results[(context_text, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                              str(targs_2_tokens[token_idx][1:]))] = -perplex
                total_perplex += perplex / len(raw_targ)
                #print(perplex)
            #print("Perplexity", -total_perplex)
            total_odds /= len(raw_targ)
            total_total_odds += total_odds
            total_total_perlex += -total_perplex
            if -total_perplex > max_perplex:
                max_perplex = -total_perplex
            #print("Total Odds:", total_odds)
            #print("Min odds:", min_odds)
    print((total_total_perlex / len(examples)), max_perplex)#, results)


Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

AssertionError: 

In [273]:
print(raw_targ)
for step in range(len(targs_2_tokens)):
    with torch.no_grad():
        input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:step], targs_2_tokens[step], multihint)
        output, _ = model(input_ids, segment_ids, input_mask, None, None)
        start_i = np.where(input_mask.data.numpy() == 0)[1][0]

    perplex = perplexity(targs_2_ids[step][2], output[0][start_i:])
    print(-perplex)


[('[VB]', 'deliver'), ('[VBN]', 'obstructed'), ('[NP]', "member states '"), ('[VB]', 'state'), ('[NP]', 'public services'), ('[WHNP]', 'Which artictle')]
5.569985866546631
0.6189584732055664
3.8744914531707764
5.505232810974121
0.03523874282836914
9.294071197509766


In [177]:
" ".join(tokenizer.convert_ids_to_tokens(input_ids.data.numpy()[0]))

'[CLS] the scotland act 1998 , which was passed by the parliament of the united kingdom and given royal assent by queen elizabeth ii on 19 november 1998 , govern ##s the functions and role of the scottish parliament and del ##imi ##ts its legislative competence . the scotland act 2012 extends the dev ##olved compete ##ncies . for the purposes of parliamentary sovereignty , the parliament of the united kingdom at westminster continues to constitute the supreme legislature of scotland . however , under the terms of the scotland act , westminster agreed to dev ##ol ##ve some of its responsibilities over scottish domestic policy to the scottish parliament . such " dev ##olved matters " include education , health , agriculture and justice . the scotland act enabled the scottish parliament to pass primary legislation on these issues . a degree of domestic authority , and all foreign policy , remain with the uk parliament in westminster . the scottish parliament has the power to pass laws and

In [183]:
for j in range(3):
    c = Counter()
    for i, o in enumerate(output[0][start_i+j]):
        c[i] = o
    for x, val in c.most_common(25):
        print(tokenizer.convert_ids_to_tokens([x]), val)
    print("#"*20)

['who'] tensor(13.5665)
['which'] tensor(11.6664)
['what'] tensor(10.8242)
['whom'] tensor(8.8862)
['whose'] tensor(8.0151)
['that'] tensor(7.9654)
['how'] tensor(7.4757)
['she'] tensor(5.3564)
['where'] tensor(4.8989)
['when'] tensor(4.3432)
['the'] tensor(4.2472)
['to'] tensor(3.8279)
['with'] tensor(3.7598)
['i'] tensor(3.7550)
['and'] tensor(3.6890)
['of'] tensor(3.6807)
['her'] tensor(3.6745)
['a'] tensor(3.1893)
['in'] tensor(3.1563)
['.'] tensor(3.1552)
['why'] tensor(2.8226)
['about'] tensor(2.8221)
['for'] tensor(2.6915)
['it'] tensor(2.5785)
['w'] tensor(2.5754)
####################
['queen'] tensor(10.2119)
['monarch'] tensor(7.7946)
['elizabeth'] tensor(7.1501)
['year'] tensor(6.6846)
['scottish'] tensor(6.2331)
['british'] tensor(6.1781)
['government'] tensor(5.9256)
['country'] tensor(5.6825)
['nation'] tensor(5.5809)
['president'] tensor(5.4016)
['event'] tensor(5.3116)
['other'] tensor(5.2889)
['two'] tensor(5.1743)
['uk'] tensor(5.1509)
['european'] tensor(5.0603)
['mo

In [179]:
raw_targ

[('[NP]', 'royal assent'),
 ('[NP]', 'Scotland Act'),
 ('[NP]', '1988'),
 ('[VBD]', 'granted'),
 ('[WHNP]', 'Who')]

In [274]:
examples = [e for e in unans_e if e[0] == context]
dataloader = eval_dataloader_unans
multihint = False
if len(examples) == 0:
    assert(False)
total_total_odds = 0
total_total_perlex = 0
max_perplex = 0
results = {}
for example in examples:
    cid, qid, targetid, _ = example
    context_text = dataloader.dataset.contexts[cid]
    question = dataloader.dataset.questions[qid]
    raw_targ = dataloader.dataset.raw_targets[targetid]
    results[(context_text, question)] = {}

    raw_targ_copy = list(raw_targ)

    first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
    middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
    middle_targets = []
    for i in range(20):
        for tag, words in middle_targets_int:
            if str(i) in tag:
                middle_targets.append((tag,words))

    second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
    random.shuffle(first_targets)
    random.shuffle(second_targets)
    raw_targ = (first_targets + middle_targets + second_targets)
    raw_targ = [t for t in raw_targ if t[1]]

    with torch.no_grad():
        model.eval()

        targs_2_tokens = []#[tokenizer.tokenize(t) for _, t in raw_targ]            
        for tidx, (tag, words) in enumerate(raw_targ):
            clean_tag = tag[1:-1]
            if "-" in tag:
                clean_tag = tag[:tag.index("-")]
            if multihint:
                span = clean_tag + " [SEP] " + words
            else:
                span = words
            targs_2_tokens.append((tokenizer.tokenize(span), tokenizer.tokenize(clean_tag), 
                                  tokenizer.tokenize(words)))

        targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]

        total_odds = 0
        min_odds = 100
        total_perplex = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            odds_list = []
            input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:token_idx], targs_2_tokens[token_idx], multihint)
            output, _ = model(input_ids, segment_ids, input_mask, None, None)

            #print(input_mask)
            start_i = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx][2]):
                odds += output[0][start_i+t_i][t]
                odds_list.append(output[0][start_i+t_i][t])
            if len(targs_2_ids[token_idx]) == 0:
                print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)
            odds = odds/len(targs_2_ids[token_idx])
            if odds < min_odds:
                min_odds = odds
            # print(odds)
            total_odds += odds
            perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
            print(context_text)
            print(question)
            if question == "The Scotland Act 2002 extends the devolved what?":
                assert(False)
            print(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                          str(targs_2_tokens[token_idx][1:]))
            print(-perplex)
            print(targs_2_ids[token_idx][2])
            results[(context_text, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                          str(targs_2_tokens[token_idx][1:]))] = -perplex
            total_perplex += perplex / len(raw_targ)
            #print(perplex)
        #print("Perplexity", -total_perplex)
        total_odds /= len(raw_targ)
        total_total_odds += total_odds
        total_total_perlex += -total_perplex
        if -total_perplex > max_perplex:
            max_perplex = -total_perplex

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

In [230]:
def perplexity(logit_idx, dist):
    log_prob = 0
    for i, lg_idx in enumerate(logit_idx):
        prob = softmax_model(dist[i])[lg_idx]
#         print(logit_idx)
#         print(list(tokenizer.vocab.items())[lg_idx], prob)
        log_prob += prob
    return (log_prob / len(logit_idx)).item()

# print(raw_targ)
# for step in range(len(targs_2_tokens)):
#     with torch.no_grad():
#         input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:step], targs_2_tokens[step], multihint)
#         output, _ = model(input_ids, segment_ids, input_mask, None, None)
#         start_i = np.where(input_mask.data.numpy() == 0)[1][0]

#     perplex = perplexity(targs_2_ids[step][2], output[0][start_i:])
#     print(-perplex)


In [275]:
examples = [e for e in ans_e if e[0] == context]
dataloader = eval_dataloader_ans
multihint = False
if len(examples) == 0:
    assert(False)
total_total_odds = 0
total_total_perlex = 0
max_perplex = 0
results = {}
for example in examples:
    cid, qid, targetid, _ = example
    context_text = dataloader.dataset.contexts[cid]
    question = dataloader.dataset.questions[qid]
    raw_targ = dataloader.dataset.raw_targets[targetid]
    results[(context_text, question)] = {}

    raw_targ_copy = list(raw_targ)

    first_targets = [(tag, words) for tag, words in raw_targ if "-"  not in tag  and "W" not in tag]
    middle_targets_int = [(tag, words) for tag, words in raw_targ if "-"   in tag]
    middle_targets = []
    for i in range(20):
        for tag, words in middle_targets_int:
            if str(i) in tag:
                middle_targets.append((tag,words))

    second_targets = [(tag, words) for tag, words in raw_targ if "W" in tag]
    random.shuffle(first_targets)
    random.shuffle(second_targets)
    raw_targ = (first_targets + middle_targets + second_targets)
    raw_targ = [t for t in raw_targ if t[1]]

    with torch.no_grad():
        model.eval()

        targs_2_tokens = []#[tokenizer.tokenize(t) for _, t in raw_targ]            
        for tidx, (tag, words) in enumerate(raw_targ):
            clean_tag = tag[1:-1]
            if "-" in tag:
                clean_tag = tag[:tag.index("-")]
            if multihint:
                span = clean_tag + " [SEP] " + words
            else:
                span = words
            targs_2_tokens.append((tokenizer.tokenize(span), tokenizer.tokenize(clean_tag), 
                                  tokenizer.tokenize(words)))

        targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]

        total_odds = 0
        min_odds = 100
        total_perplex = 0
        for token_idx in range(len(raw_targ)):
            odds = 0
            odds_list = []
            input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:token_idx], targs_2_tokens[token_idx], multihint)
            output, _ = model(input_ids, segment_ids, input_mask, None, None)

            #print(input_mask)
            start_i = np.where(input_mask.data.numpy() == 0)[1][0]
            for t_i, t in enumerate(targs_2_ids[token_idx][2]):
                odds += output[0][start_i+t_i][t]
                odds_list.append(output[0][start_i+t_i][t])
            if len(targs_2_ids[token_idx]) == 0:
                print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)
            odds = odds/len(targs_2_ids[token_idx])
            if odds < min_odds:
                min_odds = odds
            # print(odds)
            total_odds += odds
            perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
            print(context_text)
            print(question)
            print(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                          str(targs_2_tokens[token_idx][1:]))
            print(-perplex)
            print(targs_2_ids[token_idx][2])
            results[(context_text, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                          str(targs_2_tokens[token_idx][1:]))] = -perplex
            total_perplex += perplex / len(raw_targ)
            #print(perplex)
        #print("Perplexity", -total_perplex)
        total_odds /= len(raw_targ)
        total_total_odds += total_odds
        total_total_perlex += -total_perplex
        if -total_perplex > max_perplex:
            max_perplex = -total_perplex

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

In [276]:
for e in ans_e:
    if e[0] == context:
        print(eval_dataloader_ans.dataset.questions[e[1]])
print("~"*20)
for e in unans_e:
    if e[0] == context:
        print(eval_dataloader_unans.dataset.questions[e[1]])

In which article does the Treaty of Lisbon prohibit anti-competitive agreements?
Which article allows the European Council to govern mergers between firms?
Which articles state that the member states' rights to deliver public services may not be obstructed?
What does Article 102 of the Treaty of Lisbon prohibit?
~~~~~~~~~~~~~~~~~~~~
Which artictle does not state that the member states' rights to deliver public services may not be obstructed?
What does the Treaty of Lisbon allow today?
What does Article 102 not allow the European Council to do?
What does Article 102 not prohibit?
What does Article 107 not lay down?


# Generation

In [260]:
targs_2_tokens_tmp = [('scotland act', 'NP', 'scotland act'),
                     ('pass', 'vb', 'pass'),
                     ('when', 'WHADJP', 'when')]
targs_2_tokens_tmp = [list(map(tokenizer.tokenize, t)) for t in targs_2_tokens_tmp]
print(targs_2_tokens_tmp)
targs_2_ids_tmp = targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens_tmp]
step = 2
with torch.no_grad():
    input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens_tmp[:step], targs_2_tokens_tmp[step], multihint)
    output, _ = model(input_ids, segment_ids, input_mask, None, None)
    start_i = np.where(input_mask.data.numpy() == 0)[1][0]

perplex = perplexity(targs_2_ids_tmp[step][2], output[0][start_i:])
print(-perplex)
    
for j in range(3):
    c = Counter()
    for i, o in enumerate(output[0][start_i+j]):
        c[i] = o
    for x, val in c.most_common(25):
        print(tokenizer.convert_ids_to_tokens([x]), val)
    print("#"*20)


[[['scotland', 'act'], ['np'], ['scotland', 'act']], [['pass'], ['v', '##b'], ['pass']], [['when'], ['w', '##had', '##j', '##p'], ['when']]]
0.2175922393798828
['when'] tensor(14.5137)
['how'] tensor(12.6893)
['where'] tensor(11.7335)
['why'] tensor(10.1992)
['what'] tensor(8.6261)
['who'] tensor(7.2562)
['which'] tensor(6.8164)
['whose'] tensor(5.8255)
['as'] tensor(5.5572)
['whereby'] tensor(5.5490)
['since'] tensor(5.5428)
['that'] tensor(5.0190)
['with'] tensor(4.9674)
['until'] tensor(4.9466)
['to'] tensor(4.8709)
['whenever'] tensor(4.8655)
['about'] tensor(4.8636)
['in'] tensor(4.8414)
['after'] tensor(4.8138)
['whom'] tensor(4.7061)
['and'] tensor(4.6670)
['on'] tensor(4.6074)
['whether'] tensor(4.5267)
['if'] tensor(4.3912)
['.'] tensor(4.2983)
####################
['long'] tensor(8.0352)
['many'] tensor(7.6208)
['important'] tensor(7.4536)
['often'] tensor(7.3681)
['of'] tensor(6.7335)
['else'] tensor(6.6596)
['in'] tensor(6.3847)
['large'] tensor(6.0406)
['type'] tensor(5.67

In [263]:
targs_2_tokens_tmp = [('english national bank', 'NP', 'english national bank'),
                     ('voted', 'vb', 'voted'),
                     ('what reason', 'WHNP', 'what reason')]
targs_2_tokens_tmp = [list(map(tokenizer.tokenize, t)) for t in targs_2_tokens_tmp]
print(targs_2_tokens_tmp)
targs_2_ids_tmp = targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens_tmp]
step = 2
with torch.no_grad():
    input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens_tmp[:step], targs_2_tokens_tmp[step], multihint)
    output, _ = model(input_ids, segment_ids, input_mask, None, None)
    start_i = np.where(input_mask.data.numpy() == 0)[1][0]

perplex = perplexity(targs_2_ids_tmp[step][2], output[0][start_i:])
print(-perplex)
    
for j in range(3):
    c = Counter()
    for i, o in enumerate(output[0][start_i+j]):
        c[i] = o
    for x, val in c.most_common(25):
        print(tokenizer.convert_ids_to_tokens([x]), val)
    print("#"*20)


[[['english', 'national', 'bank'], ['np'], ['english', 'national', 'bank']], [['voted'], ['v', '##b'], ['voted']], [['what', 'reason'], ['w', '##hn', '##p'], ['what', 'reason']]]
4.222418785095215
['what'] tensor(12.2574)
['which'] tensor(10.9302)
['who'] tensor(10.5983)
['how'] tensor(10.5133)
['that'] tensor(7.5072)
['whose'] tensor(7.0082)
['whom'] tensor(6.5752)
['about'] tensor(4.8137)
['where'] tensor(4.6658)
['when'] tensor(4.2426)
['to'] tensor(3.8988)
['the'] tensor(3.6469)
['in'] tensor(3.5854)
['why'] tensor(3.5764)
['approximately'] tensor(3.5761)
['of'] tensor(3.5390)
['with'] tensor(3.4668)
['a'] tensor(3.1168)
['and'] tensor(2.9788)
['it'] tensor(2.8857)
['by'] tensor(2.7613)
['as'] tensor(2.6795)
['.'] tensor(2.6702)
['w'] tensor(2.6133)
['whether'] tensor(2.4935)
####################
['year'] tensor(7.7012)
['act'] tensor(6.7683)
['type'] tensor(6.5378)
['many'] tensor(6.3372)
['body'] tensor(5.9508)
['much'] tensor(5.9032)
['scottish'] tensor(5.8434)
['part'] tensor(5

In [233]:
print(targs_2_tokens)
step = 0
with torch.no_grad():
    input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:step], targs_2_tokens[step], multihint)
    output, _ = model(input_ids, segment_ids, input_mask, None, None)
    start_i = np.where(input_mask.data.numpy() == 0)[1][0]

perplex = perplexity(targs_2_ids[step][2], output[0][start_i:])
print(-perplex)


[(['extends'], ['v', '##b', '##z'], ['extends']), (['dev', '##olved', 'what'], ['np'], ['dev', '##olved', 'what']), (['scotland', 'act', '2002'], ['np'], ['scotland', 'act', '2002'])]
2.0234193801879883


In [234]:
targs_2_ids

[[[8908], [1058, 2497, 2480], [8908]],
 [[16475, 16116, 2054], [27937], [16475, 16116, 2054]],
 [[3885, 2552, 2526], [27937], [3885, 2552, 2526]]]

In [247]:
context_text

'The Scotland Act 1998, which was passed by the Parliament of the United Kingdom and given royal assent by Queen Elizabeth II on 19 November 1998, governs the functions and role of the Scottish Parliament and delimits its legislative competence. The Scotland Act 2012 extends the devolved competencies. For the purposes of parliamentary sovereignty, the Parliament of the United Kingdom at Westminster continues to constitute the supreme legislature of Scotland. However, under the terms of the Scotland Act, Westminster agreed to devolve some of its responsibilities over Scottish domestic policy to the Scottish Parliament. Such "devolved matters" include education, health, agriculture and justice. The Scotland Act enabled the Scottish Parliament to pass primary legislation on these issues. A degree of domestic authority, and all foreign policy, remain with the UK Parliament in Westminster. The Scottish Parliament has the power to pass laws and has limited tax-varying capability. Another of 

In [283]:
# quick test
from difflib import SequenceMatcher
s1 = tokenizer.tokenize(context_text)
s2 = tokenizer.tokenize(question)
match = SequenceMatcher(None, s1, s2).get_matching_blocks()


In [282]:
print(context_text)
print(question)

Today, the Treaty of Lisbon prohibits anti-competitive agreements in Article 101(1), including price fixing. According to Article 101(2) any such agreements are automatically void. Article 101(3) establishes exemptions, if the collusion is for distributional or technological innovation, gives consumers a "fair share" of the benefit and does not include unreasonable restraints that risk eliminating competition anywhere (or compliant with the general principle of European Union law of proportionality). Article 102 prohibits the abuse of dominant position, such as price discrimination and exclusive dealing. Article 102 allows the European Council to regulations to govern mergers between firms (the current regulation is the Regulation 139/2004/EC). The general test is whether a concentration (i.e. merger or acquisition) with a community dimension (i.e. affects a number of EU member states) might significantly impede effective competition. Articles 106 and 107 provide that member state's ri

In [287]:
for m in match:
    if m.size ==0:
        continue
    print(s2[m.b:m.b+m.size])

['the', 'treaty', 'of', 'lisbon']


In [286]:
tokenizer.vocab['prohibits']

25822