In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
sys.path.append("examples/")

import logging
import argparse
import json
from tqdm import tqdm, trange
import csv
from collections import Counter

import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler

from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertForPreTraining 
from pytorch_pretrained_bert.optimization import BertAdam

from torch.utils.data import Dataset
import random

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [7]:
from train_sent_cond import InputExample, random_word, InputFeatures, BERTDataset, x_in_y_int, mask_question 

In [3]:
# args
gradient_accumulation_steps = 1
train_batch_size = 1
eval_file = "dataset/dev-v2.0.json"
max_seq_length=256
on_memory = True
bert_model = "model_sent2/pytorch_model1.bin"

In [4]:
device = torch.device("cuda" if torch.cuda.is_available()  else "cpu")
n_gpu = torch.cuda.device_count()

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if n_gpu > 0:
    torch.cuda.manual_seed_all(42)

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)

# Load eval_data
eval_dataset_answerable = BERTDataset(eval_file, "qparts/copy_parts2/parsed_qs_labels%s.pkl", tokenizer, seq_len=max_seq_length,
                                    on_memory=on_memory)
eval_dataset_unanswerable = BERTDataset(eval_file, "qparts/copy_parts2/parsed_qs_labels%s.pkl", tokenizer, seq_len=max_seq_length,
                                    on_memory=on_memory, keep_answerable=False)

# Prepare model
model_state_dict = torch.load(bert_model, map_location='cpu') #TODO daniter: remove this map_location
## TODO daniter: check if bert model is being loaded correctly
model = BertForPreTraining.from_pretrained("bert-base-uncased", state_dict=model_state_dict)
model.to(device)


# Prepare optimizer
print("Checking the vocab size:", len(tokenizer.vocab))
# 768 is bert hidden size, 256 is GRU hidden size, 1 is the layers in the GRU

# eval loader
eval_sampler_ans = SequentialSampler(eval_dataset_answerable)
eval_dataloader_ans = DataLoader(eval_dataset_answerable, sampler=eval_sampler_ans,
                                 batch_size=train_batch_size)
eval_sampler_unans = SequentialSampler(eval_dataset_unanswerable)
eval_dataloader_unans = DataLoader(eval_dataset_unanswerable, sampler=eval_sampler_unans,
                                   batch_size=train_batch_size)


05/08/2019 15:39:39 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/daniter/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 1587.81it/s]
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 1529.19it/s]
05/08/2019 15:39:44 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
05/08/2019 15:39:44 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d

Checking the vocab size: 30522


In [5]:
def get_examples(contexts):
    ans_examples = []
    unans_examples = []
    for context in contexts:
        ans_questions = set()
        unans_questions = set()
        for x in eval_dataloader_unans.dataset.examples:
            if x[0] == context:
                if x[1] not in unans_questions:
                    unans_examples.append(x)
                unans_questions.add(x[1])        
        for x in eval_dataloader_ans.dataset.examples:
            if x[0] == context:
                if x[1] not in ans_questions:
                    ans_examples.append(x)
                ans_questions.add(x[1])
        #print (eval_dataloader_unans.dataset.contexts[context])
    return((ans_examples, unans_examples))

In [6]:
from torch.nn import LogSoftmax
softmax_model = LogSoftmax(dim=0)

def perplexity(logit_idx, dist):
    log_prob = 0
    for i, lg_idx in enumerate(logit_idx):
        prob = softmax_model(dist[i])[lg_idx]
        log_prob += prob
    return (log_prob / len(logit_idx)).item()

In [21]:
def build_input(context, tokens_b, target_tokens, multihint=False):
    tokenized_context = tokenizer.tokenize(context)
    tokens_b = tokenizer.tokenize(tokens_b)
    target_span, target_tag, _ = target_tokens
    tokens_b = mask_question(tokens_b, target_span)
    tokens_b += ["[SEP]"] + target_tag + ["[SEP]"]
    targets = target_span + ["[SEP]"]
    
    buff_size = len(tokens_b) + len(targets)
    if len(tokenized_context) + buff_size > max_seq_length - 3:
        end = max_seq_length - 3 - buff_size
        tokenized_context = tokenized_context[:end]
    
    tokens = []
    segment_ids = []
    tokens.append("[CLS]")
    segment_ids.append(0)
    for token in tokenized_context:
        tokens.append(token)
        segment_ids.append(0)
    tokens.append("[SEP]")
    segment_ids.append(0)

    for token in tokens_b:
        tokens.append(token)
        segment_ids.append(1)
    
    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_ids)
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)

    return torch.tensor([input_ids]), torch.tensor([input_mask]), torch.tensor([segment_ids])

In [85]:
def get_avg_odds(examples, dataloader, multihint=False):
    if len(examples) == 0:
        return 0
    total_total_odds = 0
    total_total_perlex = 0
    max_perplex = 0
    results = {}
    for example in examples:
        cid, qid, targetid, _ = example
        context = dataloader.dataset.contexts[cid]
        question = dataloader.dataset.questions[qid]
        raw_targ = dataloader.dataset.raw_targets[targetid]
        results[(context, question)] = {}

        raw_targ_copy = list(raw_targ)
        raw_targ = [(tag,word) for (word,(_,tag)) in raw_targ if word]

        with torch.no_grad():
            model.eval()

            targs_2_tokens = []#[tokenizer.tokenize(t) for _, t in raw_targ]            
            for tidx, (tag, words) in enumerate(raw_targ):
                clean_tag = tag
                if multihint:
                    span = tokenizer.tokenize(clean_tag) + ["[SEP]"] + words
                else:
                    span = words
                targs_2_tokens.append((span, tokenizer.tokenize(clean_tag), words))
                    
            targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]
            total_perplex = 0
            for token_idx in range(len(raw_targ)):
                input_ids, input_mask, segment_ids = build_input(context, question, targs_2_tokens[token_idx], multihint)
                output, _ = model(input_ids, segment_ids, input_mask, None, None)
                
                start_i = np.where(input_mask.data.numpy() == 0)[1][0]
                if len(targs_2_ids[token_idx]) == 0:
                    print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)

                perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
                #print(question)
                #print(input_ids.numpy()[0])
                in_data = tokenizer.convert_ids_to_tokens(input_ids.numpy()[0])
                in_data = in_data[in_data.index('[SEP]'):]
                in_data = " ".join([tmp for tmp in in_data if tmp not in ['[SEP]', '[PAD]']])
                counters = {}
                for j in range(len(raw_targ[token_idx][1])):
                    c = Counter()
                    for i, o in enumerate(output[0][start_i+j]):
                        c[tokenizer.convert_ids_to_tokens([i])[0]]= o
                    counters[raw_targ[token_idx][1][j]] = c
                print(in_data)
                print("Target", raw_targ[token_idx][1])
                headform = "{:<20s}{:^10s}" * len(raw_targ[token_idx][1])
                form = "{:<20s}{:^10.2f}" * len(raw_targ[token_idx][1])

                header = []
                for h in raw_targ[token_idx][1]:
                    header.append(h)
                    header.append('score')
                print(headform.format(*header))
                print("-"*80)
                for search_depth in range(10):
                    text = []
                    for tok in raw_targ[token_idx][1]:
                        word, score = counters[tok].most_common(10)[search_depth]
                        text.append(word)
                        text.append(score.item())
                    print(form.format(*text))
                print("perplexity:", -perplex)
                print("~"*20)
                results[(context, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                              str(targs_2_tokens[token_idx][1:]))] = -perplex
                total_perplex += perplex / len(raw_targ)
            print("Total Perplex for Q", -total_perplex)
            print("#"*20)
            total_total_perlex += -total_perplex
            if -total_perplex > max_perplex:
                max_perplex = -total_perplex

    return (total_total_perlex / len(examples)), max_perplex, results





In [87]:
contexts = random.sample(range(1203), 1)
ans_e, unans_e = get_examples(contexts)
ans_res, unans_res = {}, {}
for context in contexts:
    #print_context_and_questions(context, ans_e, unans_e)
    print("Context", eval_dataloader_ans.dataset.contexts[context])
    print("ans")
    avg_ans_odds, max_ent_ans, r = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    ans_res.update(r)
    print("unans")
    avg_unans_odds, max_ent_unans, r = get_avg_odds([e for e in unans_e if e[0] == context], eval_dataloader_unans)
    unans_res.update(r)
    print(avg_ans_odds, avg_unans_odds, max_ent_ans, max_ent_unans)

Context Plotting the relationship between level of income and inequality, Kuznets saw middle-income developing economies level of inequality bulging out to form what is now known as the Kuznets curve. Kuznets demonstrated this relationship using cross-sectional data. However, more recent testing of this theory with superior panel data has shown it to be very weak. Kuznets' curve predicts that income inequality will eventually decrease given time. As an example, income inequality did fall in the United States during its High school movement from 1910 to 1940 and thereafter.[citation needed] However, recent data shows that the level of income inequality began to rise after the 1970s. This does not necessarily disprove Kuznets' theory.[citation needed] It may be possible that another Kuznets' cycle is occurring, specifically the move from the manufacturing sector to the service sector.[citation needed] This implies that it may be possible for multiple Kuznets' cycles to be in effect at an

inequality             8.00   '                      7.92   
average                6.45   [SEP]                  7.60   
increase               6.31   equality               6.88   
wealth                 6.28   and                    6.66   
incomes                6.08   in                     6.21   
money                  5.99   income                 6.19   
economic               5.68   ##s                    5.62   
their                  5.68   ##qual                 5.38   
unemployment           5.60   or                     5.33   
perplexity: 0.04287433624267578
~~~~~~~~~~~~~~~~~~~~
what does ku ##z ##nets ' curve predict about income inequality [MASK] ? pp
Target ['given', 'time']
given                 score   time                  score   
--------------------------------------------------------------------------------
in                    11.37   given                  9.07   
from                  11.18   time                   8.37   
during                 9.50   giv

kansas                 5.13   '                      6.19   ##net                  7.65   with                   7.85   with                   6.95   ##s                    6.15   
different              5.10   and                    6.10   s                      7.41   ##s                    6.19   ##s                    6.27   with                   6.07   
similar                4.93   ##s                    5.55   -                      7.24   ##nets                 5.50   superior               5.65   superior               5.51   
ka                     4.66   ##zu                   5.28   based                  6.39   system                 5.41   system                 5.40   system                 5.38   
kahn                   4.47   [SEP]                  4.59   and                    6.38   and                    5.28   and                    5.02   in                     4.86   
this                   4.33   theory                 4.46   [SEP]                  6.25   based

during what time period did [MASK] decrease in the united states ? np
Target ['income', 'inequality']
income                score   inequality            score   
--------------------------------------------------------------------------------
income                12.59   inequality            14.34   
high                   7.62   equality               7.15   
poverty                6.86   poverty                7.13   
economic               6.42   income                 7.04   
unemployment           5.96   [SEP]                  6.89   
incomes                5.59   enrollment             6.48   
education              5.56   education              6.16   
inequality             5.56   rate                   6.09   
average                5.22   rates                  5.89   
employment             5.15   unemployment           5.78   
perplexity: 0.027944564819335938
~~~~~~~~~~~~~~~~~~~~
during what time period did income inequality [MASK] in the united states ? n ##n
Target ['d

can                   11.39   increase               6.94   be                     7.42   [SEP]                  7.03   to                     7.38   to                     7.17   increase               6.78   increase               6.67   increase               6.59   to                     6.69   increase               6.82   increase               6.94   increase               6.89   
might                 10.97   '                      6.25   in                     6.34   decrease               6.50   '                      5.88   '                      6.17   '                      6.13   '                      6.10   '                      6.05   '                      6.06   '                      6.26   '                      6.25   '                      6.19   
suggests               9.84   by                     5.99   [SEP]                  5.87   that                   5.90   by                     5.82   by                     5.88   by                     5.85   by      

high                   7.02   
their                  6.68   
middle                 6.33   
two                    5.83   
lower                  5.76   
different              5.75   
higher                 5.69   
rates                  5.50   
perplexity: 0.1948537826538086
~~~~~~~~~~~~~~~~~~~~
[MASK] never plotted the relationships between levels of income and inequality ? w ##hn ##p
Target ['who']
who                   score   
--------------------------------------------------------------------------------
who                   15.45   
what                  11.62   
which                 11.61   
whom                   9.38   
whose                  8.74   
how                    6.70   
that                   6.59   
where                  6.32   
he                     4.44   
when                   4.41   
perplexity: 0.04620361328125
~~~~~~~~~~~~~~~~~~~~
Total Perplex for Q 2.168630758921305
####################
in what sector are jobs beginning to [MASK] ? v ##b
Target ['d

average                5.51   education              5.86   
wealth                 5.15   rates                  5.79   
rate                   5.06   and                    5.68   
perplexity: 0.01641082763671875
~~~~~~~~~~~~~~~~~~~~
when did income inequality begin to [MASK] in the us ? v ##b
Target ['decrease']
decrease              score   
--------------------------------------------------------------------------------
fall                  12.35   
rise                  10.53   
decrease              10.03   
decline               10.02   
drop                   9.27   
increase               9.21   
grow                   8.09   
occur                  7.75   
happen                 7.56   
collapse               7.41   
perplexity: 2.7690649032592773
~~~~~~~~~~~~~~~~~~~~
when did income inequality [MASK] to decrease in the us ? v ##b
Target ['begin']
begin                 score   
--------------------------------------------------------------------------------
begin           

# Ideas:
Some kind of weighting scheme based on how hard words are
- what should weigh less than 'lisbon treaty'
- when was fresno county courthouse demolished has high probability:
    - this is because we know that it's been demolished and the entity is the same, the article just doesn't say when
    - maybe include answer somewhere but not directly (ie. using a sentence or candidate answer to condition the model or else it might never learn that when questions need dates)
    - there is something that looks like a date (between the 1880s and wwii) and a number (1401) but no relationship showing that a demolition took place at a certain date 
    
Some paraphrases are surprising (GDP instead of gross domestic product). Is there another way to describe the probability, such as, given data we said GDP, how far is the meaning from the most likely meaning?

Can we make some small edits to siginficantly increase the probability of the question?

TODO:
- read about bert speaking model again
- implement language model for christ sakes
- get rid of w- words in training data

In [145]:
contexts = random.sample(range(1203), 1)
ans_e, unans_e = get_examples(contexts)
for context in contexts:
    #avg_ans_odds, max_ent_ans, r = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    examples = [e for e in unans_e if e[0] == context]
    dataloader = eval_dataloader_unans
    multihint = False
    total_total_odds = 0
    total_total_perlex = 0
    max_perplex = 0
    results = {}
    for example in examples:
        cid, qid, targetid, _ = example
        context_text = dataloader.dataset.contexts[cid]
        question = dataloader.dataset.questions[qid]
        raw_targ = dataloader.dataset.raw_targets[targetid]
        results[(context_text, question)] = {}

        raw_targ_copy = list(raw_targ)
        raw_targ = [(tag,word) for (word,(_,tag)) in raw_targ if word]


        with torch.no_grad():
            model.eval()

            targs_2_tokens = []#[tokenizer.tokenize(t) for _, t in raw_targ]            
            for tidx, (tag, words) in enumerate(raw_targ):
                clean_tag = tag
                if multihint:
                    span = tokenizer.tokenize(clean_tag) + ["[SEP]"] + words
                else:
                    span = words
                targs_2_tokens.append((span, tokenizer.tokenize(clean_tag), words))

            targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]

            total_odds = 0
            min_odds = 100
            total_perplex = 0
            for token_idx in range(len(raw_targ)):
                odds = 0
                odds_list = []
                input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:token_idx], targs_2_tokens[token_idx], multihint)
                output, _ = model(input_ids, segment_ids, input_mask, None, None)

                #print(input_mask)
                start_i = np.where(input_mask.data.numpy() == 0)[1][0]
                for t_i, t in enumerate(targs_2_ids[token_idx][2]):
                    odds += output[0][start_i+t_i][t]
                    odds_list.append(output[0][start_i+t_i][t])
                if len(targs_2_ids[token_idx]) == 0:
                    print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)
                odds = odds/len(targs_2_ids[token_idx])
                if odds < min_odds:
                    min_odds = odds
                # print(odds)
                total_odds += odds
                perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
                assert False
                results[(context_text, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                              str(targs_2_tokens[token_idx][1:]))] = -perplex
                total_perplex += perplex / len(raw_targ)
                #print(perplex)
            print("Perplexity", -total_perplex)
            total_odds /= len(raw_targ)
            total_total_odds += total_odds
            total_total_perlex += -total_perplex
            if -total_perplex > max_perplex:
                max_perplex = -total_perplex
            #print("Total Odds:", total_odds)
            #print("Min odds:", min_odds)
    #return (total_total_perlex / len(examples)), max_perplex, results

AssertionError: 

In [146]:
print(context_text)
#print(raw_targ)
print("~"*20)
print(question)
for step in range(len(targs_2_tokens)):
    with torch.no_grad():
        input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:step], targs_2_tokens[step], multihint)
        output, _ = model(input_ids, segment_ids, input_mask, None, None)
        start_i = np.where(input_mask.data.numpy() == 0)[1][0]

    perplex = perplexity(targs_2_ids[step][2], output[0][start_i:])
    print(raw_targ[step], -perplex)


As in the House of Commons, a number of qualifications apply to being an MSP. Such qualifications were introduced under the House of Commons Disqualification Act 1975 and the British Nationality Act 1981. Specifically, members must be over the age of 18 and must be a citizen of the United Kingdom, the Republic of Ireland, one of the countries in the Commonwealth of Nations, a citizen of a British overseas territory, or a European Union citizen resident in the UK. Members of the police and the armed forces are disqualified from sitting in the Scottish Parliament as elected MSPs, and similarly, civil servants and members of foreign legislatures are disqualified. An individual may not sit in the Scottish Parliament if he or she is judged to be insane under the terms of the Mental Health (Care and Treatment) (Scotland) Act 2003.
~~~~~~~~~~~~~~~~~~~~
The House of Lords introduced qualifications for which position?
('NNP', ['house']) 9.108257293701172
('VBD', ['introduced']) 4.05820751190185

In [128]:
contexts = random.sample(range(1203), 1)
ans_e, unans_e = get_examples(contexts)
for context in contexts:
    #avg_ans_odds, max_ent_ans, r = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    examples = [e for e in ans_e if e[0] == context]
    dataloader = eval_dataloader_ans
    multihint = False
    total_total_odds = 0
    total_total_perlex = 0
    max_perplex = 0
    results = {}
    for example in examples:
        cid, qid, targetid, _ = example
        context_text = dataloader.dataset.contexts[cid]
        question = dataloader.dataset.questions[qid]
        raw_targ = dataloader.dataset.raw_targets[targetid]
        results[(context_text, question)] = {}

        raw_targ_copy = list(raw_targ)
        raw_targ = [(tag,word) for (word,(_,tag)) in raw_targ if word]


        with torch.no_grad():
            model.eval()

            targs_2_tokens = []#[tokenizer.tokenize(t) for _, t in raw_targ]            
            for tidx, (tag, words) in enumerate(raw_targ):
                clean_tag = tag
                if multihint:
                    span = tokenizer.tokenize(clean_tag) + ["[SEP]"] + words
                else:
                    span = words
                targs_2_tokens.append((span, tokenizer.tokenize(clean_tag), words))

            targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]

            total_odds = 0
            min_odds = 100
            total_perplex = 0
            for token_idx in range(len(raw_targ)):
                odds = 0
                odds_list = []
                input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:token_idx], targs_2_tokens[token_idx], multihint)
                output, _ = model(input_ids, segment_ids, input_mask, None, None)

                #print(input_mask)
                start_i = np.where(input_mask.data.numpy() == 0)[1][0]
                for t_i, t in enumerate(targs_2_ids[token_idx][2]):
                    odds += output[0][start_i+t_i][t]
                    odds_list.append(output[0][start_i+t_i][t])
                if len(targs_2_ids[token_idx]) == 0:
                    print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)
                odds = odds/len(targs_2_ids[token_idx])
                if odds < min_odds:
                    min_odds = odds
                # print(odds)
                total_odds += odds
                perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
                assert False
                results[(context_text, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                              str(targs_2_tokens[token_idx][1:]))] = -perplex
                total_perplex += perplex / len(raw_targ)
                #print(perplex)
            print("Perplexity", -total_perplex)
            total_odds /= len(raw_targ)
            total_total_odds += total_odds
            total_total_perlex += -total_perplex
            if -total_perplex > max_perplex:
                max_perplex = -total_perplex
            #print("Total Odds:", total_odds)
            #print("Min odds:", min_odds)
    #return (total_total_perlex / len(examples)), max_perplex, results

AssertionError: 

In [130]:
print(context_text)
print("~"*20)
#print(raw_targ)
print(question)
for step in range(len(targs_2_tokens)):
    with torch.no_grad():
        input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:step], targs_2_tokens[step], multihint)
        output, _ = model(input_ids, segment_ids, input_mask, None, None)
        start_i = np.where(input_mask.data.numpy() == 0)[1][0]
        print(raw_targ[step])
        for j in range(len(raw_targ[step][1])):
            c = Counter()
            for i, o in enumerate(output[0][start_i+j]):
                c[i] = o
            for x, val in c.most_common(25):
                print(tokenizer.convert_ids_to_tokens([x]), val)
            print("~"*20)
        print("#"*20)

    perplex = perplexity(targs_2_ids[step][2], output[0][start_i:])
    print(raw_targ[step], -perplex)


Victoria contains many topographically, geologically and climatically diverse areas, ranging from the wet, temperate climate of Gippsland in the southeast to the snow-covered Victorian alpine areas which rise to almost 2,000 m (6,600 ft), with Mount Bogong the highest peak at 1,986 m (6,516 ft). There are extensive semi-arid plains to the west and northwest. There is an extensive series of river systems in Victoria. Most notable is the Murray River system. Other rivers include: Ovens River, Goulburn River, Patterson River, King River, Campaspe River, Loddon River, Wimmera River, Elgin River, Barwon River, Thomson River, Snowy River, Latrobe River, Yarra River, Maribyrnong River, Mitta River, Hopkins River, Merri River and Kiewa River. The state symbols include the pink heath (state flower), Leadbeater's possum (state animal) and the helmeted honeyeater (state bird).
~~~~~~~~~~~~~~~~~~~~
How high is Victoria's Mount Bogong?
('NP', ['victoria', "'", 's'])
['state'] tensor(7.3829)
['highe

In [108]:
print(context_text)
print("~"*20)
#print(raw_targ)
print(question)
for step in range(len(targs_2_tokens)):
    with torch.no_grad():
        for i in range(len(targs_2_tokens[step][0])):
            thetarg = [targs_2_tokens[step][0][:i+1], targs_2_tokens[step][1], targs_2_tokens[step][2][:i+1]]
            print(thetarg)
            input_ids, input_mask, segment_ids = build_input(context_text, targs_2_tokens[:step], thetarg, multihint)
            output, _ = model(input_ids, segment_ids, input_mask, None, None)
            start_i = np.where(input_mask.data.numpy() == 0)[1][0]
            print(input_ids)
            print(input_mask)
            print(tokenizer.convert_ids_to_tokens(input_ids[0].numpy()))
        
            the_ids = targs_2_ids[step][2][i:i+1]
            print(the_ids)
            perplex = perplexity(the_ids, output[0][start_i:])
            print(raw_targ[step][1][i], -perplex)
            break


Larger drugs (>500 Da) can provoke a neutralizing immune response, particularly if the drugs are administered repeatedly, or in larger doses. This limits the effectiveness of drugs based on larger peptides and proteins (which are typically larger than 6000 Da). In some cases, the drug itself is not immunogenic, but may be co-administered with an immunogenic compound, as is sometimes the case for Taxol. Computational methods have been developed to predict the immunogenicity of peptides and proteins, which are particularly useful in designing therapeutic antibodies, assessing likely virulence of mutations in viral coat particles, and validation of proposed peptide-based drug treatments. Early techniques relied mainly on the observation that hydrophilic amino acids are overrepresented in epitope regions than hydrophobic amino acids; however, more recent developments rely on machine learning techniques using databases of existing known epitopes, usually on well-studied virus proteins, as a

tensor([[  101,  3469,  5850,  1006,  1028,  3156,  4830,  1007,  2064, 27895,
          1037,  8699,  6026, 11311,  3433,  1010,  3391,  2065,  1996,  5850,
          2024,  8564,  8385,  1010,  2030,  1999,  3469, 21656,  1012,  2023,
          6537,  1996, 12353,  1997,  5850,  2241,  2006,  3469, 25117,  2015,
          1998,  8171,  1006,  2029,  2024,  4050,  3469,  2084, 25961,  4830,
          1007,  1012,  1999,  2070,  3572,  1010,  1996,  4319,  2993,  2003,
          2025, 10047, 23041, 24278,  1010,  2021,  2089,  2022,  2522,  1011,
          8564,  2007,  2019, 10047, 23041, 24278,  7328,  1010,  2004,  2003,
          2823,  1996,  2553,  2005,  4171,  4747,  1012, 15078,  4725,  2031,
          2042,  2764,  2000, 16014,  1996, 10047, 23041, 24278,  3012,  1997,
         25117,  2015,  1998,  8171,  1010,  2029,  2024,  3391,  6179,  1999,
         12697, 17261, 22931,  1010, 20077,  3497,  6819,  6820, 22717,  1997,
         14494,  1999, 13434,  5435,  9309,  1010,  

tensor([[  101,  3469,  5850,  1006,  1028,  3156,  4830,  1007,  2064, 27895,
          1037,  8699,  6026, 11311,  3433,  1010,  3391,  2065,  1996,  5850,
          2024,  8564,  8385,  1010,  2030,  1999,  3469, 21656,  1012,  2023,
          6537,  1996, 12353,  1997,  5850,  2241,  2006,  3469, 25117,  2015,
          1998,  8171,  1006,  2029,  2024,  4050,  3469,  2084, 25961,  4830,
          1007,  1012,  1999,  2070,  3572,  1010,  1996,  4319,  2993,  2003,
          2025, 10047, 23041, 24278,  1010,  2021,  2089,  2022,  2522,  1011,
          8564,  2007,  2019, 10047, 23041, 24278,  7328,  1010,  2004,  2003,
          2823,  1996,  2553,  2005,  4171,  4747,  1012, 15078,  4725,  2031,
          2042,  2764,  2000, 16014,  1996, 10047, 23041, 24278,  3012,  1997,
         25117,  2015,  1998,  8171,  1010,  2029,  2024,  3391,  6179,  1999,
         12697, 17261, 22931,  1010, 20077,  3497,  6819,  6820, 22717,  1997,
         14494,  1999, 13434,  5435,  9309,  1010,  

# TODO
add check for end token in perplexiy counter

In [194]:
def get_avg_odds(examples, dataloader, multihint=False):
    if len(examples) == 0:
        return 0
    total_total_odds = 0
    total_total_perlex = 0
    max_perplex = 0
    results = {}
    for example in examples:
        cid, qid, targetid, _ = example
        context = dataloader.dataset.contexts[cid]
        question = dataloader.dataset.questions[qid]
        print(question)
        raw_targ = dataloader.dataset.raw_targets[targetid]
        results[(context, question)] = {}

        raw_targ_copy = list(raw_targ)
        raw_targ = [(tag,word) for (word,(_,tag)) in raw_targ if word]


        with torch.no_grad():
            model.eval()

            targs_2_tokens = []            
            for tidx, (tag, words) in enumerate(raw_targ):
                clean_tag = tag
                if multihint:
                    span = tokenizer.tokenize(clean_tag) + ["[SEP]"] + words
                else:
                    span = words
                targs_2_tokens.append((span, tokenizer.tokenize(clean_tag), words))
                    
            targs_2_ids = [list(map(tokenizer.convert_tokens_to_ids, t)) for t in targs_2_tokens]

            total_odds = 0
            total_perplex = 0
            for token_idx in range(len(raw_targ)):
                input_ids, input_mask, segment_ids = build_input(context, targs_2_tokens[:token_idx], targs_2_tokens[token_idx], multihint)
                output, _ = model(input_ids, segment_ids, input_mask, None, None)
                
                start_i = np.where(input_mask.data.numpy() == 0)[1][0]
                if len(targs_2_ids[token_idx]) == 0:
                    print(token_idx, targs_2_ids, targs_2_ids[token_idx], raw_targ)
                perplex = perplexity(targs_2_ids[token_idx][2], output[0][start_i:])
                results[(context, question)][(str([tt[0] for tt in targs_2_tokens[:token_idx]]), 
                                              str(targs_2_tokens[token_idx][1:]))] = -perplex
                
                normalizers = []
                for ij in range(len(targs_2_ids[token_idx][2])):
                    normalizers.append(int(np.argmax(output[0][start_i+ij]).numpy()))
                norm_perplex = perplexity(normalizers, output[0][start_i:])
                #print(norm_perplex)
                #assert False
                print("Target", tokenizer.convert_ids_to_tokens(targs_2_ids[token_idx][2]))
                print("Normalized", tokenizer.convert_ids_to_tokens(normalizers))
                print("perplex", perplex)
                print("Norm", np.abs(norm_perplex))
                print("normalized perplex", perplex / np.abs(norm_perplex))
                print("~"*20)
                total_perplex += perplex / np.abs(norm_perplex) / len(raw_targ)
                #print("Total", total_perplex)

            total_odds /= len(raw_targ)
            total_total_odds += total_odds
            total_total_perlex += -total_perplex
            if -total_perplex > max_perplex:
                max_perplex = -total_perplex
    return (total_total_perlex / len(examples)), max_perplex, results

In [195]:
contexts = random.sample(range(1203), 3)
ans_e, unans_e = get_examples(contexts)
ans_res, unans_res = {}, {}
for context in contexts:
    print(dataloader.dataset.contexts[context])
    print("#"*20)
    print("Start Answerable")
    avg_ans_odds, max_ent_ans, r = get_avg_odds([e for e in ans_e if e[0] == context], eval_dataloader_ans)
    ans_res.update(r)
    print("Start Unanswerbale")
    avg_unans_odds, max_ent_unans, r = get_avg_odds([e for e in unans_e if e[0] == context], eval_dataloader_unans)
    unans_res.update(r)
    print(avg_ans_odds, avg_unans_odds, max_ent_ans, max_ent_unans)

Dynamic equilibrium was first described by Galileo who noticed that certain assumptions of Aristotelian physics were contradicted by observations and logic. Galileo realized that simple velocity addition demands that the concept of an "absolute rest frame" did not exist. Galileo concluded that motion in a constant velocity was completely equivalent to rest. This was contrary to Aristotle's notion of a "natural state" of rest that objects with mass naturally approached. Simple experiments showed that Galileo's understanding of the equivalence of constant velocity and rest were correct. For example, if a mariner dropped a cannonball from the crow's nest of a ship moving at a constant velocity, Aristotelian physics would have the cannonball fall straight down while the ship moved beneath it. Thus, in an Aristotelian universe, the falling cannonball would land behind the foot of the mast of a moving ship. However, when this experiment is actually conducted, the cannonball always falls at t

Target ['non', '-', 'religious', 'reason']
Normalized ['catholic', '.', '##ots', 'day']
perplex -7.427223205566406
Norm 1.5164649486541748
normalized perplex -4.897721646753447
~~~~~~~~~~~~~~~~~~~~
Target ['massacre']
Normalized ['persecution']
perplex -4.476167678833008
Norm 0.34348583221435547
normalized perplex -13.031593326503245
~~~~~~~~~~~~~~~~~~~~
Target ['what']
Normalized ['what']
perplex -0.04803466796875
Norm 0.04803466796875
normalized perplex -1.0
~~~~~~~~~~~~~~~~~~~~
How many Huguenots were killed during this purge?
Target ['how', 'many', 'hug', '##uen', '##ots', 'were']
Normalized ['how', 'many', 'protestants', '##ots', '##ots', '[SEP]']
perplex -2.8615729808807373
Norm 1.2262383699417114
normalized perplex -2.33361885504917
~~~~~~~~~~~~~~~~~~~~
Target ['killed']
Normalized ['attacked']
perplex -2.116086006164551
Norm 0.47498321533203125
normalized perplex -4.455075332894293
~~~~~~~~~~~~~~~~~~~~
Target ['purge']
Normalized ['persecution']
perplex -9.608264923095703
Norm 

Target ['which', 'principle']
Normalized ['what', '[SEP]']
perplex -2.380352020263672
Norm 0.34424877166748047
normalized perplex -6.914627490851066
~~~~~~~~~~~~~~~~~~~~
The fact that not all fossils may be found globally at the same time causes the principle to become what?
Target ['fact', 'that', 'not', 'all', 'fossils', 'may', 'be', 'found', 'globally', 'at', 'the', 'same', 'time']
Normalized ['principles', 'of', 'succession', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]', '[SEP]']
perplex -7.518015384674072
Norm 0.9021682143211365
normalized perplex -8.333274510598036
~~~~~~~~~~~~~~~~~~~~
Target ['principle']
Normalized ['existence']
perplex -6.338626861572266
Norm 1.9399747848510742
normalized perplex -3.2673759015166075
~~~~~~~~~~~~~~~~~~~~
Target ['causes']
Normalized ['describes']
perplex -3.7770395278930664
Norm 2.2571983337402344
normalized perplex -1.673330815212156
~~~~~~~~~~~~~~~~~~~~
The principle of faunal succession was developed 100 ye

Target ['sedimentary', 'rock']
Normalized ['fossil', 'fossils']
perplex -8.405275344848633
Norm 0.8196568489074707
normalized perplex -10.254627111396816
~~~~~~~~~~~~~~~~~~~~
Target ['not']
Normalized ['worldwide']
perplex -4.417304992675781
Norm 0.6023101806640625
normalized perplex -7.333937121576774
~~~~~~~~~~~~~~~~~~~~
Target ['where']
Normalized ['where']
perplex -1.1716604232788086
Norm 1.1716604232788086
normalized perplex -1.0
~~~~~~~~~~~~~~~~~~~~
Who wrote the principles of faunal succession?
Target ['fauna', '##l', 'succession']
Normalized ['principles', 'of', 'succession']
perplex -4.8109893798828125
Norm 0.9003005027770996
normalized perplex -5.343759517008666
~~~~~~~~~~~~~~~~~~~~
Target ['principles']
Normalized ['fossils']
perplex -5.020883560180664
Norm 0.18271255493164062
normalized perplex -27.47968557529699
~~~~~~~~~~~~~~~~~~~~
Target ['wrote']
Normalized ['developed']
perplex -3.044300079345703
Norm 1.0518131256103516
normalized perplex -2.894335510007199
~~~~~~~~~~~