In [5]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
sys.path.append("examples/")

import logging
import argparse
import json
from tqdm import tqdm, trange
import csv

import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler

from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertModel 
from pytorch_pretrained_bert.optimization import BertAdam

from torch.utils.data import Dataset
import random

In [6]:
from run_autoreg_eval import BERTDataset, RNNModel

In [14]:
# args
gradient_accumulation_steps = 1
train_batch_size = 1
eval_file = "dataset/dev-v2.0.json"
max_seq_length=128
on_memory = True
bert_model = "autoreg_model/pytorch_model.bin"

In [15]:
device = torch.device("cuda" if torch.cuda.is_available()  else "cpu")
n_gpu = torch.cuda.device_count()

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if n_gpu > 0:
    torch.cuda.manual_seed_all(42)

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)

# Load eval_data
eval_dataset_answerable = BERTDataset(eval_file, tokenizer, seq_len=max_seq_length,
                            on_memory=on_memory, answerable=True)
eval_dataset_unanswerable = BERTDataset(eval_file, tokenizer, seq_len=max_seq_length,
                           on_memory=on_memory, answerable=False)

# Prepare model
model_state_dict = torch.load(bert_model, map_location='cpu') #TODO daniter: remove this map_location
## TODO daniter: check if bert model is being loaded correctly
context_model = BertModel.from_pretrained("bert-base-uncased")#, state_dict=model_state_dict)
question_model = BertModel.from_pretrained("bert-base-uncased")#, state_dict=model_state_dict)
context_model.to(device)
question_model.to(device)


# Prepare optimizer
print("Checking the vocab size:", len(tokenizer.vocab))
# 768 is bert hidden size, 256 is GRU hidden size, 1 is the layers in the GRU
model = RNNModel("GRU", len(tokenizer.vocab), 768, 768, 1, context_model, question_model, ngpu=n_gpu)
model.load_state_dict(model_state_dict)
model.to(device)

# eval loader
eval_sampler_ans = SequentialSampler(eval_dataset_answerable)
eval_dataloader_ans = DataLoader(eval_dataset_answerable, sampler=eval_sampler_ans,
                                 batch_size=train_batch_size)
eval_sampler_unans = SequentialSampler(eval_dataset_unanswerable)
eval_dataloader_unans = DataLoader(eval_dataset_unanswerable, sampler=eval_sampler_unans,
                                   batch_size=train_batch_size)


criterion = nn.CrossEntropyLoss()
model.init_hidden(train_batch_size)
pass
# with torch.no_grad():
#     model.eval()

#     eval_loss_ans = 0
#     for batch_i, eval_batch in enumerate(eval_dataloader_ans):
#         assert False
#         if batch_i % 1000 == 0:
#             print("#### DANITER completed answerable", batch_i)
#         eids = eval_batch[-1]
#         eval_batch = tuple(t.to(device) for t in eval_batch[:-1])
#         question_ids, question_mask, context_ids, context_mask, targets = eval_batch
#         output, _ = model(context_ids, context_mask, question_ids, question_mask)
#         loss = criterion(output.view(-1, len(tokenizer.vocab)), question_ids.view(-1))
#         eval_loss_ans += loss.item()
#     print("##### DANITER EVAL LOSS IS (ANSWERABLE) : ", eval_loss_ans)

#     eval_loss_unans = 0
#     for batch_i, eval_batch in enumerate(eval_dataloader_unans):
#         if batch_i % 1000 == 0:
#             print("#### DANITER completed unanswerable", batch_i)
#         eids = eval_batch[-1]
#         eval_batch = tuple(t.to(device) for t in eval_batch[:-1])
#         question_ids, question_mask, context_ids, context_mask, targets = eval_batch
#         output, _ = model(context_ids, context_mask, question_ids, question_mask)
#         loss = criterion(output.view(-1, len(tokenizer.vocab)), question_ids.view(-1))
#         eval_loss_unans += loss.item()
#     print("##### DANITER EVAL LOSS IS (UNANSWERABLE) : ", eval_loss_unans)

02/10/2019 13:19:15 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/daniter/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 3569.53it/s]
Loading Squad: 100%|██████████| 35/35 [00:00<00:00, 821.61it/s]
02/10/2019 13:19:17 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
02/10/2019 13:19:17 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/daniter/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1

Checking the vocab size: 30522


In [33]:
with torch.no_grad():
    model.eval()

    eval_loss_ans = 0
    for batch_i, eval_batch in enumerate(eval_dataloader_ans):
        eids = eval_batch[-1]
        eval_batch = tuple(t.to(device) for t in eval_batch[:-1])
        question_ids, question_mask, context_ids, context_mask, targets = eval_batch
        output, _ = model(context_ids, context_mask, question_ids, question_mask)
        loss = criterion(output.view(-1, len(tokenizer.vocab)), question_ids.view(-1))
        eval_loss_ans += loss.item()
        if loss.item() > 0.01:
            print(batch_i, eval_loss_ans)
            break

12 0.10354693979024887


In [39]:
print(question_ids)
print(tokenizer.convert_ids_to_tokens(question_ids.data.numpy()[0]))

tensor([[  101,  2054,  2314,  2761, 10351,  1996, 11068,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,  

In [43]:
o = output.data.numpy()
print(o)
print(tokenizer.convert_ids_to_tokens(np.argmax(o[0], axis=1)))

[[[ 5.6281233  -0.69632524 -1.6773183  ... -1.089677   -0.66363454
   -2.3050935 ]
  [ 7.029129   -2.3007493  -2.2169943  ... -2.3561647  -1.9902968
   -1.9757373 ]
  [ 0.5491697  -2.3692703  -1.9706893  ... -2.0390208  -1.2088827
   -3.4375327 ]
  ...
  [33.869183   -1.5653121  -2.9970436  ... -3.353264   -2.9366097
   -2.3617918 ]
  [33.857277   -1.5599437  -3.0104399  ... -3.3641667  -2.9220674
   -2.3839917 ]
  [33.84768    -1.5606     -3.0117958  ... -3.3346536  -2.9785056
   -2.3785434 ]]]
['[CLS]', 'what', 'river', 'originally', 'supervised', 'the', 'duchy', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD

In [44]:
for i in range(10):
    print(np.argmax(o[0,i,:]), np.exp(np.max(o[0,i,:]))/ np.sum(np.exp(o[0,i,:])))

101 1.0
2054 0.99999976
2314 0.9999653
2761 0.99997437
13588 0.06754731
1996 0.9999825
11068 0.9999676
0 0.99984914
0 1.0
0 1.0


In [45]:
eval_loss_ans

0.10354693979024887

In [51]:
np.exp(np.max(o[0,4,:]))/ np.sum(np.exp(o[0,4,:]))

0.06754731

In [54]:
np.exp(o[0,4,13588])/ np.sum(np.exp(o[0,4,:]))

0.06754731

In [55]:
np.exp(o[0,4,10351])/ np.sum(np.exp(o[0,4,:]))

1.7623411e-06

In [56]:
from collections import Counter

In [57]:
c = Counter()

In [58]:
for i in range(o.shape[2]):
    c[i] = np.exp(o[0,4,i])/ np.sum(np.exp(o[0,4,:]))

In [59]:
for idx, score in c.most_common()[:25]:
    print(tokenizer.convert_ids_to_tokens([idx]), score)

['supervised'] 0.06754731
['brat'] 0.021551749
['boundary'] 0.0153177455
['##uve'] 0.013979059
['bounds'] 0.0138610555
['resided'] 0.012585191
['regulated'] 0.011854893
['soyuz'] 0.011402544
['##gut'] 0.010964767
['54th'] 0.01089891
['yue'] 0.010824164
['poems'] 0.010754741
['##ht'] 0.009082668
['poem'] 0.009074019
['##ount'] 0.008436166
['lowered'] 0.0067586447
['deserved'] 0.0064462107
['triggered'] 0.0064248405
['corresponding'] 0.0061934637
['packet'] 0.0059934203
['##erted'] 0.00508144
['took'] 0.004897214
['bordered'] 0.0045819986
['assessed'] 0.004541193
['vacated'] 0.004289019


In [60]:
with torch.no_grad():
    model.eval()

    eval_loss_ans = 0
    for batch_i, eval_batch in enumerate(eval_dataloader_unans):
        eids = eval_batch[-1]
        eval_batch = tuple(t.to(device) for t in eval_batch[:-1])
        question_ids, question_mask, context_ids, context_mask, targets = eval_batch
        output, _ = model(context_ids, context_mask, question_ids, question_mask)
        loss = criterion(output.view(-1, len(tokenizer.vocab)), question_ids.view(-1))
        eval_loss_ans += loss.item()
        break
        if loss.item() > 0.01:
            print(batch_i, eval_loss_ans)
            break

In [61]:
eval_loss_ans

3.0606985092163086e-05

In [62]:
print(question_ids)
print(tokenizer.convert_ids_to_tokens(question_ids.data.numpy()[0]))

tensor([[  101,  2040,  2435,  2037,  2171,  2000, 13298,  1999,  1996,  6694,
          1005,  1055,  1998, 22096,  1005,  1055,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,  

In [63]:
o = output.data.numpy()
print(o)
print(tokenizer.convert_ids_to_tokens(np.argmax(o[0], axis=1)))

[[[ 4.9845333  -0.5840121  -1.7072299  ... -0.8986035  -0.7701622
   -2.4625373 ]
  [ 5.5687795  -3.1512156  -1.5189477  ... -2.2867513  -0.81157726
   -1.9473257 ]
  [ 1.8056442  -2.4558816  -1.3853037  ... -1.5242887  -1.9538682
   -2.677875  ]
  ...
  [33.50697    -1.898835   -3.0913079  ... -3.4296584  -3.0715473
   -2.3606715 ]
  [33.482018   -1.8573587  -3.0754821  ... -3.4108062  -3.087913
   -2.3764603 ]
  [33.43003    -1.8635046  -3.1077945  ... -3.40165    -3.1059961
   -2.4096801 ]]]
['[CLS]', 'who', 'gave', 'their', 'name', 'to', 'normandy', 'in', 'the', '1000', "'", 's', 'and', '1100', "'", 's', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]

In [66]:
for i in range(20):
    print(np.argmax(o[0,i,:]), np.exp(np.max(o[0,i,:]))/ np.sum(np.exp(o[0,i,:])))

101 1.0
2040 0.99998236
2435 0.99976426
2037 0.99991757
2171 0.999995
2000 0.99999946
13298 0.9999513
1999 0.99999833
1996 0.99999017
6694 0.998527
1005 0.99999917
1055 0.999995
1998 0.99999845
22096 0.99808013
1005 0.9999972
1055 0.99999726
0 0.99986583
0 0.99999994
0 1.0
0 1.0
