In [1]:
import torch
import torch.nn.functional as F
import numpy as np
from pytorch_pretrained_bert import BertConfig, BertTokenizer
from model import FusionBert
from util import InputExample, InputFeatures, convert_examples_to_features

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
CONFIG_PATH = './wiki_base_output2/bert_config.json'
MODEL_PATH = './wiki_base_output2/pytorch_model.bin'
TOKEN_PATH = 'bert-base-uncased'
num_labels = 2
label_list = ["0", "1"]
token_length = 128

In [3]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [4]:
device = torch.device('cuda')
config = BertConfig(CONFIG_PATH)
tokenizer = BertTokenizer.from_pretrained(TOKEN_PATH)
model = FusionBert(None, config)
model.load_state_dict(torch.load(MODEL_PATH))
model.to(device)
model.eval()

04/07/2020 10:39:54 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


FusionBert(
  (bert_module): FusionBertModule(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): BertLayerNorm()
        (dropout): Dropout(p=0.1)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
                (LayerNorm): BertLayerNorm()
                (dropout): Dropout(p=0.1)
   

In [5]:
import csv
from collections import defaultdict
datasets = []
d = defaultdict(list)
with open('./wikiqa_data/test.tsv', 'r', encoding='utf-8') as f:
    reader = csv.reader(f, delimiter='\t')
    for lidx, line in enumerate(reader):
        #if lidx == 0:
        #   continue
        d[line[0]].append(line[1:])
        
d

defaultdict(list,
            {'HOW AFRICAN AMERICANS WERE IMMIGRATED TO THE US': [['African immigration to the United States refers to immigrants to the United States who are or were nationals of Africa .',
               '0'],
              ['The term African in the scope of this article refers to geographical or national origins rather than racial affiliation.',
               '0'],
              ['From the Immigration and Nationality Act of 1965 to 2007, an estimated total of 0.8 to 0.9 million Africans immigrated to the United States, accounting for roughly 3.3% of total immigration to the United States during this period.',
               '0'],
              ['African immigrants in the United States come from almost all regions in Africa and do not constitute a homogeneous group.',
               '0'],
              ['They include people from different national, linguistic, ethnic, racial, cultural and social backgrounds.',
               '0'],
              ['As such, African im

In [6]:
def sort_and_couple(labels: np.array, scores: np.array) -> np.array:
    """Zip the `labels` with `scores` into a single list."""
    couple = list(zip(labels, scores))
    return np.array(sorted(couple, key=lambda x: x[1], reverse=True))

def meanAveragePrecision(y_true, y_pred, _threshold=0.):
    result = 0.
    pos = 0
    coupled_pair = sort_and_couple(y_true, y_pred)
    for idx, (label, score) in enumerate(coupled_pair):
        if label > _threshold:
            pos += 1.
            result += pos / (idx + 1.)
    if pos == 0:
        return 0.
    return result / pos
                    
def meanReciprocalRank(y_true, y_pred, _threshold=0.):
    coupled_pair = sort_and_couple(y_true, y_pred)
    for idx, (label, pred) in enumerate(coupled_pair):
        if label > _threshold:
            return 1. / (idx + 1)
    return 0.

In [7]:
def forward(dataset):
    examples = []
    for i, data in enumerate(dataset):
        examples.append(InputExample(i, data[0], data[1], '0'))
    eval_features = convert_examples_to_features(examples, label_list,
                                                 token_length, tokenizer)
    all_input_ids = torch.tensor(
        [f.input_ids for f in eval_features], dtype=torch.long).to(device)
    all_input_mask = torch.tensor(
        [f.input_mask for f in eval_features], dtype=torch.long).to(device)
    all_segment_ids = torch.tensor(
        [f.segment_ids for f in eval_features], dtype=torch.long).to(device)
    all_label_ids = torch.tensor(
        [f.label_id for f in eval_features], dtype=torch.long).to(device)
    x_input_ids = torch.tensor(
        [f.input_ids_x for f in eval_features], dtype=torch.long).to(device)
    x_input_mask = torch.tensor(
        [f.input_mask_x for f in eval_features], dtype=torch.long).to(device)
    x_segment_ids = torch.tensor(
        [f.segment_ids_x for f in eval_features], dtype=torch.long).to(device)
    y_input_ids = torch.tensor(
        [f.input_ids_y for f in eval_features], dtype=torch.long).to(device)
    y_input_mask = torch.tensor(
        [f.input_mask_y for f in eval_features], dtype=torch.long).to(device)
    y_segment_ids = torch.tensor(
        [f.segment_ids_y for f in eval_features], dtype=torch.long).to(device)
#     input_ids = all_input_ids.to(device)
#     input_mask = all_input_mask.to(device)
#     segment_ids = all_segment_ids.to(device)
#     label_ids = all_label_ids.to(device)
    with torch.no_grad():
        logits = model(x_input_ids, x_input_mask, x_segment_ids,
                            y_input_ids, y_input_mask, y_segment_ids,
                            all_input_ids, all_segment_ids, all_input_mask)
    return F.softmax(logits, dim=1)

In [8]:
labels = {}
datasets = {}
for k, v in d.items():
    # k is the id, v is the dataset
    dataset = []
    label = []
    for i, pair in enumerate(v):
        dataset.append([k, pair[0]])
        label.append(pair[-1])
    datasets[k] = dataset
    labels[k] = label
# labels

In [9]:
total_batches = 0
total_avp = 0.0
total_mrr = 0.0
for k, v in datasets.items():
    # print(forward(v))
    # break
    score = forward(v)[:, 1].cpu().numpy()
    label = np.array(list(map(int, labels[k])))
    # print(score, label)
    total_avp += meanAveragePrecision(label, score)
    total_mrr += meanReciprocalRank(label, score)
    total_batches += 1

In [10]:
total_mrr / total_batches

0.6637245419840357

In [11]:
total_avp / total_batches

0.6463001171596666