In [1]:
import torch
import torch.nn.functional as F
import numpy as np
from pytorch_pretrained_bert import BertConfig, BertTokenizer
from model import FusionBert
from util import InputExample, InputFeatures, convert_examples_to_features

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
CONFIG_PATH = './nlpcc_output/bert_config.json'
MODEL_PATH = './nlpcc_output/pytorch_model.bin'
TOKEN_PATH = 'bert-base-chinese'
num_labels = 2
label_list = ["0", "1"]
token_length = 128

In [3]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [4]:
device = torch.device('cuda')
config = BertConfig(CONFIG_PATH)
tokenizer = BertTokenizer.from_pretrained(TOKEN_PATH)
model = FusionBert(None, config)
model.load_state_dict(torch.load(MODEL_PATH))
model.to(device)
model.eval()

04/18/2020 03:56:49 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt from cache at /root/.pytorch_pretrained_bert/8a0c070123c1f794c42a29c6904beb7c1b8715741e235bee04aca2c7636fc83f.9b42061518a39ca00b8b52059fd2bede8daa613f8a8671500e518a8c29de8c00


FusionBert(
  (bert_module): FusionBertModule(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(21128, 768)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): BertLayerNorm()
        (dropout): Dropout(p=0.1)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
                (LayerNorm): BertLayerNorm()
                (dropout): Dropout(p=0.1)
   

In [5]:
import csv
from collections import defaultdict
datasets = []
d = defaultdict(list)
with open('/root/workspace/qa_data/task_data/nlpcc2016/nlpcc-dbqa/test.tsv', 'r', encoding='utf-8') as f:
    reader = csv.reader(f, delimiter='\t')
    for lidx, line in enumerate(reader):
        if lidx == 0:
            continue
        d[line[0]].append(line[1:])
        
d

defaultdict(list,
            {'324': [['北京奥运博物馆的场景效果负责人是谁？',
               '北京奥运博物馆是北京市文物局局属全额拨款事业单位，坐落在国家体育场（鸟巢）南侧负一层，总面积约2.3万平方米。',
               '0'],
              ['北京奥运博物馆的场景效果负责人是谁？', '主要承担奥运文物征集、保管、研究和爱国主义教育基地建设相关工作。', '0'],
              ['北京奥运博物馆的场景效果负责人是谁？',
               '北京奥运博物馆以传播奥林匹克文化，博物馆以传播奥林匹克文化，振奋中华民族精神为宗旨，主要通过展览、讲座、互动活动等方式，使观众更加深刻的理解奥林匹克精神和文化，同时肩负对广大青少年开展爱国主义教育的历史重任，是一所集文化、休闲为一体的综合性博物馆。[1]',
               '0'],
              ['北京奥运博物馆的场景效果负责人是谁？', '于海勃，美国加利福尼亚大学教授 场景效果负责人 总设计师', '1'],
              ['北京奥运博物馆的场景效果负责人是谁？', '美国加利福尼亚大学教授 展陈设计及效果总监 总设计师', '0'],
              ['北京奥运博物馆的场景效果负责人是谁？', '洪麦恩，清华大学美术学院教授 内容及主展线负责人 总设计师', '0']],
             '1493': [['秀雅减肥茶的特惠装可以多少天？',
               '秀雅减肥茶是香港秀雅美容保健集团旗下的一款减肥产品，秀雅减肥茶主要依靠茶叶中的茶纤素分解体内多余的游离脂肪来实现减肥。',
               '0'],
              ['秀雅减肥茶的特惠装可以多少天？', '主要材料都是茶叶和中药，是一款健康有效的减肥产品。', '0'],
              ['秀雅减肥茶的特惠装可以多少天？', '产品名称：秀雅减肥茶', '0'],
              ['秀雅减肥茶的特惠装可以多少天？', '规格：12袋/盒', '0'],
              ['秀雅减

In [6]:
def sort_and_couple(labels: np.array, scores: np.array) -> np.array:
    """Zip the `labels` with `scores` into a single list."""
    couple = list(zip(labels, scores))
    return np.array(sorted(couple, key=lambda x: x[1], reverse=True))

def meanAveragePrecision(y_true, y_pred, _threshold=0.):
    result = 0.
    pos = 0
    coupled_pair = sort_and_couple(y_true, y_pred)
    for idx, (label, score) in enumerate(coupled_pair):
        if label > _threshold:
            pos += 1.
            result += pos / (idx + 1.)
    if pos == 0:
        return 0.
    return result / pos
                    
def meanReciprocalRank(y_true, y_pred, _threshold=0.):
    coupled_pair = sort_and_couple(y_true, y_pred)
    for idx, (label, pred) in enumerate(coupled_pair):
        if label > _threshold:
            return 1. / (idx + 1)
    return 0.

In [7]:
def forward(dataset):
    examples = []
    for i, data in enumerate(dataset):
        examples.append(InputExample(i, data[0], data[1], '0'))
    eval_features = convert_examples_to_features(examples, label_list,
                                                 token_length, tokenizer)
    all_input_ids = torch.tensor(
        [f.input_ids for f in eval_features], dtype=torch.long).to(device)
    all_input_mask = torch.tensor(
        [f.input_mask for f in eval_features], dtype=torch.long).to(device)
    all_segment_ids = torch.tensor(
        [f.segment_ids for f in eval_features], dtype=torch.long).to(device)
    all_label_ids = torch.tensor(
        [f.label_id for f in eval_features], dtype=torch.long).to(device)
    x_input_ids = torch.tensor(
        [f.input_ids_x for f in eval_features], dtype=torch.long).to(device)
    x_input_mask = torch.tensor(
        [f.input_mask_x for f in eval_features], dtype=torch.long).to(device)
    x_segment_ids = torch.tensor(
        [f.segment_ids_x for f in eval_features], dtype=torch.long).to(device)
    y_input_ids = torch.tensor(
        [f.input_ids_y for f in eval_features], dtype=torch.long).to(device)
    y_input_mask = torch.tensor(
        [f.input_mask_y for f in eval_features], dtype=torch.long).to(device)
    y_segment_ids = torch.tensor(
        [f.segment_ids_y for f in eval_features], dtype=torch.long).to(device)
#     input_ids = all_input_ids.to(device)
#     input_mask = all_input_mask.to(device)
#     segment_ids = all_segment_ids.to(device)
#     label_ids = all_label_ids.to(device)
    with torch.no_grad():
        logits = model(x_input_ids, x_input_mask, x_segment_ids,
                            y_input_ids, y_input_mask, y_segment_ids,
                            all_input_ids, all_segment_ids, all_input_mask)
    return F.softmax(logits, dim=1)

In [8]:
labels = {}
datasets = {}
for k, v in d.items():
    # k is the id, v is the dataset
    dataset = []
    label = []
    for i, pair in enumerate(v):
        dataset.append([k, pair[0]])
        label.append(pair[-1])
    datasets[k] = dataset
    labels[k] = label
# labels

In [9]:
total_batches = 0
total_avp = 0.0
total_mrr = 0.0
from tqdm import tqdm
for k, v in tqdm(datasets.items()):
    # print(forward(v))
    # break
    score = forward(v)[:, 1].cpu().numpy()
    label = np.array(list(map(int, labels[k])))
    # print(score, label)
    total_avp += meanAveragePrecision(label, score)
    total_mrr += meanReciprocalRank(label, score)
    total_batches += 1

100%|██████████| 3998/3998 [23:56<00:00,  2.98it/s]


In [10]:
total_mrr / total_batches

0.28108985350363663

In [11]:
total_avp / total_batches

0.2788345295904325