In [184]:
import torch
from tqdm import tqdm_notebook as tqdm

from transformers import BertTokenizer, BertModel, BertForMaskedLM

In [2]:
import pandas as pd
import numpy as np

In [3]:
from torch.nn.utils.rnn import pad_sequence 
import torchvision
import torch.nn as nn

In [4]:
from transformers.optimization import AdamW
from transformers.optimization import AdamW, get_linear_schedule_with_warmup

In [5]:
validation_data = pd.read_json("FGC_release_1.7.13/FGC_release_all_dev.json")
training_data = pd.read_json("FGC_release_1.7.13/FGC_release_all_train.json")
test_data = pd.read_json("FGC_release_1.7.13/FGC_release_all_test.json")

In [6]:
device = torch.device("cuda")

## Data Preprocessing

In [7]:
def datapreprocessing(data, is_dev=False):
    
    # Save all the questions, potential supporting evidence and indices in three lists
    textQ_to_be_tokenized = []
    textA_to_be_tokenized = []
    sp_index = []
    
    for dictionary in data['QUESTIONS']:
        for element in dictionary:
            textQ_to_be_tokenized.append(element['QTEXT_CN'])
            sp_index.append(element['SHINT_'])
    for dictionary in data['SENTS']:
        current_text_sentence = []
        for element in dictionary:
            current_text_sentence.append(element['text'])
        textA_to_be_tokenized.append(current_text_sentence)
    
    QandA_label = pd.DataFrame({'Question': textQ_to_be_tokenized,
                                'Sentence_List': textA_to_be_tokenized,
                                'SE_Index': sp_index,
                                'Label': sp_index})
    
    QandA_label['Length'] = QandA_label['Sentence_List'].apply(lambda x: len(x))
    QandA_label['SE_Index'] = QandA_label['SE_Index'].apply(lambda x: [0])
    QandA_label['SE_Index'] = QandA_label['SE_Index'] * QandA_label['Length']
    QandA_label['SE_Index'] = list(zip(QandA_label['SE_Index'], QandA_label['Label']))

    # Extract label index
    for row in QandA_label['SE_Index']:
        for index in row[1]:
            row[0][index] = 1
        
    indexed = [i[0] for i in list(QandA_label['SE_Index'])]
    QandA_label['Label'] = indexed

    Q_and_Sentence_all_Comb = pd.DataFrame({'Question':np.repeat(QandA_label['Question'].values, QandA_label['Sentence_List'].str.len()),
                        'Sentence':np.concatenate(QandA_label['Sentence_List'].values)})
    Q_and_Sentence_all_Comb['Label'] = QandA_label['Label'].sum()
    
    tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
            
    # Put all question and sentence combination into a list 
    All_instances = []
    for i in range(len(QandA_label)):
        for sentence in QandA_label['Sentence_List'][i]:
            question_token = tokenizer.tokenize(QandA_label['Question'][i])
            sentence_token = tokenizer.tokenize(sentence)
            instance = ['[CLS]'] + question_token + ['[SEP]'] + sentence_token + ['[SEP]']
            if len(instance) > 512:
                instance = instance[:512]
            All_instances.append(instance)
            
    # Convert ids to segment_ids
    segment_ids = []
    for token in All_instances:
        length_of_zeros = token.index('[SEP]') - token.index('[CLS]') + 1
        length_of_ones = len(token) - length_of_zeros
        zeros_and_ones = [0] * length_of_zeros + [1] * length_of_ones
        segment_ids.append(zeros_and_ones)
        
    ids = []
    for token in All_instances:
        ids.append(tokenizer.convert_tokens_to_ids(token))
        
    mask_ids = []
    for token in All_instances:
        mask_ids.append([1] * len(token))
        
    labels = list(Q_and_Sentence_all_Comb['Label'])
    labels = [[i] for i in labels]
    
    return All_instances, ids, segment_ids, mask_ids, labels

In [8]:
dev_instances, dev_ids, dev_seg_ids, dev_mask_ids, dev_labels = datapreprocessing(validation_data, True)
train_instances, train_ids, train_seg_ids, train_mask_ids, train_labels = datapreprocessing(training_data)

I0703 01:59:50.011849 140491409098560 tokenization_utils.py:375] loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt from cache at /root/.cache/torch/transformers/8a0c070123c1f794c42a29c6904beb7c1b8715741e235bee04aca2c7636fc83f.9b42061518a39ca00b8b52059fd2bede8daa613f8a8671500e518a8c29de8c00
I0703 01:59:54.834610 140491409098560 tokenization_utils.py:375] loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt from cache at /root/.cache/torch/transformers/8a0c070123c1f794c42a29c6904beb7c1b8715741e235bee04aca2c7636fc83f.9b42061518a39ca00b8b52059fd2bede8daa613f8a8671500e518a8c29de8c00


## Loading Data

In [9]:
from torch.utils.data import Dataset

In [10]:
class SentenceDataset(Dataset):
    
    def __init__(self, ids, segment_ids, mask_ids, labels):
        self.instances = []
        for ids_i, segment_ids_i, mask_ids, label in zip(ids, segment_ids, mask_ids, labels):
            self.instances.append({"ids": ids_i, "segment_ids": segment_ids_i, 
                                   "mask_ids": mask_ids, "labels": label})  
    def __len__(self):
        return len(self.instances)

    def __getitem__(self, idx):
        sample = self.instances[idx]

        return sample

In [11]:
train_dataset = SentenceDataset(train_ids, train_seg_ids, train_mask_ids, train_labels)

In [12]:
dev_dataset = SentenceDataset(dev_ids, dev_seg_ids, dev_mask_ids, dev_labels)

In [13]:
from torch.utils.data import DataLoader

In [14]:
def collate(batch):
    padded_ids = pad_sequence([torch.tensor(instance['ids']) for instance in batch], batch_first=True)
    padded_ids = padded_ids.to(device)
    
    padded_segment_ids = pad_sequence([torch.tensor(instance['segment_ids']) for instance in batch], batch_first=True)
    padded_segment_ids = padded_segment_ids.to(device)
    
    padded_mask_ids = pad_sequence([torch.tensor(instance['mask_ids']) for instance in batch], batch_first=True)
    padded_mask_ids = padded_mask_ids.to(device)
    
    labels = torch.stack([torch.tensor(instance['labels']) for instance in batch])
    labels = labels.to(device)
    return {'ids': padded_ids, 'mask_ids': padded_mask_ids, 'segment_ids': padded_segment_ids, 'labels': labels}

In [15]:
dataloader_train = DataLoader(train_dataset, batch_size=8, shuffle = True, collate_fn = collate)
#dataloader_dev = DataLoader(dev_dataset, collate_fn = collate)

In [57]:
def eval_preprocessing(data, dataset):
    
    len_array = np.cumsum(np.array(data['SENTS'].apply(lambda x: len(x))))

    dictionary_lists = []
    batches = []
    for i in range(len(dataset.instances)):

        instance = dataset.instances[i]
        dictionary_lists.append(instance)

        if i in len_array - 1:

            padded_ids = pad_sequence([torch.tensor(instance['ids']) for instance in dictionary_lists], batch_first=True)
            padded_ids = padded_ids.to(device)

            padded_segment_ids = pad_sequence([torch.tensor(instance['segment_ids']) for instance in dictionary_lists], batch_first=True)
            padded_segment_ids = padded_segment_ids.to(device)

            padded_mask_ids = pad_sequence([torch.tensor(instance['mask_ids']) for instance in dictionary_lists], batch_first=True)
            padded_mask_ids = padded_mask_ids.to(device)

            labels = torch.stack([torch.tensor(instance['labels']) for instance in dictionary_lists])
            labels = labels.to(device)

            current_dev_batch = {'ids': padded_ids, 'mask_ids': padded_mask_ids, 'segment_ids': padded_segment_ids, 'labels': labels}

            batches.append(current_dev_batch)
            dictionary_lists = []

    return batches

## Creating Neural Network

In [17]:
class FGC_Network(nn.Module):

    def __init__(self):
        
        super(FGC_Network, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-chinese')
        self.linear = nn.Linear(768, 1)

    def forward(self, batch):
        # batch['ids'] = (batch_size, sent_len)
        # batch['segment_ids'] = (batch_size, sent_len)
        # batch['mask_ids'] = = (batch_size, sent_len)
        # output = (batch_size, 1)
        hidden_state, pooler_output = self.bert(batch['ids'], batch['mask_ids'], batch['segment_ids'])
        linear_output = self.linear(pooler_output)
        
        return linear_output

    def loss(self, batch):
        
        loss_fn = nn.BCEWithLogitsLoss()
        output = self.forward(batch)
        target = batch['labels'].float().to(device)
        
        return loss_fn(output, target)
    
    def _predict(self, batch):
        
        output = self.forward(batch)
        scores = torch.sigmoid(output)
        scores = scores.cpu().numpy()[:,0].tolist()
        
        return scores
    
    def predict_fgc(self, batch, threshold=0.5):
        
        scores = self._predict(batch)

        max_i = 0
        max_score = 0
        sp = []
        
        for i, score in enumerate(scores):

            if score > max_score:
                max_i = i
                max_score = score
            if score >= threshold:
                sp.append(i)

        if not sp:
            sp.append(max_i)

        return {'sp': sp, 'sp_scores': scores}

In [18]:
network = FGC_Network()
network.to(device)

I0703 02:00:14.653911 140491409098560 configuration_utils.py:152] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json from cache at /root/.cache/torch/transformers/8a3b1cfe5da58286e12a0f5d7d182b8d6eca88c08e26c332ee3817548cf7e60a.f12a4f986e43d8b328f5b067a641064d67b91597567a06c7b122d1ca7dfd9741
I0703 02:00:14.657922 140491409098560 configuration_utils.py:169] Model config {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pad_token_id": 0,
  

FGC_Network(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
        

## Training & Evaluating Data 

In [28]:
def optim(nn, num_epochs, lr):
    param_optimizer = list(nn.bert.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    num_epochs = num_epochs
    num_train_optimization_steps = len(dataloader_train) * num_epochs
    optimizer = AdamW(optimizer_grouped_parameters, lr=lr)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                     num_warmup_steps=int(
                                                         num_train_optimization_steps * 0.1),
                                                     num_training_steps=num_train_optimization_steps)
    return optimizer, scheduler

In [20]:
def _update_sp(metrics, sp_gold, sp_pred):
    tp, fp, fn = 0, 0, 0
        
    for p in sp_pred:
        if p in sp_gold:
            tp += 1
        else:
            fp += 1
    for g in sp_gold:
        if g not in sp_pred:
            fn += 1
            
    precision = 1.0 * tp / (tp + fp) if tp + fp > 0 else 0.0
    recall = 1.0 * tp / (tp + fn) if tp + fn > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0
    em = 1.0 if fp + fn == 0 else 0.0
    
    metrics['sp_em'] += em
    metrics['sp_f1'] += f1
    metrics['sp_prec'] += precision
    metrics['sp_recall'] += recall
    
    return precision, recall, f1

In [21]:
def eval_sp_fgc(sp_golds, sp_preds):
    
    metrics = {'sp_em': 0, 'sp_prec': 0, 'sp_recall': 0, 'sp_f1': 0}
    
    assert len(sp_golds) == len(sp_preds)
    
    for sp_gold, sp_pred in zip(sp_golds, sp_preds):
        _update_sp(metrics, sp_gold, sp_pred)
    
    N = len(sp_golds)
    for k in metrics.keys():
        metrics[k] /= N
        metrics[k] = round(metrics[k], 3)
    print(metrics)
    return metrics

In [22]:
def eval_fgc_atype(atype_golds, atype_preds):
    
    pos = 0
    neg = 0
    
    for gold, atype in zip(atype_golds, atype_preds):
        if atype == gold:
            pos += 1
        else:
            neg += 1
    return pos/len(atypes_preds)

In [64]:
def eval(network, dev_batches, current_epoch, sp_golds, avg_loss):
    
    network.eval()
    
    with torch.no_grad():
        sp_preds = []
        
        #atype_preds = []
        for batch in tqdm(dev_batches):
            
            out_dct = network.predict_fgc(batch)
            sp_preds.append(out_dct['sp'])
            
            #if 'atype' in out_dct:
                #for type_i in out_dct['atype']:
                    #assert type_i == out_dct['atype'][0]
                #atype_preds.append(type_i)
                
  
    metrics = eval_sp_fgc(sp_golds, sp_preds)
    print('epoch %d eval_recall: %.3f eval_f1: %.3f loss: %.3f' % (
            current_epoch, metrics['sp_recall'], metrics['sp_f1'], avg_loss))
        
    #torch.save(network.state_dict(), "FGC_release_1.7.13/models_with_scheduler/model_epoch{0}_eval_em:{1:.3f}_precision:{2:.3f}_recall:{3:.3f}_f1:{4:.3f}_loss:{5:.3f}.m".format(current_epoch, metrics['sp_em'], metrics['sp_prec'], metrics['sp_recall'], metrics['sp_f1'], avg_loss))
    
    return sp_preds, sp_golds

In [24]:
def train(network, num_epochs, lr):
    
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer, scheduler = optim(network, num_epochs, lr)
    
    sp_golds = validation_data['QUESTIONS'].apply(lambda x: x[0]['SHINT_']).tolist()
    #atype_golds = validation_data['QUESTIONS'].apply(lambda x: x[0]['ATYPE_']).tolist()
    
    for current_epoch in range(num_epochs):
        network.train()
        running_loss = 0.0
        dr = True
        for batch in tqdm(dataloader_train):
            optimizer.zero_grad()
            current_output = network(batch)
            current_target = batch['labels'].to(dtype=torch.float, device=device)
            current_loss = loss_fn(current_output, current_target)

            current_loss.backward()
            torch.nn.utils.clip_grad_norm_(network.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            running_loss += current_loss.item()
            
        learning_rate_scalar = scheduler.get_lr()[0]
        print('lr = %f' % learning_rate_scalar)
        avg_loss = running_loss/len(dataloader_train)
        print('epoch %d train_loss: %.3f' % (current_epoch, avg_loss))
        eval(network, dev_batches, current_epoch, sp_golds, avg_loss)

In [None]:
train(network, 20, 0.00002)

HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000010
epoch 0 train_loss: 0.213


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.142, 'sp_prec': 0.592, 'sp_recall': 0.611, 'sp_f1': 0.543}
epoch 0 eval_recall: 0.611 eval_f1: 0.543 loss: 0.213


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000020
epoch 1 train_loss: 0.163


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.15, 'sp_prec': 0.59, 'sp_recall': 0.63, 'sp_f1': 0.554}
epoch 1 eval_recall: 0.630 eval_f1: 0.554 loss: 0.163


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000019
epoch 2 train_loss: 0.135


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.166, 'sp_prec': 0.619, 'sp_recall': 0.532, 'sp_f1': 0.522}
epoch 2 eval_recall: 0.532 eval_f1: 0.522 loss: 0.135


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000018
epoch 3 train_loss: 0.106


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.186, 'sp_prec': 0.661, 'sp_recall': 0.532, 'sp_f1': 0.548}
epoch 3 eval_recall: 0.532 eval_f1: 0.548 loss: 0.106


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000017
epoch 4 train_loss: 0.083


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.174, 'sp_prec': 0.627, 'sp_recall': 0.545, 'sp_f1': 0.538}
epoch 4 eval_recall: 0.545 eval_f1: 0.538 loss: 0.083


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000016
epoch 5 train_loss: 0.062


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.182, 'sp_prec': 0.59, 'sp_recall': 0.614, 'sp_f1': 0.547}
epoch 5 eval_recall: 0.614 eval_f1: 0.547 loss: 0.062


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000014
epoch 6 train_loss: 0.050


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.154, 'sp_prec': 0.565, 'sp_recall': 0.617, 'sp_f1': 0.538}
epoch 6 eval_recall: 0.617 eval_f1: 0.538 loss: 0.050


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))


lr = 0.000013
epoch 7 train_loss: 0.039


HBox(children=(IntProgress(value=0, max=247), HTML(value='')))


{'sp_em': 0.146, 'sp_prec': 0.535, 'sp_recall': 0.627, 'sp_f1': 0.517}
epoch 7 eval_recall: 0.627 eval_f1: 0.517 loss: 0.039


HBox(children=(IntProgress(value=0, max=3928), HTML(value='')))

In [60]:
trained_network = FGC_Network()
trained_network.load_state_dict(torch.load('FGC_release_1.7.13/models_with_scheduler/model_epoch19_eval_em:0.154_precision:0.599_recall:0.609_f1:0.547_loss:0.001.m'))

I0703 05:30:48.277780 140491409098560 configuration_utils.py:152] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json from cache at /root/.cache/torch/transformers/8a3b1cfe5da58286e12a0f5d7d182b8d6eca88c08e26c332ee3817548cf7e60a.f12a4f986e43d8b328f5b067a641064d67b91597567a06c7b122d1ca7dfd9741
I0703 05:30:48.282982 140491409098560 configuration_utils.py:169] Model config {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pad_token_id": 0,
  

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [67]:
sp_golds = training_data['QUESTIONS'].apply(lambda x: x[0]['SHINT_']).tolist()
batches = eval_preprocessing(training_data, train_dataset)

trained_network.to("cuda")
train_pred, train_obs = eval(trained_network, batches, 0, sp_golds, 0.001)


HBox(children=(IntProgress(value=0, max=882), HTML(value='')))


{'sp_em': 0.985, 'sp_prec': 0.986, 'sp_recall': 0.988, 'sp_f1': 0.987}
epoch 0 eval_recall: 0.988 eval_f1: 0.987 loss: 0.001


In [73]:
training_data_with_performance = training_data.copy()
training_data_with_performance['train_pred'] = train_pred
training_data_with_performance['train_obs'] = train_obs

In [90]:
training_data_with_performance[training_data_with_performance['train_pred'] != training_data_with_performance['train_obs']]

Unnamed: 0,DID,QUESTIONS,DTEXT,DTEXT_CN,SENTS,train_pred,train_obs
20,D006,"[{'QID': 'D006Q02', 'QTYPE': '申論', 'ATYPE_': '...",阿拉伯之春（阿拉伯語：الثورات العربية‎）是西方主流媒體所稱的阿拉伯世界的一次...,阿拉伯之春（阿拉伯语：الثورات العربية‎）是西方主流媒体所称的阿拉伯世界的一次...,[{'text': '阿拉伯之春（阿拉伯语：الثورات العربية‎）是西方主流媒体...,[9],[]
70,D032,"[{'QID': 'D032Q10', 'QTYPE': '進階題', 'ATYPE_': ...",北美自由貿易協定（英語：North American Free Trade Agreemen...,北美自由贸易协定（英语：North American Free Trade Agreemen...,[{'text': '北美自由贸易协定（英语：North American Free Tra...,[11],[]
156,D048,"[{'QID': 'D048Q09', 'QTYPE': '申論', 'ATYPE_': '...",聊天機器人並非最近幾年出現的新應用，麻省理工學院（MIT）人工智慧實驗室早在1966年即研發...,聊天机器人并非最近几年出现的新应用，麻省理工学院（MIT）人工智慧实验室早在1966年即研发...,"[{'text': '聊天机器人并非最近几年出现的新应用，', 'start': 0, 'e...",[1],[]
284,D086,"[{'QID': 'D086Q03', 'QTYPE': '申論', 'ATYPE_': '...",在自然界中，能源可以採取幾種不同的形式存在：熱，電，輻射，化學能等。許多這些形式可以很容易轉...,在自然界中，能源可以采取几种不同的形式存在：热，电，辐射，化学能等。许多这些形式可以很容易转...,"[{'text': '在自然界中，', 'start': 0, 'end': 6, 'IE'...",[3],[]
324,D094,"[{'QID': 'D094Q01', 'QTYPE': '申論', 'ATYPE_': '...",財團法人伊甸社會福利基金會（英語：Eden Social Welfare Foundatio...,财团法人伊甸社会福利基金会（英语：Eden Social Welfare Foundatio...,[{'text': '财团法人伊甸社会福利基金会（英语：Eden Social Welfar...,[4],[]
370,D107,"[{'QID': 'D107Q07', 'QTYPE': '進階題', 'ATYPE_': ...",根據政府公權力介入的程度，健康照護體系一般可以區分為以下三種不同的體系。\n社會保險制（台灣...,根据政府公权力介入的程度，健康照护体系一般可以区分为以下三种不同的体系。\n社会保险制（台湾...,"[{'text': '根据政府公权力介入的程度，', 'start': 0, 'end': ...","[12, 25]",[25]
371,D107,"[{'QID': 'D107Q08', 'QTYPE': '進階題', 'ATYPE_': ...",根據政府公權力介入的程度，健康照護體系一般可以區分為以下三種不同的體系。\n社會保險制（台灣...,根据政府公权力介入的程度，健康照护体系一般可以区分为以下三种不同的体系。\n社会保险制（台湾...,"[{'text': '根据政府公权力介入的程度，', 'start': 0, 'end': ...","[12, 25]",[12]
395,D116,"[{'QID': 'D116Q09', 'QTYPE': '進階題', 'ATYPE_': ...",在流行病學家及醫學研究者繼續探討癌症的相關生活因素的同時，美國醫學會所出版的著名醫學雜誌也於...,在流行病学家及医学研究者继续探讨癌症的相关生活因素的同时，美国医学会所出版的著名医学杂志也于...,"[{'text': '在流行病学家及医学研究者继续探讨癌症的相关生活因素的同时，', 'st...",[10],[]
449,D182,"[{'QID': 'D182Q07', 'QTYPE': '基礎題', 'ATYPE_': ...",國慶焰火在9月24號晚間試放3分鐘，為確保施放安全、順利，屏東縣政府表示24號當天屏東河濱公...,国庆焰火在9月24号晚间试放3分钟，为确保施放安全、顺利，屏东县政府表示24号当天屏东河滨公...,"[{'text': '国庆焰火在9月24号晚间试放3分钟，', 'start': 0, 'e...",[2],[]
502,D245,"[{'QID': 'D245Q05', 'QTYPE': '申論', 'ATYPE_': '...",到了15世紀，聖伯多祿大殿結構日益變舊，亞維農教廷遷回羅馬後，萊昂·巴蒂斯塔·阿伯提和貝爾納...,到了15世纪，圣伯多禄大殿结构日益变旧，亚维农教廷迁回罗马后，莱昂·巴蒂斯塔·阿伯提和贝尔纳...,"[{'text': '到了15世纪，', 'start': 0, 'end': 7, 'IE...",[7],[]


In [145]:
training_data_with_performance[training_data_with_performance['train_pred'] != training_data_with_performance['train_obs']]['QUESTIONS'][156]

[{'QID': 'D048Q09',
  'QTYPE': '申論',
  'ATYPE_': 'Object',
  'AMODE_': ['Single-Span-Extraction'],
  'QTEXT': '聊天機器人仰賴哪些方法讓回答愈來愈準確?',
  'QTEXT_CN': '聊天机器人仰赖哪些方法让回答愈来愈准确?',
  'SENTS': [{'text': '聊天机器人仰赖哪些方法让回答愈来愈准确?',
    'start': 0,
    'end': 20,
    'IE': {'NER': [],
     'COREF': {},
     'RELATION': [],
     'TOKEN': [{'word': '聊天', 'char_b': 0, 'char_e': 2, 'pos': 'NN'},
      {'word': '机器人', 'char_b': 2, 'char_e': 5, 'pos': 'NN'},
      {'word': '仰赖', 'char_b': 5, 'char_e': 7, 'pos': 'VV'},
      {'word': '哪些', 'char_b': 7, 'char_e': 9, 'pos': 'DT'},
      {'word': '方法', 'char_b': 9, 'char_e': 11, 'pos': 'NN'},
      {'word': '让', 'char_b': 11, 'char_e': 12, 'pos': 'VV'},
      {'word': '回答', 'char_b': 12, 'char_e': 14, 'pos': 'VV'},
      {'word': '愈来愈', 'char_b': 14, 'char_e': 17, 'pos': 'AD'},
      {'word': '准确', 'char_b': 17, 'char_e': 19, 'pos': 'VA'},
      {'word': '?', 'char_b': 19, 'char_e': 20, 'pos': 'PU'}]}}],
  'SHINT_': [],
  'ANSWER': [{'ATEXT': '',
    'ATEXT_CN'

In [190]:
training_data_with_performance.loc[874]['DTEXT_CN']

'国内肠病毒轻症疫情持续上升，另新增1例肠病毒71型并发重症病例。疾病管制署再次呼吁，肠病毒传染力强，家长与教托育机构人员不可轻忽，应加强居家环境、教室及游乐设施等的通风、整洁与消毒，并教导学童落实「湿、搓、冲、捧、擦」正确洗手步骤，及生病在家休息等良好卫生观念，以降低病毒于校园或社区中传播的风险。\n疾管署表示，新增之肠病毒并发重症个案为南部7岁男童，9月1日至2日陆续出现手足口症、食欲下降、呕吐、发烧、腹部疼痛、喉咙痛及咳嗽等症状，9月4日个案因症状持续且出现疑似肌抽跃、疱疹性咽峡炎及对答反应慢等情形，经就医转诊后收治住院，5日由医院采检通报，经检验审查确认感染肠病毒71型并发重症(脑炎)，所幸个案经治疗后症状改善并已出院。\n疾管署监测资料显示，上周(9月8日至9月14日)国内肠病毒门急诊就诊共计20,585人次，较前一周上升6.5%；近期就诊人次持续上升，疫情处流行高峰期。今(2019)年累计37例肠病毒并发重症病例，以感染肠病毒71型为多(28例)，其他分别感染肠病毒D68型、克沙奇A6型、A10型(各2例)，克沙奇A9型、B5型、伊科病毒11型(各1例)。近四周社区肠病毒检出型别以克沙奇A群为多，肠病毒71型持续活动；今年累计315例肠病毒71型个案，高于2016至2018年同期。\n疾管署表示，肠病毒感染者在发病前几天，喉咙与粪便就有病毒存在且具传染力，发病后一周内传染力最高，痊愈后肠病毒会随著粪便排出达8到12周之久。提醒民众如感染肠病毒，应在家休息并避免与其他婴幼儿接触；痊愈后仍应注意个人手部卫生，以免将病毒传染给其他幼儿造成交叉感染。如发现家中婴幼儿出现肢体无力麻痺，或有嗜睡、意识不清、活力不佳、手脚无力、肌跃型抽搐(无故惊吓或全身肌肉突然收缩)、持续呕吐与呼吸急促或心跳加快等肠病毒重症前兆病征，请尽速送大医院接受治疗。'

In [187]:
training_data_with_performance.loc[731]['DTEXT_CN']#[2]

'如果你到埃及旅行，沿著尼罗河前进，你将会看到一座座巍峨的金字塔，矗立在一望无际沙漠中。在蔚蓝的天空下，这些已有好几千年历史的庞然大物，不但显得傲岸不群，更给人一种神秘的感觉。\n到底金字塔这种古老的建筑，是做什么用的呢？原来它是专为埃及国王所设计的陵墓。古埃及人相信：人死后只要把遗体保存好，就可以在另一个世界得到永生。所以尊贵的国王一旦去世，埃及人会先把他们的遗体做防腐的处理，再用泡过香料的布条层层包裹好，制成「木乃伊」，放进棺木中。然后安置在金字塔内部的墓室里，再启动机关，放下巨石，堵住通道，将整个金字塔封闭起来。这样一来，便可以防止盗墓者的侵入破坏，让遗体永享安宁了。\n在尼罗河沿岸，大大小小的金字塔遗址，大约有八十多座。其中以位于基沙的三座金字塔，以及在塔前担任守护神的人面狮身像，最为宏伟壮观。这些金字塔都是用大石块堆砌建造的。底部为四方形，四面则为平滑的三角斜面。单是规模最大的古夫王金字塔，面积就有五个足球场大。它总共使用了二百三十多万块的大石头，每块足足有两吨半重。塔高一百四十六公尺，相当于四十层高的摩天大楼。据说古夫王生前动用了十几万名工人，花费了二十年的时间，才完成这座世界最大的金字塔。\n我们很难想像，远在二、三千年前，没有电力，没有机器的情形下，这样庞大的建筑物究竟是怎么建成的。一般的推测是：聪明的埃及人，利用尼罗河每年雨季上涨的洪流，把采石工从岸山上一斧一整切下来的石材，顺著水流用木筏运送到工地去。再用大量的砖块在金字塔的外侧，建造一条盘旋而上的坡道，这样就可以让成千上万的工人，用绳子、滚木把石块顺著斜坡缓缓的往上拖，一层层的累积，叠成金字塔。\n几千年后的今天，每年都有许多游客从世界各地涌来，观看这号称「世界奇迹」的建筑物。光芒四射的金字塔，不仅透露出埃及国王的权势和追求永生的心愿，更展现了古埃及人的智慧和建筑技术。'

## Training Data Error Analysis

#20 (Pred:[9], Actual:[])

Question:「阿拉伯之春」运动中，走上街头的民众的诉求为何? <br>
Predicted SP: 只有突尼西亚成为阿拉伯之春中，<br>
Actual: None

Comment: While the predicted SP is incorrect, I found out that there is supporting evidence in the paragraph. (...要求推翻本国的专制政体的行动)This might be a case of incorrect input.

#70 (Pred:[11], Actual:[])

Question: 第二次签订的北美贸易协定从签署至生效过了几日? <br>
Predicted SP: 美国、墨西哥和加拿大就更新北美自由贸易协定达成一致，<br>
Actual: None

Comment: Same as #20. (美国、加拿大及墨西哥在1992年8月12日签署了关于三国间全面贸易的协议。...，北美自由贸易协议于1994年1月1日正式生效。)

#156 (Pred:[1], Actual:[])

Question: 聊天机器人仰赖哪些方法让回答愈来愈准确? <br>
Predicted SP: 麻省理工学院（MIT）人工智慧实验室早在1966年即研发出名为「Eliza」的机器人， <br>
Actual: None <br>

Comment: Same (聊天机器人的作答准确度要透过程式化的方法改善)

#284 (Pred:[3], Actual:[])

Question: 不可再生能源的意义是什么？ <br>
Predicted SP: 许多这些形式可以很容易转化为另一种的帮助下， <br>
Actual: None <br>

Comment: Same (是无法经过短时间内再生的能源，而且它们的消耗速度远远超过它们再生的速度)

#324 (Pred:[4], Actual:[])

Question: 伊甸基金會成立的宗旨為何? <br>
Predicted SP: 因著上帝的呼召及一颗爱身心障碍者的同理心，<br>
Actual: None <br>

Comment: No SP in the paragraph. But I think SP is pretty close to being a supporting evidence. This 
must be a borderline case.

#370 (Pred:[12,25], Actual:[25])

Question: 三大健康照护体系保险制度中，政府涉入程度低的是哪一种？<br>
Predicted SP: 公医制（政府介入最多）：以英国为代表。 AND 自由市场（政府一般不介入）：以2013年前的美国为代表。<br>
Actual: 公医制（政府介入最多）：以英国为代表。<br>

Comment: I think this is a very reasonable mismatch. As two supporting evidences are very similar
syntax-wise but drastically different in meaning.

#371 (Pred:[12,25], Actual:[12])

Question: 三大健康照護體系保險制度中，政府涉入程度高的是哪一種？ <br>
Predicted SP: 公医制（政府介入最多）：以英国为代表。 AND 自由市场（政府一般不介入）：以2013年前的美国为代表。<br>
Actual: 公医制（政府介入最多）：以英国为代表。<br>

Comment: Same as #370

#395 (Pred:[10], Actual:[])

Question: 熬夜是否能减低得到癌症的风险? <br>
Predicted SP: 皆强烈建议减少或避免动物性食品摄取， <br>
Actual: None <br>

Comment: Another potential case of incorrect input. In the paragraph I found this sentence (所以防癌守则：...，注重睡眠品质)

#449 (Pred:[2], Actual:[])

Question: 高屏地区国庆烟火试放管制时间是从晚上几点开始？ <br>
Predicted SP: 屏东县政府表示24号当天屏东河滨公园将管制不开放， <br>
Actual: None <br>

Comment: Another similar case. This time I strongly believe this is an incorrect input. 
当晚7时并会进行全面清场 <- This is sufficient to be a supporting evidence

#502 (Pred:[7], Actual:[])

Question: 为何圣伯多禄大殿只能重建不能整修就好? <br>
Predicted SP: 教宗犹利二世决定重建圣伯多禄大殿 <br>
Actual: None 

Comment: Another similar case. (无疑再改动有机会让建筑倒塌)

#630 (Pred:[62], Actual:[])

Question: 毛笔、铅笔、钢笔，这三种笔中哪个笔尖的硬度高？ <br>
Predicted SP: 更进一步看：我们无论使用那一种笔，<br>
Actual: None <br>

Comment: Again, I think this counts as a supporting evidence (钢笔的笔尖用金属制成，弹性大，硬度高)

#731 (Pred:[9], Actual:[])

Question: 为什么古埃及人要把死人做成木乃伊? <br>
Predicted SP: 是做什么用的呢？ <br>
Actual: None <br>

Comment: I think this counts (古埃及人相信：人死后只要把遗体保存好，就可以在另一个世界得到永生。)

#874 (Pred:[22], Actual:[])

Question: 要如何降低肠病毒的传播风险？
Predicted SP: 今(2019)年累计37例肠病毒并发重症病例，
Actual: None

Comment: No doubt, these are supporting evidences (应加强居家环境、教室及游乐设施等的通风、整洁与消毒，并教导学童落实「湿、搓、冲、捧、擦」正确洗手步骤，及生病在家休息等良好卫生观念，)