In [1]:
import os
import logging
import os
import random
import numpy as np

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [3]:
from transformers import BertTokenizerFast, BertModel, BertTokenizer

In [4]:
from TorchCRF import CRF

In [5]:
from tensorboardX import SummaryWriter

In [6]:
from model import BERT_CRF, BertCRFTagger
from utils import *
from dataloader import SeqLabeling_Dataset
from config import args

In [7]:
from seqeval.metrics import f1_score
from seqeval.metrics import precision_score
from seqeval.metrics import accuracy_score
from seqeval.metrics import recall_score
from seqeval.metrics import classification_report

In [8]:
paras = args()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
print(f'Loading model: {paras.model_name}.')
tokenizer = BertTokenizerFast.from_pretrained(paras.model_name)
bert = BertModel.from_pretrained(paras.model_name, output_hidden_states=True)

vocab_dict = tokenizer.get_vocab()


Loading model: dmis-lab/biobert-base-cased-v1.1.


In [10]:
train_dataset = SeqLabeling_Dataset(paras.train_data, paras.label_file, vocab_dict)
label_to_index = train_dataset.label_to_index
index_to_label = train_dataset.index_to_label
train_dataloader = DataLoader(train_dataset, batch_size=paras.batch_size,
                                shuffle=paras.shuffle, drop_last=paras.drop_last)


In [11]:
bert_crf_tagger = BertCRFTagger(bert, paras.hidden_size, paras.num_tags,
                                paras.droupout_prob).to(device)


In [12]:
optimizer = torch.optim.Adam(bert_crf_tagger.parameters(), lr=paras.learning_rate)


In [13]:
# train
best_loss = 0
for epoch in range(paras.num_train_epochs):
    epoch_loss = 0
    bert_crf_tagger.train()
    for step, batch in enumerate(train_dataloader):
        optimizer.zero_grad()

        batch_data, batch_label = batch
        batch_data_list = [ data.split('&&&') for data in batch_data ]
        batch_label_list = [ label.split('&&&') for label in batch_label ]

        input_ids, mask = batch_data_processing(batch_data_list, paras.max_length,
                                                        vocab_dict.get('[PAD]'),
                                                        vocab_dict.get('[CLS]'),
                                                        vocab_dict.get('[SEP]'))

        input_ids = input_ids.to(device)
        mask = mask.to(device)

        # break
        # encoded_input = tokenizer(batch_data_list,
        #                           return_offsets_mapping=True,
        #                           max_length=paras.max_length,
        #                           truncation=True,
        #                           is_split_into_words=True,
        #                           padding=True,
        #                           return_tensors='pt').to(device)

        # input_ids = encoded_input['input_ids']
        # mask = encoded_input['attention_mask'].byte()

        batch_max_length = input_ids.shape[1]
        batch_label_pad = label_padding(batch_max_length, batch_label_list,
                                        label_to_index)

        batch_label_pad = torch.LongTensor(batch_label_pad)
        break

        loss = bert_crf_tagger(input_ids, mask, batch_label_pad)

        epoch_loss += loss.detach().cpu().item()

        logger.info(f'epoch: {epoch}, step: {step}, loss: {loss:.4f}')
        # acc, precision, recall, f1 = evaluation(bert_crf_tagger, test_dataloader,
        #                                         index_to_label, tokenizer, paras)
        # logger.info(f'ACC.: {acc:.4f}, Precision: {precision:.4f}, '
        #             f'Recall: {recall:.4f}, F1-score: {f1:.4f}')

        loss.backward()
        optimizer.step()

KeyError: 'PAD'

In [None]:
input_ids.shape, mask.shape, batch_label_pad.shape,

In [None]:
input_ids[0].shape, mask[0].sum(),batch_label_pad.shape

In [32]:
token_list = [tokenizer.decode(token) for token in input_ids[0]]

In [18]:
#print(token_list)
print(label_to_index)

{'I-Reg': 0, 'I-Var': 1, 'B-Interaction': 2, 'B-CPA': 3, 'B-NegReg': 4, 'B-Pathway': 5, '[PAD]': 6, 'I-Pathway': 7, 'I-Disease': 8, 'I-MPA': 9, 'I-PosReg': 10, 'B-MPA': 11, 'I-CPA': 12, 'B-Protein': 13, 'I-Interaction': 14, 'I-NegReg': 15, 'B-Gene': 16, 'I-Protein': 17, 'B-Reg': 18, 'I-Gene': 19, 'I-Enzyme': 20, 'O': 21, 'B-PosReg': 22, 'B-Disease': 23, 'B-Var': 24, 'B-Enzyme': 25}


In [14]:
def convert_label_to_index(batch_label_list: list, label_to_index: dict):
    batch_label_idx = []
    for label_list in batch_label_list:
        batch_label_idx.append([label_to_index[label] for label in label_list])
    return batch_label_idx

In [22]:
def label_padding(seq_max_length,batch_max_length: int, batch_label_list: list,
                  label_to_index:dict, return_tensor=True):
    
    if batch_max_length > seq_max_length:
        max_length = max_length
    else:
        max_length = batch_max_length
    
    pad_idx = label_to_index['[PAD]']
    batch_label_idx = convert_label_to_index(batch_label_list, label_to_index)

    batch_label_pad = []
    for label_list in batch_label_idx:
        if len(label_list) > (max_length-2):
            label_list = label_list[:max_length-2]
            label_list.insert(0, pad_idx)
            label_list.append(pad_idx)
            batch_label_pad.append(label_list)
        else:
            label_list.insert(0, pad_idx)
            label_list.append(pad_idx)
            label_list = label_list + ([pad_idx]*(max_length-len(label_list)))
            batch_label_pad.append(label_list)
    
    if return_tensor:
        return torch.LongTensor(batch_label_pad)
    
    return batch_label_pad

In [23]:
len(batch_label_list),len(batch_label_list[0])

(32, 45)

In [25]:
batch_label_pad = label_padding(paras.max_length, batch_max_length,
                                batch_label_list,label_to_index)

In [26]:
batch_label_pad.shape

torch.Size([32, 128])

In [28]:
label_list = [index_to_label[int(index)] for index in batch_label_pad[0]]

In [30]:
print(batch_label_list[0]), batch_label_pad[0], print(label_list)

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Gene', 'O', 'B-NegReg', 'B-Var', 'O', 'O', 'B-PosReg', 'I-PosReg', 'O', 'B-MPA', 'I-MPA', 'O', 'B-PosReg', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-MPA', 'I-MPA', 'O', 'O', 'O', 'O']
['[PAD]', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Gene', 'O', 'B-NegReg', 'B-Var', 'O', 'O', 'B-PosReg', 'I-PosReg', 'O', 'B-MPA', 'I-MPA', 'O', 'B-PosReg', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-MPA', 'I-MPA', 'O', 'O', 'O', 'O', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '

(None,
 tensor([ 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
         21, 21, 16, 21,  4, 24, 21, 21, 22, 10, 21, 11,  9, 21, 22, 21, 21, 21,
         21, 21, 21, 21, 11,  9, 21, 21, 21, 21,  6,  6,  6,  6,  6,  6,  6,  6,
          6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
          6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
          6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
          6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
          6,  6]),
 None)

In [35]:
example_idx = 1
token_list = [tokenizer.decode(token) for token in input_ids[example_idx]]
label_list = [index_to_label[int(index)] for index in batch_label_pad[example_idx]]
for i in range(len(label_list)):
    print(token_list[i], label_list[i])

[CLS] [PAD]
Further O
clinical O
studies O
to O
confirm O
this O
relationship O
are O
[UNK] O
. O
[UNK] O
[UNK] O
[UNK] O
but O
not O
[UNK] O
[UNK] O
in O
[UNK] O
[UNK] O
of O
human O
lung O
cancer O
[UNK] O
- O
[UNK] O
cells O
. O
More O
than O
half O
of O
all O
human B-Disease
[UNK] I-Disease
are O
associated B-Reg
with O
mutations B-Var
of O
the O
[UNK] B-Gene
gene O
. O
[SEP] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]
[PAD] [PAD]

In [46]:
batch_data_tensor, mask = batch_data_processing(batch_data_list, 128, vocab_dict.get('[PAD]'), vocab_dict.get('[CLS]'),vocab_dict.get('[SEP]'))
batch_max_length = batch_data_tensor.shape[1]

In [41]:
mask[0].sum()

tensor(28)

In [44]:
len(batch_data_list), len(batch_label_list)

for i in range(len(batch_data_list)):
    print(len(batch_data_list[i]), len(batch_label_list[i]), mask[i].sum(), len(predict_result[i]))

28 28 tensor(28) 28
79 79 tensor(79) 79
23 23 tensor(23) 23
85 85 tensor(85) 85
60 60 tensor(60) 60
34 34 tensor(34) 34
40 40 tensor(40) 40
28 28 tensor(28) 28
132 132 tensor(128) 128
71 71 tensor(71) 71
119 119 tensor(119) 119
101 101 tensor(101) 101
43 43 tensor(43) 43
52 52 tensor(52) 52
30 30 tensor(30) 30
64 64 tensor(64) 64
27 27 tensor(27) 27
122 122 tensor(122) 122
200 200 tensor(128) 128
57 57 tensor(57) 57
66 66 tensor(66) 66
43 43 tensor(43) 43
30 30 tensor(30) 30
38 38 tensor(38) 38
299 299 tensor(128) 128
22 22 tensor(22) 22
76 76 tensor(76) 76
29 29 tensor(29) 29
254 254 tensor(128) 128
69 69 tensor(69) 69
240 240 tensor(128) 128
166 166 tensor(128) 128


In [54]:
def convert_index_to_label_single(index_list: list, index_to_label: dict):
    return [index_to_label[idx] for idx in index_list]
def convert_index_to_token_single(index_list: list, tokenizer):
    return [tokenizer.decode(index) for index in index_list]

In [55]:
data_exp = input_ids[0]
label_exp = batch_label_pad[0]
mask_exp = mask[0]
predict_label = bert_crf_tagger(data_exp.unsqueeze(0), mask_exp.unsqueeze(0))
print(len(data_exp), len(label_exp), len(mask_exp))

128 128 128


In [57]:
token_list = convert_index_to_token_single(data_exp, tokenizer)


In [59]:
token_list

['[CLS]',
 'This',
 'effect',
 'was',
 'not',
 'associated',
 'with',
 'enhanced',
 'β',
 '-',
 'cell',
 'proliferation',
 'or',
 'mass',
 '.',
 'Our',
 'data',
 'suggest',
 'that',
 'the',
 '[UNK]',
 '[UNK]',
 '[UNK]',
 'mutation',
 'may',
 '[UNK]',
 'beneficial',
 'effects',
 'on',
 'glucose',
 'metabolism',
 'by',
 'increasing',
 'the',
 'capacity',
 'of',
 'β',
 '-',
 'cells',
 'to',
 '[UNK]',
 'insulin',
 'under',
 '[UNK]',
 'conditions',
 '.',
 '[SEP]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD

In [48]:
predict_label = bert_crf_tagger(data_exp.unsqueeze_(0), mask_exp.unsqueeze_(0))

In [49]:
predict_label

[[13,
  13,
  20,
  13,
  0,
  19,
  13,
  0,
  19,
  23,
  23,
  12,
  20,
  0,
  7,
  17,
  12,
  20,
  20,
  4,
  0,
  17,
  17,
  3,
  13,
  13,
  17,
  12,
  20,
  23,
  12,
  13,
  0,
  20,
  19,
  17,
  0,
  23,
  10,
  19,
  0,
  3,
  19,
  0,
  0]]

In [None]:
token_list = [tokenizer.decode(token) for token in data_exp]
label_list = [index_to_label[index] for index in label_exp]
                

In [31]:
# test predict_result and evaluation
predict_result = bert_crf_tagger(batch_data_tensor, mask)


In [60]:
def label_truncation(batch_label_list: list, max_length: int):
    process_label_list = []
    for label_list in batch_label_list:
        if len(label_list) > max_length:
            process_label_list.append(label_list[:max_length-2])
        else:
            process_label_list.append(label_list[:-2])
    return process_label_list

In [61]:
predict_label_list = convert_index_to_label(predict_result, index_to_label)
ture_label_list = label_truncation(batch_label_list, batch_max_length)


In [62]:
len(predict_result[0]), len(predict_label_list[0]), 

(28, 26)

In [63]:
len(batch_label_list[0]), len(ture_label_list[0])

(28, 26)

In [64]:
for i in range(len(predict_label_list)):
    if len(predict_label_list[i]) != len(ture_label_list[i]):
        print(len(predict_label_list[i]), len(ture_label_list[i]))

In [34]:
len(batch_data_list[2]), len(batch_label_list[2])

(23, 23)

In [44]:
token_list = [tokenizer.decode(idx) for idx in input_ids[2]]

In [51]:
vocab_dict['RNA']

13254

In [50]:
for i in range(len(batch_data_list[2])):
    print(batch_data_list[2][i], token_list[i])

The [CLS]
main the
clinical main
and clinical
[UNK] and
features [UNK]
of features
[UNK] of
are [UNK]
progressive are
intention progressive
[UNK] intention
and [UNK]
[UNK] and
[UNK] [UNK]
associated [UNK]
with associated
brain with
[UNK] brain
, [UNK]
[UNK] ,
cell [UNK]
loss cell
and loss
presence and
of presence
[UNK] of
- [UNK]
positive -
[UNK] positive
[UNK] [UNK]
in [UNK]
both in
neurons both
and neurons
[UNK] and
. [UNK]
At .
the at
molecular the
level molecular
, level
[UNK] ,
is [UNK]
characterized is
by characterized
increased by
expression increased
of expression
[UNK] of
sense [UNK]
and sense
[UNK] and
RNA [UNK]
containing r
expanded ##na
[UNK] containing
or expanded
[UNK] [UNK]
repeats or
, [UNK]
respectively repeats
. ,
Here respectively
, .
we here
discuss ,
the we
[UNK] discuss
molecular the
mechanisms [UNK]
underlying molecular
[UNK] mechanisms
and underlying
notably [UNK]
recent and
reports notably
that recent
expanded reports
[UNK] that
and expanded
[UNK] [UNK]
repeats

In [26]:
len(predict_result[3])

35

In [41]:
token_list = [tokenizer.decode(idx) for idx in input_ids[2]]
token_list.count('[PAD]')

18

In [36]:
len(token_list), len(predict_result[2]), len(predict_label_list[2]), len(batch_label_list[2])

(128, 110, 108, 107)

In [52]:
a = [[1,2,3],[1,2,3,4], [1,2,3], [1,2,3,4,5]]

In [53]:
encoded_input = tokenizer(a,
                return_offsets_mapping=True,
                max_length=paras.max_length,
                truncation=True,
                is_split_into_words=True,
                padding=True,
                return_tensors='pt')


AssertionError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

In [None]:
token_list.count('[UNK]'), len(token_list)

In [16]:
def label_truncation(batch_label_list: list, max_length: int):
    process_label_list = []
    for label_list in batch_label_list:
        if len(label_list) > max_length:
            process_label_list.append(label_list[:max_length-2])
        else:
            process_label_list.append(label_list)
    return process_label_list


In [17]:
predict_label_list = convert_index_to_label(predict_result, index_to_label)
ture_label_list = label_truncation(batch_label_list, batch_max_length)

In [27]:
#predict_label_list
#ture_label_list
for i in range(len(predict_label_list)):
    print(len(predict_label_list[i]), len(ture_label_list[i]), len(batch_label_list[i]))

79 79 79
126 126 171
108 107 107
33 32 32
126 126 149
68 67 67
126 126 136
30 30 30
126 126 169
15 15 15
126 126 384
126 126 292
126 126 191
72 72 72
126 126 130
12 12 12
34 34 34
90 88 88
126 126 288
45 44 44
82 82 82
57 57 57
59 59 59
46 45 45
21 21 21
19 19 19
86 85 85
62 62 62
57 57 57
17 17 17
77 75 75
35 34 34


In [14]:
len(predict_result), len(batch_label_list)

(32, 32)

In [25]:
def convert_index_to_label(batch_index_list: list, index_to_label: dict,
                           del_special_token=True):
    batch_label_list = []
    for idx_list in batch_index_list:
        label_list = [index_to_label[idx] for idx in idx_list]
        if del_special_token:
            label_list = label_list[1:-1]
        batch_label_list.append(label_list)
    return batch_label_list

def label_truncation(batch_label_list: list, max_length: int):
    pro_label_list = []
    for label_list in batch_label_list:
        if len(label_list) > max_length:
            pro_label_list.append(label_list[:max_length-2])
        else:
            pro_label_list.append(label_list)
    return pro_label_list

In [22]:
predict_label_list = convert_index_to_label(predict_result, index_to_label)

In [36]:
predict_label_list = convert_index_to_label(predict_result, index_to_label)
ture_label_list = label_truncation(batch_label_list, batch_max_length)


In [37]:
len(predict_label_list), len(ture_label_list)

(32, 32)

In [60]:
for i in predict_label_list:
    print(len(i))

39
126
31
126
126
44
24
126
123
126
69
126
126
126
126
62
79
126
43
57
126
126
69
126
37
126
44
50
126
42
11
33


In [55]:
total_pred_label = []
total_ture_label = []

In [56]:
for predict_list, ture_list in zip(predict_label_list, ture_label_list):
    if len(predict_list) != len(ture_list):
        print(len(predict_list), len(ture_list))
        continue
        
    total_pred_label.append(predict_list)
    total_ture_label.append(ture_list)

In [57]:
len(total_pred_label), len(total_ture_label)

(32, 32)

In [64]:
a = ['O','O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
     'O', 'O'
, 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'
, 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']

b = ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
     'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'
, 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
'O', 'O', 'O', 'O', 'B-Var', 'I-Var', 'I-Var', 'I-Var', 'I-Var', 'I-Var', 'I-Var', 'I-Var',
 'I-Var', 'I-Var', 'B-Reg', 'B-Disease', 'I-Disease', 'I-Disease', 'O']

In [65]:
len(a), len(b)

(126, 128)

In [51]:
accuracy_score(total_ture_label, total_pred_label)

0.03138485680659082

In [52]:
precision_score(ture_label_list, predict_label_list)

0.0012244897959183673

In [53]:
recall_score(ture_label_list, predict_label_list)

0.03225806451612903

In [54]:
f1_score(ture_label_list, predict_label_list)

0.0023594180102241443

In [29]:
for i in range(len(predict_label_list)):
    print(len(predict_result[i]), len(predict_label_list[i]), len(batch_label_list[i]),
         len(ture_label_list[i]))

41 39 39 39
128 126 126 126
33 31 31 31
128 126 209 126
128 126 239 126
46 44 44 44
26 24 24 24
128 126 141 126
125 123 123 123
128 126 266 126
71 69 69 69
128 126 179 126
128 126 256 126
128 126 179 126
128 126 297 126
64 62 62 62
81 79 79 79
128 126 311 126
45 43 43 43
59 57 57 57
128 126 241 126
128 126 376 126
71 69 69 69
128 126 308 126
39 37 37 37
128 126 306 126
46 44 44 44
52 50 50 50
128 126 414 126
44 42 42 42
13 11 11 11
35 33 33 33


In [17]:
len(predict_label_list)

32

{'I-CPA': 0,
 'I-Reg': 1,
 'B-Disease': 2,
 'B-PosReg': 3,
 'I-PosReg': 4,
 'I-MPA': 5,
 'B-CPA': 6,
 'I-Var': 7,
 'I-Interaction': 8,
 'B-Interaction': 9,
 'I-Disease': 10,
 'B-NegReg': 11,
 'I-Pathway': 12,
 'B-Enzyme': 13,
 'I-Enzyme': 14,
 'B-MPA': 15,
 'I-Gene': 16,
 'B-Protein': 17,
 'B-Pathway': 18,
 'B-Reg': 19,
 'B-Gene': 20,
 'O': 21,
 'B-Var': 22,
 'I-NegReg': 23,
 'I-Protein': 24}

In [2]:
import time
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) )


2021-04-08 18:39:54


In [4]:
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

'2021-04-08 18:40:43'