In [None]:
! pip install pytorch_pretrained_bert

Collecting pytorch_pretrained_bert
[?25l  Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)
[K     |██▋                             | 10kB 23.8MB/s eta 0:00:01[K     |█████▎                          | 20kB 30.7MB/s eta 0:00:01[K     |████████                        | 30kB 35.6MB/s eta 0:00:01[K     |██████████▋                     | 40kB 36.9MB/s eta 0:00:01[K     |█████████████▎                  | 51kB 39.2MB/s eta 0:00:01[K     |███████████████▉                | 61kB 41.8MB/s eta 0:00:01[K     |██████████████████▌             | 71kB 41.7MB/s eta 0:00:01[K     |█████████████████████▏          | 81kB 41.9MB/s eta 0:00:01[K     |███████████████████████▉        | 92kB 42.3MB/s eta 0:00:01[K     |██████████████████████████▌     | 102kB 43.3MB/s eta 0:00:01[K     |█████████████████████████████▏  | 112kB 43.3MB/s eta 0:00:01[K     |████████████

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_pretrained_bert import BertModel
from torch.autograd import Variable

class bigru_attention(nn.Module):
    def __init__(self, bert_config, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, dropout1, use_cuda):
        super(bigru_attention, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.rnn_layers = rnn_layers
        self.word_embeds = BertModel.from_pretrained(bert_config)
        # 双向GRU，//操作为了与后面的Attention操作维度匹配，hidden_dim要取偶数！
        self.bigru = nn.GRU(embedding_dim, hidden_dim, num_layers=rnn_layers, bidirectional=True,dropout=dropout_ratio, batch_first=True)
        self.dropout1 = nn.Dropout(p=dropout1)
        # 由nn.Parameter定义的变量都为requires_grad=True状态
        self.weight_W = nn.Parameter(torch.Tensor(hidden_dim*2, hidden_dim*2))
        self.weight_proj = nn.Parameter(torch.Tensor(hidden_dim*2, 1))
        self.fc = nn.Linear(hidden_dim*2,tagset_size)
        nn.init.uniform_(self.weight_W, -0.1, 0.1)
        nn.init.uniform_(self.weight_proj, -0.1, 0.1)
        self.use_cuda =  use_cuda

    def rand_init_hidden(self, batch_size):
        if self.use_cuda:
            return Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)).cuda(), Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)).cuda()
        else:
            return Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)), Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim))
 
    def forward(self, sentence, attention_mask=None):
        batch_size = sentence.size(0)
        seq_length = sentence.size(1)
        embeds, _ = self.word_embeds(sentence, attention_mask=attention_mask, output_all_encoded_layers=False)
        hidden = self.rand_init_hidden(batch_size)
        gru_out, hiden = self.bigru(embeds) # [seq_len, bs, hid_dim]
        d_gru_out = self.dropout1(gru_out)
        x = d_gru_out
        # # # Attention过程，与上图中三个公式对应
        u = torch.tanh(torch.matmul(x, self.weight_W))
        att = torch.matmul(u, self.weight_proj)
        att_score = F.softmax(att, dim=1)
        scored_x = x * att_score
        # # # Attention过程结束
        feat = torch.sum(scored_x, dim=1)
        y = self.fc(feat)
        return y


备用

In [None]:
class InputFeatures(object):
    def __init__(self, text, label, input_id, input_mask):
        self.text = text
        self.label = label
        self.input_id = input_id
        self.input_mask = input_mask

def load_vocab(vocab_file):
    vocab = {}
    index = 0
    with open(vocab_file, "r", encoding="utf-8") as reader:
        while True:
            token = reader.readline()
            if not token:
                break
            token = token.strip()
            vocab[token] = index
            index += 1
    return vocab
#修改
def load_file(file_path):
    contents = open(file_path, encoding='utf-8').readlines()
    texts = []
    labels = []
    for line in contents:
        if line != '\n':
            line = line.strip().split(' ')
            texts.append([char for char in line[-1]])
            labels.append(int(line[0]))#目前不确定要不要加【】
    return texts, labels
 
def load_data(file_path, max_length, vocab):
    texts, labels = load_file(file_path)
    assert len(texts) == len(labels)
    result = []
    for i in range(len(texts)):
        token = texts[i]
        label = int(labels[i])
        if len(token) > max_length-2:
            token = token[0:(max_length-2)]
        tokens_f =['[CLS]'] + token + ['[SEP]']
        input_ids = [int(vocab[i]) if i in vocab else int(vocab['[UNK]']) for i in tokens_f]
        mask_bool=1
        input_mask = [mask_bool] * len(input_ids)
        while len(input_ids) < max_length:
            input_ids.append(0)
            input_mask.append(0)
        assert len(input_ids) == max_length
        assert len(input_mask) == max_length
        #assert len(label_ids) == max_length 实体识别标签序列用
        feature = InputFeatures(text=tokens_f, label=label, input_id=input_ids, input_mask=input_mask)
        result.append(feature)
    return result

In [None]:
train_file='/content/drive/My Drive/Colab Notebooks/kashgari/ka re/Copy of train.txt'
dev_file='/content/drive/My Drive/Colab Notebooks/kashgari/ka re/Copy of test.txt'
max_length=100
vocab_file='/content/drive/My Drive/chinese_roberta_wwm_ext_pytorch.zip_files/vocab.txt'
batch_size=1
tagset_size = 16
epochs=50

In [None]:
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report
if torch.cuda.is_available():
    device = torch.device("cuda", 0)
    print('device',device)
    use_cuda = True
else:
    device = torch.device("cpu")
    use_cuda = False
vocab = load_vocab(vocab_file)
train_data = load_data(train_file, max_length=max_length, vocab=vocab)
train_ids = torch.LongTensor([temp.input_id for temp in train_data])
train_masks = torch.LongTensor([temp.input_mask for temp in train_data])
train_tags = torch.LongTensor([temp.label for temp in train_data])
train_dataset = TensorDataset(train_ids, train_masks, train_tags)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

dev_data = load_data(dev_file, max_length=max_length, vocab=vocab)
dev_ids = torch.LongTensor([temp.input_id for temp in dev_data])
dev_masks = torch.LongTensor([temp.input_mask for temp in dev_data])
dev_tags = torch.LongTensor([temp.label for temp in dev_data])
dev_dataset = TensorDataset(dev_ids, dev_masks, dev_tags)
dev_loader = DataLoader(dev_dataset, shuffle=True, batch_size=batch_size)

#def evaluate(model, dev_loader):
#    model.eval()
 #   pred = []
#    for i, train_batch in enumerate(dev_loader):
#        model.zero_grad()
 #       sentence, masks, tags = train_batch
 #       sentence, masks, tags = Variable(sentence), Variable(masks), Variable(tags)
 #       if use_cuda:
 #           sentence = sentence.cuda()
 #           asks = masks.cuda()
 #           tags = tags.cuda()
 ##       predict_tags = F.softmax(model(sentence)).tolist()
  #      pred_tag=predict_tags.index(max(predict_tags))
  #      pred.extend([pred_tag])
 #       print(classification_report(dev_tags, pred))
        
model = bigru_attention('/content/drive/My Drive/chinese_roberta_wwm_ext_pytorch.zip_files', tagset_size, 768, 200, 1,
                      dropout_ratio=0.4, dropout1=0.4, use_cuda = use_cuda)
if use_cuda:
    model.cuda()
model.train()
losser=torch.nn.CrossEntropyLoss()
optimizer = getattr(optim, 'Adam')
optimizer = optimizer(model.parameters(), lr=0.00003, weight_decay=0.00005)
best_f = -100
for epoch in range(epochs):
    print('epoch: {}trrain'.format(epoch))
    for i, train_batch in enumerate(tqdm(train_loader)):
        model.train()
        model.zero_grad()
        sentence, masks, tags = train_batch
        sentence, masks, tags = Variable(sentence), Variable(masks), Variable(tags)
        if use_cuda:
            sentence = sentence.cuda()
            masks = masks.cuda()
            tags = tags.cuda()
        loss = losser(model(sentence),tags)
        loss.backward()
        optimizer.step()
    print('epoch: {}loss: {}'.format(epoch, loss.item()))
    model.eval()
    pred = []
    true=[]
    for i, dev_batch in enumerate(dev_loader):
        model.zero_grad()
        sentence, masks, tags = dev_batch
        sentence, masks, tags = Variable(sentence), Variable(masks), Variable(tags)
        if use_cuda:
            sentence = sentence.cuda()
            asks = masks.cuda()
            tags = tags.cuda()
        predict_tags = F.softmax(model(sentence)).tolist()
        pred_tag=predict_tags[0].index(max(predict_tags[0]))
        pred.extend([pred_tag])
        true.extend(tags.tolist())
    print(classification_report(true, pred))

device cuda:0


  0%|          | 1/1112 [00:00<03:19,  5.57it/s]

epoch: 0trrain


100%|██████████| 1112/1112 [02:58<00:00,  6.22it/s]


epoch: 0loss: 2.37180495262146


  0%|          | 1/1112 [00:00<02:27,  7.55it/s]

              precision    recall  f1-score   support

           0       0.46      0.90      0.61        98
           1       0.60      0.50      0.55        18
           2       0.29      0.18      0.22        22
           3       1.00      0.10      0.18        10
           4       0.67      0.25      0.36        57
           5       0.54      0.60      0.57        25
           6       0.00      0.00      0.00         6
           7       0.50      0.50      0.50         2
           8       0.50      0.11      0.18         9
           9       0.00      0.00      0.00         2
          10       0.00      0.00      0.00        12
          11       0.00      0.00      0.00         7
          12       0.00      0.00      0.00         3
          13       1.00      0.80      0.89         5
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.49       278
   macro avg       0.35   

100%|██████████| 1112/1112 [02:58<00:00,  6.23it/s]


epoch: 1loss: 0.5255570411682129


  0%|          | 1/1112 [00:00<02:33,  7.23it/s]

              precision    recall  f1-score   support

           0       0.51      0.94      0.66        98
           1       1.00      0.44      0.62        18
           2       0.88      0.32      0.47        22
           3       1.00      0.30      0.46        10
           4       0.43      0.21      0.28        57
           5       0.84      0.84      0.84        25
           6       1.00      0.33      0.50         6
           7       0.67      1.00      0.80         2
           8       0.18      0.22      0.20         9
           9       0.00      0.00      0.00         2
          10       0.50      0.17      0.25        12
          11       0.00      0.00      0.00         7
          12       0.00      0.00      0.00         3
          13       1.00      0.60      0.75         5
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.56       278
   macro avg       0.56   

100%|██████████| 1112/1112 [02:58<00:00,  6.24it/s]


epoch: 2loss: 0.7273683547973633


  0%|          | 1/1112 [00:00<02:26,  7.58it/s]

              precision    recall  f1-score   support

           0       0.78      0.63      0.70        98
           1       0.88      0.78      0.82        18
           2       0.50      0.77      0.61        22
           3       1.00      0.70      0.82        10
           4       0.71      0.70      0.71        57
           5       0.85      0.88      0.86        25
           6       0.80      0.67      0.73         6
           7       0.50      1.00      0.67         2
           8       0.53      0.89      0.67         9
           9       0.00      0.00      0.00         2
          10       0.34      0.83      0.49        12
          11       0.00      0.00      0.00         7
          12       0.00      0.00      0.00         3
          13       0.80      0.80      0.80         5
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.69       278
   macro avg       0.54   

100%|██████████| 1112/1112 [02:58<00:00,  6.24it/s]


epoch: 3loss: 0.07604455947875977


  0%|          | 1/1112 [00:00<02:28,  7.50it/s]

              precision    recall  f1-score   support

           0       0.76      0.76      0.76        98
           1       0.84      0.89      0.86        18
           2       0.51      0.82      0.63        22
           3       0.71      1.00      0.83        10
           4       0.80      0.65      0.72        57
           5       0.92      0.88      0.90        25
           6       0.57      0.67      0.62         6
           7       0.40      1.00      0.57         2
           8       0.50      0.22      0.31         9
           9       0.00      0.00      0.00         2
          10       0.47      0.67      0.55        12
          11       1.00      0.29      0.44         7
          12       0.00      0.00      0.00         3
          13       0.67      0.80      0.73         5
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.72       278
   macro avg       0.57   

100%|██████████| 1112/1112 [02:58<00:00,  6.25it/s]


epoch: 4loss: 0.027752399444580078


  0%|          | 1/1112 [00:00<02:25,  7.64it/s]

              precision    recall  f1-score   support

           0       0.91      0.77      0.83        98
           1       0.94      0.94      0.94        18
           2       0.73      0.73      0.73        22
           3       1.00      0.80      0.89        10
           4       0.73      0.86      0.79        57
           5       0.96      0.92      0.94        25
           6       1.00      0.50      0.67         6
           7       1.00      1.00      1.00         2
           8       0.40      0.89      0.55         9
           9       0.00      0.00      0.00         2
          10       0.86      0.50      0.63        12
          11       0.33      0.71      0.45         7
          12       1.00      0.67      0.80         3
          13       1.00      1.00      1.00         5
          14       0.50      1.00      0.67         1
          15       0.00      0.00      0.00         1

    accuracy                           0.79       278
   macro avg       0.71   

100%|██████████| 1112/1112 [02:58<00:00,  6.25it/s]


epoch: 5loss: 0.027107715606689453


  0%|          | 1/1112 [00:00<02:33,  7.25it/s]

              precision    recall  f1-score   support

           0       0.88      0.68      0.77        98
           1       0.75      0.83      0.79        18
           2       0.76      0.86      0.81        22
           3       1.00      0.90      0.95        10
           4       0.84      0.89      0.86        57
           5       0.92      0.92      0.92        25
           6       1.00      0.67      0.80         6
           7       0.67      1.00      0.80         2
           8       0.60      0.67      0.63         9
           9       0.33      0.50      0.40         2
          10       0.34      0.92      0.50        12
          11       0.75      0.43      0.55         7
          12       0.00      0.00      0.00         3
          13       0.80      0.80      0.80         5
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.78       278
   macro avg       0.67   

100%|██████████| 1112/1112 [02:58<00:00,  6.24it/s]


epoch: 6loss: 0.10666179656982422


  0%|          | 1/1112 [00:00<02:35,  7.15it/s]

              precision    recall  f1-score   support

           0       0.82      0.86      0.84        98
           1       1.00      0.83      0.91        18
           2       0.87      0.59      0.70        22
           3       1.00      0.60      0.75        10
           4       0.76      0.91      0.83        57
           5       0.90      0.72      0.80        25
           6       0.62      0.83      0.71         6
           7       0.67      1.00      0.80         2
           8       0.47      0.89      0.62         9
           9       0.33      0.50      0.40         2
          10       0.67      0.33      0.44        12
          11       0.50      0.43      0.46         7
          12       0.00      0.00      0.00         3
          13       0.80      0.80      0.80         5
          14       0.33      1.00      0.50         1
          15       0.00      0.00      0.00         1

    accuracy                           0.78       278
   macro avg       0.61   

100%|██████████| 1112/1112 [02:58<00:00,  6.24it/s]


epoch: 7loss: 0.029680252075195312


  0%|          | 1/1112 [00:00<02:27,  7.56it/s]

              precision    recall  f1-score   support

           0       0.88      0.83      0.85        98
           1       0.88      0.78      0.82        18
           2       0.73      0.73      0.73        22
           3       1.00      1.00      1.00        10
           4       0.76      0.91      0.83        57
           5       0.92      0.96      0.94        25
           6       1.00      1.00      1.00         6
           7       0.67      1.00      0.80         2
           8       0.54      0.78      0.64         9
           9       0.33      0.50      0.40         2
          10       0.86      0.50      0.63        12
          11       0.75      0.43      0.55         7
          12       0.00      0.00      0.00         3
          13       0.83      1.00      0.91         5
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.82       278
   macro avg       0.70   

100%|██████████| 1112/1112 [02:58<00:00,  6.24it/s]


epoch: 8loss: 2.325535297393799


  0%|          | 1/1112 [00:00<02:25,  7.61it/s]

              precision    recall  f1-score   support

           0       0.87      0.76      0.81        98
           1       0.85      0.94      0.89        18
           2       0.68      0.68      0.68        22
           3       0.91      1.00      0.95        10
           4       0.78      0.93      0.85        57
           5       0.96      0.88      0.92        25
           6       0.86      1.00      0.92         6
           7       0.29      1.00      0.44         2
           8       0.50      0.33      0.40         9
           9       0.00      0.00      0.00         2
          10       0.53      0.67      0.59        12
          11       0.62      0.71      0.67         7
          12       0.00      0.00      0.00         3
          13       1.00      0.80      0.89         5
          14       1.00      1.00      1.00         1
          15       1.00      1.00      1.00         1

    accuracy                           0.79       278
   macro avg       0.68   

100%|██████████| 1112/1112 [02:58<00:00,  6.23it/s]


epoch: 9loss: 0.4652981758117676


  0%|          | 1/1112 [00:00<02:28,  7.47it/s]

              precision    recall  f1-score   support

           0       0.91      0.84      0.87        98
           1       0.94      0.94      0.94        18
           2       0.74      0.77      0.76        22
           3       0.89      0.80      0.84        10
           4       0.84      0.95      0.89        57
           5       1.00      0.88      0.94        25
           6       0.86      1.00      0.92         6
           7       0.67      1.00      0.80         2
           8       0.67      0.67      0.67         9
           9       0.25      1.00      0.40         2
          10       0.55      0.50      0.52        12
          11       0.50      0.29      0.36         7
          12       0.67      0.67      0.67         3
          13       0.67      0.80      0.73         5
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.83       278
   macro avg       0.70   

  7%|▋         | 82/1112 [00:13<02:43,  6.30it/s]

KeyboardInterrupt: ignored

In [None]:
print(classification_report(true, pred))

              precision    recall  f1-score   support

           0       0.91      0.84      0.87        98
           1       0.94      0.94      0.94        18
           2       0.74      0.77      0.76        22
           3       0.89      0.80      0.84        10
           4       0.84      0.95      0.89        57
           5       1.00      0.88      0.94        25
           6       0.86      1.00      0.92         6
           7       0.67      1.00      0.80         2
           8       0.67      0.67      0.67         9
           9       0.25      1.00      0.40         2
          10       0.55      0.50      0.52        12
          11       0.50      0.29      0.36         7
          12       0.67      0.67      0.67         3
          13       0.67      0.80      0.73         5
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.83       278
   macro avg       0.70   

In [None]:
model_name = model_name = '/content/drive/My Drive/chinese_roberta_wwm_ext_pytorch.zip_files/' + str(epochs) + ".pkl"
torch.save(model.state_dict(), model_name)

In [None]:
import torch
model =bigru_attention('/content/drive/My Drive/chinese_roberta_wwm_ext_pytorch.zip_files', tagset_size, 768, 200, 1,
                      dropout_ratio=0.4, dropout1=0.4, use_cuda = True)
model.load_state_dict(torch.load('/content/drive/My Drive/Colab Notebooks/kashgari/ka re/50.pkl'))
model.cuda()

  "num_layers={}".format(dropout, num_layers))


bigru_attention(
  (word_embeds): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
          

In [None]:

import pandas as pd
import json
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
df=pd.read_csv("/content/drive/My Drive/Colab Notebooks/bert-bilstm-crf-master/bert_lstm_crf_pytorch/data/012.txt")
df.columns=['关系','类型1','人物1','类型2','人物2','文本']  
relations = list(df['关系'].unique())
texts = []
for per1, per2, text in zip(df['人物1'].tolist(), df['人物2'].tolist(), df['文本'].tolist()):
    text = '$'.join([str(per1), str(per2), text.replace(str(per1), len(str(per1))*'#').replace(str(per2), len(str(per2))*'#')])
    texts.append(text)
df['text'] = texts
tokens=[]
ids=[]
vocab = load_vocab(vocab_file)
for i in range(len(texts)):
    token = texts[i]
    if len(token) > max_length-2:
        token = token[0:(max_length-2)]
    tokens_f =['[CLS]'] + list(token) + ['[SEP]']
    tokens.append(tokens_f)
    input_ids = [int(vocab[i]) if i in vocab else int(vocab['[UNK]']) for i in tokens_f]
    ids.append(input_ids)
    while len(input_ids) < max_length:
        input_ids.append(0)
pred_ids = torch.LongTensor([temp for temp in ids])
pred_dataset = TensorDataset(pred_ids)
pred_loader = DataLoader(pred_dataset, shuffle=False, batch_size=batch_size)
model.eval()
pred = []
for i, pred_batch in enumerate(pred_loader):
    model.zero_grad()
    sentence = pred_batch
    sentence= Variable(sentence[0])
    if True:
        sentence = sentence.cuda()
    predict_tags = F.softmax(model(sentence)).tolist()
    pred_tag=predict_tags[0].index(max(predict_tags[0]))
    pred.extend([pred_tag])



In [None]:
pred

[4,
 4,
 0,
 0,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 6,
 4,
 4,
 4,
 2,
 2,
 0,
 0,
 6,
 6,
 14,
 0,
 6,
 6,
 0,
 6,
 6,
 6,
 6,
 0,
 1,
 1,
 2,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 0,
 5,
 5,
 5,
 1,
 0,
 5,
 5,
 5,
 1,
 5,
 5,
 5,
 1,
 0,
 0,
 4,
 0,
 4,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 5,
 1,
 1,
 4,
 6,
 4,
 4,
 6,
 4,
 4,
 4,
 4,
 4,
 2,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 0,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 0,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 0,
 0,
 4,
 0,
 4,
 4,
 0,
 4,
 0,
 4,
 0,
 4,
 0,
 4,
 0,
 0,
 4,
 6,
 4,
 0,
 6,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 10,
 10,
 10,
 4,
 0,
 0,
 4,
 0,
 4,
 4,
 2,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,


In [None]:
raw=df.values.tolist()

In [None]:
raw[:5]

[[0,
  '医疗指标',
  '因子',
  '医疗指标',
  'NGF',
  '目的:研究黄皮提取物对星形胶质细胞分泌神经营养因子(NGF)的影响',
  '因子$NGF$目的:研究黄皮提取物对星形胶质细胞分泌神经营养##(###)的影响'],
 [1,
  '诊断技术',
  'RT-PCR法',
  '医疗指标',
  'NGF',
  '方法:RT-PCR法测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平',
  'RT-PCR法$NGF$方法:#######测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内###和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中###的分泌水平'],
 [2,
  '诊断技术',
  'RT-PCR法',
  '医疗指标',
  'P75',
  '方法:RT-PCR法测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平',
  'RT-PCR法$P75$方法:#######测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体###的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平'],
 [3,
  '诊断技术',
  'RT-PCR法',
  '医疗指标',
  'mRNA',
  '方法:RT-PCR法测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平',
  'RT-PCR法$mRNA$方法:#######测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的####水平,同时ELISA方法验证培养上清中NGF的分泌水平'],
 [4,
  '诊断技术',
  'RT-PCR法',
  '诊断技术',
  'ELISA',
  '方法:RT-PCR法测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平

In [None]:
predicted_sen=list(zip(raw,pred))

In [None]:
predicted_sen[:5]

[([0,
   '医疗指标',
   '因子',
   '医疗指标',
   'NGF',
   '目的:研究黄皮提取物对星形胶质细胞分泌神经营养因子(NGF)的影响',
   '因子$NGF$目的:研究黄皮提取物对星形胶质细胞分泌神经营养##(###)的影响'],
  4),
 ([1,
   '诊断技术',
   'RT-PCR法',
   '医疗指标',
   'NGF',
   '方法:RT-PCR法测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平',
   'RT-PCR法$NGF$方法:#######测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内###和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中###的分泌水平'],
  4),
 ([2,
   '诊断技术',
   'RT-PCR法',
   '医疗指标',
   'P75',
   '方法:RT-PCR法测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平',
   'RT-PCR法$P75$方法:#######测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体###的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平'],
  0),
 ([3,
   '诊断技术',
   'RT-PCR法',
   '医疗指标',
   'mRNA',
   '方法:RT-PCR法测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的mRNA水平,同时ELISA方法验证培养上清中NGF的分泌水平',
   'RT-PCR法$mRNA$方法:#######测定给予10μmol/L的黄皮提取物不同时间点后星形胶质细胞内NGF和低亲和力受体P75的####水平,同时ELISA方法验证培养上清中NGF的分泌水平'],
  0),
 ([4,
   '诊断技术',
   'RT-PCR法',
   '诊断技术',
   'ELISA',
   '方法:RT-PCR法测定给予10μmol/L的黄皮提取物

In [None]:
rel_dic={
 0:"unknown",
 1:"临床表现",
 2:"药物治疗",
 3:"预防",
 4:"病因",
 5:"同义",
 6:"病理",
 7:"护理",
 8:"诊断",
 9:"报告",
 10:"非药治疗",
 11:"观察",
 12:"检测项目为",
 13:"作用于",
 14:"导致",
 15:"分为"
}

In [None]:
pred_result=[]
for i in pred:
    pred_result.append(rel_dic[i])

In [None]:
pred_result

['病因',
 '病因',
 'unknown',
 'unknown',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病理',
 '病因',
 '病因',
 '病因',
 '药物治疗',
 '药物治疗',
 'unknown',
 'unknown',
 '病理',
 '病理',
 '导致',
 'unknown',
 '病理',
 '病理',
 'unknown',
 '病理',
 '病理',
 '病理',
 '病理',
 'unknown',
 '临床表现',
 '临床表现',
 '药物治疗',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 'unknown',
 '同义',
 '同义',
 '同义',
 '临床表现',
 'unknown',
 '同义',
 '同义',
 '同义',
 '临床表现',
 '同义',
 '同义',
 '同义',
 '临床表现',
 'unknown',
 'unknown',
 '病因',
 'unknown',
 '病因',
 '病因',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 '临床表现',
 '临床表现',
 '同义',
 '临床表现',
 '临床表现',
 '病因',
 '病理',
 '病因',
 '病因',
 '病理',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '药物治疗',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '病因',
 '

In [None]:
predicted_sen=list(zip(raw,pred_result))