In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
%cd /content/drive/My\ Drive/NLP/Medical
!ls

/content/drive/My Drive/NLP/Medical
data			NN.ipynb		 word2vec300.npy
data_reader_general.py	pos2vec300.npy		 word2vec300_simple.npy
data_reader_new.py	pos2vec500.npy		 word2vec500.npy
data_reader.py		__pycache__		 word2vec500_simple.npy
data_reader_simple.py	tf_glove.py
distribution.txt	train_word_vector.ipynb


In [0]:
!pip install numpy==1.16.1



In [0]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import torchvision
import numpy as np
import random
import time

In [0]:
import data_reader_general
SemData = data_reader_general.read_data_sets('data', padding=True, shuffle=True, noZero=True, simple=False)

###################################################################################################

In [0]:
print(len(SemData.train.sentences))
print(len(SemData.validation.sentences))
print(len(SemData.test.sentences))
print(len(SemData.weight))
print(SemData.embedding_matrix.shape)
print(SemData.postag_matrix.shape)

13218
4405
4405
14
(36515, 500)
(44, 500)


In [0]:
class Params():
    def __init__(self):
        # ALL
        self.n_inputs = 500 # 300
        self.n_class = 14 # 15 if noZero==False
        self.batch_size = 128
        self.n_train = 10000
        self.n_display = 500
        self.half_test_size = 2200
        # RNN & LSTM
        self.n_hidden = 128
        # CNN
        self.max_length = 110

params = Params()

In [0]:
class CNN1(nn.Module):
    def __init__(self):
        super(CNN1, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_class = params.n_class
        self.batch_size = params.batch_size
        self.max_length = params.max_length
        
        # self.embedding_matrix = nn.Parameter(torch.tensor(SemData.embedding_matrix))
        self.embedding_matrix = torch.tensor(SemData.embedding_matrix).cuda()
        # self.postag_matrix = nn.Parameter(torch.tensor(SemData.postag_matrix))
        self.postag_matrix = torch.tensor(SemData.postag_matrix).cuda()
        self.conv = nn.Conv2d(1, 128, (5, 300), stride=1, padding=(2, 0))
        self.relu = nn.ReLU()
        self.fc = nn.Linear(128, self.n_class)
        
    def forward(self, sentences, postags):
        sentences = nn.functional.embedding(torch.tensor(sentences).cuda(), self.embedding_matrix)
        #postags = nn.functional.embedding(torch.tensor(postags).cuda(), self.postag_matrix)
        #sentences = torch.tensor(sentences + postags).cuda()
        x = torch.unsqueeze(sentences, 1).float()
        y_conv = self.conv(x)
        y_relu = self.relu(y_conv)
        y_max, _ = torch.max(y_relu, 2)
        y_flatten = torch.reshape(y_max, [-1, 128])
        output = self.fc(y_flatten)
        return output

In [0]:
class CNN2(nn.Module):
    def __init__(self):
        super(CNN2, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_class = params.n_class
        self.batch_size = params.batch_size
        self.max_length = params.max_length
        
        # self.embedding_matrix = nn.Parameter(torch.tensor(SemData.embedding_matrix))
        self.embedding_matrix = torch.tensor(SemData.embedding_matrix).cuda()
        # self.postag_matrix = nn.Parameter(torch.tensor(SemData.postag_matrix))
        self.postag_matrix = torch.tensor(SemData.postag_matrix).cuda()
        self.relu = nn.ReLU()
        self.conv1 = nn.Conv2d(1, 32, (5, 500), 1, (2, 0))
        self.pool1 = nn.MaxPool2d((5, 1), stride=(2, 1), padding=(2, 0))
        self.conv2 = nn.Conv2d(32, 128, (5, 1), 1, (2, 0))
        self.fc1 = nn.Linear(128, self.n_class)
        
    def forward(self, sentences, postags):
        sentences = nn.functional.embedding(torch.tensor(sentences).cuda(), self.embedding_matrix)
        postags = nn.functional.embedding(torch.tensor(postags).cuda(), self.postag_matrix)
        sentences = torch.tensor(sentences + postags).cuda()
        x = torch.unsqueeze(sentences, 1).float()
        y_conv1 = self.conv1(x)
        y_relu1 = self.relu(y_conv1)
        y_pool1 = self.pool1(y_relu1)
        y_conv2 = self.conv2(y_pool1)
        y_relu2 = self.relu(y_conv2)
        y_max, _ = torch.max(y_relu2, 2)
        y_flatten = torch.reshape(y_max, [-1, 128])
        y_flatten = torch.nn.functional.dropout(y_flatten, 0.3).float()
        output = self.fc1(y_flatten)
        return output

In [0]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_hidden = params.n_hidden
        self.n_class = params.n_class
        self.batch_size = params.batch_size
        
        # self.embedding_matrix = nn.Parameter(torch.tensor(SemData.embedding_matrix))
        self.embedding_matrix = torch.tensor(SemData.embedding_matrix).cuda()
        # self.postag_matrix = torch.tensor(SemData.postag_matrix).cuda()
        self.postag_matrix = torch.tensor(SemData.postag_matrix).cuda()
        self.rnn = nn.RNN(self.n_inputs, self.n_hidden, bidirectional=False)
        self.fc = nn.Linear(self.n_hidden, self.n_class)
        self.h0 = torch.randn(1, 1, self.n_hidden).cuda()
    
    def forward(self, sentences, postags):
        output = torch.tensor([]).cuda()
        for i in range(len(sentences)):
            sentence = nn.functional.embedding(torch.tensor(sentences[i]).cuda(), self.embedding_matrix)
            sentence = sentence.unsqueeze(1).float()
            # sentence = torch.unsqueeze(sentence, 0)
            # sentence = sentence.permute(1, 0, 2).float()
            _, h = self.rnn(sentence, self.h0)
            h = torch.nn.functional.dropout(h[0], 0.5).float()
            out = self.fc(h)
            output = torch.cat((output, out), 0)
        return output

In [0]:
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_hidden = params.n_hidden
        self.n_class = params.n_class
        self.batch_size = params.batch_size
        
        self.embedding_matrix = nn.Parameter(torch.tensor(SemData.embedding_matrix))
        # self.embedding_matrix = torch.tensor(SemData.embedding_matrix).cuda()
        self.postag_matrix = torch.tensor(SemData.postag_matrix).cuda()
        # self.postag_matrix = torch.tensor(SemData.postag_matrix).cuda()
        self.lstm = nn.LSTM(self.n_inputs, self.n_hidden, bidirectional=False)
        self.fc = nn.Linear(self.n_hidden, self.n_class)
        self.h0 = torch.randn(1, 1, self.n_hidden).cuda()
        self.c0 = torch.randn(1, 1, self.n_hidden).cuda()
    
    def forward(self, sentences, postags):
        output = torch.tensor([]).cuda()
        for i in range(len(sentences)):
            sentence = nn.functional.embedding(torch.tensor(sentences[i]).cuda(), self.embedding_matrix)
            sentence = sentence.unsqueeze(1).float()
            # sentence = torch.unsqueeze(sentence, 0)
            # sentence = sentence.permute(1, 0, 2).float()
            _, (h, _) = self.lstm(sentence, (self.h0, self.c0))
            h = torch.nn.functional.dropout(h[0], 0.5).float()
            out = self.fc(h)
            output = torch.cat((output, out), 0)
        return output

In [0]:
class BiLSTM(nn.Module):
    def __init__(self):
        super(BiLSTM, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_hidden = params.n_hidden
        self.n_class = params.n_class
        self.batch_size = params.batch_size
        
        # self.embedding_matrix = nn.Parameter(torch.tensor(SemData.embedding_matrix))
        self.embedding_matrix = torch.tensor(SemData.embedding_matrix).cuda()
        # self.postag_matrix = nn.Parameter(torch.tensor(SemData.postag_matrix))
        self.postag_matrix = torch.tensor(SemData.postag_matrix).cuda()
        self.lstm = nn.LSTM(self.n_inputs, self.n_hidden, bidirectional=True)
        self.fc = nn.Linear(self.n_hidden, self.n_class)
        self.h0 = torch.randn(2, 1, self.n_hidden).cuda()
        self.c0 = torch.randn(2, 1, self.n_hidden).cuda()
    
    def forward(self, sentences, postags):
        output = torch.tensor([]).cuda()
        for i in range(len(sentences)):
            sentence = nn.functional.embedding(torch.tensor(sentences[i]).cuda(), self.embedding_matrix)
            postag = nn.functional.embedding(torch.tensor(postags[i]).cuda(), self.postag_matrix)
            sentence = (torch.tensor(sentence) + torch.tensor(postag))
            sentence = sentence.unsqueeze(1).float()
            h_fb, (_, _) = self.lstm(sentence, (self.h0, self.c0))
            h, _ = torch.max(h_fb[:, :, 0:self.n_hidden]+h_fb[:, :, self.n_hidden:], 0)
            h = torch.nn.functional.dropout(h, 0.5).float()
            out = self.fc(h)
            output = torch.cat((output, out), 0)
        return output

In [0]:
model = CNN2()
model = model.cuda()
criterion = nn.CrossEntropyLoss(weight=torch.tensor(SemData.weight, dtype=torch.float).cuda())
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.01)

In [0]:
def cond_mat(mat, preds, labels):
    for i in range(len(labels)):
        mat[preds[i], labels[i]] += 1
    return mat

In [0]:
def train():
    start = time.clock()
    for step in range(params.n_train):
        train_sentences, train_postags, train_labels = SemData.train.next_batch(params.batch_size)
        train_labels = torch.tensor(train_labels).cuda()
        optimizer.zero_grad()
        train_output = model(train_sentences, train_postags)
        _, train_golden = torch.max(train_labels, 1)
        train_loss = criterion(train_output, train_golden)
        train_loss.backward()
        optimizer.step()
        if step%params.n_display == 0 or step == params.n_train-1 :
            print('#')
            print("<step: %d>" % (step))
            train_mat = torch.zeros(14, 14).cuda() # 15 if onZero==False
            _, train_preds = torch.max(train_output, 1)
            train_mat = cond_mat(train_mat, train_preds, train_golden)
            train_accuracy = torch.trace(train_mat) / params.batch_size
            print("train_accuracy: %2.4f %%, local_loss: %.8f" % (train_accuracy*100, train_loss.item()))
            
            validation_mat = torch.zeros(14, 14).cuda() # 15 if onZero==False
            validation_loss = 0.
            for _ in range(2):
                validation_sentences, validation_postags, validation_labels = SemData.validation.next_batch(params.half_test_size)
                validation_labels = torch.tensor(validation_labels).cuda()
                validation_output = model(validation_sentences, validation_postags)
                _, validation_preds = torch.max(validation_output, 1)
                _, validation_golden = torch.max(validation_labels, 1)
                validation_mat = cond_mat(validation_mat, validation_preds, validation_golden)
                validation_loss += criterion(validation_output, validation_golden).item() / 2
            validation_accuracy = torch.trace(validation_mat) / (2 * params.half_test_size)
            # f1 score (macro)
            TP = (torch.tensor([validation_mat[i, i] for i in range(14)]) + 1e-5).cuda()
            FP = torch.sum(validation_mat, 1) - TP
            FN = torch.sum(validation_mat, 0) - TP
            P = TP / (TP + FP)
            R = TP / (TP + FN)
            validation_f1 = torch.mean(2 / (1/P+1/R))
            print("validation_accuracy:  %2.4f %%, total_loss: %.8f, f1_score: %2.2f" % (validation_accuracy*100, validation_loss, validation_f1*100))
        if step%10 == 0:
            print('#', end='')
    print("------------------------------------")
    print("training time: ", time.clock()-start, " s")
    test_mat = torch.zeros(14, 14).cuda() # 15 if onZero==False
    test_loss = 0.
    for _ in range(2):
        test_sentences, test_postags, test_labels = SemData.test.next_batch(params.half_test_size)
        test_labels = torch.tensor(test_labels).cuda()
        test_output = model(test_sentences, test_postags)
        _, test_preds = torch.max(test_output, 1)
        _, test_golden = torch.max(test_labels, 1)
        test_mat = cond_mat(test_mat, test_preds, test_golden)
        test_loss += criterion(test_output, test_golden).item() / 2
    test_accuracy = torch.trace(test_mat) / (2 * params.half_test_size)
    # f1 score (macro)
    TP = (torch.tensor([test_mat[i, i] for i in range(14)]) + 1e-5).cuda()
    FP = torch.sum(test_mat, 1) - TP
    FN = torch.sum(test_mat, 0) - TP
    P = TP / (TP + FP)
    R = TP / (TP + FN)
    test_f1 = torch.mean(2 / (1/P+1/R))
    print("test_accuracy:  %2.4f %%, total_loss: %.8f, f1_score: %2.2f" % (test_accuracy*100, test_loss, test_f1*100))

In [0]:
train() # CNN2



#
<step: 0>
train_accuracy: 9.3750 %, local_loss: 2.72360158
validation_accuracy:  10.3864 %, total_loss: 2.64607215, f1_score: 4.60
###################################################
<step: 500>
train_accuracy: 81.2500 %, local_loss: 0.33697829
validation_accuracy:  79.0455 %, total_loss: 0.86834222, f1_score: 62.11
###################################################
<step: 1000>
train_accuracy: 81.2500 %, local_loss: 0.35704994
validation_accuracy:  79.7273 %, total_loss: 0.78856674, f1_score: 62.33
###################################################
<step: 1500>
train_accuracy: 85.1562 %, local_loss: 0.19550689
validation_accuracy:  83.5455 %, total_loss: 1.07066968, f1_score: 66.59
###################################################
<step: 2000>
train_accuracy: 86.7188 %, local_loss: 0.23347276
validation_accuracy:  83.3864 %, total_loss: 0.87905720, f1_score: 68.07
###################################################
<step: 2500>
train_accuracy: 92.1875 %, local_loss: 0.18983062
v