In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import torchvision
import numpy as np
import random
import time

In [2]:
import data_reader
SemData = data_reader.read_data_sets('data', padding=0, shuffle=True, noZero=True)

In [3]:
print(len(SemData.train.sentences))
print(len(SemData.test.sentences))
print(len(SemData.weight))

19584
2181
14


In [4]:
class Params():
    def __init__(self):
        self.n_inputs = 300
        self.n_hidden = 150
        self.n_class = 14 # 15 if noZero==False
        self.batch_size = 128
        self.n_train = 3000
        self.n_display = 100
        self.test_size = 2181

params = Params()

In [5]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_hidden = params.n_hidden
        self.n_class = params.n_class
        self.batch_size = params.batch_size
        self.rnn = nn.RNN(self.n_inputs, self.n_hidden, bidirectional=True)
        self.lstm = nn.LSTM(self.n_inputs, self.n_hidden, bidirectional=True)
        self.fc = nn.Linear(self.n_hidden, self.n_class)
        self.h0 = torch.randn(2, 1, self.n_hidden).cuda()
        self.c0 = torch.randn(2, 1, self.n_hidden).cuda()
    
    def forward(self, sentences):
        output = torch.tensor([]).cuda()
        for i in range(len(sentences)):
            sen_len = sentences[i].shape[0]
            sentence = sentences[i].reshape(1, sen_len, self.n_inputs)
            sentence = torch.tensor(sentence.transpose(1, 0, 2)).cuda()
            # _, h = self.rnn(sentence, self.h0)
            # _, (h, _) = self.lstm(sentence, (self.h0, self.c0))
            # h = torch.nn.functional.dropout(h, 0.5)
            # out = self.fc(h[0])
            
            h_fb, (_, _) = self.lstm(sentence, (self.h0, self.c0))
            # h_fb, _ = self.rnn(sentence, self.h0)
            h, _ = torch.max(h_fb[:, :, 0:self.n_hidden]+h_fb[:, :, self.n_hidden:], 0)
            
            h = torch.nn.functional.dropout(h, 0.5)
            out = self.fc(h)
            output = torch.cat((output, out), 0)
        return output

In [6]:
model = RNN()
model = model.cuda()
# criterion = nn.CrossEntropyLoss()
criterion = nn.CrossEntropyLoss(weight=torch.tensor(SemData.weight, dtype=torch.float).cuda())
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.01)

In [7]:
def cond_mat(mat, preds, labels):
    for i in range(len(labels)):
        mat[preds[i], labels[i]] += 1
    return mat

In [8]:
def train():
    start = time.clock()
    for step in range(params.n_train):
        train_sentences, train_labels = SemData.train.next_batch(params.batch_size)
        train_labels = torch.tensor(train_labels).cuda()
        optimizer.zero_grad()
        train_output = model(train_sentences)
        _, train_golden = torch.max(train_labels, 1)
        train_loss = criterion(train_output, train_golden)
        train_loss.backward()
        optimizer.step()
        if step%params.n_display == 0 or step == params.n_train-1 :
            print("<step: %d>" % (step))
            train_mat = torch.zeros(14, 14).cuda() # 15 if onZero==False
            _, train_preds = torch.max(train_output, 1)
            train_mat = cond_mat(train_mat, train_preds, train_golden)
            train_accuracy = torch.trace(train_mat) / params.batch_size
            print("train_accuracy: %2.4f %%, local_loss: %.8f" % (train_accuracy*100, train_loss.item()))
            
            test_sentences = SemData.test.sentences
            test_labels = torch.tensor(SemData.test.labels).cuda()
            
            test_output = model(test_sentences)
            
            test_mat = torch.zeros(14, 14).cuda() # 15 if onZero==False
            _, test_preds = torch.max(test_output, 1)
            _, test_golden = torch.max(test_labels, 1)
            test_mat = cond_mat(test_mat, test_preds, test_golden)
            test_loss = criterion(test_output, test_golden)
            test_accuracy = torch.trace(test_mat) / params.test_size
            # f1 score (macro)
            TP = (torch.tensor([test_mat[i, i] for i in range(14)]) + 1e-5).cuda()
            FP = torch.sum(test_mat, 1) - TP
            FN = torch.sum(test_mat, 0) - TP
            P = TP / (TP + FP)
            R = TP / (TP + FN)
            test_f1 = torch.mean(2 / (1/P+1/R))
            print("test_accuracy:  %2.4f %%, total_loss: %.8f, f1_score: %.4f" % (test_accuracy*100, test_loss.item(), test_f1))
    print("------------------------------------")
    print("training time: ", time.clock()-start, " s")

In [9]:
train()

<step: 0>
train_accuracy: 4.6875 %, local_loss: 2.63534284
test_accuracy:  8.6657 %, total_loss: 2.65260148, f1_score: 0.0294
<step: 100>
train_accuracy: 3.1250 %, local_loss: 2.57534981
test_accuracy:  3.8056 %, total_loss: 2.62192416, f1_score: 0.0304
<step: 200>
train_accuracy: 24.2188 %, local_loss: 2.40114307
test_accuracy:  17.0105 %, total_loss: 2.57186651, f1_score: 0.0608
<step: 300>
train_accuracy: 20.3125 %, local_loss: 2.37475896
test_accuracy:  16.1852 %, total_loss: 2.50760961, f1_score: 0.0559
<step: 400>
train_accuracy: 15.6250 %, local_loss: 2.23354602
test_accuracy:  7.1068 %, total_loss: 2.45364523, f1_score: 0.0437
<step: 500>
train_accuracy: 15.6250 %, local_loss: 2.27472329
test_accuracy:  21.6414 %, total_loss: 2.35722518, f1_score: 0.1321
<step: 600>
train_accuracy: 13.2812 %, local_loss: 2.32858682
test_accuracy:  11.0500 %, total_loss: 2.34652638, f1_score: 0.0870
<step: 700>
train_accuracy: 14.0625 %, local_loss: 2.17896605
test_accuracy:  17.4690 %, total_lo

In [9]:
train() # no weight

<step: 0>
train_accuracy: 4.6875 %, local_loss: 2.65138626
test_accuracy:  15.8184 %, total_loss: 2.57452822, f1_score: 0.0468
<step: 100>
train_accuracy: 30.4688 %, local_loss: 1.85798347
test_accuracy:  37.0931 %, total_loss: 1.81209588, f1_score: 0.0672
<step: 200>
train_accuracy: 46.8750 %, local_loss: 1.55853164
test_accuracy:  45.4837 %, total_loss: 1.61855733, f1_score: 0.1190
<step: 300>
train_accuracy: 56.2500 %, local_loss: 1.47000468
test_accuracy:  50.3897 %, total_loss: 1.51637399, f1_score: 0.1537
<step: 400>
train_accuracy: 53.1250 %, local_loss: 1.50509071
test_accuracy:  55.2957 %, total_loss: 1.42368948, f1_score: 0.1734
<step: 500>
train_accuracy: 62.5000 %, local_loss: 1.24840117
test_accuracy:  56.7171 %, total_loss: 1.41012204, f1_score: 0.1871
<step: 600>
train_accuracy: 57.8125 %, local_loss: 1.28281951
test_accuracy:  57.7717 %, total_loss: 1.36438143, f1_score: 0.1977
<step: 700>
train_accuracy: 64.0625 %, local_loss: 1.30785882
test_accuracy:  58.3677 %, tota

In [9]:
train() # bilstm

<step: 0>
train_accuracy: 10.9375 %, local_loss: 2.67729163
test_accuracy:  8.9867 %, total_loss: 2.67035627, f1_score: 0.0534
<step: 100>
train_accuracy: 5.4688 %, local_loss: 2.49088049
test_accuracy:  10.2247 %, total_loss: 2.58412075, f1_score: 0.0704
<step: 200>
train_accuracy: 28.9062 %, local_loss: 2.10347152
test_accuracy:  25.2178 %, total_loss: 2.39768577, f1_score: 0.1563
<step: 300>
train_accuracy: 26.5625 %, local_loss: 2.36723757
test_accuracy:  27.6020 %, total_loss: 2.26368809, f1_score: 0.1688
<step: 400>
train_accuracy: 38.2812 %, local_loss: 2.14833641
test_accuracy:  35.4883 %, total_loss: 2.11428261, f1_score: 0.2311
<step: 500>
train_accuracy: 38.2812 %, local_loss: 2.05144691
test_accuracy:  37.0931 %, total_loss: 2.09526038, f1_score: 0.2263
<step: 600>
train_accuracy: 42.1875 %, local_loss: 1.53515255
test_accuracy:  37.4140 %, total_loss: 1.93283963, f1_score: 0.2758
<step: 700>
train_accuracy: 38.2812 %, local_loss: 1.43829346
test_accuracy:  45.1169 %, total

In [10]:
train() # continue

<step: 0>
train_accuracy: 42.1875 %, local_loss: 1.10981822
test_accuracy:  38.3769 %, total_loss: 1.67319179, f1_score: 0.3183
<step: 100>
train_accuracy: 51.5625 %, local_loss: 0.81429213
test_accuracy:  53.0949 %, total_loss: 1.61122692, f1_score: 0.3908
<step: 200>
train_accuracy: 54.6875 %, local_loss: 0.86153102
test_accuracy:  51.4901 %, total_loss: 1.59392691, f1_score: 0.3795
<step: 300>
train_accuracy: 70.3125 %, local_loss: 0.87668407
test_accuracy:  54.7914 %, total_loss: 1.65502954, f1_score: 0.4227
<step: 400>
train_accuracy: 53.9062 %, local_loss: 0.96636564
test_accuracy:  50.8024 %, total_loss: 1.64357603, f1_score: 0.3798
<step: 500>
train_accuracy: 50.0000 %, local_loss: 1.29098260
test_accuracy:  51.5818 %, total_loss: 1.49516833, f1_score: 0.3917
<step: 600>
train_accuracy: 53.1250 %, local_loss: 0.92524356
test_accuracy:  52.6364 %, total_loss: 1.65406156, f1_score: 0.3814
<step: 700>
train_accuracy: 58.5938 %, local_loss: 0.70378011
test_accuracy:  53.7827 %, tot

In [11]:
print(torch.__version__)

1.1.0
