In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import time
import numpy as np
print(np.__version__)

1.16.2


In [2]:
import data_reader
SemData = data_reader.read_data_sets("data", padding=False)

TRAIN_FILE.txt existed
TEST_FILE.txt existed


In [3]:
print(SemData.train.sentences.shape)
print(SemData.test.sentences.shape)

(8000,)
(2717,)


In [4]:
class Params():
    def __init__(self):
        self.n_inputs = 300
        self.n_hidden = 150
        self.n_class = 19
        self.batch_size = 128
        self.n_train = 2000
        self.n_display = 100
        self.test_size = 2717

params = Params()

In [5]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_hidden = params.n_hidden
        self.n_class = params.n_class
        self.batch_size = params.batch_size
        self.rnn = nn.RNN(self.n_inputs, self.n_hidden)
        self.lstm = nn.LSTM(self.n_inputs, self.n_hidden)
        self.fc = nn.Linear(self.n_hidden, self.n_class)
        self.h0 = torch.randn(1, 1, self.n_hidden).cuda()
        self.c0 = torch.randn(1, 1, self.n_hidden).cuda()
    
    def forward(self, sentences):
        for i in range(len(sentences)):
            sen_len = sentences[i].shape[0]
            sentence = sentences[i].reshape(1, sen_len, self.n_inputs)
            sentence = torch.tensor(sentence.transpose(1, 0, 2)).cuda()
            # _, h = self.rnn(sentence, self.h0)
            _, (h, _) = self.lstm(sentence, (self.h0, self.c0))
            h = torch.nn.functional.dropout(h, 0.5)
            out = self.fc(h[0])
            if i==0:
                output = out
            else:
                output = torch.cat([output, out])
        return output

In [6]:
model = RNN()
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.01)

In [7]:
def train():
    start = time.clock()
    test_sentences = SemData.test.sentences
    test_labels = torch.tensor(SemData.test.labels).cuda()
    for step in range(params.n_train):
        sentences, labels = SemData.train.next_batch(params.batch_size)
        labels = torch.tensor(labels).cuda()
        optimizer.zero_grad()
        logits = model(sentences)
        _, train_golden = torch.max(labels, 1)
        loss = criterion(logits, train_golden)
        loss.backward()
        optimizer.step()

        if step % params.n_display == 0:
            _, preds = torch.max(logits, 1)
            train_accuracy = torch.sum(preds == train_golden).item() / params.batch_size
            
            test_logits = model(test_sentences)
            _, test_preds = torch.max(test_logits, 1)
            _, test_golden = torch.max(test_labels, 1)
            test_loss = criterion(test_logits, test_golden)
            test_accuracy = torch.sum(test_preds == test_golden).item() / params.test_size
            print("<step: %d>" % (step))
            print("train_accuracy: %.4g %% local_loss: %.8g" % (train_accuracy*100, loss.item()))
            print("test_accuracy: %.4g %% total_loss: %.8g" % (test_accuracy*100, test_loss.item()))
    print("------------------------------------")
    print("training time: ", time.clock()-start, " s")
    test_logits = model(test_sentences)
    _, test_preds = torch.max(test_logits, 1)
    _, test_golden = torch.max(test_labels, 1)
    test_loss = criterion(test_logits, test_golden)
    test_accuracy = torch.sum(test_preds == test_golden).item() / params.test_size
    print("test_accuracy: %.4g %% total_loss: %.8g" % (test_accuracy*100, test_loss.item()))

In [8]:
model = train() # RNN n_hidden=150, dropout=0.5, L2=False

<step: 0>
train_accuracy: 9.375 % local_loss: 2.9261742
test_accuracy: 11.19 % total_loss: 2.8763506
<step: 100>
train_accuracy: 16.41 % local_loss: 2.6115258
test_accuracy: 16.08 % total_loss: 2.6712329
<step: 200>
train_accuracy: 21.88 % local_loss: 2.4935794
test_accuracy: 17.63 % total_loss: 2.6652174
<step: 300>
train_accuracy: 37.5 % local_loss: 2.205471
test_accuracy: 20.21 % total_loss: 2.5219367
<step: 400>
train_accuracy: 47.66 % local_loss: 1.9282322
test_accuracy: 20.57 % total_loss: 2.5854838
<step: 500>
train_accuracy: 42.19 % local_loss: 1.9235288
test_accuracy: 21.31 % total_loss: 2.6803162
<step: 600>
train_accuracy: 42.19 % local_loss: 1.781953
test_accuracy: 21.05 % total_loss: 2.7049806
<step: 700>
train_accuracy: 46.09 % local_loss: 1.7690461
test_accuracy: 20.76 % total_loss: 2.9145789
<step: 800>
train_accuracy: 53.91 % local_loss: 1.5275216
test_accuracy: 21.9 % total_loss: 2.860441
<step: 900>
train_accuracy: 62.5 % local_loss: 1.302924
test_accuracy: 20.1 % to

In [8]:
model = train() # RNN n_hidden=150, dropout=0.75, L2=False

<step: 0>
train_accuracy: 0.7812 % local_loss: 3.1063154
test_accuracy: 6.809 % total_loss: 2.9494512
<step: 100>
train_accuracy: 19.53 % local_loss: 2.6463046
test_accuracy: 16.64 % total_loss: 2.7068105
<step: 200>
train_accuracy: 21.88 % local_loss: 2.6135867
test_accuracy: 16.97 % total_loss: 2.6648424
<step: 300>
train_accuracy: 24.22 % local_loss: 2.3458853
test_accuracy: 19.84 % total_loss: 2.563504
<step: 400>
train_accuracy: 29.69 % local_loss: 2.2998233
test_accuracy: 21.02 % total_loss: 2.5521312
<step: 500>
train_accuracy: 32.03 % local_loss: 2.1606925
test_accuracy: 20.61 % total_loss: 2.6385729
<step: 600>
train_accuracy: 32.03 % local_loss: 2.1539917
test_accuracy: 19.95 % total_loss: 2.6155984
<step: 700>
train_accuracy: 33.59 % local_loss: 2.0787683
test_accuracy: 24 % total_loss: 2.538944
<step: 800>
train_accuracy: 35.16 % local_loss: 2.1280279
test_accuracy: 21.35 % total_loss: 2.741565
<step: 900>
train_accuracy: 48.44 % local_loss: 1.6619174
test_accuracy: 25.03 %

In [8]:
model = train() # LSTM n_hidden=150, dropout=0.5, L2=False

<step: 0>
train_accuracy: 5.469 % local_loss: 2.9477136
test_accuracy: 8.502 % total_loss: 2.9118285
<step: 100>
train_accuracy: 17.97 % local_loss: 2.5412292
test_accuracy: 20.17 % total_loss: 2.5936091
<step: 200>
train_accuracy: 25.78 % local_loss: 2.3631718
test_accuracy: 27.31 % total_loss: 2.3311367
<step: 300>
train_accuracy: 42.97 % local_loss: 1.6521778
test_accuracy: 36.29 % total_loss: 1.976873
<step: 400>
train_accuracy: 62.5 % local_loss: 1.2153895
test_accuracy: 45.27 % total_loss: 1.7492627
<step: 500>
train_accuracy: 63.28 % local_loss: 1.1227171
test_accuracy: 42.99 % total_loss: 1.7962646
<step: 600>
train_accuracy: 69.53 % local_loss: 0.95380795
test_accuracy: 48.77 % total_loss: 1.7715095
<step: 700>
train_accuracy: 78.91 % local_loss: 0.73090172
test_accuracy: 50.02 % total_loss: 1.8223318
<step: 800>
train_accuracy: 83.59 % local_loss: 0.59417158
test_accuracy: 49.36 % total_loss: 1.9133638
<step: 900>
train_accuracy: 89.84 % local_loss: 0.34373221
test_accuracy: 

In [8]:
model = train() # LSTM n_hidden=150, dropout=0.5, L2=0.01

<step: 0>
train_accuracy: 6.25 % local_loss: 2.9220266
test_accuracy: 7.214 % total_loss: 2.9125187
<step: 100>
train_accuracy: 17.19 % local_loss: 2.639487
test_accuracy: 16.75 % total_loss: 2.6832063
<step: 200>
train_accuracy: 17.97 % local_loss: 2.715559
test_accuracy: 16.75 % total_loss: 2.6763699
<step: 300>
train_accuracy: 22.66 % local_loss: 2.5020874
test_accuracy: 22.12 % total_loss: 2.5406444
<step: 400>
train_accuracy: 26.56 % local_loss: 2.4539399
test_accuracy: 22.41 % total_loss: 2.5121427
<step: 500>
train_accuracy: 21.88 % local_loss: 2.4431083
test_accuracy: 22.41 % total_loss: 2.4546094
<step: 600>
train_accuracy: 21.09 % local_loss: 2.4868965
test_accuracy: 22.05 % total_loss: 2.4568908
<step: 700>
train_accuracy: 25.78 % local_loss: 2.3279834
test_accuracy: 27.94 % total_loss: 2.233644
<step: 800>
train_accuracy: 31.25 % local_loss: 2.3453293
test_accuracy: 29.37 % total_loss: 2.1344507
<step: 900>
train_accuracy: 26.56 % local_loss: 2.1394622
test_accuracy: 28.74 

KeyboardInterrupt: 