In [1]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from time import time
import math
import bcolz
import cPickle as pickle

In [2]:
def one_hot_encode(value, value_range=11):
    one_hot = torch.zeros(value_range)
    one_hot[value] = 1
    return one_hot

In [3]:
def save_array(fname, arr): 
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()
    
def load_array(fname):
    return bcolz.open(fname)

In [4]:
# load the convolutional feature vectors
train_features = pickle.load(open("data/train_features.pkl", "rb"))
test_features = pickle.load(open("data/test_features.pkl", "rb"))
train_labels = load_array("data/train_labels.bc")
test_labels = load_array("data/test_labels.bc")

In [5]:
print(train_features.size())
print(train_labels.shape)
print(test_features.size())
print(test_labels.shape)

torch.Size([500, 32, 7, 35])
(500, 5)
torch.Size([200, 32, 7, 35])
(200, 5)


In [6]:
train_data = []
for i in range(train_features.size()[0]):
    train_data.append((train_features[i].view(-1), torch.LongTensor(train_labels[i])))
print(train_data[0][0].size(), train_data[0][1].shape)

test_data = []
for i in range(test_features.size()[0]):
    test_data.append((test_features[i].view(-1), torch.LongTensor(test_labels[i])))
print(test_data[0][0].size(), test_data[0][1].shape)

torch.Size([7840]) torch.Size([5])
torch.Size([7840]) torch.Size([5])


In [16]:
vocab_size = 10
embedding_dim = 7840 + 11
hidden_dim = 1024
dropout_prob = 0
target_size = 5

In [8]:
def time_since(since):
    s = time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [9]:
# train an LSTM to recognize digit sequences by using the convolutional features
class MNIST_Seq(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout_prob):
        super(MNIST_Seq, self).__init__()

        self.hidden_dim=hidden_dim

        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.dropout = nn.Dropout(dropout_prob)
        self.decoder = nn.Linear(hidden_dim, vocab_size)


    def init_hidden(self):
        self.hidden = (Variable(torch.zeros(1, 1, self.hidden_dim)).cuda(),
                        Variable(torch.zeros(1, 1, self.hidden_dim)).cuda())

    def forward(self, context):
        lstm_out, self.hidden = self.lstm(context, self.hidden)
        lstm_dropped = self.dropout(lstm_out)
        decoded = self.decoder(lstm_dropped)
        #logprobs = F.log_softmax(decoded)
        return decoded

In [10]:
def evaluate(data):
    model.eval()
    correct_digits = 0
    total_digits = 0
    for this_input, this_target in data:
        model.init_hidden()
        # input_char is initially an SOS char
        input_char = one_hot_encode(10, 11)
        predictions = torch.LongTensor(target_size)
        for i in range(target_size):
            inpt = Variable(torch.cat([this_input, input_char])).cuda()
            output = model(inpt.view(1,1,-1))
            # get pred
            top_n, top_i = torch.topk(output, 1, dim=2)
            pred = top_i.cpu().data.view(1)[0]
            predictions[i] = pred
            input_char = one_hot_encode(pred, 11)             
#         print(predictions)
#         print(this_target)
        correct_digits += (predictions == this_target).sum()
        total_digits += 5
#         print("pred: ", predictions.numpy())
#         print("targ: ", this_target.numpy())
#         print()
    return float(correct_digits) / total_digits

def train(inp, target):
    model.init_hidden()
    model.zero_grad()

    input_im = torch.stack([inp] * target_size)
    
    ls = [10] + list(target[:-1])
    ls = [one_hot_encode(value, 11) for value in ls]
    input_char = torch.stack(ls)
    
    inpt = Variable(torch.cat([input_im, input_char], 1)).cuda()
    
    output = model(inpt.view(target_size, 1, -1))
    loss = criterion(output.view(target_size, vocab_size), Variable(target).cuda())

    loss.backward()
    optimizer.step()

    return loss.data[0] / target_size

def train_epoch():
    model.train(True)
    for this_input, this_target in train_data:
        loss = train(this_input, this_target)
        #print loss
    return loss

In [17]:
model = MNIST_Seq(vocab_size, embedding_dim, hidden_dim, dropout_prob).cuda()
criterion = nn.CrossEntropyLoss()

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [19]:
n_epochs=5
print_every = 1
plot_every = 10

all_losses = []
loss_avg = 0

In [21]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0002

In [20]:
start = time()
for epoch in range(1, n_epochs + 1):

    loss = train_epoch()
    loss_avg += loss

    if epoch % print_every == 0:
        
        train_acc = evaluate(train_data)
        test_acc = evaluate(test_data)
        print('[%s (%d %d%%) %.4f (train_acc: %.3f, test_acc: %.3f)]' % (time_since(start), epoch, epoch / n_epochs * 100, loss, train_acc, test_acc))

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

[0m 24s (1 20%) 0.4512 (train_acc: 0.343, test_acc: 0.313)]
[0m 49s (2 40%) 0.3603 (train_acc: 0.398, test_acc: 0.357)]
[1m 13s (3 60%) 0.3186 (train_acc: 0.463, test_acc: 0.390)]
[1m 38s (4 80%) 0.2749 (train_acc: 0.516, test_acc: 0.427)]
[2m 3s (5 100%) 0.2475 (train_acc: 0.570, test_acc: 0.465)]


In [None]:
evaluate(test_data)