In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import torchvision
import numpy as np

In [2]:
print(torch.cuda.current_device())

0


In [3]:
train_data = datasets.MNIST('~/data/mnist/', train=True, transform=transforms.ToTensor(), download=True)
test_data = datasets.MNIST('~/data/mnist/', train=False, transform=transforms.ToTensor(), download=True)
data_loader = torch.utils.data.DataLoader(train_data, batch_size=64, 
            shuffle=True, num_workers=4, drop_last=True)

In [4]:
class Params():
    def __init__(self):
        self.n_inputs = 28
        self.n_steps = 28
        self.hidden_size = 150
        self.n_class = 10
        self.batch_size = 64

params = Params()

In [25]:
class MNISTRnn(nn.Module):
    def __init__(self):
        super(MNISTRnn, self).__init__()
        self.n_inputs = params.n_inputs
        self.n_steps = params.n_steps
        self.hidden_size = params.hidden_size
        self.batch_size = params.batch_size
        self.n_class = params.n_class
        self.rnn = nn.RNN(self.n_inputs, self.hidden_size)
        self.fc = nn.Linear(self.hidden_size, self.n_class)
  
    def initHidden(self):
    # num layers, batch size, hidden size
        return torch.zeros(1, self.batch_size, self.hidden_size)
  
    def forward(self, images):
        images = images.squeeze(1)
        x = images.permute(1,0,2)# w, n, h
        hidden = self.initHidden()
        hidden = hidden.cuda()
        out, h_out = self.rnn(x, hidden) #out: seq_len, batch, hidden_size
        out = out[-1].view([out.shape[1], out.shape[2]])
        out = self.fc(out)
        return out.view(-1, self.n_class)

In [26]:
model = MNISTRnn()
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [27]:
def train():
    epochs = 15
    for epoch in range(epochs):
        print('Epoch {}\{}'.format(epoch+1, epochs))
        tot_loss = 0.0
        correct = 0.0
        for data in data_loader:
            inputs, labels = data
            inputs, labels = Variable(inputs), Variable(labels)
            inputs = inputs.cuda()
            labels = labels.cuda()
            optimizer.zero_grad()
            logits = model(inputs)
            _, preds = torch.max(logits.data,1)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            tot_loss += loss.item()
            correct += torch.sum(preds == labels.data)

        print('Epoch loss: ', tot_loss/len(train_data))
        print('Epoch acc: ', correct.item()/len(train_data))
    return model

In [28]:
model = train()

Epoch 1\15
Epoch loss:  0.01262030405079325
Epoch acc:  0.72915
Epoch 2\15
Epoch loss:  0.005291099001094699
Epoch acc:  0.89965
Epoch 3\15
Epoch loss:  0.0038669323236991964
Epoch acc:  0.92865
Epoch 4\15
Epoch loss:  0.00298925953283906
Epoch acc:  0.9449833333333333
Epoch 5\15
Epoch loss:  0.0026391105947395164
Epoch acc:  0.95185
Epoch 6\15
Epoch loss:  0.002411529238211612
Epoch acc:  0.9569
Epoch 7\15
Epoch loss:  0.00216883993173639
Epoch acc:  0.9604666666666667
Epoch 8\15
Epoch loss:  0.002058921172345678
Epoch acc:  0.9622833333333334
Epoch 9\15
Epoch loss:  0.001871984553275009
Epoch acc:  0.9654833333333334
Epoch 10\15
Epoch loss:  0.0017689654262115558
Epoch acc:  0.9676
Epoch 11\15
Epoch loss:  0.001740149086341262
Epoch acc:  0.9677666666666667
Epoch 12\15
Epoch loss:  0.0016780011168370645
Epoch acc:  0.9688666666666667
Epoch 13\15
Epoch loss:  0.00154737648088485
Epoch acc:  0.97125
Epoch 14\15
Epoch loss:  0.0015859671774009865
Epoch acc:  0.9705166666666667
Epoch 15\