# Recurrent Neural Network (RNN/LSTM)

In [1]:
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable

cuda = torch.cuda.is_available() # True if cuda is available, False otherwise

Let's load the MNIST data set.

In [2]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                torchvision.transforms.Normalize(mean=(.5, .5, .5), std=(.5, .5, .5))])
train_data = torchvision.datasets.MNIST(root='../data/', train=True, transform=transform, download=True)
test_data = torchvision.datasets.MNIST(root='../data/', train=False, transform=transform, download=True)

Create a *loader* to feed the data batch by batch during training.

In [3]:
batch = 100
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch)

Now, we define the **recurrent neural network**.

In [4]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(input_size=28, hidden_size=1024, num_layers=1, batch_first=True)
        self.fc = nn.Sequential(
                    nn.Linear(1024, 10),
                    nn.Softmax())
    
    def forward(self, x):
        h0 = Variable(torch.randn(1, x.size(0), 1024))
        c0 = Variable(torch.randn(1, x.size(0), 1024))
        if cuda:
            h0, c0 = h0.cuda(), c0.cuda()
        x, _  = self.rnn(x, (h0, c0))
        x = x[:, -1, :] # last output
        return self.fc(x)

rnn = RNN().cuda() if cuda else RNN()

We define the loss function and the optimization scheme (here **Adam**) for the neural network.

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=rnn.parameters(), lr=0.001)

Finally, we train the classifier.

In [6]:
epochs = 5
train_size = int(train_data.train_labels.size()[0])
test_size = int(test_data.test_labels.size()[0])
accuracy = 0.

for i in range(epochs):
    for j, (images, labels) in enumerate(train_loader):
        images = Variable(images).view(images.size(0), 28, 28)
        if cuda:
            images = images.cuda()
        labels = Variable(labels).cuda() if cuda else Variable(labels)

        rnn.zero_grad()
        outputs = rnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # test network  
        if (j + 1) % 300 == 0:
            for images, labels in test_loader:
                images = Variable(images).view(images.size(0), 28, 28)
                if cuda:
                    images = images.cuda()
                labels = Variable(labels).cuda() if cuda else Variable(labels)
                outputs = rnn(images)
                _, predicted = torch.max(outputs, 1)
                accuracy += torch.sum(torch.eq(predicted, labels).float()).data[0] / test_size
            print('[TEST] Epoch %i/%i [step %i/%i] accuracy: %.3f' % 
                  (i + 1, epochs, j + 1, float(train_size) / batch, accuracy))
            accuracy = 0.

[TEST] Epoch 1/5 [step 300/600] accuracy: 0.803
[TEST] Epoch 1/5 [step 600/600] accuracy: 0.901
[TEST] Epoch 2/5 [step 300/600] accuracy: 0.942
[TEST] Epoch 2/5 [step 600/600] accuracy: 0.956
[TEST] Epoch 3/5 [step 300/600] accuracy: 0.956
[TEST] Epoch 3/5 [step 600/600] accuracy: 0.966
[TEST] Epoch 4/5 [step 300/600] accuracy: 0.969
[TEST] Epoch 4/5 [step 600/600] accuracy: 0.976
[TEST] Epoch 5/5 [step 300/600] accuracy: 0.973
[TEST] Epoch 5/5 [step 600/600] accuracy: 0.976
