## Description

This work aims to get acquainted with recurrent neural networks (RNN, GRU, LSTM). Overview of their types, advantages and disadvantages of their use.

## Tasks

In [1]:
import os
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

Task 1. Build a simple recurrent neural network.

In [2]:
class SingleRNN(nn.Module):
    def __init__(self, n_inputs, n_neurons):
        super(SingleRNN, self).__init__()
        self.Wx = torch.randn(n_inputs, n_neurons)
        self.Wy = torch.randn(n_neurons, n_neurons)
        self.b = torch.zeros(1, n_neurons)
    
    def forward(self, X0, X1):
        self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b)
        self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy) + torch.mm(X1, self.Wx) + self.b)
        return self.Y0, self.Y1

In [3]:
n_inputs = 3 
n_neurons = 5
X0_batch = torch.tensor([[0,1,2], [3,4,5], [6,7,8], [9,0,1]], dtype = torch.float)
X1_batch = torch.tensor([[9,8,7], [0,0,0], [6,5,4], [3,2,1]], dtype = torch.float)
model = SingleRNN(n_inputs, n_neurons)

In [4]:
Y0_val, Y1_val = model(X0_batch, X1_batch)
print(Y0_val)
print(Y1_val)

tensor([[ 0.9630,  0.9985,  0.9998, -0.5624,  0.5305],
        [ 0.9972,  0.9874,  1.0000,  0.5996, -0.9009],
        [ 0.9998,  0.8959,  1.0000,  0.9655, -0.9983],
        [-0.9239, -1.0000,  0.9077,  1.0000, -0.9996]])
tensor([[ 0.9991, -0.9247,  1.0000,  0.9993, -1.0000],
        [ 0.7420,  0.2828,  0.8164, -0.9693,  0.7207],
        [ 0.9014, -1.0000,  1.0000,  0.7337, -0.9963],
        [-0.8954, -1.0000,  0.9877,  0.8233, -0.3909]])


Task 2. Get to know PyTorch's built-in RNN cell

In [5]:
rnn = nn.RNNCell(3, 5)
X_batch = torch.tensor([[[0,1,2], [3,4,5], [6,7,8], [9,0,1]], [[9,8,7], [0,0,0], [6,5,4], [3,2,1]]], 
                       dtype = torch.float)
hx = torch.randn(4, 5)
output = []
for i in range(2):
    hx = rnn(X_batch[i], hx)
    output.append(hx)

print(output)

[tensor([[-0.1955,  0.1880,  0.2866,  0.0624,  0.1098],
        [-0.9238,  0.7130,  0.1175,  0.6967,  0.9608],
        [-0.9938,  0.9632,  0.9381,  0.9177,  0.9098],
        [-0.9385, -0.7901,  0.8246,  0.9323, -0.2826]],
       grad_fn=<TanhBackward0>), tensor([[-0.9939,  0.9319,  0.7696,  0.9865,  0.9319],
        [ 0.4640, -0.4806, -0.2478,  0.6196, -0.4036],
        [-0.9370,  0.3671,  0.5176,  0.9901,  0.2503],
        [ 0.1549,  0.4100, -0.3900,  0.9540,  0.4495]],
       grad_fn=<TanhBackward0>)]


Task 3. Build a calculation graph using PyTorch's RNN Cell

In [6]:
class CleanBasicRNN(nn.Module):
    def __init__(self, batch_size, n_inputs, n_neurons):
        super(CleanBasicRNN, self).__init__()
        rnn = nn.RNNCell(n_inputs, n_neurons)
        self.hx = torch.randn(batch_size, n_neurons) 

    def forward(self, X):
        output = []
        for i in range(2):
            self.hx = rnn(X[i], self.hx)
            output.append(self.hx)
        return output, self.hx

In [7]:
batch_size = 4
n_inputs = 3
n_neurons = 5
X_batch = torch.tensor([[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]],
                        [[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]], 
                       dtype=torch.float)
model = CleanBasicRNN(batch_size, n_inputs, n_neurons)
output_val, states_val = model(X_batch)
print(output_val)
print(states_val)

[tensor([[-0.6340,  0.6578, -0.1951, -0.8484,  0.4107],
        [-0.8311,  0.6916,  0.0403,  0.9253,  0.9479],
        [-0.9922,  0.8694,  0.8533,  0.9404,  0.9332],
        [-0.9920, -0.7749,  0.6399,  0.9857, -0.3737]],
       grad_fn=<TanhBackward0>), tensor([[-0.9967,  0.9310,  0.6433,  0.9750,  0.9634],
        [ 0.5299, -0.5247, -0.1338,  0.6324, -0.4847],
        [-0.9319,  0.3652,  0.5078,  0.9902,  0.2685],
        [ 0.2165,  0.3886, -0.3302,  0.9486,  0.4959]],
       grad_fn=<TanhBackward0>)]
tensor([[-0.9967,  0.9310,  0.6433,  0.9750,  0.9634],
        [ 0.5299, -0.5247, -0.1338,  0.6324, -0.4847],
        [-0.9319,  0.3652,  0.5078,  0.9902,  0.2685],
        [ 0.2165,  0.3886, -0.3302,  0.9486,  0.4959]],
       grad_fn=<TanhBackward0>)


Task 4. Using RNN for image classification on the example of the MNIST dataset.

**Set hyperparameters**

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 64
num_epochs = 1
learning_rate = 0.01

**Data preparation**

In [9]:
train_dataset = torchvision.datasets.MNIST(root='../../data/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='../../data/', train=False, transform=transforms.ToTensor())

In [10]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

**Model building**

In [11]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

**Model training**

In [12]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
total_step = len(train_loader)

In [13]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            model.eval()
            with torch.no_grad():
                correct = 0
                total = 0
                for images, labels in test_loader:
                    images = images.reshape(-1, sequence_length,
                    input_size).to(device)
                    labels = labels.to(device)
                    outputs = model(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Test accuracy: {}'.format(epoch + 1, num_epochs, 
                                                                                        i + 1, total_step, loss.item(), 
                                                                                        100 * correct / total))

Epoch [1/1], Step [100/938], Loss: 0.3155, Test accuracy: 88.23
Epoch [1/1], Step [200/938], Loss: 0.2629, Test accuracy: 93.48
Epoch [1/1], Step [300/938], Loss: 0.1016, Test accuracy: 94.19
Epoch [1/1], Step [400/938], Loss: 0.2195, Test accuracy: 95.09
Epoch [1/1], Step [500/938], Loss: 0.3313, Test accuracy: 94.19
Epoch [1/1], Step [600/938], Loss: 0.2687, Test accuracy: 95.29
Epoch [1/1], Step [700/938], Loss: 0.0232, Test accuracy: 97.04
Epoch [1/1], Step [800/938], Loss: 0.1087, Test accuracy: 96.5
Epoch [1/1], Step [900/938], Loss: 0.1834, Test accuracy: 94.66


__Model testing__

In [14]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
    print(predicted[:10], 'prediction numbers')
    print(labels[:10], 'real numbers')

Test Accuracy of the model on the 10000 test images: 94.85 %
tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], device='cuda:0') prediction numbers
tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], device='cuda:0') real numbers


In [15]:
torch.save(model.state_dict(), 'mnist_rnn_model.ckpt')