### RNN - 1 Hidden layer

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as ds

#### Loading MNIST dataset

In [2]:
train_dataset = ds.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = ds.MNIST(root='./data', train=False, transform=transforms.ToTensor())

In [3]:
print(train_dataset.train_data.size())
print(train_dataset.train_labels.size())



torch.Size([60000, 28, 28])




torch.Size([60000])


In [4]:
print(test_dataset.test_data.size())
print(test_dataset.test_labels.size())



torch.Size([10000, 28, 28])




torch.Size([10000])


#### Make dataset iterable

In [5]:
batch_size = 100
n_iters = 3000
num_epochs = int(n_iters / (len(train_dataset) / batch_size))

In [6]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

#### Create model class

In [7]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
        
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        h0= torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        out, hn = self.rnn(x, h0.detach())
        out = self.fc(out[:, -1, :])
        return out

#### Instantiate model class

In [8]:
input_dim = 28
hidden_dim = 100
layer_dim = 1
output_dim = 10

In [9]:
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

#### Instantiate loss class

In [10]:
criterion = nn.CrossEntropyLoss()

#### Instantiate optimizer class

In [11]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

#### Train model

In [12]:
seq_dim = 28

In [13]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        
        images = images.view(-1, seq_dim, input_dim).requires_grad_()
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        iter += 1
        if iter % 500 == 0:
            model.eval()
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = images.view(-1, seq_dim, input_dim)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration {}: Loss {}, Accuracy {}'.format(iter, loss.item(), accuracy))

Iteration 500: Loss 2.3050808906555176, Accuracy 11
Iteration 1000: Loss 2.294857978820801, Accuracy 15
Iteration 1500: Loss 2.2815682888031006, Accuracy 18
Iteration 2000: Loss 2.188897132873535, Accuracy 19
Iteration 2500: Loss 1.683497428894043, Accuracy 36
Iteration 3000: Loss 1.2253551483154297, Accuracy 57
