In [0]:
import sys
sys.path.append('../')

In [0]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as ds

from CustomRNN import DeepRNN

#### Loading MNIST dataset

In [0]:
train_dataset = ds.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = ds.MNIST(root='./data', train=False, transform=transforms.ToTensor())

In [4]:
print(train_dataset.data.size())
print(train_dataset.targets.size())

torch.Size([60000, 28, 28])
torch.Size([60000])


In [5]:
print(test_dataset.data.size())
print(test_dataset.targets.size())

torch.Size([10000, 28, 28])
torch.Size([10000])


#### Make dataset iterable

In [0]:
batch_size = 100
n_iters = 3000
num_epochs = int(n_iters / (len(train_dataset) / batch_size))

In [0]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

#### Create model class

In [0]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_layers : list):
        super(RNNModel, self).__init__()

        self.rnn = DeepRNN(input_dim, hidden_layers, batch_first=True, nonlinearity='relu')
        self.fc = nn.Linear(hidden_layers[-1], output_dim)
        
    def forward(self, x):
        out = self.rnn(x)
        out = self.fc(out[:, -1, :])
        return out

#### Instantiate model class

In [0]:
input_dim = 28
output_dim = 10

In [0]:
model = RNNModel(input_dim, output_dim, hidden_layers=[100, 100])

#### Instantiate loss class

In [0]:
criterion = nn.CrossEntropyLoss()

#### Instantiate optimizer class

In [0]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

#### Train model

In [0]:
seq_dim = 28

In [14]:
%%time
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        
        images = images.view(-1, seq_dim, input_dim).requires_grad_()
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        iter += 1
        if iter % 500 == 0:
            model.eval()
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = images.view(-1, seq_dim, input_dim)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration {}:\tLoss {:.5f}, Accuracy {}%'.format(iter, loss.item(), accuracy))

Iteration 500:	Loss 2.28075, Accuracy 16%
Iteration 1000:	Loss 1.75996, Accuracy 32%
Iteration 1500:	Loss 1.42914, Accuracy 55%
Iteration 2000:	Loss 1.29020, Accuracy 62%
Iteration 2500:	Loss 0.91775, Accuracy 65%
Iteration 3000:	Loss 0.96114, Accuracy 71%
CPU times: user 2min 33s, sys: 4.62 s, total: 2min 38s
Wall time: 2min 39s
