In [3]:
!pip install -q torchmetrics

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchmetrics

In [4]:
# Fully connected neural network with one hidden layer
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(RNN, self).__init__()
    self.num_layers = num_layers
    self.hidden_size = hidden_size
    self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
    # Forward propagate RNN
    out, _ = self.rnn(x, h0)
    # Decode the hidden state of the last time step
    out = out[:, -1, :]
    out = self.fc(out)
    return out

In [5]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
# Hyper-parameters
num_classes = 10
num_epochs = 2
batch_size = 64
learning_rate = 0.001
input_size = 28
sequence_length = 28
hidden_size = 128
num_layers = 2

In [7]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
model

RNN(
  (rnn): RNN(28, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=10, bias=True)
)

In [8]:
# MNIST dataset
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True) #Download and load the training portion of MNIST dataset(60000 images)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) #do the batching and shuffling of train data sets
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True) #Load the test portion of MNIST(10000 images)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True) #do the batching and shuffling of test data set

100%|██████████| 9.91M/9.91M [00:00<00:00, 12.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 347kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.14MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.36MB/s]


In [9]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
  print(f"Epoch [{epoch + 1}/{num_epochs}]")
  for i, (images, labels) in enumerate(tqdm(train_loader)):
    # origin shape: [N, 1, 28, 28] -> resized: [N, 28, 28]
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)
    # Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)
    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:
      print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/2]


 11%|█         | 103/938 [00:06<00:59, 13.98it/s]

Epoch [1/2], Step [100/938], Loss: 1.2375


 22%|██▏       | 204/938 [00:13<00:43, 16.88it/s]

Epoch [1/2], Step [200/938], Loss: 0.7891


 32%|███▏      | 303/938 [00:17<00:29, 21.52it/s]

Epoch [1/2], Step [300/938], Loss: 0.6782


 43%|████▎     | 403/938 [00:21<00:18, 29.60it/s]

Epoch [1/2], Step [400/938], Loss: 0.7588


 54%|█████▎    | 503/938 [00:24<00:14, 29.32it/s]

Epoch [1/2], Step [500/938], Loss: 0.7210


 64%|██████▍   | 603/938 [00:27<00:12, 26.86it/s]

Epoch [1/2], Step [600/938], Loss: 0.5148


 75%|███████▍  | 703/938 [00:32<00:09, 24.31it/s]

Epoch [1/2], Step [700/938], Loss: 0.2867


 86%|████████▌ | 806/938 [00:35<00:04, 30.64it/s]

Epoch [1/2], Step [800/938], Loss: 0.4374


 97%|█████████▋| 906/938 [00:39<00:01, 30.25it/s]

Epoch [1/2], Step [900/938], Loss: 0.1752


100%|██████████| 938/938 [00:40<00:00, 23.41it/s]


Epoch [2/2]


 11%|█         | 105/938 [00:04<00:28, 29.07it/s]

Epoch [2/2], Step [100/938], Loss: 0.2086


 22%|██▏       | 203/938 [00:07<00:23, 31.12it/s]

Epoch [2/2], Step [200/938], Loss: 0.3615


 32%|███▏      | 303/938 [00:11<00:20, 30.37it/s]

Epoch [2/2], Step [300/938], Loss: 0.3600


 43%|████▎     | 403/938 [00:14<00:26, 20.42it/s]

Epoch [2/2], Step [400/938], Loss: 0.2032


 54%|█████▎    | 503/938 [00:18<00:13, 31.09it/s]

Epoch [2/2], Step [500/938], Loss: 0.3120


 64%|██████▍   | 605/938 [00:22<00:11, 29.64it/s]

Epoch [2/2], Step [600/938], Loss: 0.1357


 75%|███████▌  | 705/938 [00:25<00:07, 29.74it/s]

Epoch [2/2], Step [700/938], Loss: 0.0928


 86%|████████▌ | 806/938 [00:29<00:04, 29.03it/s]

Epoch [2/2], Step [800/938], Loss: 0.1734


 96%|█████████▋| 905/938 [00:32<00:01, 29.88it/s]

Epoch [2/2], Step [900/938], Loss: 0.3345


100%|██████████| 938/938 [00:33<00:00, 27.59it/s]


In [11]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 94.38 %
