In [1]:
# Imports

import torch

import torchvision

import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions

import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.

import torch.nn.functional as F  # All functions that don't have any parameters

from torch.utils.data import (

    DataLoader,

)  # Gives easier dataset managment and creates mini batches

import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way

import torchvision.transforms as transforms  # Transformations we can perform on our dataset

In [2]:
# Set device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



# Hyperparameters

input_size = 28

hidden_size = 256

num_layers = 2

num_classes = 10

sequence_length = 28

learning_rate = 0.005

batch_size = 64

num_epochs = 10

In [5]:
# Recurrent neural network with LSTM (many-to-one)

class RNN_LSTM(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, num_classes):

        super(RNN_LSTM, self).__init__()

        self.hidden_size = hidden_size

        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)



    def forward(self, x):

        # Set initial hidden and cell states

        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)



        # Forward propagate LSTM

        out, _ = self.lstm(

            x, (h0, c0)

        )  # out: tensor of shape (batch_size, seq_length, hidden_size)

        out = out.reshape(out.shape[0], -1)



        # Decode the hidden state of the last time step

        out = self.fc(out)

        return out


In [6]:

# Load Data

train_dataset = datasets.MNIST(

    root="dataset/", train=True, transform=transforms.ToTensor(), download=True

)



test_dataset = datasets.MNIST(

    root="dataset/", train=False, transform=transforms.ToTensor(), download=True

)



train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)



In [7]:
model = RNN_LSTM(input_size, hidden_size, num_layers, num_classes).to(device)

In [8]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
for epoch in range(num_epochs):

    for batch_idx, (data, targets) in enumerate(train_loader):

        # Get data to cuda if possible

        data = data.to(device=device).squeeze(1)

        targets = targets.to(device=device)



        # forward

        scores = model(data)

        loss = criterion(scores, targets)



        # backward

        optimizer.zero_grad()

        loss.backward()



        # gradient descent or adam step

        optimizer.step()

In [17]:
# Train Network

for epoch in range(num_epochs):

    for batch_idx, (data, targets) in enumerate(train_loader):

        # Get data to cuda if possible

        data = data.to(device=device).squeeze(1)

        targets = targets.to(device=device)



        # forward

        scores = model(data)

        loss = criterion(scores, targets)



        # backward

        optimizer.zero_grad()

        loss.backward()



        # gradient descent or adam step

        optimizer.step()



# Check accuracy on training & test to see how good our model

def check_accuracy(loader, model):

    if loader.dataset.train:

        print("Checking accuracy on training data")

    else:

        print("Checking accuracy on test data")



    num_correct = 0

    num_samples = 0



    # Set model to eval

    model.eval()



    with torch.no_grad():

        for x, y in loader:

            x = x.to(device=device).squeeze(1)

            y = y.to(device=device)



            scores = model(x)

            _, predictions = scores.max(1)

            num_correct += (predictions == y).sum()

            num_samples += predictions.size(0)



        print(

            f"Got {num_correct} / {num_samples} with \accuracy {float(num_correct)/float(num_samples)*100:.2f}"

        )

    # Set model back to train

    model.train()



In [18]:
check_accuracy(train_loader, model)

check_accuracy(test_loader, model)

Checking accuracy on training data
Got 59862 / 60000 with ccuracy 99.77
Checking accuracy on test data
Got 9897 / 10000 with ccuracy 98.97
