In [2]:
import os
import torch
import numpy as np 
import pandas as pd 

from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms 

In [3]:
# define hyperparamters - ones that do not change through training

batchsize = 100       # number of samples to work through before updating the model's internal parameters
sequence_len = 28     # how much information the model can retain from the past and how long it can backpropagate gradients through time
input_len = 28        # sequential data in the form of fixed-length input sequences
hidden_size = 128     # number of features that are used to compute each hidden state
num_layers = 2        # stacking two LSTMs together to form a stacked LSTM
num_classes = 10      # number of distinct classes in the classification task. this determines the size of the output layer of the network
num_epochs = 5        # number of times the entire training dataset will pass through the model
learning_rate = 0.01  # step size used for updating the model parameters during training


In [4]:
# Loading the training and testing data
#
#   datasets.FashionMNIST function from the torchvision.datasets module that is used to download and load the FashionMNIST dataset
#   root: specifies the directory where the dataset is stored 
#   train: whether or not the dataset will that is being loaded is the training set
#   transform: applies a transformation to the data, converting them to the specified form (PyTorch tensors in this case)
#
training_data = datasets.FashionMNIST(root="/Users/jaimejacob/Documents/ml_practice/data", train=True, transform=transforms.ToTensor())
test_data = datasets.FashionMNIST(root="/Users/jaimejacob/Documents/ml_practice/data", train=False, transform=transforms.ToTensor())


# Creating the Training and Testing DataLoader
#
#   DataLoader: a PyTorch utility that wraps the dataset and provides an iterable over the dataset
#   first parameter (either training_data or test_data): the dataset to be wrapped by the DataLoader
#   batch_size: number of samples that will be loaded per batch
#
train_dataloader = DataLoader(training_data, batch_size=batchsize)
test_dataloader = DataLoader(test_data, batch_size=batchsize)

In [5]:
# Define LSTM class
class LSTM(nn.Module):  # defines a new class 'LSTM' that inherits from 'torch.nn.module' which is the base class for all neural networks and models

    # Initializer method defining the layers and parameters of the network
    def __init__(self, input_len, hidden_size, num_class, num_layers):
        super(LSTM, self).__init__()    # calls the initializer of nn.module to ensure the module is set up correctly 
        self.hidden_size = hidden_size  # stores the hidden_size parameter
        self.num_layers = num_layers    # stores the num_layers parameter
        self.lstm = nn.LSTM(input_len, hidden_size, num_layers, batch_first=True)  # defines an LSTM layer with the specified input length, hidden size, and num layers
        self.output_layer = nn.Linear(hidden_size, num_class)  # defines a linear layer that maps the hidden state output of the LSTM to the desired number of classes

    # Defines the forward pass of the network
    #   X : input tensor
    def forward(self, X):
        hidden_states = torch.zeros(self.num_layers, X.size(0), self.hidden_size)  # initializes the hidden sates with zeros
        cell_states  =  torch.zeros(self.num_layers, X.size(0), self.hidden_size)  # initializes the cell states with zeros
       
        # Pass the input tensor, X, and the intiial hidden and cell states through the LSTM layer
        #   Out contains the output features from the LSTM for each time step
        #   _ contains the hidden and cell states
        out, _ = self.lstm(X, (hidden_states, cell_states))

        # Takes the output from the last time step of the LSTM and passes it through the fully connected layer to get the final output
        #   Size output is the number of classes, as defined in initialization
        out = self.output_layer(out[:, -1, :])

        return out  # returns the final output 

In [6]:
# Create an instance of the LSTM model we defined above
#   Using the hyperparameters we defined earlier as well
model = LSTM(input_len, hidden_size, num_classes, num_layers)
print(model)

LSTM(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=128, out_features=10, bias=True)
)


In [10]:
# Defining more hyperparameters

# Loss Function - quantifies the difference between the predicted outputs of a machine learning algorithm and the actual target values
#
#   Cross Entropy is a commonly used loss function for multi-class classification tasks
#   Loss function of choice can change depending on objective when building LSTM
#
loss_func = nn.CrossEntropyLoss()

# Optimizer - function or an algorithm that adjusts the attributes of the neural network
#
#   Optimizers aid in reducing the overall loss (calculated by the loss function) and improving accuracy
#   Adam is an optimization alogorithm well-suited for training deep neural networks 
#   Uses Stochastic Gradient Descent but allows for different step sizes for different parameters 
#   
#   model.parameters(): Provides the parameters (weights and biases) of the model we instanstiated with our defined LSTM model that need to be optimized during training 
#   lr: specifies the learning rate, which controls the step size in the parameter space update
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
# Define training function
#
#   num_epochs: number of times to iterate through the entire dataset
#   model: the neural network to be trained
#   train_dataloader: PyTorch DataLoader object that provides batches of training data
#   loss_func: Loss function used to compute the loss between predictions and actual outputs
#
def train(num_epochs, model, train_dataloader, loss_func):
    total_steps = len(train_dataloader)

    # Iterate over the specified number of epochs
    for epoch in range(num_epochs):

        # Iterate over batches of data provided by train_dataloader
        for batch, (image, labels) in enumerate(train_dataloader):

            image = image.reshape(-1, sequence_len, input_len)  # reshapes the input data to match the expected shape 

            output = model(image)  # passes the reshaped input through the model to get predictions
            loss = loss_func(output, labels)  # computes the loss between the model's predictions and the actual labels

            optimizer.zero_grad()  # clears the gradients of all optimized parameters, ensures the gradients are not accumualted and are reset to zero before backpropagation
            loss.backward()   # computes gradients of the loss with respect to all model parameters
            optimizer.step()  # updates the model parameters based on the computed gradients and the optimizer's update rule

            # Print the current epoch, batch number, total number of batches, and the current loss every 100 batches
            if (batch+ 1) % 100 == 0:
                print(f"Epoch:{epoch}; Batch {batch + 1} / {total_steps}; Loss: {loss.item():>4f}")

In [14]:
# Train our model using the train function we just defined with previously defined parameters
train(num_epochs, model, train_dataloader, loss_func)

Epoch:0; Batch 99 / 600; Loss: 0.909919
Epoch:0; Batch 199 / 600; Loss: 0.769844
Epoch:0; Batch 299 / 600; Loss: 0.576003
Epoch:0; Batch 399 / 600; Loss: 0.528650
Epoch:0; Batch 499 / 600; Loss: 0.631615
Epoch:0; Batch 599 / 600; Loss: 0.439263
Epoch:1; Batch 99 / 600; Loss: 0.470505
Epoch:1; Batch 199 / 600; Loss: 0.360890
Epoch:1; Batch 299 / 600; Loss: 0.382278
Epoch:1; Batch 399 / 600; Loss: 0.327946
Epoch:1; Batch 499 / 600; Loss: 0.541091
Epoch:1; Batch 599 / 600; Loss: 0.262329
Epoch:2; Batch 99 / 600; Loss: 0.310740
Epoch:2; Batch 199 / 600; Loss: 0.332401
Epoch:2; Batch 299 / 600; Loss: 0.316494
Epoch:2; Batch 399 / 600; Loss: 0.318100
Epoch:2; Batch 499 / 600; Loss: 0.491146
Epoch:2; Batch 599 / 600; Loss: 0.278895
Epoch:3; Batch 99 / 600; Loss: 0.337027
Epoch:3; Batch 199 / 600; Loss: 0.264335
Epoch:3; Batch 299 / 600; Loss: 0.349030
Epoch:3; Batch 399 / 600; Loss: 0.286830
Epoch:3; Batch 499 / 600; Loss: 0.429619
Epoch:3; Batch 599 / 600; Loss: 0.272788
Epoch:4; Batch 99 / 

In [16]:
# Define two new variables using the dataloader we defined earlier 
#   test_images will contain a batch of test images
#   test_labels will contain the corresponding labels for those test images 
test_images, test_labels = next(iter(test_dataloader))
test_labels

tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 2, 3, 9, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2])

In [17]:
# Perform a forward pass of the test_images through your neural network model
#   view reshapes test_images insto a 3D tensor with dimensions (batch_size, sequence_len, input_len)
test_output = model(test_images.view(-1, 28,28))

In [20]:
# Obtain the predicted class labels from test_output
#   torch.max is a PyTorch function that retrurns the maximum value of elements along a given dimension of a tensor 
_, predicted = torch.max(test_output, 1)
predicted

tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 5, 3, 4, 1, 2, 2, 8, 0, 2, 5, 7, 5,
        1, 2, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 0, 1, 6, 7, 6, 7, 2, 1,
        2, 6, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 0, 3, 7, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2])

In [22]:
# Find the length to compare, ensures that you are only comparing labels for which predictions are available in both sets
length = min(len(predicted), len(test_labels))

# Iterate over indices up to length 
#   Check if the predicted label matches the actual label
#   If correct, append 1 to the correct list, otherwise 0
correct = [1 for i in range(length) if predicted[i] == test_labels[i]]

# Calculate percentage correct
#   Sum the 1s in correct giving the total number of correct predictions
#   Divide by the total number of entries to find the percentage correct 
percentage_correct = sum(correct)/length
percentage_correct

0.91