In [44]:
import idx2numpy
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [72]:
# Load train and test data
train_images = idx2numpy.convert_from_file('datasets/train-images-idx3-ubyte/train-images-idx3-ubyte')
train_labels = idx2numpy.convert_from_file('datasets/train-labels-idx1-ubyte/train-labels-idx1-ubyte')
test_images = idx2numpy.convert_from_file('datasets/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
test_labels = idx2numpy.convert_from_file('datasets/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')

# Convert numpy arrays to PyTorch tensors
train_data = torch.tensor(train_images, dtype=torch.float32)
train_labels = torch.tensor(train_labels, dtype=torch.long)
test_data = torch.tensor(test_images, dtype=torch.float32)
test_labels = torch.tensor(test_labels, dtype=torch.long)

In [73]:
# Check shapes
print("Train data shape:", train_data.shape)  # should be (60000, 28, 28)
print("Train labels shape:", train_labels.shape)  # should be (60000,)
print("Test data shape:", test_data.shape)  # should be (10000, 28, 28)
print("Test labels shape:", test_labels.shape)  # should be (10000,)

Train data shape: torch.Size([60000, 28, 28])
Train labels shape: torch.Size([60000])
Test data shape: torch.Size([10000, 28, 28])
Test labels shape: torch.Size([10000])


In [74]:
#convert pytorch tensors to pytorch datasets
train_data = TensorDataset(train_data, train_labels)
test_data = TensorDataset(test_data, test_labels)

#now translate into dataloader objects
batchsize = 128
train_loader = DataLoader(train_data, batch_size=batchsize, shuffle=True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=test_data.tensors[0].shape[0])

In [75]:
#create model function
def mnistModel(o='SGD', lr=0.01):

    class mnistNet(nn.Module):
        def __init__(self):
            super().__init__()

            #input layer
            self.input = nn.Linear(784, 64)
            
            #hidden layer
            self.fc1 = nn.Linear(64, 32)
            self.fc2 = nn.Linear(32, 32)

            #output layer
            self.output = nn.Linear(32, 10)

        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            return self.output(x)

    #create model instance
    net = mnistNet()

    #loss function
    lossfun = nn.CrossEntropyLoss()

    #optimizer
    optifun = getattr(torch.optim, o)
    optimizer = optifun(net.parameters(), lr=lr)

    return net, lossfun, optimizer


In [76]:
#model training function
def trainModel(o, lr):
    epochs = 100

    #create a new model
    net, lossfun, optimizer = mnistModel(o, lr) 

    #initialize losses
    losses = torch.zeros(epochs)
    trainAcc = []
    testAcc = []

    #looping over epochs
    for epochi in range(epochs):
        batchAcc = []
        batchLoss = []
        
        for X, y in train_loader:
            #forward pass and loss
            yHat = net(X)
            loss = lossfun(yHat, y)
            
            # backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # loss from this batch
            batchLoss.append(loss.item())

            # compute accuracy
            matches = torch.argmax(yHat,axis=1) == y     # booleans (false/true)
            matchesNumeric = matches.float()             # convert to numbers (0/1)
            accuracyPct = 100*torch.mean(matchesNumeric) # average and x100
            batchAcc.append( accuracyPct )               # add to list of accuracies
            # end of batch loop..

        #get their average training accuracy
        trainAcc.append( np.mean(batchAcc) )
        
        # and get average losses across the batches
        losses[epochi] = np.mean(batchLoss)
        
        # test accuracy
        X,y = next(iter(test_loader)) # extract X,y from test dataloader
        with torch.no_grad(): # deactivates autograd
            yHat = net(X)

        # compare the following really long line of code to the training accuracy lines
        testAcc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()) )

    # function output
    return trainAcc,testAcc,losses,net

In [77]:
learningRates = [.0001] #add learning rate
optimTypes = ['Adam'] #add optimizers here

#initialize performance matrix
finalPerformance = np.zeros((len(learningRates),len(optimTypes)))

# now for the experiment!
for idx_o, o in enumerate(optimTypes):
  for idx_lr, lr in enumerate(learningRates):
    trainAcc,testAcc,losses,net = trainModel(o,lr)
    finalPerformance[idx_lr,idx_o] = np.mean(testAcc[-10:])

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3584x28 and 784x64)

In [None]:
# plot the results!
plt.plot(learningRates,finalPerformance,'o-',linewidth=2)
plt.legend(optimTypes)
plt.xscale('log')
plt.xlabel('Learning rates')
plt.ylabel('Test accuracy (ave. last 10 epochs)')
plt.title('Comparison of optimizers by learning rate')
plt.show()