In [209]:
# To do
# Add regularization
# Add hyper-parameter tuning
# Run

In [210]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [211]:
# Install idx2numpy package for extracting data
!pip install idx2numpy



In [212]:
# Import packages
import gzip
import torch
import torchvision
import numpy as np 

import idx2numpy
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [213]:
# Check for GPUs
torch.device('cpu'), 
torch.cuda.device('cuda'), 
torch.cuda.device('cuda:1'), torch.cuda.device_count()

(<torch.cuda.device at 0x7f95b9bc22e8>, 1)

In [214]:
# Set device
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

In [215]:
def load_one_dataset(path):
    '''
    Convenience function to load a single dataset
    '''
    f = gzip.open(path, 'rb')
    data = torch.from_numpy(idx2numpy.convert_from_file(f))
    f.close()
    
    return(data)


def load_all_datasets(train_imgs, train_labs, test_imgs, test_labs, batch_size):
    '''
    Load training as well as test images here
    '''
    train_images = load_one_dataset(train_imgs).type(torch.float32)
    train_labels = load_one_dataset(train_labs).type(torch.long)
    train = list(zip(train_images, train_labels))

    test_images = load_one_dataset(test_imgs).type(torch.float32)
    test_labels = load_one_dataset(test_labs).type(torch.long)
    test = list(zip(test_images, test_labels))
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return(train_loader, test_loader)

In [216]:
class Net(nn.Module):
  
  def __init__(self, nb_units, input_dim, output_dim):
    '''
    Declare the network architecture here
    '''
    super(Net, self).__init__()
    
    # Initialize a list to store layers
    fc = []

    # Add input and output dimensions to layer list
    self.nb_units = [input_dim] + nb_units + [output_dim]

    # Now compute the total no. of layers
    self.nb_layers = len(self.nb_units)

    # Now append the hidden layers
    for i in range(1, self.nb_layers):
      fc.append(nn.Linear(self.nb_units[i-1], self.nb_units[i]))
    
    # Wrap this in a module list 
    self.fc = nn.ModuleList(fc)
    
  
  def forward(self, x):
    '''
    Send input forward through 
    the network
    '''
    # Reshape 28X28 images to be 784 X 784
    x = x.view(-1, 28*28)

    # Send example through network
    for layer in self.fc: x = F.relu(layer(x))
    
    return x

In [217]:
def train(nb_units=[128, 64, 32, 16], input_dim=784, output_dim = 10, 
          epochs=2, lr=0.001, momentum=0.9, batch_size=256):
    '''
    This is the main training loop
    '''
    
    # Set paths to datasets
    paths = {
        
        'train_imgs': '/content/drive/MyDrive/data/train-images-idx3-ubyte.gz',
        'train_labs': '/content/drive/MyDrive/data/train-labels-idx1-ubyte.gz',
        'test_imgs': '/content/drive/MyDrive/data/t10k-images-idx3-ubyte.gz',
        'test_labs': '/content/drive/MyDrive/data/t10k-labels-idx1-ubyte.gz'
    }
    
    # Load datasets
    train_loader, test_loader = load_all_datasets(**paths, batch_size = batch_size)
    
    # Set parameters
    net = Net(nb_units, input_dim, output_dim)
    
    # We use the cross-entropy loss
    criterion = nn.CrossEntropyLoss()

    # We use mini-batch stochastic gradient descent with momentum
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    
    # Loop over the dataset multiple times
    for epoch in range(epochs):  
        
        # Initialize running loss
        running_loss = 0.0
        running_accuracy = 0.0

        # Initialize the validation running loss
        val_running_loss = 0.0
        val_running_accuracy = 0.0
        
        # Iterate through data now
        for i, data in enumerate(train_loader):
            
            # Get the inputs: data is a list of [inputs, labels]
            inputs, labels = data

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(inputs)
            
            # Calculate loss
            loss = criterion(outputs, labels)

            # Get predictions
            _, preds = torch.max(outputs.data, 1)
            
            # Backward
            loss.backward()
            
            # Optimize
            optimizer.step()

            # Add to running loss
            running_loss += loss.item()

            # Add to running accuracy
            running_accuracy += (preds == labels).float().sum()
        
        # Loop through the validation data
        for j, data in enumerate(test_loader):
          
          # No need to calculate gradients for validation set
          with torch.no_grad():

              # Get the data item 
              val_inputs, val_labels = data

              # Send the data item through the network to get output
              val_outputs = net(val_inputs)

              # Compute the loss
              val_loss = criterion(val_outputs, val_labels)

              # Get predictions
              _, val_preds = torch.max(val_outputs.data, 1)

              # Add to running loss
              val_running_loss += val_loss.item()

              # Add to running accuracy
              val_running_accuracy += (val_preds == val_labels).float().sum()
        
        # Rescale the training and validation perfomance metrics
        running_loss = running_loss/len(train_loader)
        running_accuracy = running_accuracy/(len(train_loader)*batch_size)
        
        # Rescale the validation loss
        val_running_loss = val_running_loss/len(test_loader)
        val_running_accuracy = val_running_accuracy/(len(test_loader)*batch_size)
        
        # Make print message format string
        msg = "Data: {}, Epoch:[{}], Loss:[{}], Accuracy:[{}]," "\n"


        # Print performance
        print(msg.format('Train', epoch, running_loss, running_accuracy))
        print(msg.format('Val.', epoch, val_running_loss, val_running_accuracy))

    # Print message
    print('Done training...')

In [208]:
train(epochs=100)

torch.Size([60000, 28, 28])
torch.Size([60000])
Data: Train, Epoch:[0], Loss:[1.0703337042889696], Accuracy:[0.6120179295539856],

Data: Val., Epoch:[0], Loss:[0.7867661744356156], Accuracy:[0.6942383050918579],

Data: Train, Epoch:[1], Loss:[0.7020378597239231], Accuracy:[0.7363696694374084],

Data: Val., Epoch:[1], Loss:[0.7176669597625732], Accuracy:[0.711621105670929],

Data: Train, Epoch:[2], Loss:[0.650341910757917], Accuracy:[0.7541389465332031],

Data: Val., Epoch:[2], Loss:[0.6768500924110412], Accuracy:[0.73046875],

Data: Train, Epoch:[3], Loss:[0.6217785052796627], Accuracy:[0.7625166177749634],

Data: Val., Epoch:[3], Loss:[0.6559620723128319], Accuracy:[0.7367187738418579],

Data: Train, Epoch:[4], Loss:[0.5979095597216424], Accuracy:[0.7705119848251343],

Data: Val., Epoch:[4], Loss:[0.6573981806635857], Accuracy:[0.7337890863418579],

Data: Train, Epoch:[5], Loss:[0.5826126132873779], Accuracy:[0.7757646441459656],

Data: Val., Epoch:[5], Loss:[0.6696657985448837], Accu