In [129]:
# To do
# Test out variable layers
# Add regularization
# Add hyper-parameter tuning
# Check everything
# Run

In [130]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [131]:
# Install idx2numpy package for extracting data
!pip install idx2numpy



In [132]:
# Import packages
import gzip
import torch
import torchvision
import numpy as np 

import idx2numpy
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [133]:
# Mount Google drive to access data from Colab
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [134]:
def load_one_dataset(path):
    '''
    Convenience function to load a single dataset
    '''
    f = gzip.open(path, 'rb')
    data = torch.from_numpy(idx2numpy.convert_from_file(f))
    f.close()
    
    return(data)


def load_all_datasets(train_imgs, train_labs, test_imgs, test_labs, batch_size):
    '''
    Load training as well as test images here
    '''
    
    train_images = load_one_dataset(train_imgs).type(torch.float32)
    train_labels = load_one_dataset(train_labs).type(torch.long)
    train = list(zip(train_images, train_labels))
    
    test_images = load_one_dataset(test_imgs).type(torch.float32)
    test_labels = load_one_dataset(test_labs).type(torch.long)
    test = list(zip(test_images, test_labels))
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return(train_loader, test_loader)

In [135]:
class Net(nn.Module):
  
  def __init__(self, nb_units, input_dim, output_dim):
    '''
    Declare the network architecture here
    '''
    super(Net, self).__init__()
    
    # Initialize a list to store layers
    fc = []

    # Add input and output dimensions to layer list
    self.nb_units = [input_dim] + nb_units + [output_dim]

    # Now compute the total no. of layers
    self.nb_layers = len(self.nb_units)

    # Now append the hidden layers
    for i in range(1, self.nb_layers):
      fc.append(nn.Linear(self.nb_units[i-1], self.nb_units[i]))
    
    # Wrap this in a module list 
    self.fc = nn.ModuleList(fc)
    
  
  def forward(self, x):
    '''
    Send input forward through 
    the network
    '''
    # Reshape 28X28 images to be 784 X 784
    x = x.view(-1, 28*28)

    # Send example through network
    for layer in self.fc: x = F.relu(layer(x))
    
    return x

In [136]:
def train(nb_units=[256, 128, 64, 32, 16], input_dim=784, output_dim = 10, 
          epochs=2, lr=0.001, momentum=0.9, batch_size=256):
    '''
    This is the main training loop
    '''
    
    # Set paths to datasets
    paths = {
        
        'train_imgs': '/content/gdrive/MyDrive/data/train-images-idx3-ubyte.gz',
        'train_labs': '/content/gdrive/MyDrive/data/train-labels-idx1-ubyte.gz',
        'test_imgs': '/content/gdrive/MyDrive/data/t10k-images-idx3-ubyte.gz',
        'test_labs': '/content/gdrive/MyDrive/data/t10k-labels-idx1-ubyte.gz'
    }
    
    # Load datasets
    train_loader, test_loader = load_all_datasets(**paths, batch_size = batch_size)
    
    # Set parameters
    net = Net(nb_units, input_dim, output_dim)
    
    # We use the cross-entropy loss
    criterion = nn.CrossEntropyLoss()

    # We use mini-batch stochastic gradient descent with momentum
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    
    # Loop over the dataset multiple times
    for epoch in range(epochs):  
        
        # Initialize running loss
        running_loss = 0.0
        
        # Iterate through data now
        for i, data in enumerate(train_loader):
            
            # Get the inputs: data is a list of [inputs, labels]
            inputs, labels = data

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(inputs)
            
            # Calculate loss
            loss = criterion(outputs, labels)
            
            # Backward
            loss.backward()
            
            # Optimize
            optimizer.step()

            # Print statistics
            running_loss += loss.item()
        
        # Initialize the validation running loss
        val_running_loss = 0.0
        
        # Loop through the validation data
        for j, data in enumerate(test_loader):
          
          # No need to calculate gradients for validation set
          with torch.no_grad():

              # Get the data item 
              val_inputs, val_labels = data

              # Send the data item through the network to get output
              val_outputs = net(val_inputs)

              # Compute the loss
              val_loss = criterion(val_outputs, val_labels)

              # Add to the running validation loss
              val_running_loss += val_loss.item()
            
        # Print train loss
        print("The train loss on epoch {} is {}...".format(epoch, running_loss))
        
        # Print validation loss
        print("The validation loss on epoch {} is {}...".format(epoch, val_running_loss))
    
    # Print message
    print('Done training...')

In [137]:
train(epochs=100)

The train loss on epoch 0 is 303.3239602446556...
The validation loss on epoch 0 is 38.306807816028595...
The train loss on epoch 1 is 212.50751334428787...
The validation loss on epoch 1 is 35.803361654281616...
The train loss on epoch 2 is 199.51816880702972...
The validation loss on epoch 2 is 34.94192507863045...
The train loss on epoch 3 is 192.23022776842117...
The validation loss on epoch 3 is 33.41094583272934...
The train loss on epoch 4 is 187.6843690276146...
The validation loss on epoch 4 is 32.732335805892944...
The train loss on epoch 5 is 184.3571653366089...
The validation loss on epoch 5 is 33.24167376756668...
The train loss on epoch 6 is 181.8697247505188...
The validation loss on epoch 6 is 31.863867074251175...
The train loss on epoch 7 is 177.92119485139847...
The validation loss on epoch 7 is 32.69438564777374...
The train loss on epoch 8 is 175.32000172138214...
The validation loss on epoch 8 is 31.670001447200775...
The train loss on epoch 9 is 173.835130810737