In [91]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [92]:
# Install idx2numpy package for extracting data
!pip install idx2numpy --quiet
!pip install livelossplot --quiet

In [93]:
# Import packages
import os
import json
import gzip
import torch
import torchvision
import numpy as np 

import idx2numpy
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [94]:
def get_best_results(results):
  '''
  Take in a results dictionary 
  and return epoch on which 
  minimum validation loss was
  reached, minimum val. loss on
  that epoch, and min. accuracy
  on that epoch
  '''
  # Find the epoch on which the minimum validation loss was reached
  best_val_epoch = np.argmin(np.array(results['val_loss']))
  
  # Store validation metrics
  best_epoch_val_loss = np.array(results['val_loss'][best_val_epoch])
  best_epoch_val_accuracy = np.array(results['train_accuracy'][best_val_epoch])

  # Store training metrics
  best_epoch_train_loss = np.array(results['train_loss'][best_val_epoch])
  best_epoch_train_accuracy = np.array(results['train_accuracy'][best_val_epoch])
  
  # Store best results in a list
  best_results = [best_val_epoch, 
                  best_epoch_val_loss, 
                  best_epoch_val_accuracy, 
                  best_epoch_train_loss, best_epoch_train_accuracy]

  # Return statement
  return(best_results)


def plot_loss(results, lab, model_num):
  '''
  Convenience function to plot results
  '''
  # Store model name and destination path
  model_name = str(model_num) + '_' + lab + '_results.png'
  path = os.path.join('/content/drive/MyDrive/', 'figs', model_name)
  
  # Plot the results
  plt.plot(results['train_' + lab], label='Train')
  plt.plot(results['val_' + lab], label='Validation')
  
  # Add annotations
  plt.legend()
  plt.title(lab.title() + ' by Epoch')
  
  # Save the figure and close the plot
  plt.savefig(path)
  plt.clf()

In [95]:
def load_one_dataset(path):
    '''
    Convenience function to load a single dataset
    '''
    f = gzip.open(path, 'rb')
    data = torch.from_numpy(idx2numpy.convert_from_file(f))
    f.close()
    
    return(data)


def load_all_datasets(train_imgs, train_labs, test_imgs, test_labs, batch_size):
    '''
    Load training as well as test images here
    '''
    train_images = load_one_dataset(train_imgs).type(torch.float32)/255.0
    train_labels = load_one_dataset(train_labs).type(torch.long)
    train = list(zip(train_images, train_labels))

    test_images = load_one_dataset(test_imgs).type(torch.float32)/255.0
    test_labels = load_one_dataset(test_labs).type(torch.long)
    test = list(zip(test_images, test_labels))
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return(train_loader, test_loader)

In [96]:
class Net(nn.Module):
  
  def __init__(self, nb_units, input_dim, output_dim):
    '''
    Declare the network architecture here
    '''
    super(Net, self).__init__()
    
    # Initialize a list to store layers
    fc = []

    # Add input and output dimensions to layer list
    self.nb_units = [input_dim] + nb_units + [output_dim]

    # Now compute the total no. of layers
    self.nb_layers = len(self.nb_units)

    # Now append the hidden layers
    for i in range(1, self.nb_layers):
      fc.append(nn.Linear(self.nb_units[i-1], self.nb_units[i]))
    
    # Wrap this in a module list 
    self.fc = nn.ModuleList(fc)
    
  
  def forward(self, x):
    '''
    Send input forward through 
    the network
    '''
    # Reshape 28X28 images to be 784 X 784
    x = x.view(-1, 28*28)

    # Send example through network
    for layer in self.fc: x = F.relu(layer(x))
    
    # Return statement
    return x

In [97]:
def train(config, input_dim=784, output_dim = 10, epochs=2,
          data_dir = '/content/drive/MyDrive/data'):
    '''
    This is the main training loop
    '''
    # Set device
    if torch.cuda.is_available():
      device = torch.device("cuda")
    else:
      device = torch.device("cpu")
    
    # Set paths to datasets
    paths = {
        'train_imgs': os.path.join(data_dir, 'train-images-idx3-ubyte.gz'),
        'train_labs': os.path.join(data_dir, 'train-labels-idx1-ubyte.gz'),
        'test_imgs': os.path.join(data_dir,'t10k-images-idx3-ubyte.gz'),
        'test_labs': os.path.join(data_dir,'t10k-labels-idx1-ubyte.gz')
    }

    # Load datasets
    train_loader, test_loader = load_all_datasets(**paths, batch_size = config['batch_size'])
    
    # Set parameters
    net = Net(config['nb_units'], input_dim, output_dim)
    
    # Send net object to device memory
    net.to(device)
    
    # We use the cross-entropy loss
    criterion = nn.CrossEntropyLoss()

    # We use mini-batch stochastic gradient descent with momentum
    optimizer = optim.SGD(net.parameters(), lr=config['lr'], momentum=config['momentum'], 
                                            weight_decay=config['weight_decay'])

    # Store results here
    results = {
      'train_loss': [], 
      'train_accuracy': [],
      'val_loss': [], 
      'val_accuracy': []
      }

    # Loop over the dataset multiple times
    for epoch in range(epochs):  
        
        # Initialize running loss
        running_loss = 0.0
        running_accuracy = 0.0

        # Initialize the validation running loss
        val_running_loss = 0.0
        val_running_accuracy = 0.0
        
        # Iterate through data now
        for i, data in enumerate(train_loader):
            
            # Get the inputs: data is a list of [inputs, labels]
            inputs, labels = data
            
            # Send the inputs to the memory of the device
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(inputs)
            
            # Calculate loss
            loss = criterion(outputs, labels)

            # Get predictions
            _, preds = torch.max(outputs.data, 1)
            
            # Backward
            loss.backward()
            
            # Optimize
            optimizer.step()

            # Add to running loss
            running_loss += loss.item()

            # Add to running accuracy
            running_accuracy += (preds == labels).float().sum()
        
        # Loop through the validation data
        for j, data in enumerate(test_loader):
          
          # No need to calculate gradients for validation set
          with torch.no_grad():

              # Get the data item 
              val_inputs, val_labels = data
              val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)

              # Send the data item through the network to get output
              val_outputs = net(val_inputs)

              # Compute the loss
              val_loss = criterion(val_outputs, val_labels)

              # Get predictions
              _, val_preds = torch.max(val_outputs.data, 1)

              # Add to running loss
              val_running_loss += val_loss.item()

              # Add to running accuracy
              val_running_accuracy += (val_preds == val_labels).float().sum()
        
        # Rescale the training and validation perfomance metrics
        running_loss = running_loss/len(train_loader)
        running_accuracy = running_accuracy/(len(train_loader)*config['batch_size'])
        
        # Rescale the validation loss
        val_running_loss = val_running_loss/len(test_loader)
        val_running_accuracy = val_running_accuracy/(len(test_loader)*config['batch_size'])
        
        # Append to the results tracker
        results['train_loss'].append(np.float(running_loss))
        results['train_accuracy'].append(np.float(running_accuracy))
        results['val_loss'].append(np.float(val_running_loss))
        results['val_accuracy'].append(np.float(val_running_accuracy))

        # Make print message format string
        msg = "{}, Epoch:{}, Loss:{}, Accuracy:{}," "\n"

        # Print performance
        print(msg.format("Training", epoch, running_loss, running_accuracy))
        print(msg.format("Validation", epoch, val_running_loss, val_running_accuracy))
        
    # Print message
    print('Done training...')
    
    # Get the best results and print
    best_results = get_best_results(results)
    print(best_results)
    
    # Plot the losses
    plot_loss(results, lab='loss', model_num=config['model_num'])
    plot_loss(results, lab='accuracy', model_num=config['model_num'])
    
    # Return statement
    return(results, best_results)

In [99]:
def get_grid(archs, learning_rates, decays, momentums, batch_size = 64,
             checkpoints_dir = "/content/drive/MyDrive/checkpoints"):
  '''
  Get a random set of hyper-parameter combinations
  to test with for a given number of layers. 
  '''
  
  # Store the grid and results in this dictionary
  experiment = {
      
      'params': [], 
      'best_results': []

  }

  # Create experiment ID
  experiment_id = len([name for name in os.listdir(checkpoints_dir) if os.path.isfile(name)]) + 1

  # This will generate the list of parameter combinations to search
  for i, arch in enumerate(archs):
    for lr in learning_rates: 
      for d in decays: 
        for m in momentums:   
          config = {
                    'lr': lr,
                    'batch_size': batch_size,
                    'weight_decay': d,
                    'nb_units': arch,
                    'momentum': m,
                    'done': 0,
                    'model_num': i}
          
          experiment['params'].append(config)

  # This is the experiment path
  path = os.path.join(checkpoints_dir, experiment_id)

  # We write the experiment dictionary as a .json file
  with open(path, 'w', encoding='utf-8') as f:
    json.dump(experiment, f, ensure_ascii=False, indent=4)

In [100]:
def run_grid_search(experiment_id, checkpoints_dir = "/content/drive/MyDrive/checkpoints"):
  '''
  Run the grid search
  '''
  # Set the seed so each experiment is reproducible
  np.random.seed(21390)
  torch.manual_seed(10394)

  # First we load the experiment dictionary
  # Create the complete path
  path = os.path.join(checkpoints_dir, experiment_id)
  
  # Load the experiment data file
  with open(path, 'r', encoding='utf-8') as f:
    experiment = json.load(f)

  # Loop through experiments data file
  for i, config in enumerate(experiment['params']):
    
    # Print progress report
    print("This is combination {} of experiment {}".format(i, experiment_id))

    # Run experiments only for those parameter combinations which have not been tested
    if config['done'] == 0:
      print(config)
      results, best_results = train(config, epochs=max_epochs)
      experiment['best_results'].append(best_results) 
      config['done'] = 1
    
    # We update the .json dictionary every 15 iterations
    if i%15 == 0:
      with open(path, 'w', encoding='utf-8') as f:
        json.dump(experiment, f, ensure_ascii=False, indent=4)

In [None]:
# Initialize grid
archs = [[512, 256, 128, 64, 32, 16], [512, 256, 128, 64, 32], [512, 256, 128, 64], [512, 256, 128], 
         [256, 128, 64, 32, 16]], [256, 128, 64, 32], [256, 128, 64], [128, 64, 32]]

learning_rates = [0.1, 0.3, 0.5, 0.7, 0.9]
decays = [0.001, 0.01, 0.1]
momentums = [0.2, 0.4, 0.6, 0.8, 0.9]