In [64]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [65]:
# Install idx2numpy package for extracting data
!pip install idx2numpy



In [66]:
# Import packages
import os
import json
import gzip
import torch
import torchvision
import numpy as np 
import pandas as pd

import idx2numpy
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [67]:
def load_one_dataset(path):
    '''
    Convenience function to load a single dataset
    '''
    f = gzip.open(path, 'rb')
    data = torch.from_numpy(idx2numpy.convert_from_file(f))
    f.close()
    
    return(data)


def load_all_datasets(train_imgs, train_labs, test_imgs, test_labs, batch_size):
    '''
    Load training as well as test images here
    '''
    train_images = load_one_dataset(train_imgs).type(torch.float32)
    train_labels = load_one_dataset(train_labs).type(torch.long)
    train = list(zip(train_images, train_labels))

    test_images = load_one_dataset(test_imgs).type(torch.float32)
    test_labels = load_one_dataset(test_labs).type(torch.long)
    test = list(zip(test_images, test_labels))
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return(train_loader, test_loader)

In [68]:
class Net(nn.Module):
  
  def __init__(self, nb_units, input_dim, output_dim):
    '''
    Declare the network architecture here
    '''
    super(Net, self).__init__()
    
    # Initialize a list to store layers
    fc = []

    # Add input and output dimensions to layer list
    self.nb_units = [input_dim] + nb_units + [output_dim]

    # Now compute the total no. of layers
    self.nb_layers = len(self.nb_units)

    # Now append the hidden layers
    for i in range(1, self.nb_layers):
      fc.append(nn.Linear(self.nb_units[i-1], self.nb_units[i]))
    
    # Wrap this in a module list 
    self.fc = nn.ModuleList(fc)
    
  
  def forward(self, x):
    '''
    Send input forward through 
    the network
    '''
    # Reshape 28X28 images to be 784 X 784
    x = x.view(-1, 28*28)

    # Send example through network
    for layer in self.fc: x = F.relu(layer(x))
    
    # Return statement
    return x

In [69]:
def train(config, input_dim=784, output_dim = 10, epochs=2,
          data_dir = '/content/drive/MyDrive/data'):
    '''
    This is the main training loop
    '''
    # Set device
    if torch.cuda.is_available():
      device = torch.device("cuda")
    else:
      device = torch.device("cpu")
    
    # Set paths to datasets
    paths = {
        'train_imgs': os.path.join(data_dir, 'train-images-idx3-ubyte.gz'),
        'train_labs': os.path.join(data_dir, 'train-labels-idx1-ubyte.gz'),
        'test_imgs': os.path.join(data_dir,'t10k-images-idx3-ubyte.gz'),
        'test_labs': os.path.join(data_dir,'t10k-labels-idx1-ubyte.gz')
    }

    # Load datasets
    train_loader, test_loader = load_all_datasets(**paths, batch_size = config['batch_size'])
    
    # Set parameters
    net = Net(config['nb_units'], input_dim, output_dim)
    
    # Send net object to device memory
    net.to(device)
    
    # We use the cross-entropy loss
    criterion = nn.CrossEntropyLoss()

    # We use mini-batch stochastic gradient descent with momentum
    optimizer = optim.SGD(net.parameters(), lr=config['lr'], momentum=config['momentum'])

    # Store results here
    results = {
      'train_loss': [], 
      'train_accuracy': [],
      'val_loss': [], 
      'val_accuracy': []
      }

    # Loop over the dataset multiple times
    for epoch in range(epochs):  
        
        # Initialize running loss
        running_loss = 0.0
        running_accuracy = 0.0

        # Initialize the validation running loss
        val_running_loss = 0.0
        val_running_accuracy = 0.0
        
        # Iterate through data now
        for i, data in enumerate(train_loader):
            
            # Get the inputs: data is a list of [inputs, labels]
            inputs, labels = data
            
            # Send the inputs to the memory of the device
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(inputs)
            
            # Calculate loss
            loss = criterion(outputs, labels)

            # Get predictions
            _, preds = torch.max(outputs.data, 1)
            
            # Backward
            loss.backward()
            
            # Optimize
            optimizer.step()

            # Add to running loss
            running_loss += loss.item()

            # Add to running accuracy
            running_accuracy += (preds == labels).float().sum()
        
        # Loop through the validation data
        for j, data in enumerate(test_loader):
          
          # No need to calculate gradients for validation set
          with torch.no_grad():

              # Get the data item 
              val_inputs, val_labels = data
              val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)

              # Send the data item through the network to get output
              val_outputs = net(val_inputs)

              # Compute the loss
              val_loss = criterion(val_outputs, val_labels)

              # Get predictions
              _, val_preds = torch.max(val_outputs.data, 1)

              # Add to running loss
              val_running_loss += val_loss.item()

              # Add to running accuracy
              val_running_accuracy += (val_preds == val_labels).float().sum()
        
        # Rescale the training and validation perfomance metrics
        running_loss = running_loss/len(train_loader)
        running_accuracy = running_accuracy/(len(train_loader)*config['batch_size'])
        
        # Rescale the validation loss
        val_running_loss = val_running_loss/len(test_loader)
        val_running_accuracy = val_running_accuracy/(len(test_loader)*config['batch_size'])
        
        # Append to the results tracker
        results['train_loss'].append(np.float(running_loss))
        results['train_accuracy'].append(np.float(running_accuracy))
        results['val_loss'].append(np.float(val_running_loss))
        results['val_accuracy'].append(np.float(val_running_accuracy))

        # Make print message format string
        msg = "{}, Epoch:{}, Loss:{}, Accuracy:{}," "\n"

        # Print performance
        print(msg.format("Training", epoch, running_loss, running_accuracy))
        print(msg.format("Validation", epoch, val_running_loss, val_running_accuracy))
        
    # Print message
    print('Done training...')
    
    # Return statement
    return(results)

In [70]:
def get_random_hyper_parameters(n_layers=4):
    '''
    Randomly select hyper-parameters to train with
    '''
    config = {
        
      'lr': np.random.uniform(1e-4, 1e-1),
      'batch_size': 512,
      'weight_decay': np.random.uniform(1e-3, 1e-1),
      'nb_units': [2**np.random.randint(2,9) for i in range(n_layers)],
      'momentum': np.random.choice([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
      'done': 0
    }
    
    return(config)

In [71]:
def get_random_grid(n_samples=500, n_layers=4):
  '''
  Get a random set of hyper-parameter combinations
  to test with for a given number of layers. 
  '''
  
  # Store the grid and results in this dictionary
  experiment = {
      
      'params': [], 
      'results': []

  }
  
  # This will generate the list of parameter combinations to search
  for i in range(n_samples):   
    config = get_random_hyper_parameters(n_layers)
    experiment['params'].append(config)

  # This is the experiment label
  experiment_name = '{}_samples_{}_layers.json'.format(n_samples, n_layers)

  # This is the experiment path
  path = os.path.join("/content/drive/MyDrive/checkpoints", experiment_name)

  # We write the experiment dictionary as a .json file
  with open(path, 'w', encoding='utf-8') as f:
    json.dump(experiment, f, ensure_ascii=False, indent=4)


In [72]:
def run_grid_search(n_samples=100, n_layers=4, max_epochs=100):
  '''
  Run the grid search
  '''

  # First we load the experiment dictionary
  experiment_name = '{}_samples_{}_layers.json'.format(n_samples, n_layers)
  
  # Create the complete path
  path = os.path.join("/content/drive/MyDrive/checkpoints", experiment_name)
  
  # Load the experiment data file
  with open(path, 'r', encoding='utf-8') as f:
    experiment = json.load(f)

  # Loop through experiments data file
  for i, config in enumerate(experiment['params']):
    
    # Print progress report
    print("This is experiment {}".format(i))

    # Run experiments only for those parameter combinations which have not been tested
    if config['done'] == 0:
      results = train(config, epochs=max_epochs)
      experiment['results'].append(results) 
      config['done'] = 1
    
    # We update the .json dictionary every 15 iterations
    if i%15 == 0:
      with open(path, 'w', encoding='utf-8') as f:
        json.dump(experiment, f, ensure_ascii=False, indent=4)

In [73]:
# Create a new random grid if needed
np.random.seed(129031908)
new = 1
num_samples = 200
n_layers = 3
if new == 1: 
  get_random_grid(num_samples, n_layers)

In [None]:
# Run the grid search
run_grid_search(num_samples, n_layers)

This is experiment 0
Training, Epoch:0, Loss:478.9507746777292, Accuracy:0.09864936769008636,

Validation, Epoch:0, Loss:2.303194487094879, Accuracy:0.09765625,

Training, Epoch:1, Loss:2.303028242062714, Accuracy:0.09879833459854126,

Validation, Epoch:1, Loss:2.302754592895508, Accuracy:0.09765625,

Training, Epoch:2, Loss:2.3027376663886896, Accuracy:0.09931144118309021,

Validation, Epoch:2, Loss:2.302633059024811, Accuracy:0.09765625,

Training, Epoch:3, Loss:2.302641618049751, Accuracy:0.09931144118309021,

Validation, Epoch:3, Loss:2.3025971055030823, Accuracy:0.09765625,

Training, Epoch:4, Loss:2.3026158163103005, Accuracy:0.09881488233804703,

Validation, Epoch:4, Loss:2.3025922894477846, Accuracy:0.09765625,

Training, Epoch:5, Loss:2.302593109971386, Accuracy:0.09931144118309021,

Validation, Epoch:5, Loss:2.302589166164398, Accuracy:0.09765625,

Training, Epoch:6, Loss:2.3025915137792037, Accuracy:0.09931144118309021,

Validation, Epoch:6, Loss:2.302585744857788, Accuracy: