In [212]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [213]:
# Install idx2numpy package for extracting data
!pip install idx2numpy --quiet
!pip install livelossplot --quiet

In [214]:
# Import packages
import os
import json
import gzip
import torch
import random
import torchvision
import numpy as np 

import idx2numpy
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [215]:
def get_best_results(results):
  '''
  Take in a results dictionary 
  and return epoch on which 
  minimum validation loss was
  reached, minimum val. loss on
  that epoch, and min. accuracy
  on that epoch
  '''
  # Find the epoch on which the minimum validation loss was reached
  best_val_epoch = np.argmin(np.array(results['val_loss']))
  
  # Store validation metrics
  best_epoch_val_loss = np.array(results['val_loss'][best_val_epoch])
  best_epoch_val_accuracy = np.array(results['val_accuracy'][best_val_epoch])

  # Store training metrics
  best_epoch_train_loss = np.array(results['train_loss'][best_val_epoch])
  best_epoch_train_accuracy = np.array(results['train_accuracy'][best_val_epoch])
  
  # Store best results in a list
  best_results = [np.int(best_val_epoch), 
                  np.float(best_epoch_val_loss), 
                  np.float(best_epoch_val_accuracy), 
                  np.float(best_epoch_train_loss), 
                  np.float(best_epoch_train_accuracy)]

  # Return statement
  return(best_results)


def plot_loss(results, lab, model_num, experiment_id):
  '''
  Convenience function to plot results
  '''
  # Store model name and destination path
  model_name = str(experiment_id) + '_' + str(model_num) + '_' + lab + '_results.png'
  path = os.path.join('/content/drive/MyDrive/', '1_figs', model_name)
  
  # Plot the results
  plt.plot(results['train_' + lab], label='Train')
  plt.plot(results['val_' + lab], label='Validation')
  
  # Add annotations
  plt.legend()
  plt.title(lab.title() + ' by Epoch')
  
  # Save the figure and close the plot
  plt.savefig(path)
  plt.clf()

In [216]:
def load_one_dataset(path):
    '''
    Convenience function to load a single dataset
    '''
    f = gzip.open(path, 'rb')
    data = torch.from_numpy(idx2numpy.convert_from_file(f))
    f.close()
    
    return(data)


def load_all_datasets(train_imgs, train_labs, test_imgs, test_labs, batch_size, train_size=50000, val_size=10000):
    '''
    Load training as well as test images here
    '''
    torch.manual_seed(123809)
    train_images = load_one_dataset(train_imgs).type(torch.float32)/255.0
    train_labels = load_one_dataset(train_labs).type(torch.long)
    train = list(zip(train_images, train_labels))
    print(len(train))

    test_images = load_one_dataset(test_imgs).type(torch.float32)/255.0
    test_labels = load_one_dataset(test_labs).type(torch.long)
    test = list(zip(test_images, test_labels))
    
    train, val = torch.utils.data.random_split(train, [train_size, val_size])
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return(train_loader, val_loader, test_loader)

In [217]:
class Net(nn.Module):
  
  def __init__(self, nb_units, input_dim, output_dim):
    '''
    Declare the network architecture here
    '''
    super(Net, self).__init__()
    
    # Initialize a list to store layers
    fc = []

    # Add input and output dimensions to layer list
    self.nb_units = [input_dim] + nb_units + [output_dim]

    # Now compute the total no. of layers
    self.nb_layers = len(self.nb_units)

    # Now append the hidden layers
    for i in range(1, self.nb_layers):
      fc.append(nn.Linear(self.nb_units[i-1], self.nb_units[i]))
    
    # Wrap this in a module list 
    self.fc = nn.ModuleList(fc)
    
  
  def forward(self, x):
    '''
    Send input forward through 
    the network
    '''
    # Reshape 28X28 images to be 784 X 784
    x = x.view(-1, 28*28)

    # Send example through network
    for layer in self.fc: x = F.relu(layer(x))
    
    # Return statement
    return x

In [218]:
def train(config, experiment_id, input_dim=784, output_dim = 10, epochs=2,
          data_dir = '/content/drive/MyDrive/data', checkpoints_dir = "/content/drive/MyDrive/1_checkpoints/", 
          opt='sgd', save=0):
    '''
    This is the main training loop
    '''
    
    # Set the seed so each experiment is reproducible
    np.random.seed(21390)
    torch.manual_seed(10394)
    
    # Set device
    if torch.cuda.is_available():
      device = torch.device("cuda")
    else:
      device = torch.device("cpu")
    
    # Set paths to datasets
    paths = {
        'train_imgs': os.path.join(data_dir, 'train-images-idx3-ubyte.gz'),
        'train_labs': os.path.join(data_dir, 'train-labels-idx1-ubyte.gz'),
        'test_imgs': os.path.join(data_dir,'t10k-images-idx3-ubyte.gz'),
        'test_labs': os.path.join(data_dir,'t10k-labels-idx1-ubyte.gz')
    }

    # Load datasets
    train_loader, val_loader, test_loader = load_all_datasets(**paths, batch_size = config['batch_size'])
    
    # Set parameters
    net = Net(config['nb_units'], input_dim, output_dim)
    
    # Send net object to device memory
    net.to(device)
    
    # We use the cross-entropy loss
    criterion = nn.CrossEntropyLoss()

    # We use mini-batch stochastic gradient descent with momentum
    if opt == 'sgd':
      optimizer = optim.SGD(net.parameters(), lr=config['lr'], momentum=config['momentum'], 
                            weight_decay=config['weight_decay'])
    elif opt == 'adam':
      optimizer = optim.Adam(net.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])

    # Store results here
    results = {
      'train_loss': [], 
      'train_accuracy': [],
      'val_loss': [], 
      'val_accuracy': []
      }

    # Loop over the dataset multiple times
    for epoch in range(epochs):  
        
        # Initialize running loss
        running_loss = 0.0
        running_accuracy = 0.0

        # Initialize the validation running loss
        val_running_loss = 0.0
        val_running_accuracy = 0.0
        
        # Iterate through data now
        for i, data in enumerate(train_loader):
            
            # Get the inputs: data is a list of [inputs, labels]
            inputs, labels = data
            
            # Send the inputs to the memory of the device
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(inputs)
            
            # Calculate loss
            loss = criterion(outputs, labels)

            # Get predictions
            _, preds = torch.max(outputs.data, 1)
            
            # Backward
            loss.backward()
            
            # Optimize
            optimizer.step()

            # Add to running loss
            running_loss += loss.item()

            # Add to running accuracy
            running_accuracy += (preds == labels).float().sum()
        
        # Loop through the validation data
        for j, data in enumerate(val_loader):
          
          # No need to calculate gradients for validation set
          with torch.no_grad():

              # Get the data item 
              val_inputs, val_labels = data
              val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)

              # Send the data item through the network to get output
              val_outputs = net(val_inputs)

              # Compute the loss
              val_loss = criterion(val_outputs, val_labels)

              # Get predictions
              _, val_preds = torch.max(val_outputs.data, 1)

              # Add to running loss
              val_running_loss += val_loss.item()

              # Add to running accuracy
              val_running_accuracy += (val_preds == val_labels).float().sum()
        
        # Rescale the training and validation perfomance metrics
        running_loss = running_loss/len(train_loader)
        running_accuracy = running_accuracy/(len(train_loader)*config['batch_size'])
        
        # Rescale the validation loss
        val_running_loss = val_running_loss/len(val_loader)
        val_running_accuracy = val_running_accuracy/(len(val_loader)*config['batch_size'])
        
        # Append to the results tracker
        results['train_loss'].append(np.float(running_loss))
        results['train_accuracy'].append(np.float(running_accuracy))
        results['val_loss'].append(np.float(val_running_loss))
        results['val_accuracy'].append(np.float(val_running_accuracy))

        # Make print message format string
        msg = "{}, Epoch:{}, Loss:{}, Accuracy:{}," "\n"

        # Print performance
        print(msg.format("Training", epoch, running_loss, running_accuracy))
        print(msg.format("Validation", epoch, val_running_loss, val_running_accuracy))

        # Save
        if save == 1:
          torch.save(net.state_dict(), checkpoints_dir + 'best_model.pth')
        
    # Print message
    print('Done training...')
    
    # Get the best results and print
    best_results = get_best_results(results)
    print(best_results)

    # Plot the losses
    plot_loss(results, lab='loss', model_num=config['model_num'], experiment_id=experiment_id)
    plot_loss(results, lab='accuracy', model_num=config['model_num'], experiment_id=experiment_id)
    
    # Return statement
    return(results, best_results)

In [269]:
from sklearn.metrics import confusion_matrix
import pandas as pd

def test(data_dir = '/content/drive/MyDrive/data', 
         checkpoints_dir="/content/drive/MyDrive/1_checkpoints/", 
         model_name='best_model.pth',input_dim=784, 
         output_dim=10, batch_size=512, nb_units = [479, 444]):
  '''
  Evaluate model on the test set
  '''
  # Set paths to datasets
  paths = {
      'train_imgs': os.path.join(data_dir, 'train-images-idx3-ubyte.gz'),
      'train_labs': os.path.join(data_dir, 'train-labels-idx1-ubyte.gz'),
      'test_imgs': os.path.join(data_dir,'t10k-images-idx3-ubyte.gz'),
      'test_labs': os.path.join(data_dir,'t10k-labels-idx1-ubyte.gz')
  }

  # Load datasets
  _, _, test_loader = load_all_datasets(**paths, batch_size = batch_size)

  test_running_accuracy = 0.0

  net = Net(nb_units, input_dim, output_dim)
  net.load_state_dict(torch.load(checkpoints_dir + model_name))


  pred_list=torch.zeros(0,dtype=torch.long, device='cpu')
  lbl_list=torch.zeros(0,dtype=torch.long, device='cpu')

  # Loop through the test data
  for j, data in enumerate(test_loader):
          
      # No need to calculate gradients for test set
      with torch.no_grad():

          # Get the data item 
          test_inputs, test_labels = data
          
          # Send the data item through the network to get output
          test_outputs = net(test_inputs)

          # Get predictions
          _, test_preds = torch.max(test_outputs.data, 1)

          # Add to running accuracy
          test_running_accuracy += (test_preds == test_labels).float().sum()

          pred_list=torch.cat([pred_list, test_preds.view(-1).cpu()])
          lbl_list=torch.cat([lbl_list, test_labels.view(-1).cpu()])
  

  conf_mat=confusion_matrix(lbl_list.numpy(), pred_list.numpy())
  print(pd.DataFrame(conf_mat))
        
  # Rescale the test loss
  test_running_accuracy = test_running_accuracy/(len(test_loader)*batch_size)

  # Print per class accuracy
  class_accuracy=100*conf_mat.diagonal()/conf_mat.sum(1)
  print(pd.DataFrame(class_accuracy).T)
        
  # Append to the results tracker
  print(test_running_accuracy)

In [292]:
def plot_image(imgs):
  '''
  Take an image stored as a Torch
  tensor and display it in the notebook
  '''
  plt.figure(figsize=(20,10))
  columns = 5
  
  for i, img in enumerate(imgs):
    plt.subplot(len(imgs) / columns + 1, columns, i + 1)
    plt.imshow(img.cpu().numpy().reshape(28,28))

In [296]:
def plot_low_accuracy_classes(label, n, data_dir = '/content/drive/MyDrive/data', batch_size=6):
  '''
  Take an image stored as a Torch
  tensor and display it in the notebook
  '''
 # Set paths to datasets
  paths = {
      'train_imgs': os.path.join(data_dir, 'train-images-idx3-ubyte.gz'),
      'train_labs': os.path.join(data_dir, 'train-labels-idx1-ubyte.gz'),
      'test_imgs': os.path.join(data_dir,'t10k-images-idx3-ubyte.gz'),
      'test_labs': os.path.join(data_dir,'t10k-labels-idx1-ubyte.gz')
  }

  # Load datasets
  _, _, test_loader = load_all_datasets(**paths, batch_size = batch_size)

  imgs_to_plot = []

  # Loop through the test data
  for j, data in enumerate(test_loader):

          # Get the data item 
          test_inputs, test_labels = data
          for img, lab in zip(test_inputs, test_labels):
            if lab == label:
              # Send the data item through the network to get output
              imgs_to_plot.append(img)
  imgs_to_plot = imgs_to_plot[:n]
  imgs_to_plot = torch.stack(imgs_to_plot, dim=0)
  imgs_to_plot = imgs_to_plot.view(imgs_to_plot.size(0),-1).type(torch.FloatTensor)
  plot_image(imgs_to_plot)

  return(imgs_to_plot)
    

In [219]:
def get_grid(archs, learning_rates, decays, momentums, batch_sizes,
             checkpoints_dir = "/content/drive/MyDrive/1_checkpoints"):
  '''
  Get a random set of hyper-parameter combinations
  to test with for a given number of layers. 
  '''
  
  # Store the grid and results in this dictionary
  experiment = {
      
      'params': [], 
      'best_results': []

  }

  # Create experiment ID
  experiment_id = len([name for name in os.listdir(checkpoints_dir)]) + 1

  # Initial model number
  model_num = 0
  
  # This will generate the list of parameter combinations to search
  for arch in archs:
    for lr in learning_rates: 
      for d in decays: 
        for m in momentums: 
          for batch_size in batch_sizes:  
            config = {
                      'lr': lr,
                      'batch_size': batch_size,
                      'weight_decay': d,
                      'nb_units': arch,
                      'momentum': m,
                      'done': 0,
                      'model_num': model_num}
          
            model_num += 1
            experiment['params'].append(config)

  # This is the experiment path
  path = os.path.join(checkpoints_dir, str(experiment_id) + '.json')

  # We write the experiment dictionary as a .json file
  with open(path, 'w', encoding='utf-8') as f:
    json.dump(experiment, f, ensure_ascii=False, indent=4)

In [220]:
def run_grid_search(experiment_id, 
                    checkpoints_dir = "/content/drive/MyDrive/1_checkpoints", 
                    max_epochs=100, opt = 'sgd', save=0):
  '''
  Run the grid search
  '''
  # First we load the experiment dictionary
  # Create the complete path
  path = os.path.join(checkpoints_dir, str(experiment_id) + '.json')
  
  # Load the experiment data file
  with open(path, 'r', encoding='utf-8') as f:
    experiment = json.load(f)

  # Loop through experiments data file
  for i, config in enumerate(experiment['params']):
    
    # Print progress report
    print("This is combination {} of experiment {}".format(i, experiment_id))

    # Run experiments only for those parameter combinations which have not been tested
    if config['done'] == 0:
      print(config)
      results, best_results = train(config, epochs=max_epochs, experiment_id = experiment_id, opt=opt, save=save)
      experiment['best_results'].append(best_results) 
      config['done'] = 1
    
    # We update the .json dictionary every 15 iterations
    if i%2 == 0:
      with open(path, 'w', encoding='utf-8') as f:
        json.dump(experiment, f, ensure_ascii=False, indent=4)

In [221]:
def get_experiment_results(experiment_id, checkpoints_dir = "/content/drive/MyDrive/1_checkpoints", param='nb_units'):
  '''
  Get the best results
  '''
  path = os.path.join(checkpoints_dir, str(experiment_id) + '.json')
  
  # Load the experiment data file
  with open(path, 'r', encoding='utf-8') as f:
    experiment = json.load(f)
  
  # Store the results
  results = np.array(experiment['best_results'])
  best_config = np.argmin(results[:,1])
  best_results = results[best_config,:]

  # Return statement
  return(experiment['params'][best_config][param], experiment['params'][best_config]['model_num'], best_results)

In [222]:
# Initialize grid for architecture search
def run_architecture_search(experiment_id=2, new_experiment=0):

  archs = [[512, 256, 128, 64, 32, 16], 
           [512, 256, 128, 64, 32], 
           [512, 256, 128, 64],
           [512, 256, 128],
           [512, 256],
           [256, 128, 64, 32, 16], 
           [256, 128, 64, 32],
           [256, 128, 64], 
           [256, 128],   
           [128, 64, 32, 16], 
           [128, 64, 32],
           [64, 32, 16], 
           [64, 32], 
           [32, 16],
           [32]]

  # Set other parameters
  batch_size = [64]
  learning_rates = [0.3]
  decays = [0]
  momentums = [0]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_sizes=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100)

In [223]:
# Initialize grid for architecture search
def run_width_search(experiment_id=3, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[512, 128], 
           [512, 64],
           [512, 32],
           [512, 16],
           [256, 64],
           [256, 32],
           [256, 16],
           [128, 64],
           [128, 32],
           [128, 16],
           [64, 16], 
           [16, 32],
           [16, 64], 
           [16, 128],
           [16, 256], 
           [16, 512], 
           [32, 64],
           [32, 128],
           [32, 256],
           [32, 512],
           [64, 128], 
           [64, 256],
           [64, 512],
           [128, 256],
           [128, 512],
           [256, 512], 
           [300, 200],  
           [200, 100], 
           [403, 202], 
           [123, 86], 
           [702, 333],
           [479, 444], 
           [333, 222], 
           [204, 52], 
           [502, 114],
           [668, 45]]
            

  # Set other parameters
  batch_size = [64]
  learning_rates = [0.3]
  decays = [0]
  momentums = [0]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_sizes=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100)

In [224]:
def run_decay_search(experiment_id=4, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[479, 444]]
            

  # Set other parameters
  batch_size = [64]
  learning_rates = [0.3]
  decays = np.arange(0, 0.021, 0.001)[1:].tolist()
  momentums = [0]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_sizes=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100)

In [225]:
def run_decay_search_2(experiment_id=5, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[479, 444]]
            

  # Set other parameters
  batch_size = [64]
  learning_rates = [0.3]
  decays = np.arange(0, 0.0021, 0.0001)[1:].tolist()
  momentums = [0]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_size=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100)

In [226]:
def run_lr_search(experiment_id=6, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[479, 444]]
            
  # Set other parameters
  batch_size = [64]
  learning_rates = [0.1, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  decays = [0.0004]
  momentums = [0]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_size=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100)

In [227]:
def run_lr_search_2(experiment_id=7, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[479, 444]]
            
  # Set other parameters
  batch_size = [64]
  learning_rates = np.arange(0.21, 0.3, 0.01).tolist()
  decays = [0.0004]
  momentums = [0]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_size=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100)

  

In [228]:
def run_momentum_search(experiment_id=8, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[479, 444]]
            
  # Set other parameters
  batch_size = [64]
  learning_rates = [0.22]
  decays = [0.0004]
  momentums = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_sizes=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100)

In [229]:
def run_batch_size_search(experiment_id=9, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[479, 444]]
            
  # Set other parameters
  batch_size = [32, 64, 128, 256, 512]
  learning_rates = [0.22]
  decays = [0.0004]
  momentums = [0.1]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_sizes=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=80)

In [230]:
def run_adam_search(experiment_id=11, new_experiment=0):

  # This tests: 
  # 1) Gaps of more than 1 in the powers of 2 widths
  # 2) Low to high layer widths
  # 3) Deviations from powers of 2 widths
  archs = [[479, 444]]
            
  # Set other parameters
  batch_size = [512]
  learning_rates = np.arange(0.01, 0.1, 0.01).tolist()
  decays = [0.0004]
  momentums = [0.1]
  
  # Recreate the grid if needed
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_sizes=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=100, opt = 'adam')

In [None]:
def run_best_model(experiment_id = 12, new_experiment=1):

  # 
  archs = [[479, 444]]
            
  # Set other parameters
  batch_size = [512]
  learning_rates = [0.22]
  decays = [0.0004]
  momentums = [0.1]

  # 
  if new_experiment == 1:
    get_grid(archs, learning_rates, decays, momentums, batch_sizes=batch_size)
  
  # Run the search
  run_grid_search(experiment_id = experiment_id, max_epochs=85, save=1)

In [231]:
# Run architecture search
def full_pipeline(new_experiment=0):
  
  run_architecture_search(new_experiment)
  
  results = get_experiment_results(experiment_id=2)
  print(results)
  
  run_width_search(new_experiment)
  results = get_experiment_results(experiment_id=3)
  
  print(results)
  run_decay_search(experiment_id=4, new_experiment=0)
  
  results = get_experiment_results(experiment_id=1, param='weight_decay')
  run_decay_search_2(experiment_id=5, new_experiment=0)
  
  run_lr_search(experiment_id=6, new_experiment=0)
  run_lr_search_2(new_experiment=1)
  get_experiment_results(experiment_id=7, param='lr')
  
  run_momentum_search(experiment_id = 8, new_experiment=0)
  get_experiment_results(experiment_id=8, param='momentum')
  
  run_batch_size_search(new_experiment=0)
  get_experiment_results(experiment_id = 9, param = 'batch_size')
  
  run_adam_search(experiment_id=11, new_experiment=1)
  run_best_model(experiment_id=12, new_experiment=1)

In [None]:
test()

In [None]:
plot_low_accuracy_classes(label = 1, n = 10)