# Constants and Setup

In [1]:
import os

# whether to commit and push to git after each optimization. intended for long runs
PUSH_TO_GIT = False  

# parent output directory
EXPERIMENTS_DIR = os.path.join("experiments", "bayes_opt_2")

Optimizer to use. Choose by commenting. (Intent is to use train different ones on differen t machines for efficiency.)

In [2]:
import torch

CHOSEN_OPTIMIZER = torch.optim.SGD
# CHOSEN_OPTIMIZER = torch.optim.Adam

Number of iterations for Bayesian Optimization

In [3]:
MAX_ITERATIONS = 10

NUM_TRAIN_EPOCHS = 30
EARLY_STOP = 7

Models to test for BO. All in the list will be optimized.

In [4]:
from torchvision import models as tvm
import pretrainedmodels as ptm

# The models we will test
MODELS = (
    ptm.alexnet, # gets maximum recursion limit exceeded exceptions
    ptm.se_resnet50,
    ptm.se_resnet101,
    ptm.inceptionresnetv2,
    ptm.inceptionv4,
    ptm.vgg16,
    ptm.vgg19,
    tvm.resnet101,
    ptm.senet154,
    ptm.nasnetalarge
)

Setup: Make sure Jupyter shows all output

In [5]:
# show more than one output in cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# plot charts in our notebook
%matplotlib inline

import subprocess
import sys
sys.setrecursionlimit(3000) # for some reason AlexNet requires more recursive depth

## Helper Functions

In [6]:
import src.utils as utils
from src.trainable import Trainable
import torch


CRITERION = torch.nn.CrossEntropyLoss() # we'll always use CE for the loss function
 

def train(params):
    """
    Set up a trainable and train it using the given parameters.
    """
    print(params_to_meta_dict(params))
    batch_size, lr_factor, optim_params = parse_train_params(params)
    
    # make an output directory using the model, dataset, and BO iteration
    outdir = make_outdir_name(data_dir, utils.get_model_name_from_fn(chosen_model), 
                              prepend=EXPERIMENTS_DIR,
                              append=str(iteration))
    
    
    image_size = utils.determine_image_size(utils.get_model_name_from_fn(chosen_model))
    dataloaders = utils.get_train_val_dataloaders(
        datadir=data_dir,
        val_proportion=0.15,
        image_size=image_size, 
        batch_size=batch_size
    )
    
    model = build_model(chosen_model)
    utils.fit_model_last_to_dataset(model, dataloaders['train'].dataset)
    
    
    optimizer = CHOSEN_OPTIMIZER(model.parameters(), *optim_params)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='max', 
        factor=lr_factor, 
        patience=1
    )
    
    trainable = Trainable(dataloaders, model, CRITERION, optimizer, lr_scheduler, outdir=outdir)
    trainable.train(num_epochs=NUM_TRAIN_EPOCHS, early_stop_limit=EARLY_STOP, verbose=False)

    return trainable


def parse_train_params(params):
    """
    Parses train parameters by converting batch size to an int, and the betas to a tuple for Adam.
    """
    batch_size, lr_factor, optim_params = int(params[0]), params[1], params[2:]
    if CHOSEN_OPTIMIZER is torch.optim.Adam:  # turn b1 and b2 into a tuple
        optim_params = optim_params[:3] + tuple(optim_params[3:5]) + optim_params[5:]
    return batch_size, lr_factor, optim_params

    
def build_model(model_fn):
    """
    Build a pretrained model class from a model function. Passes 
    in the appropriate pretrained arg based on the model function's 
    parent module.
    """
    if 'pretrainedmodels' in model_fn.__module__:
        model = model_fn(num_classes=1000, pretrained='imagenet')
    else:
        model = model_fn(pretrained=True)
    return model


def make_key_from_params(params):
    """
    Makes a unique key (as a tuple) from a given list of parameters.
    For storing associated Trainable objects.
    
    """
    return tuple(round(param, 10) for param in params)


def make_outdir_name(datadir, model_name, prepend="", append=""):
    """
    Make the output directory name based on dataset, model name, and any extra info.
    """
    dataset_name = os.path.basename(datadir)
    return os.path.join(prepend, dataset_name, model_name, append) 


# Bayesian Optimization

## Define the Problem

### Domain

In [7]:
BASE_DOMAIN = [
    {'name': 'batch_size', 'type': 'discrete', 'domain': (1, 4, 8, 12)},  # DEBUG for GPUs with insufficient memory
#     {'name': 'batch_size', 'type': 'discrete', 'domain': (16, 24, 32, 48, 64)},
    {'name': 'lr_decay', 'type': 'continuous', 'domain': (0.03, 0.3)},
]
# BASE_DOMAIN = 

ADAM_DOMAIN = BASE_DOMAIN + [
    {'name': 'adam_lr', 'type': 'continuous', 'domain': (0.001, 0.1)},
    {'name': 'adam_beta1', 'type': 'continuous', 'domain': (0.8, .99)},
    {'name': 'adam_beta2', 'type': 'continuous', 'domain': (0.95, .9999)},
    {'name': 'adam_wtdecay', 'type': 'continuous', 'domain': (0, 1)}
]
# TODO: have to figure out how to set a starting default
# default_input = [32, 0.001, 0.9, 0.999, 0] 

SGD_DOMAIN = BASE_DOMAIN + [
    {'name': 'lr', 'type': 'continuous', 'domain': (0.001, 0.1)},
    {'name': 'momentum', 'type': 'continuous', 'domain': (0.5, .99)},
    {'name': 'weight_decay', 'type': 'continuous', 'domain': (0, 1)}
]

### Function to optimize

In [8]:
def f(x):
    """ Value function to maximize for bayesian optimization """
    params = x.flatten()
    
    trainable = train(params)
    val_acc = trainable.best_val_accuracy
    
    return val_acc

## Do BO on all models on both datasets.

In [9]:
from GPyOpt.methods import BayesianOptimization
from predict import create_predictions
from metrics import create_all_metrics

### BO helper functions

In [10]:
def reset_globals(datadir):
    global iteration  # keep track of our optimization iterations for directory output
    iteration = 0   # but reset to 0 each train run
    global data_dir
    data_dir = datadir
    
    # reset the global trainables produced by BO
    global trainables
    trainables = {}

def get_domain():
    return ADAM_DOMAIN if CHOSEN_OPTIMIZER is torch.optim.Adam else SGD_DOMAIN
    
def perform_bayesian_optimization():
    """
    Construct the problem and run the optimization.
    """
    domain = get_domain()
    problem = BayesianOptimization(
        f=f,
        domain=domain,
        maximize=True
    )
    problem.run_optimization(max_iter=MAX_ITERATIONS)
    return problem

def plot_bo_results(problem):
    """
    Graph the acquisition function and convergence
    """
    print('Best params:', problem.opt_x)
    problem.plot_acquisition()
    problem.plot_convergence()
    
    
def params_to_meta_dict(params):
    """
    Returns a dictionary of named parameters
    """
    domain = get_domain()
    meta = { d['name']: params[i] for i, d in enumerate(domain)}
    return meta


def generate_test_metrics(trainable):
    """
    Create an itemized predictions file and metrics for the test set.
    """
    predictions_file = create_predictions(
        outdir=trainable.outdir,
        subset='test',
        data_dir=data_dir,
        model=best_trainable.model
    )
    create_all_metrics(predictions_file, trainable.outdir, 'test')

### Main loop

In [None]:
for model_fn in MODELS:  # iterate over all models
    # set the model
    global chosen_model
    chosen_model = model_fn
    
    # iterate over both binary and quaternary datasets
    for data_dir in (
        os.path.join('data', 'die_vs_all_tt'), 
        os.path.join('data', '4_class_tt')
    ):
        print(utils.get_model_name_from_fn(chosen_model), data_dir)
        reset_globals(data_dir)  # reset some globals used for iteration tracking
        
        try:
            # define and optimize the problem
            optimized = perform_bayesian_optimization()
            # plot the results
            plot_bo_results(optimized)
            # get and save the best trainable
            best_params = optimized.x_opt.flatten()
            best_trainable = train(best_params)
            best_trainable.save(extra_meta=params_to_meta_dict(best_params))
            # evalute on the test set using the best model
            generate_test_metrics(best_trainable)
            
        # if something bad happens, skip it so we can let the others run
        except Exception as e:
            print('Skipping because', e)
#             import traceback
#             traceback.print_exc()
            continue
            
        # commit & push only if we can connect to internet
        if PUSH_TO_GIT:
            subprocess.check_call(['git', 'add', 'experiments'])
            subprocess.check_call(['git', 'commit', '-am', 
                                   f'Results from {utils.get_model_name_from_fn(chosen_model)} {data_dir}'])
            subprocess.check_call(['git', 'push'])

alexnet data\die_vs_all_tt
{'batch_size': 1.0, 'lr_decay': 0.16699831689724726, 'lr': 0.042715851890708985, 'momentum': 0.9145557761910726, 'weight_decay': 0.17517534571112614}


                                                                                                                       

Skipping because 'Trainable' object has no attribute 'verbose'
alexnet data\4_class_tt
{'batch_size': 12.0, 'lr_decay': 0.08625439462524992, 'lr': 0.03512263667603768, 'momentum': 0.5870795087047069, 'weight_decay': 0.45984973142176566}


Epoch 1, Train, best val=0.000000:   0%|                                                  | 0/1017 [00:00<?, ?images/s]

# Final Commit and Push

In [None]:
if PUSH_TO_GIT:
    import time
    time.sleep(120) # wait for two minutes to let everything rendering
    _ = subprocess.check_call(["spd-say", "Your code has finished running"])
    _ = subprocess.check_call(['git', 'commit', '-am', "BO final commit"])
    _ = subprocess.check_call(['git', 'push'])
    
print("Done")