# Constants and Setup

In [1]:
import os

# whether to commit and push to git after each optimization. intended for long runs
PUSH_TO_GIT = True

# parent output directory
EXPERIMENTS_DIR = os.path.join("experiments", "bayes_opt_2")

Bayesian Optimization and train parameters

In [2]:
MAX_ITERATIONS = 10     # max iterations for bayesian optimization

NUM_TRAIN_EPOCHS = 30   # total number of epochs tot rain for
EARLY_STOP = 7          # give up training if validation accuracy doesn't improve after this many epochs

Optimizer to use. Choose by commenting. Supposedly by Wilson et al. 2018, SGD generalizes better.

In [3]:
import torch.optim as optim

OPTIMIZERS = (
    optim.Adam,
    optim.SGD,
)

Models to test for BO. All in the list will be optimized.

In [4]:
from torchvision import models as tvm
import pretrainedmodels as ptm

# The models we will test
MODELS = (
    ptm.alexnet, # gets maximum recursion limit exceeded exceptions
#     se_resnext101_32x4d, # input size doesn't work
    ptm.dpn98,
    ptm.se_resnet50,
    ptm.se_resnet101,
    ptm.inceptionresnetv2,
    ptm.inceptionv4,
    ptm.vgg16,
    ptm.vgg19,
    tvm.resnet101,
    tvm.resnet152,
    ptm.senet154,
    ptm.nasnetalarge
)

Setup: Make sure Jupyter shows all output

In [5]:
# show more than one output in cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# plot charts in our notebook
%matplotlib inline

import subprocess  # for command line control, git
import sys
sys.setrecursionlimit(3000) # for some reason AlexNet requires more recursive depth

## Helper Functions

In [6]:
import src.utils as utils
from src.trainable import Trainable
import torch


CRITERION = torch.nn.CrossEntropyLoss() # we'll always use CE for the loss function
 

def train(params):
    """
    Set up a trainable and train it using the given parameters.
    """
#     print(params_to_meta_dict(params))  # print the input
    print('Iteration:', iteration, end=', ')
    batch_size, lr_factor, optim_params = parse_train_params(params)
    
    # make an output directory using the dataset name, model name, and BO iteration
    outdir = make_outdir_name(
        data_dir, 
        utils.get_model_name(chosen_model), 
        chosen_optimizer.__name__,
        str(iteration),
        prepend=EXPERIMENTS_DIR
    )
    
    # make our data loaders based on the image size
    image_size = utils.determine_image_size(utils.get_model_name(chosen_model))
    # get_train_val_dataloaders() makes a stratified random partitions of a train and validation set
    dataloaders = utils.get_train_val_dataloaders(
        datadir=data_dir,
        val_proportion=0.15,
        image_size=image_size, 
        batch_size=batch_size
    )
    # build our model
    model = build_model(chosen_model)
    utils.fit_model_last_to_dataset(model, dataloaders['train'].dataset)
    # build the optimizer
    optimizer = chosen_optimizer(model.parameters(), *optim_params)
    # "ReduceOnPlateau" is "dev-decay" as recommended by Wilson et al. 2018 
    # "The Marginal Value of Adaptive Gradient Methods"
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='max', 
        factor=lr_factor, 
        patience=1
    )
    
    trainable = Trainable(dataloaders, model, CRITERION, optimizer, lr_scheduler, outdir=outdir)
    trainable.train(num_epochs=NUM_TRAIN_EPOCHS, early_stop_limit=EARLY_STOP, verbose=False)

    return trainable


def parse_train_params(params):
    """
    Parses train parameters by converting batch size to an int, and the betas to a tuple for Adam.
    """
    batch_size, lr_factor, optim_params = int(params[0]), params[1], params[2:]
    if chosen_optimizer is torch.optim.Adam:  # turn b1 and b2 into a tuple
        # NOTE the below indexes are based on purely ADAM_DOMAin
        optim_params = (optim_params[0], tuple(optim_params[1:3]), optim_params[3])
#         print('OPTIM_PARAMS', optim_params)
    return batch_size, lr_factor, optim_params

    
def build_model(model_fn):
    """
    Build a pretrained model class from a model function. Passes 
    in the appropriate pretrained arg based on the model function's 
    parent module.
    """
    if 'pretrainedmodels' in model_fn.__module__:
        model = model_fn(num_classes=1000, pretrained='imagenet')
    else:
        model = model_fn(pretrained=True)
    return model


def make_key_from_params(params):
    """
    Makes a unique key (as a tuple) from a given list of parameters.
    For storing associated Trainable objects.
    
    """
    return tuple(round(param, 10) for param in params)


def make_outdir_name(data_dir, *append, prepend=""):
    """
    Make the output directory name based on dataset, model name, and any extra info.
    """
    dataset_name = os.path.basename(data_dir)
    return os.path.join(prepend, dataset_name, *append) 


# Bayesian Optimization

## Define the Problem

### Domain

In [7]:
BASE_DOMAIN = [
#     {'name': 'batch_size', 'type': 'discrete', 'domain': (1, 4, 8, 12)},  # DEBUG for GPUs with insufficient memory
    {'name': 'batch_size', 'type': 'discrete', 'domain': (16, 24, 32, 48, 64)},
    {'name': 'lr_decay', 'type': 'continuous', 'domain': (0.03, 0.3)},
]

ADAM_DOMAIN = BASE_DOMAIN + [
    {'name': 'adam_lr', 'type': 'continuous', 'domain': (0.001, 0.1)},
    {'name': 'adam_beta1', 'type': 'continuous', 'domain': (0.8, .99)},
    {'name': 'adam_beta2', 'type': 'continuous', 'domain': (0.95, .9999)},
    {'name': 'adam_wtdecay', 'type': 'continuous', 'domain': (0, 1)}
]
# TODO: have to figure out how to set a starting default
# default_input = [32, 0.001, 0.9, 0.999, 0] 

SGD_DOMAIN = BASE_DOMAIN + [
    {'name': 'lr', 'type': 'continuous', 'domain': (0.001, 0.1)},
    {'name': 'momentum', 'type': 'continuous', 'domain': (0.5, .99)},
    {'name': 'weight_decay', 'type': 'continuous', 'domain': (0, 1)}
]

### Function to optimize

In [8]:
def f(x):
    """ Value function to maximize for bayesian optimization """
    params = x.flatten()
#     print('PARAMS', params)
    
    trainable = train(params)
    val_acc = trainable.best_val_accuracy
    
    return val_acc

## Do BO on all models on both datasets.

In [9]:
from GPyOpt.methods import BayesianOptimization
from predict import create_predictions
from metrics import create_all_metrics

### BO helper functions

In [10]:
def reset_globals(datadir):
    """
    Reset the iteration and set the data directory
    """
    global iteration  # keep track of our optimization iterations for directory output
    iteration = 0   # but reset to 0 each train run
    global data_dir
    data_dir = datadir
    

def get_domain():
    """
    Get the domain parameters given the chosen optimizer
    """
    return ADAM_DOMAIN if chosen_optimizer is torch.optim.Adam else SGD_DOMAIN
    
def perform_bayesian_optimization():
    """
    Construct the problem and run the optimization.
    """
    domain = get_domain()
    problem = BayesianOptimization(
        f=f,
        domain=domain,
        maximize=True
    )
    problem.run_optimization(max_iter=MAX_ITERATIONS)
    return problem

def plot_bo_results(problem):
    """
    Graph the acquisition function and convergence
    """
    print('Best params:', problem.opt_x)
    problem.plot_acquisition()
    problem.plot_convergence()
    
    
def params_to_meta_dict(params):
    """
    Takes a list and returns a dictionary of named parameters based on domain index
    """
    domain = get_domain()
    meta = { d['name']: params[i] for i, d in enumerate(domain)}
    return meta


def generate_test_metrics(trainable):
    """
    Create an itemized predictions file and metrics for the test set.
    """
    predictions_file = create_predictions(
        outdir=trainable.outdir,
        subset='test',
        data_dir=data_dir,
        model=best_trainable.model
    )
    create_all_metrics(predictions_file, trainable.outdir, 'test')

### Main loop

In [11]:
import gc # for manual garbage collection, reduce memory consumption

for chosen_optimizer in OPTIMIZERS:
    for chosen_model in MODELS:  # iterate over all models

        # iterate over both binary and quaternary datasets
        for data_dir in (
            os.path.join('data', 'die_vs_all_tt'), 
            os.path.join('data', '4_class_tt')
        ):
            print(utils.get_model_name(chosen_model), chosen_optimizer.__name__, data_dir, end="; ")
            reset_globals(data_dir)  # reset some globals used for iteration tracking

            try:
                # define and optimize the problem
                optimized = perform_bayesian_optimization()
                # plot the results
                plot_bo_results(optimized)
                # get and save the best trainable
                best_params = optimized.x_opt.flatten()
                print('Best params:' (params_to_meta_dict(params)))
                best_trainable = train(best_params)
                best_trainable.save(extra_meta=params_to_meta_dict(best_params))
                # evalute on the test set using the best model
                generate_test_metrics(best_trainable)

                gc.collect()  # reduce memory usage

            # if something bad happens, skip it so we can let the others run
            except Exception as e:
                print('Skipping because', e)
#                 import traceback  # DEBUG
#                 traceback.print_exc()
                continue

            # commit & push only if we can connect to internet
            if PUSH_TO_GIT:
                subprocess.check_call(['git', 'add', 'experiments'])
                subprocess.check_call(['git', 'commit', '-am', 
                                       "Results from " +
                                       f'{data_dir} {utils.get_model_name(chosen_model)} ' + 
                                       f'{chosen_optimizer.__name__}'])
                subprocess.check_call(['git', 'push'])

alexnet Adam data/die_vs_all_tt; Iteration: 0, 

Epoch 4, Train, best val=0.566667:  57%|█████▋    | 576/1018 [00:08<00:06, 66.97images/s] 

Sigint caught!
Training will stop after this epoch and the best model so far will be saved.
OR press Ctrl-C again to quit immediately without saving.Sigint caught!
Training will stop after this epoch and the best model so far will be saved.
OR press Ctrl-C again to quit immediately without saving.
Sigint caught!
Training will stop after this epoch and the best model so far will be saved.
OR press Ctrl-C again to quit immediately without saving.
Sigint caught!
Training will stop after this epoch and the best model so far will be saved.
OR press Ctrl-C again to quit immediately without saving.



                                                                                         

Sigint caught!
Training will stop after this epoch and the best model so far will be saved.
OR press Ctrl-C again to quit immediately without saving.
Stopping...
Stopping...




SystemExit: 1

Stopping...
Stopping...
Stopping...




# Final Commit and Push

In [None]:
if PUSH_TO_GIT:
    import time
    time.sleep(120) # wait for two minutes to let everything rendering
    _ = subprocess.check_call(["spd-say", "Your code has finished running"])
    _ = subprocess.check_call(['git', 'commit', '-am', "BO final commit"])
    _ = subprocess.check_call(['git', 'push'])
    
print("Done")