# OneShotDL Prototype

This notebook shows the general experiment setup and code architecture that should be used within the project. It serves as a blueprint for the more advanced models and settings that we aim to develop.

In [None]:
import numpy as np
from pySOT import SyncStrategyNoConstraints, LatinHypercube, RBFInterpolant, CandidateDYCORS, CubicKernel, LinearTail
import csv
from threading import Thread, current_thread
from datetime import datetime
from poap.controller import ThreadController, BasicWorkerThread, SerialController
import os.path
import time
import h5py

# use keras for the cnn tuning example
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.losses import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K

# helper classes of OneShotDL
from helpers import load_mnist, split_and_select_random_data

### 1. Minimal example of using pySOT to implement HORD

We aim to use the method proposed in <i>Efficient Hyperparameter Optimization of Deep Learning Algorithms Using
Deterministic RBF Surrogates</i> (Ilievski et al., 2017) to tune the models. The paper shows it outperforms popular Bayesian Optimization approaches like SMAC and TPE, especially when the number of parameters to tune is large.

The paper: https://arxiv.org/pdf/1607.08316.pdf 

Their implementation uses the pySOT library. A minimal example of how to use that package is shown below. It requires a class to be written with an objective function and allowed ranges for parameters.

In [None]:
class Test():
    
    def __init__(self, dim=2):
        # these attributes are required by pySOT
        self.xlow = np.array([-2, 2])
        self.xup = np.array([-0.01, 3.1])
        self.continuous = np.arange(0,dim)
        self.integer = np.array([])
        self.dim = dim
    
        self.counter = 0
        
    def objfunction(self, x):
        # a random objective function for illustration 
        # optimal solution given the ranges is 0 at x = [-2, 2]
        self.counter += 1
        score = 2 - np.square(x[0]) + x[1]
        print("Experiment {}. Params: {}. Score: {}.".format(self.counter, x, score))
        return score

Use multi threading

In [None]:
data = Test()
maxeval = 100
nsamples = 1
nthreads = 1

# Create a strategy and a controller
controller = ThreadController()
controller.strategy = SyncStrategyNoConstraints(worker_id=0, 
                                                data=data,
                                                maxeval=maxeval, 
                                                nsamples=nsamples,
                                                exp_design=LatinHypercube(dim=data.dim, npts=2*(data.dim+1)),
                                                response_surface=RBFInterpolant(kernel=CubicKernel, maxp=maxeval),
                                                sampling_method=CandidateDYCORS(data=data, numcand=100*data.dim))

# Launch the threads and give them access to the objective function
for _ in range(nthreads):
    worker = BasicWorkerThread(controller, data.objfunction)
    controller.launch_worker(worker)

# Run the optimization strategy
result = controller.run()

print('Best value found: {0}'.format(result.value))
print('Best solution found: {0}\n'.format(
    np.array_str(result.params[0], max_line_width=np.inf,
                 precision=5, suppress_small=True)))

### 2. Prototype code for tuning a One Shot CNN on Fashion MNIST

To use this approach for tuning Deep Learning models, we need classes that train and evaluate different architectures of Neural Networks. This prototype shows how such a class could look. It trains a simple CNN on five randomly chosen images of five randomly chosen classes and evaluates the found model on the test images of those same five classes. This process is repeated a number of times for every combination of parameter values to achieve a form of cross validation.

In [None]:
class OneShotPrototype():
    
    def __init__(self, dim=5):
        
        self.hyperparams = ['num_conv_layers', 'num_dense_layers', 'neurons_conv', 'neurons_dense', 'dropout_rate']
        self.hyper_map = {self.hyperparams[i]:i for i in range(len(self.hyperparams))}
        
        # this may need a more intuitive structure
        self.xlow = np.array([1, 1, 8, 8, 0.0])
        self.xup = np.array([4, 4, 64, 128, 0.75])
        self.continuous = np.arange(4,dim)
        self.integer = np.arange(0,4)
        self.dim = dim
        
        # fixed parameters
        self.batchsize = 128
        self.epochs = 200
        self.nfolds = 5 # for cross validation
        
        # data
        self.x_train, self.y_train = load_mnist("./Data/", kind='train')
        self.x_test, self.y_test = load_mnist("./Data/", kind='test')
        self.num_classes = self.y_test.shape[1]
        
        # logging results
        #self.param_log = np.empty(shape=(,dim))
        #self.scores_log = np.empty(shape=(,self.nfolds))
        
        # counter
        self.exp_number = 0


    def objfunction(self, params):
        """ The overall objective function to provide to pySOT's black box optimization. """
        
        self.exp_number += 1
        print("--------------\nExperiment {}.\n--------------".format(self.exp_number))
        
        def define_model(params):
            """ Creates the Keras model based on given parameters. """

            model = Sequential()
            
            # add first convolutional layer and specify input shape
            model.add(Conv2D(int(params[self.hyper_map['neurons_conv']]), 
                             kernel_size=(3,3), activation='relu', 
                             input_shape=(28,28,1), data_format="channels_last"))
            
            # possibly add more
            if int(params[self.hyper_map['num_conv_layers']]) > 1:
                for l in range(1,int(params[0])):
                    model.add(Conv2D(int(params[self.hyper_map['neurons_conv']]), (3, 3), activation='relu'))
            
            # max pool > dropout > flatten
            model.add(MaxPooling2D(pool_size=(2, 2)))
            model.add(Dropout(params[self.hyper_map['dropout_rate']]))
            model.add(Flatten())
            
            # add dense layers before the classification layer
            for l in range(int(params[self.hyper_map['num_dense_layers']])):
                model.add(Dense(int(params[self.hyper_map['neurons_dense']]), activation='relu'))
            
            # classification layer
            model.add(Dense(self.num_classes, activation='softmax'))
            
            # compile and return
            model.compile(loss=keras.losses.categorical_crossentropy,
                          optimizer='rmsprop',
                          metrics=['accuracy'])
            
            # create data generator, later including augmentations
            datagen = ImageDataGenerator()

            return model, datagen
            

        def cross_validate(x, y, xtest, ytest, params, n):
            """ Cross validate with random sampling. """
            
            print("Cross validating..")
            scores = []
            for i in range(n):
                x_target_labeled, y_target, x_test, y_test, _, _, _ = \
                    split_and_select_random_data(x, y, xtest, ytest,
                                                 num_target_classes=5, num_examples_per_class=1)
                model, datagen = define_model(params)
                print("fit {}:".format(i+1))
                # fits the model on batches with real-time data augmentation:
                model.fit_generator(datagen.flow(x_target_labeled, y_target, batch_size=x_target_labeled.shape[0]), 
                                    steps_per_epoch=1, epochs=self.epochs, verbose=0)

                loss, accuracy = model.evaluate(x_test, y_test, verbose=0, batch_size=y.shape[0])
                print("test accuracy: {}%.".format(round(accuracy*100, 2)))
                scores.append(accuracy)
            
            return scores
        
        
        print("params: {}.".format(params))
        scores = cross_validate(self.x_train, self.y_train, self.x_test, self.y_test, params, self.nfolds)
        print("Scores: {}.\nMean: {}%. Standard deviation: {}%".format(scores, round(np.mean(scores)*100, 2), round(np.std(scores)*100, 2)))
        
        # to minimize, return this to pySOT
        return -np.mean(scores)

##### Testing the prototype class
Test the prototype class for a single set of parameters

In [None]:
test = OneShotPrototype()
score = test.objfunction([2, 2, 16, 16, 0.5])

Now try tuning the model with HORD.

In [None]:
data = OneShotPrototype()
maxeval = 15
nsamples = 1 # one experiment at the time

# create the controller
controller = SerialController(data.objfunction)
# experiment design
exp_des = LatinHypercube(dim=data.dim, npts=2*data.dim+1)
# Use a cubic RBF interpolant with a linear tail
surrogate = RBFInterpolant(kernel=CubicKernel, tail=LinearTail, maxp=maxeval)
# Use DYCORS with 100d candidate points
adapt_samp = CandidateDYCORS(data=data, numcand=100*data.dim)

strategy = SyncStrategyNoConstraints(worker_id=0, data=data, maxeval=maxeval, nsamples=1,
                                     exp_design=exp_des, response_surface=surrogate,
                                     sampling_method=adapt_samp)
controller.strategy = strategy

# Run the optimization strategy
start_time = datetime.now()
result = controller.run()

print('Best value found: {0}'.format(result.value))
print('Best solution found: {0}\n'.format(
    np.array_str(result.params[0], max_line_width=np.inf,
                 precision=5, suppress_small=True)))

millis = int(round(time.time() * 1000))
print('Started: '+str(start_time)+'. Ended: ' + str(datetime.now()) + ' (' + str(millis) + ')')

Note that using the multi-thread controller does not work for this class, because this conflicts with Keras, which already uses all available cores.