### Examples of hyperparameter optimization in keras

#### Function definitions

In [1]:
from typing import List

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras import optimizers

## function to setup model (assuming multiclass classification problem)
# topo -> topology graph
# input and output size are not hyperparameters to be optimized and are intrinsic to the data
# can also add functions as a hyperparameter such as scoring functions or activation functions

def setup_model(topo, dropout_rate, input_size, output_size,activation_func='softmax'):
    model = Sequential()    
    model.add(Dense(topo[0], activation="relu", input_dim = input_size))
    if dropout_rate > 0: model.add(Dropout(dropout_rate))
    for i in range(1,len(topo)):        
        model.add(Dense(topo[i], activation="relu"))
        if dropout_rate > 0: model.add(Dropout(dropout_rate))    
    model.add(Dense(output_size))
    model.add(Activation(activation_func))
    
    return model

## training the DNN - takes algorithm (string) and learning rate; data (X, y), epochs and batch size
def train_dnn(model, alg, lr, x_train, y_train, epochs = 5, batch_size = 64, metric_function: List = ['accuracy', 'mse']):
    if alg == "adam":
        optimizer = optimizers.Adam(lr = lr)
    elif alg == "rmsprop":
        optimizer = optimizers.RMSprop(lr = lr)
    elif alg == "sgd_momentum":
        optimizer = optimizers.SGD(lr = lr, momentum = 0.9)
    else: optimizer = optimizers.SGD(lr = lr)
        
    model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics = metric_function)
    model.fit(x_train, y_train, epochs = epochs, batch_size = batch_size, verbose = 0)
    
    return model

## optimizing parameters: topology, algorithm, learning rate, dropout
## randomized search optimization with maximum iterations
## takes as input: dictionary with params to optimizae and possible values; training data(X,y), validation data (X,y), iterations, epochs for training
def dnn_optimization(opt_params, x_train, y_train, x_valid, y_valid, iterations = 10, epochs = 5, verbose = True):
    from random import choice
  
    if verbose: 
        print("Topology\tDropout\tAlgorithm\tLRate\tValLoss\tValAcc\n")
    best_acc = None
    
    input_size = x_train.shape[1]
    output_size = y_train.shape[1]
    
    if "topology" in opt_params:
        topologies = opt_params["topology"]
    else: topologies = [[100]]
    if "algorithm" in opt_params:
        algs = opt_params["algorithm"]
    else: algs = ["adam"]
    if "lr" in opt_params:
        lrs = opt_params["lr"]
    else: lrs = [0.001]
    if "dropout" in opt_params:
        dropouts = opt_params["dropout"]
    else: dropouts= [0.0]
    
    for it in range(iterations):
        topo = choice(topologies)
        dropout_rate = choice(dropouts)
        dnn = setup_model (topo, dropout_rate, input_size, output_size)
        alg = choice(algs)
        lr = choice(lrs)
        dnn = train_dnn(dnn, alg, lr, x_train, y_train, epochs, 128)
        val_loss, val_acc = dnn.evaluate(x_valid, y_valid, verbose = 0)
        
        if verbose: 
            print(topo, "\t", dropout_rate, "\t", alg, "\t", lr, "\t", val_loss, "\t", val_acc)
        
        if best_acc is None or val_acc > best_acc:
            best_acc = val_acc
            best_config = (topo, dropout_rate, alg, lr)
        
    return best_config, best_acc

#### Example with MNIST dataset - DNNs with hyperparameters optimized

In [2]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

print(train_images.shape, test_images.shape)
print(len(train_labels), len(test_labels))

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28))
X_test = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
y_test = to_categorical(test_labels)

X_tr = train_images[:50000,]
X_val = train_images[50000:,]
y_tr = train_labels[:50000]
y_val = train_labels[50000:,]

print(X_tr.shape, X_val.shape, X_test.shape)
print(len(y_tr), len(y_val), len(y_test))

(60000, 28, 28) (10000, 28, 28)
60000 10000
(50000, 784) (10000, 784) (10000, 784)
50000 10000 10000


In [3]:
opt_pars = {"topology":[[100], [100,50], [250], [250,100]],
            "algorithm": [ "adam", "rmsprop", "sgd_momentum"],
            "lr": [0.01, 0.001],
            "dropout": [0, 0.2, 0.5]}

best_config, best_val_acc = dnn_optimization(opt_pars, X_tr, y_tr, X_val, y_val, 20)  
print("Best configuration:", best_config)
print("Best validation accuracy:", best_val_acc) 

Topology	Dropout	Algorithm	LRate	ValLoss	ValAcc

[100, 50] 	 0.5 	 adam 	 0.01 	 0.19035480916500092 	 0.9517999887466431
[250] 	 0 	 adam 	 0.001 	 0.0788038820028305 	 0.9767000079154968
[100, 50] 	 0.2 	 rmsprop 	 0.01 	 0.14397594332695007 	 0.9674000144004822
[250] 	 0 	 sgd_momentum 	 0.01 	 0.15039779245853424 	 0.9588000178337097
[100] 	 0.5 	 sgd_momentum 	 0.01 	 0.16686806082725525 	 0.954800009727478
[100, 50] 	 0.2 	 adam 	 0.001 	 0.09524358808994293 	 0.9718000292778015
[100] 	 0.5 	 rmsprop 	 0.001 	 0.1300782710313797 	 0.9635999798774719
[100, 50] 	 0.2 	 sgd_momentum 	 0.001 	 0.34068548679351807 	 0.9067000150680542
[100] 	 0.5 	 adam 	 0.01 	 0.14045625925064087 	 0.963100016117096
[100, 50] 	 0.5 	 sgd_momentum 	 0.001 	 0.4231471121311188 	 0.897599995136261
[250, 100] 	 0.5 	 sgd_momentum 	 0.01 	 0.12872694432735443 	 0.9642999768257141
[100] 	 0.5 	 rmsprop 	 0.01 	 0.20177757740020752 	 0.9534000158309937
[250, 100] 	 0.5 	 rmsprop 	 0.001 	 0.109310708940029

In [4]:
# take best configuration and retrain with whole training set
# evaluate error on holdout test set
best_model = setup_model(best_config[0], best_config[1], X_tr.shape[1], y_tr.shape[1])
best_model = train_dnn(best_model, best_config[2], best_config[3], train_images, train_labels)

test_loo, test_acc = best_model.evaluate(X_test, y_test, verbose = 0)
print("Test set metrics:", test_loo, test_acc)

Test set metrics: 0.07228674739599228 0.9787999987602234
