# Hperparameter optimization with Optuna
Zur Suchraum eingrenzung


In [94]:
# all imports
import optuna

import numpy as np
import pandas as pd
import tensorflow as tf

# Keras Imports
from keras import backend
from keras.layers.core import Activation
from keras.callbacks import EarlyStopping

import os


In [95]:
# Data constants
DATASET_ID = 1
DATA_PATH = f"/home/feline/Documents/KIT/WS22_23/LAMA/GA/GA_hyperparameter_optimization/data/random{DATASET_ID}.csv"

#ATA_PATH = f"{os.getcwd()}/belle_neural_trigger/data/random{DATASET_ID}.csv"
DATA_COUNT = 5000

# Random seed
RANDOM_SEED = 42

METRIC = "accuracy"


# mlp setup constants
MIN_HIDDENLAYERS = 1
MAX_HIDDENLAYERS = 10

MIN_NEURON_PER_LAYER = 1
MAX_NEURON_PER_LAYER = 300

WEIGHT_THRESH = 0.05

NUM_EPOCHS = 500


#Training
N_TRAILS = 100
N_TRAILS_2 = 50


In [96]:
data = pd.read_csv(DATA_PATH, delimiter='\t');    
data = data.iloc[:DATA_COUNT, 9:]

input = data.iloc[:, :-9]
target = data.iloc[:, -9:-7]


In [97]:
input.head()

Unnamed: 0,SL0-relID,SL0-driftT,SL0-alpha,SL1-relID,SL1-driftT,SL1-alpha,SL2-relID,SL2-driftT,SL2-alpha,SL3-relID,...,SL5-alpha,SL6-relID,SL6-driftT,SL6-alpha,SL7-relID,SL7-driftT,SL7-alpha,SL8-relID,SL8-driftT,SL8-alpha
26,0.0,0.019646,0.518803,-0.175781,0.030929,0.418625,0.0,0.039879,0.265662,-0.140625,...,0.0,0.0,0.0,-0.497786,-0.929688,0.094763,0.48377,0.0,0.107461,0.000216
26,0.0,0.019646,0.518803,-0.175781,0.030929,0.418625,0.0,0.039879,0.265662,-0.140625,...,0.0,0.0,0.0,-0.497786,-0.929688,0.094763,0.48377,0.0,0.107461,0.000216
26,0.003906,-0.043702,0.372666,1.0,-0.068906,0.0,0.0,0.0,-0.307539,-0.015625,...,0.608852,0.0,-0.189303,-0.187435,0.0,-0.21598,0.319851,0.125,-0.242333,0.000308
26,0.003906,-0.043702,0.372666,1.0,-0.068906,0.0,0.0,0.0,-0.307539,-0.015625,...,0.608852,0.0,-0.189303,-0.187435,0.0,-0.21598,0.319851,0.125,-0.242333,0.000308
26,0.003906,-0.041512,0.166792,1.0,-0.06544,0.049572,0.0,-0.088371,-0.55425,-0.015625,...,0.11339,0.0,-0.179397,-0.301857,0.0,-0.204515,0.119405,0.125,-0.229258,0.000308


In [98]:
target.head()

Unnamed: 0,RecoZ,RecoTheta
26,0.065285,0
26,0.065285,0
26,0.08051,0
26,0.08051,0
26,0.08051,0


In [99]:
# activation function: tanh(x/2)
def tanh(x):
    return backend.tanh(x/2)

In [100]:
def bias_(model):
    for layer in model.layers:
            if isinstance(layer, tf.keras.layers.Dense):
                weights = layer.get_weights()
                
                weight = weights[0]
                bias = weights[1]
        
                weight_threshold = np.mean(np.abs(weight)) * 0.1
                bias_threshold = np.mean(np.abs(bias)) * 0.05

                amount_dead_weights = len(np.where(weight < weight_threshold)[0])
                amount_dead_bias = len(np.where(bias< bias_threshold)[0])
                
    return amount_dead_weights,amount_dead_bias



In [101]:
# print out current architecture
def mlp_arcitecture(num_neurons_per_layer):
    num_of_neurons = sum(num_neurons_per_layer)
    num_neurons_per_layer = np.array(num_neurons_per_layer)
    
    print(f'------numbers of neuron per layer-----')
    print(num_neurons_per_layer)
    print(num_of_neurons)
    print(f'------')

In [102]:
def build_model(num_neurons_per_layer):
    
    # activation function
    activation_function = Activation(tanh)

     # create model: 
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(num_neurons_per_layer[0],
            input_shape = (27,),  
            activation=activation_function)
    ])

    for layer in range(1, len(num_neurons_per_layer)):
        #num_neurons = int(num_neurons_per_layer[layer])
        model.add(
            tf.keras.layers.Dense(
                num_neurons_per_layer[layer],
                activation = activation_function))

    model.add(tf.keras.layers.Dense(
        2, 
        activation = activation_function
        ))
    
    # Specify the loss fuction, optimizer, metrics
    model.compile(
        loss = tf.keras.losses.MeanSquaredError(),
        optimizer = "sgd",
        metrics = [METRIC]
    )
    return model
    

In [103]:
def pruning(trial,acc):
    #value = trail.report(value, step= epoch)
    if trial.should_prune( acc):
        raise optuna.TrialPruned()

In [104]:
def mlp_weights(model):
    weights = model.get_weights()
    amount_dead_weights = 0
    for layer in model.layers:
        weights = layer.get_weights()
        amount_dead_weights = amount_dead_weights + len(np.where(abs(weights[0]) < WEIGHT_THRESH)[0])
    return amount_dead_weights

In [108]:
# Define the objective function, with on hyperparameter
def objective(trial):

    number_of_layer = trial.suggest_int("number_of_layer", MIN_HIDDENLAYERS, MAX_HIDDENLAYERS)
    num_neurons_per_layer = []

    for i in range(number_of_layer):
        layer_ = trial.suggest_int('layer_{}'.format(i), MIN_NEURON_PER_LAYER, MAX_NEURON_PER_LAYER)
        num_neurons_per_layer.append(layer_)
    

    num_neurons_per_layer = np.array(num_neurons_per_layer)
    
    # build model
    model = build_model(num_neurons_per_layer)

    # add early stopping callback to save time
    es = EarlyStopping(monitor = f'val_{METRIC}', mode ='max', patience = 5)
   
    # Train the model
    history = model.fit(input, target, epochs= NUM_EPOCHS, validation_split = 0.2, batch_size= 2, callbacks = [es], use_multiprocessing=True, verbose=0)
        
    # accurcy
    last_result = history.history[f"val_{METRIC}"][-1:][0]

    return last_result

In [109]:
sampler = optuna.samplers.TPESampler(seed=RANDOM_SEED)

# creat study with one hyperparameter : accuracy
study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(objective, n_trials= N_TRAILS)



# Print the best hyperparameters and corresponding accuracy
print('Best hyperparameters: {}'.format(study.best_params))
print('Best accuracy: {}'.format(study.best_value))


[32m[I 2023-02-27 12:58:25,599][0m A new study created in memory with name: no-name-a458e477-271d-423a-a52b-22af64a129b4[0m
[32m[I 2023-02-27 12:58:53,102][0m Trial 0 finished with value: 0.9359999895095825 and parameters: {'number_of_layer': 4, 'layer_0': 286, 'layer_1': 220, 'layer_2': 180, 'layer_3': 47}. Best is trial 0 with value: 0.9359999895095825.[0m
[32m[I 2023-02-27 12:59:09,607][0m Trial 1 finished with value: 0.9419999718666077 and parameters: {'number_of_layer': 2, 'layer_0': 18, 'layer_1': 260}. Best is trial 1 with value: 0.9419999718666077.[0m
[32m[I 2023-02-27 12:59:21,132][0m Trial 2 finished with value: 0.6710000038146973 and parameters: {'number_of_layer': 7, 'layer_0': 213, 'layer_1': 7, 'layer_2': 291, 'layer_3': 250, 'layer_4': 64, 'layer_5': 55, 'layer_6': 56}. Best is trial 1 with value: 0.9419999718666077.[0m
[32m[I 2023-02-27 12:59:44,664][0m Trial 3 finished with value: 0.9390000104904175 and parameters: {'number_of_layer': 4, 'layer_0': 158, '

Best hyperparameters: {'number_of_layer': 3, 'layer_0': 16, 'layer_1': 148, 'layer_2': 40}
Best accuracy: 0.9440000057220459


In [111]:
# Ploting Slice Plot: First fig - accuracy over the number of layer in given MLP
#                     Secend fig - accuracy over number of layer and number of neuron per layer in MLP

fig_num_of_layer = optuna.visualization.plot_slice(study, ["number_of_layer"],target_name=METRIC)
fig_all_layer = optuna.visualization.plot_slice(study,target_name=METRIC)
fig_num_of_layer.show()
fig_all_layer.show()


In [112]:
# plots conection number of layers with corresponding accuracy
fig_number_of_layer= optuna.visualization.plot_parallel_coordinate(study,params= ["number_of_layer","layer_1"],target_name=METRIC)
fig_number_of_layer.show()
fig_layer_conection = optuna.visualization.plot_parallel_coordinate(study,params= ["layer_0","layer_1","number_of_layer"],target_name=METRIC)
fig_layer_conection.show()

In [113]:
# Plotting accuracy over each trial
fig = optuna.visualization.plot_optimization_history(study,target_name=METRIC)
fig.show()

In [114]:
# objective function with multi hyperparameter: accuracy, the amount of dead weights (weights < 0.05), amount of neurons in the hidden layer
def objective_(trial):

    number_of_layer = trial.suggest_int("number_of_layer", 1, 5)
    num_neurons_per_layer = []

    for i in range(number_of_layer):
        layer_ = trial.suggest_int('layer_{}'.format(i), 1, 20)
        num_neurons_per_layer.append(layer_)
    
    num_of_neurons = sum(num_neurons_per_layer)
    num_neurons_per_layer = np.array(num_neurons_per_layer)
    
    
    model = build_model(num_neurons_per_layer)

    # add early stopping callback to save time
    es = EarlyStopping(monitor = f'val_{METRIC}', mode ='max', patience = 5)
    
    # Train the model
    history = model.fit(input, target, epochs= NUM_EPOCHS, validation_split = 0.2, batch_size= 2, callbacks = [es], use_multiprocessing=True, verbose=0)

    amount_dead_weights = mlp_weights(model)
    
   #print(f'amount of dead weight{amount_dead_weights} and amount of dead bias {amount_dead_bias}')
    last_result = history.history[f"val_{METRIC}"][-1:][0]

    return last_result, amount_dead_weights, num_of_neurons

In [115]:
# creating study for multipel hyperparameter optimization
study_multiple_hyperparameter = optuna.create_study(directions=["maximize", "minimize","minimize"], sampler= sampler)
study_multiple_hyperparameter.optimize(objective_, n_trials=N_TRAILS)


[32m[I 2023-02-27 13:41:18,590][0m A new study created in memory with name: no-name-be9b8b4b-084f-4f3a-8459-a583191ac5d8[0m
[32m[I 2023-02-27 13:41:34,637][0m Trial 0 finished with values: [0.9449999928474426, 28.0, 23.0] and parameters: {'number_of_layer': 2, 'layer_0': 6, 'layer_1': 17}. [0m
[32m[I 2023-02-27 13:41:55,808][0m Trial 1 finished with values: [0.9380000233650208, 28.0, 17.0] and parameters: {'number_of_layer': 2, 'layer_0': 6, 'layer_1': 11}. [0m
[32m[I 2023-02-27 13:42:14,180][0m Trial 2 finished with values: [0.9290000200271606, 68.0, 17.0] and parameters: {'number_of_layer': 1, 'layer_0': 17}. [0m
[32m[I 2023-02-27 13:42:32,310][0m Trial 3 finished with values: [0.9399999976158142, 90.0, 20.0] and parameters: {'number_of_layer': 1, 'layer_0': 20}. [0m
[32m[I 2023-02-27 13:42:40,485][0m Trial 4 finished with values: [0.6710000038146973, 52.0, 37.0] and parameters: {'number_of_layer': 4, 'layer_0': 4, 'layer_1': 1, 'layer_2': 17, 'layer_3': 15}. [0m
[

In [123]:
# finding best 
trial_with_highest_accuracy = max(study_multiple_hyperparameter.best_trials, key=lambda t: t.values[1])
print(f"Trial with highest accuracy: ")
print(f"\tnumber: {trial_with_highest_accuracy.number}")
print(f"\tparams: {trial_with_highest_accuracy.params}")
print(f"\tvalues: {trial_with_highest_accuracy.values}")



Trial with highest accuracy: 
	number: 46
	params: {'number_of_layer': 4, 'layer_0': 20, 'layer_1': 13, 'layer_2': 5, 'layer_3': 9}
	values: [0.949999988079071, 114.0, 47.0]


In [119]:
# plotting pareto front
fig_1 = optuna.visualization.plot_pareto_front(study_multiple_hyperparameter, target_names=[METRIC,"dead_weights","num_neuron"])
fig_1.show()


In [124]:
hyperparameter_importances = optuna.visualization.plot_param_importances(
    study_multiple_hyperparameter, target=lambda t: t.values[0], target_name="accuracy"
)
hyperparameter_importances.show()
