In [223]:
# Jupyter packagesb
from __future__ import print_function
from keras.callbacks import EarlyStopping
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import optuna
import optuna.multi_objective

import plotly

import sklearn.datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.layers.core import Activation
from keras import backend


# Common packages, you know them from before 
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import math
import copy
from typing import *

import pickle

# Setup jupyter
%matplotlib inline



Set constants:

In [224]:
# Data constants
DATASET_ID = 3
DATA_PATH = f"/home/feline/Documents/KIT/WS22_23/LAMA/belle_neural_trigger/data/random{DATASET_ID}.csv"
DATASET_NAME = f'random{DATASET_ID}'
DATA_COUNT = 500

# Random seed
RANDOM_SEED = 42


# mlp setup constants
MIN_HIDDENLAYERS = 1
MAX_HIDDENLAYERS = 20

MIN_NEURON_PER_LAYER = 1
MAX_NEURON_PER_LAYER = 300

WEIGHT_THRESH = 0.05,

NUM_EPOCHS = 500 #100 # plot showed that no more than 40 epochs are necessary, else the network will be overfitting 

METRIC = "accuracy"


#Training
N_TRAILS = 20




Read the data:

In [225]:
data = pd.read_csv(DATA_PATH, delimiter='\t');    
data = data.iloc[:DATA_COUNT, 9:]

input = data.iloc[:, :-9]
target = data.iloc[:, -9:-7]

In [226]:
input.head()

Unnamed: 0,SL0-relID,SL0-driftT,SL0-alpha,SL1-relID,SL1-driftT,SL1-alpha,SL2-relID,SL2-driftT,SL2-alpha,SL3-relID,...,SL5-alpha,SL6-relID,SL6-driftT,SL6-alpha,SL7-relID,SL7-driftT,SL7-alpha,SL8-relID,SL8-driftT,SL8-alpha
26,0.0,-0.048085,0.466089,0.0625,-0.071386,0.005157,0.0,-0.098006,-0.61497,0.0,...,0.587446,0.113281,-0.209359,-0.413887,-0.074219,-0.239288,0.0,0.0,0.0,-0.00047
26,0.0,-0.048085,0.466089,0.0625,-0.071386,0.005157,0.0,-0.098006,-0.61497,0.0,...,0.587446,0.113281,-0.209359,-0.413887,-0.074219,-0.239288,0.0,0.0,0.0,-0.00047
26,0.0,-0.048085,0.466089,0.0625,-0.071386,0.005157,0.0,-0.098006,-0.61497,0.0,...,0.587446,0.113281,-0.209359,-0.413887,-0.074219,-0.239288,0.0,0.0,0.0,-0.00047
26,0.0,-0.048085,0.466089,0.0625,-0.071386,0.005157,0.0,-0.098006,-0.61497,0.0,...,0.587446,0.113281,-0.209359,-0.413887,-0.074219,-0.239288,0.0,0.0,0.0,-0.00047
26,0.0,0.0,-0.208208,0.027344,-0.06544,-0.1822,0.0,-0.084502,0.026128,1.0,...,0.11339,0.0625,-0.179397,0.110643,0.292969,-0.204515,0.019405,0.0,-0.229258,-0.00047


In [227]:
X_train, X_test = train_test_split(input, random_state=RANDOM_SEED)
y_train, y_test = train_test_split(target, random_state=RANDOM_SEED)


Method for plotting the accuracy history from the model training history:

In [228]:
# Method for plotting the accuracy history from the model training history
def plot_metric_history(history: tf.keras.callbacks.History, metric: str):
    n_epochs = len(history[metric])

    plt.plot(range(1, n_epochs + 1), history[metric], label=f'train {metric}')
    plt.plot(range(1, n_epochs + 1), history[f'val_{metric}'], label=f'validation {metric}')
    plt.legend()
    plt.title(f'Training and validation {metric} for {n_epochs} epochs of training.')
    plt.show()


In [240]:
def plot_dead_weights(weight_values: List):
    plt.scatter(weight_values, label='Amount ofWeight Values')
    plt.xlabel('Layer Index')
    plt.ylabel('Value')
    plt.legend()
    plt.show()

In [241]:
def plot_dead_bias(bias_values: List):
    plt.scatter(bias_values, label='Amount of bias Values')
    plt.xlabel('Layer Index')
    plt.ylabel('Value')
    plt.legend()
    plt.show()

## Bayesian Optimization overview steps:
<ul>
<ls>1.Build a surrogate probability model of the objektiv function</ls>
</ls>2. Find the hyperparameters that perfomr best on the surrogate</ls>
</ls>3. Applay the hyperparamerter to the thrue objectiv function</ls>
</ls>4. Update th surrgate midel incorporating th enew results</ls>
</ls>5. Repeat steps 2- 4 until max interation or time is reached</ls>
</ul>



In [231]:
# activation function: tanh(x/2)
def tanh(x):
    return backend.tanh(x/2)

In [242]:
def objective(trial):

    number_of_layer = trial.suggest_int("number_of_layer", MIN_HIDDENLAYERS, MAX_HIDDENLAYERS)
    num_neurons_per_layer = []
    weights_history =[]
    bias_history =[]

    for layer in range(number_of_layer):

        layer_id = "layer_" + str(layer)
        layer_ = trial.suggest_int(layer_id, MIN_NEURON_PER_LAYER, MAX_NEURON_PER_LAYER)
        num_neurons_per_layer.append(layer_)

    num_neurons_per_layer = np.array(num_neurons_per_layer)
        

    
    print(f'------numbers of neuron per layer-----')
    print(num_neurons_per_layer)
    print(f'------')
    fitness_value: float
    verbose = False
    plot = False
    
    # sort the num_neurons parameter list
    num_neurons_per_layer_without_zeros = num_neurons_per_layer[num_neurons_per_layer != 0]
    num_neurons_per_layer_sorted = np.zeros(MAX_HIDDENLAYERS)

    for i in range(len(num_neurons_per_layer_without_zeros)):
        num_neurons_per_layer_sorted[i] = int(num_neurons_per_layer_without_zeros[i])
    
    activation_function = Activation(tanh)
    num_neurons_per_layer = num_neurons_per_layer_sorted

    # create model: 
    cnn_model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(
            max(int(num_neurons_per_layer_without_zeros[0]), 1),
            input_shape = (27,),  
            activation=activation_function)
    ])

    for layer in range(1, len(num_neurons_per_layer_without_zeros)):
        num_neurons = int(num_neurons_per_layer_without_zeros[layer])
        cnn_model.add(
            tf.keras.layers.Dense(
                num_neurons,
                activation = activation_function))

    # The sigmoid activation function outputs a value between 0 and 1, 
    # which can be interpreted as the probability of the positive class
    cnn_model.add(tf.keras.layers.Dense(
        2, 
        activation = 'sigmoid'))


    # Specify the loss fuction, optimizer, metrics
    cnn_model.compile(
        loss = tf.keras.losses.MeanSquaredError(),
        optimizer = "sgd",#'adam', #'sdg'
        metrics = [METRIC]
    )

    
    # add early stopping callback to save time
    es = EarlyStopping(monitor = f'val_{METRIC}', mode ='max', patience = 5)
    # Train the model
    history = cnn_model.fit(X_train, y_train, epochs= NUM_EPOCHS, validation_split = 0.2, batch_size= 2, callbacks = [es], use_multiprocessing=True, verbose=0)
    
    bias_values = []
    weight_values = []
    bias_values_deat = []
    weight_values_deat = []

    for layer in cnn_model.layers:
        if isinstance(layer, tf.keras.layers.Dense):
            weights = layer.get_weights()
            
            weight = weights[0]
            bias = weights[1]

            bias_values.append(np.sum(np.abs(bias)))
            weight_values.append(np.sum(np.abs(weight)))

            weight_threshold = np.mean(np.abs(weight)) * 0.1
            bias_threshold = np.mean(np.abs(bias)) * 0.05

            amount_dead_weights = len(np.where(weight < weight_threshold)[0])
            amount_dead_bias = len(np.where(bias< weight_threshold)[0])

            bias_values_deat.append(amount_dead_weights)
            weight_values_deat.append(amount_dead_bias)

            print(f'amount of dead weight{amount_dead_weights} and amount of dead bias {amount_dead_bias}')
            #weight[np.abs(weight) < weight_threshold] = 0
            #bias[np.abs(bias) < bias_threshold] = 0
            #layer.set_weights([weight, weights[1]])

    plot_dead_weights(weight_values)
    plot_dead_bias(bias_values)

    plot_dead_weights(bias_values_deat)
    plot_dead_bias(weight_values_deat)

    # TODO choose the best
    last_result = history.history[f"val_{METRIC}"][-1:][0]
    best_result = max(history.history[f"val_{METRIC}"])

   
    #weights = cnn_model.get_weights()
    #amount_dead_weights = len(np.where(abs(weights[0]) < WEIGHT_THRESH)[0])
    
    

    if (plot):
        plot_metric_history(history.history, METRIC)
    if (verbose):
        #print (cnn_model.summary())
        #print (history.history)
        print (f"last {METRIC}: {last_result:0.05f}, best {METRIC}: {best_result:0.05f}, parameter: {num_neurons_per_layer}")
       
    return last_result, -amount_dead_weights, -amount_dead_bias
    

    MedianPruner: This pruner stops trials that are unlikely to beat the median performance of all trials run so far.

    HyperbandPruner: This pruner is based on the Hyperband algorithm and is designed for early stopping of poorly performing trials.

    SuccessiveHalvingPruner: This pruner is based on the successive halving algorithm and is designed for early stopping of poorly performing trials.

    NopPruner: This pruner does not stop any trials and is useful for debugging or for evaluating the performance of the optimization algorithm without the influence of pruning.

    TakestepPruner: This pruner allows you to customize the pruning strategy by providing a callback function that can stop trials based on any criteria you define.

    ThresholdPruner: This pruner stops trials that have objective values worse than a threshold.

In [233]:
#ensemble pruning = useing multiple pruners in an ensemble, the trial is stopped if any of the pruners request it.
#from optuna.pruners import MedianPruner, HyperbandPruner, SuccessiveHalvingPruner, NopPruner, TakestepPruner, ThresholdPruner

#pruner_1 = MedianPruner()
#j
#pruner_2 = HyperbandPruner()
#study = create_study(pruner=[pruner_1, pruner_2])

In [243]:
print("-----------")
#sampler = optuna.samplers.TPESampler(seed=10) TODO
#sampler = optuna.samplers.RandomSampler(seed=RANDOM_SEED)
sampler = optuna.samplers.NSGAIISampler()
pruner  = optuna.pruners.MedianPruner(n_startup_trials=20, n_warmup_steps=30, interval_steps=10)

    
study = optuna.create_study(directions=["maximize","maximize"], sampler=sampler, pruner= pruner)
study.optimize(objective, n_trials= N_TRAILS)
#study.optimize(objective, n_trials=N_TRAILS)
print("----------j-")


[32m[I 2023-02-10 10:53:03,636][0m A new study created in memory with name: no-name-b08576e2-269e-412c-94c0-6cf29bccc472[0m


-----------
------numbers of neuron per layer-----
[170 271  14  24 298 119 150 107  99 151 139 278   1  40  17 232 260  72
  48 170]
------


[33m[W 2023-02-10 10:53:07,895][0m Trial 0 failed with parameters: {'number_of_layer': 20, 'layer_0': 170, 'layer_1': 271, 'layer_2': 14, 'layer_3': 24, 'layer_4': 298, 'layer_5': 119, 'layer_6': 150, 'layer_7': 107, 'layer_8': 99, 'layer_9': 151, 'layer_10': 139, 'layer_11': 278, 'layer_12': 1, 'layer_13': 40, 'layer_14': 17, 'layer_15': 232, 'layer_16': 260, 'layer_17': 72, 'layer_18': 48, 'layer_19': 170} because of the following error: TypeError("scatter() missing 1 required positional argument: 'y'").[0m
Traceback (most recent call last):
  File "/home/feline/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_41421/1856321188.py", line 99, in objective
    plot_dead_weights(weight_values)
  File "/tmp/ipykernel_41421/4113070843.py", line 2, in plot_dead_weights
    plt.scatter(weight_values, label='Amount ofWeight Values')
TypeError: scatter() missing 1 required positional argument: 'y

amount of dead weight2368 and amount of dead bias 170
amount of dead weight24172 and amount of dead bias 271
amount of dead weight1960 and amount of dead bias 14
amount of dead weight193 and amount of dead bias 24
amount of dead weight3734 and amount of dead bias 298
amount of dead weight18625 and amount of dead bias 119
amount of dead weight9343 and amount of dead bias 150
amount of dead weight8453 and amount of dead bias 107
amount of dead weight5483 and amount of dead bias 99
amount of dead weight7810 and amount of dead bias 151
amount of dead weight11055 and amount of dead bias 139
amount of dead weight20271 and amount of dead bias 278
amount of dead weight139 and amount of dead bias 1
amount of dead weight24 and amount of dead bias 40
amount of dead weight382 and amount of dead bias 17
amount of dead weight2100 and amount of dead bias 231
amount of dead weight31720 and amount of dead bias 221
amount of dead weight9860 and amount of dead bias 46
amount of dead weight1821 and amount

TypeError: scatter() missing 1 required positional argument: 'y'

In [244]:

#pareto_front = study.get_pareto_front_trials()
pareto_front = study.best_trials

#frozen trial= snapshot of a trial at a specific point in time
for trial in pareto_front:
    print("Trial number:", trial.number)
    print("state:", trial.params)
    #objective_values = trial.values
    #objective_values = [obj.value for obj in trial.val]
    print("Objective values:", trial.values)
    
    #trial.report()
    #print("Objective intermediate_values:", trial.intermediate_values)
    # Access other information of the trial as needed.


#trial = study.best_trial
    print('Accuracy: {}'.format(trial.values))
    print("Best hyperparameters: {}".format(trial.params))
    print("-----------")

target_index = 0
#optuna.visualization.plot_optimization_history(study=study,target = target_index, target_name="Objective 1")
#plt.show()
print("-----------")
#optuna.visualization.plot_slice(study)
#optuna.visualization.plot_pareto_front(study)

optuna.visualization.plot_pareto_front(study, target_names=["accuracy", "weights","bias"]).show()
optuna.visualization.plot_param_importances( study, target=lambda t: t.values[0], target_name="accurcy").show()


[33m[W 2023-02-10 11:56:39,751][0m Your study does not have any completed trials.[0m


-----------


[33m[W 2023-02-10 11:56:39,767][0m Study instance does not contain completed trials.[0m


The set of all non-dominated solutions is referred to as the Pareto front. The Pareto front represents the trade-off between the objectives and provides insight into the optimal solutions. Solutions on the Pareto front are often called Pareto-optimal solutions.

In [236]:
# Narrowest search space but it doesn't include the global optimum point.
#study2 = optuna.create_study(study_name="x=[1,3), y=[1,3)", sampler=sampler)
#study2.optimize(lambda t: objective(t, 1, 5, MIN_NEURON_PER_LAYER,MAX_NEURON_PER_LAYER), n_trials=10)

#study3 = optuna.create_study(study_name="x=[1,3), y=[1,3)", sampler=sampler)
#study3.optimize(lambda t: objective(t, 0, 5, MIN_NEURON_PER_LAYER,MAX_NEURON_PER_LAYER), n_trials=500)


#fig = optuna.visualization.plot_edf([study,study2])
#fig.show()

In [237]:
#Plot intermediate values of all trials in a study
#fig = optuna.visualization.plot_intermediate_values(study)
#fig.show()

In [238]:
#Plot optimization history of all trials in a study
#fig = optuna.visualization.plot_optimization_history(study)
#fig.show()

In [239]:
#Plot the high-dimensional parameter relationships in a study
fig = optuna.visualization.plot_parallel_coordinate(study,params = ["number_of_layer","layer_0", "layer_1","layer_2", "layer_3","layer_4", "layer_5","layer_6", "layer_7","layer_8", "layer_9"])
fig.show()


#Plot the high-dimensional parameter relationships in a study
fig = optuna.visualization.plot_parallel_coordinate(study,params = ["number_of_layer","layer_0", "layer_1","layer_2", "layer_3","layer_4", "layer_5","layer_6", "layer_7","layer_8"])
fig.show()

fig = optuna.visualization.plot_parallel_coordinate(study,params = ["number_of_layer"])
fig.show()

ValueError: If the `study` is being used for multi-objective optimization, please specify the `target`.

In [None]:
#fPlot the Pareto front of a study
fig = optuna.visualization.plot_pareto_front(study, target_names=["accuracy", "v1"])
fig.show()