# Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import tensorflow as tf
from tensorflow import keras
import optuna
import datetime
import os
import random

%load_ext tensorboard

np.random.seed(42)
tf.random.set_seed(234)
random.seed(42)

# Functions

## Data

In [None]:
def load_data():
    data_dir = ''
    X_train = torch.load(os.path.join(data_dir, 'X_train.pt')).detach().numpy()
    X_valid = torch.load(os.path.join(data_dir, 'X_valid.pt')).detach().numpy()
    X_test = torch.load(os.path.join(data_dir, 'X_test.pt')).detach().numpy()
    y_train = torch.load(os.path.join(data_dir, 'y_train.pt')).to(torch.float64).detach().numpy()
    y_valid = torch.load(os.path.join(data_dir, 'y_valid.pt')).to(torch.float64).detach().numpy()
    y_test = torch.load(os.path.join(data_dir, 'y_test.pt')).to(torch.float64).detach().numpy()
    return X_train, X_valid, X_test, y_train, y_valid, y_test

X_train, X_valid, X_test, y_train, y_valid, y_test = load_data()

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
print(X_valid.shape, y_valid.shape)

## Model

In [None]:
def create_model(trial, input_shape):
    model = tf.keras.models.Sequential()

    # layer 0
    n_units_0 = trial.suggest_int('nunints_layer_0', 128, 2048, step=128)
    actv_func_0 = trial.suggest_categorical('actv_func_layer_0', ['relu', 'leaky_relu', 'elu', 'sigmoid', 'tanh', 'gelu'])

    bias_reg_init_0 = trial.suggest_categorical('bias_reg_init_layer_0', ['l1', 'l2', 'l1l2', None])
    if bias_reg_init_0 == 'l1':
        bias_reg_0 = tf.keras.regularizers.l1(trial.suggest_float('bias_reg_layer_0', 0.0001, 0.5))
    elif bias_reg_init_0 == 'l2':
        bias_reg_0 = tf.keras.regularizers.l2(trial.suggest_float('bias_reg_layer_0', 0.0001, 0.5))
    elif bias_reg_init_0 == 'l1l2':
        bias_reg_0 = tf.keras.regularizers.L1L2(trial.suggest_float('bias_reg_layer_0_l1', 0.0001, 0.5),
                                                trial.suggest_float('bias_reg_layer_0_l2', 0.0001, 0.5))
    else:
        bias_reg_0 = None

    kernel_reg_init_0 = trial.suggest_categorical('kernel_reg_init_layer_0', ['l1', 'l2', 'l1l2', None])
    if kernel_reg_init_0 == 'l1':
        kernel_reg_0 = tf.keras.regularizers.l1(trial.suggest_float('kernel_reg_layer_0', 0.0001, 0.5))
    elif kernel_reg_init_0 == 'l2':
        kernel_reg_0 = tf.keras.regularizers.l2(trial.suggest_float('kernel_reg_layer_0', 0.0001, 0.5))
    elif kernel_reg_init_0 == 'l1l2':
        kernel_reg_0 = tf.keras.regularizers.L1L2(trial.suggest_float('kernel_reg_layer_0_l1', 0.0001, 0.5),
                                                  trial.suggest_float('kernel_reg_layer_0_l2', 0.0001, 0.5))
    else:
        kernel_reg_0 = None

    kernel_initializer_0 = trial.suggest_categorical(
        'kernel_initializer_layer_0', ['glorot_uniform', 'glorot_normal',
                                       'he_uniform', 'he_normal',
                                       'lecun_uniform', 'lecun_normal']
    )

    model.add(tf.keras.layers.Dense(
                                units=n_units_0, activation=actv_func_0, 
                                kernel_initializer=kernel_initializer_0,
                                bias_regularizer=bias_reg_0, kernel_regularizer=kernel_reg_0,
                                input_shape=input_shape))

    # hidden layers
    num_layers = trial.suggest_int('num_layers', 0, 4)
    batch_norm = trial.suggest_categorical(f'batch_norm', [True, False])
    for layer_num in range(num_layers):
        layer_i = layer_num + 1
        n_units = trial.suggest_int(f'nunits_layer_{layer_i}', 128, 2048, step=128)
        actv_func = trial.suggest_categorical(f'actv_func_layer_{layer_i}', ['relu', 'leaky_relu', 'elu', 'sigmoid', 'tanh', 'gelu'])

        bias_reg_init = trial.suggest_categorical(f'bias_reg_init_layer_{layer_i}', ['l1', 'l2', 'l1l2', None])
        if bias_reg_init == 'l1':
            bias_reg = tf.keras.regularizers.l1(trial.suggest_float(f'bias_reg_layer_{layer_i}', 0.0001, 0.5))
        elif bias_reg_init == 'l2':
            bias_reg = tf.keras.regularizers.l2(trial.suggest_float(f'bias_reg_layer_{layer_i}', 0.0001, 0.5))
        elif bias_reg_init == 'l1l2':
            bias_reg = tf.keras.regularizers.L1L2(trial.suggest_float(f'bias_reg_layer_{layer_i}_l1', 0.0001, 0.5),
                                                  trial.suggest_float(f'bias_reg_layer_{layer_i}_l2', 0.0001, 0.5))
        else:
            bias_reg = None

        kernel_reg_init = trial.suggest_categorical(f'kernel_reg_init_layer_{layer_i}', ['l1', 'l2', 'l1l2', None])
        if kernel_reg_init == 'l1':
            kernel_reg = tf.keras.regularizers.l1(trial.suggest_float(f'kernel_reg_layer_{layer_i}', 0.001, 0.5))
        elif kernel_reg_init == 'l2':
            kernel_reg = tf.keras.regularizers.l2(trial.suggest_float(f'kernel_reg_layer_{layer_i}', 0.001, 0.5))
        elif kernel_reg_init == 'l1l2':
            kernel_reg = tf.keras.regularizers.L1L2(trial.suggest_float(f'kernel_reg_layer_{layer_i}_l1', 0.001, 0.5),
                                                    trial.suggest_float(f'kernel_reg_layer_{layer_i}_l2', 0.001, 0.5))
        else:
            kernel_reg = None

        kernel_initializer = trial.suggest_categorical(
            f'kernel_initializer_layer_{layer_i}', ['glorot_uniform', 'glorot_normal',
                                                    'he_uniform', 'he_normal',
                                                    'lecun_uniform', 'lecun_normal']
        )
        
        if batch_norm:
            model.add(tf.keras.layers.BatchNormalization())
        dropout_rate = trial.suggest_float(f'dropout_rate_layer_{layer_i}', 0.0, 0.999)
        model.add(tf.keras.layers.Dropout(dropout_rate))
        model.add(tf.keras.layers.Dense(n_units, actv_func, 
                                    kernel_initializer=kernel_initializer,
                                    bias_regularizer=bias_reg, kernel_regularizer=kernel_reg))
    if batch_norm:
        model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(10, activation='softmax'))
    return model

In [None]:
def create_optimizer(trial):
    opt_kwargs = {}
    opt_init = trial.suggest_categorical('optimizer', ['SGD', 'Adam', 'Nadam', 'Adamax'])
    if opt_init == 'SGD':
        opt_kwargs['learning_rate'] = trial.suggest_float('opt_lr', 1e-5, 1e-1, log=True)
        opt_kwargs['momentum'] = trial.suggest_float('opt_momentum', 1e-5, 0.1, log=True)
        opt_kwargs['nesterov'] = trial.suggest_categorical('opt_nesterov', [True, False])
    if opt_init == 'Adam':
        opt_kwargs['learning_rate'] = trial.suggest_float('opt_lr', 1e-5, 1e-1, log=True)
    if opt_init == 'Nadam':
        opt_kwargs['learning_rate'] = trial.suggest_float('opt_lr', 1e-5, 1e-1, log=True)
    if opt_init == 'Adamax':
        opt_kwargs['learning_rate'] = trial.suggest_float('opt_lr', 1e-5, 1e-1, log=True)
    optimizer = getattr(tf.optimizers, opt_init)(**opt_kwargs)
    return optimizer

## Objective / train

In [None]:
def objective(trial):
    X_train, X_valid, _, y_train, y_valid, _ = load_data()

    BATCH_SIZE = trial.suggest_categorical('batch_size', [32, 64, 128, 256, 512, 1024])
    PREFETCH = 1
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE).prefetch(PREFETCH)
    valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, y_valid)).batch(BATCH_SIZE).prefetch(PREFETCH)
    # test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(BATCH_SIZE).prefetch(PREFETCH)

    model = create_model(trial, X_train.shape[1:])
    optimizer = create_optimizer(trial)
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                optimizer=optimizer,
                #   metrics=[tf.keras.metrics.F1Score(average='micro')]
                metrics=[F1Score(average='micro')]
                )
    
    # callbacks
    logdir = os.path.join("logs/optuna", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
    earlystopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_f1_score', patience=25, mode='max')
    lr_scheduler_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_f1_score', patience=15, mode='max',
                                                                 factor=trial.suggest_float('lr_scheduler_factor', 0.1, 0.75))

    history = model.fit(train_dataset, epochs=500, 
                        validation_data=valid_dataset,
                        callbacks=[tensorboard_callback, lr_scheduler_callback, earlystopping_callback],
                        verbose=0)
    print('\n')
    return np.max(history.history['val_f1_score'])

# Run

## Start 24h

In [None]:
sampler = optuna.samplers.TPESampler(
    n_startup_trials=40, n_ei_candidates=48,
    multivariate=False, seed=42
)
study = optuna.create_study(direction='maximize', sampler=sampler, study_name='study', storage='sqlite:///db.sqlite3')
study.optimize(
    objective, n_trials=1000,
    timeout=24*3600, # in seconds
    n_jobs=1,
    show_progress_bar=True
)

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
optuna.visualization.plot_edf(study)

In [None]:
optuna.visualization.plot_contour(study, params=['lr_scheduler_factor', 'opt_lr'])

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
print(study.best_trial.params)
print(study.best_trial.value)

In [None]:
study.best_trial.params