In [1]:
from comet_ml import Experiment
from keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier
from keras_metrics_module import f1_1_func, f1_0_func, recall_func, precision_func, f1_macro_func
import itertools
import pandas as pd
from modules.testing_module import metrics_report
from modules import json_module, h5py_module, dirs_module
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, ReLU
import numpy as np
import tensorflow as tf
import keras_tuner as kt

try:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)
except:
    pass

In [2]:
SPLIT_ID = 'S3'
REGIME = 'all'
FEATURE = ''
MACHINE_ID = ''
DURATION = '05S'
ADDITIONAL_INFO = 'COMBINED'
COMET_PROJECT = '0917_1s_n15_m07_f10'
EXPERIMENT_ID = f'{SPLIT_ID}_{DURATION}_{REGIME}_{MACHINE_ID}_{ADDITIONAL_INFO}'

data_dir = r'G:\data\splits\s3_1s_bayesian'

In [4]:
fft_pairwise_bin_width_zero_padding_multiple = [
        (15, 1),
]
wavelet_pairwise_bin_width_levels_multiple = [
    (3, 700),
]
REGIME = 'N15_M07_F10'
fft_bin_width, fft_zero_padding_multiple = fft_pairwise_bin_width_zero_padding_multiple[0]
wavelet_levels, wavelet_bin_width = wavelet_pairwise_bin_width_levels_multiple[0]
combined_id = f'fft_{fft_bin_width}_{fft_zero_padding_multiple}_' \
              f'wavelet_{wavelet_bin_width}_{wavelet_levels}'

In [None]:
x_train = np.load(f'{data_dir}/x_train_{REGIME}_{combined_id}.npy')
x_train = x_train.astype('float32')
y_train = np.load(f'{data_dir}/y_train_{REGIME}_{combined_id}.npy', allow_pickle = True)

y_value_counts = pd.Series(list(y_train)).value_counts()
class_weight = {
    0: y_value_counts[1] / y_value_counts[0],
    1: 1
}

x_val = np.load(f'{data_dir}/x_val_{REGIME}_{combined_id}.npy')
x_val = x_val.astype('float32')
y_val = np.load(f'{data_dir}/y_val_{REGIME}_{combined_id}.npy', allow_pickle = True)

y_value_counts = pd.Series(list(y_val)).value_counts()
val_class_weight = {
    0: y_value_counts[1] / y_value_counts[0],
    1: 1
}
val_sample_weights = np.array([val_class_weight[y_i_val] for y_i_val in y_val])

INPUT_SHAPE = (x_train.shape[1],)

In [4]:
y_value_counts = pd.Series(list(y_train)).value_counts()
y_value_counts

0    17460
1    15340
dtype: int64

In [5]:
def build_model(hp):

    num_dense_layers = hp.Int('num_dense_layers',2,7)
    first_dense_layer_num_nodes = hp.Int('first_dense_layer_num_nodes', 64, INPUT_SHAPE[0], step = 32)
    dense_layers_shrinkage_factor = hp.Float('shrinkage_factor', 0.25, 1, step = 0.125)
    loss_name = hp.Choice('loss_name', ['binary_crossentropy','categorical_hinge'])
    batch_norm = True
    optimizer_name = 'adam'
    learning_rate = hp.Float('learning_rate', 0.001, 0.2, sampling = 'log')
    dropout = hp.Choice('dropout', [True, False])
    l2_lambda = hp.Float('l2_lambda', 0.01, 1, step = 0.05)

    layers_list = [Dense(first_dense_layer_num_nodes,
                         input_shape = INPUT_SHAPE,
                         kernel_regularizer = tf.keras.regularizers.l2(l2 = l2_lambda))]
    layers_list.append(ReLU())
    if batch_norm:
        layers_list.append(BatchNormalization())
    if dropout:
        layers_list.append(Dropout(0.1))

    dense_layer_current_num_nodes = first_dense_layer_num_nodes
    for dense_layer_number in range(1, num_dense_layers + 1):
        dense_layer_current_num_nodes = dense_layer_current_num_nodes * dense_layers_shrinkage_factor
        layers_list.append(Dense(dense_layer_current_num_nodes,
                                 kernel_regularizer = tf.keras.regularizers.l2(l2 = l2_lambda)))
        layers_list.append(ReLU())
        if batch_norm:
            layers_list.append(BatchNormalization())
        if dropout:
            layers_list.append(Dropout(0.1))

    if dropout:
        layers_list = layers_list[:-1]
    layers_list.append(Dense(1, activation = 'sigmoid'))

    if optimizer_name.lower() == 'adam':
        optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    built_model = Sequential(layers_list)
    built_model.compile(loss = loss_name, optimizer = optimizer,
                        metrics = ['accuracy', f1_macro_func, f1_1_func, f1_0_func, recall_func, precision_func])
    return built_model

In [6]:
tensorboard_log_dir = f'G:/training/{COMET_PROJECT}/tensorboard'

tb_callback = tf.keras.callbacks.TensorBoard(
    log_dir=tensorboard_log_dir,
    histogram_freq=100,
    embeddings_freq=100,
    write_graph=True,
    update_freq='batch'
)

stop_callback = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto')

tuner = kt.BayesianOptimization(
    build_model,
    objective = kt.Objective('val_f1_macro_func', direction="max"),
    max_trials = 1000,
    executions_per_trial = 3,
    num_initial_points = 10,
    directory=f'G:/training/{COMET_PROJECT}/kerass_tuner',
    project_name="initial_val",
)

tuner.search(x_train, y_train,
             batch_size = 16,
             epochs = 20,
             class_weight = class_weight,
             verbose = 10,
             validation_data = (x_val, y_val, val_sample_weights),
             callbacks= [tb_callback, stop_callback],
             use_multiprocessing = True,
             shuffle = True,
             )

Trial 30 Complete [00h 20m 34s]
val_f1_macro_func: 0.3995252748330434

Best val_f1_macro_func So Far: 0.4278547763824463
Total elapsed time: 06h 25m 27s

Search: Running Trial #31

Hyperparameter    |Value             |Best Value So Far 
num_dense_layers  |7                 |2                 
first_dense_lay...|896               |288               
shrinkage_factor  |1                 |0.25              
loss_name         |binary_crossent...|categorical_hinge 
learning_rate     |0.0010981         |0.015958          
dropout           |1                 |0                 
l2_lambda         |0.16              |0.51              

Epoch 1/20
Epoch 2/20


KeyboardInterrupt: 

In [7]:
x_test = np.load(f'{data_dir}/x_test_{REGIME}_{combined_id}.npy')
x_test = x_test.astype('float32')
y_test = np.load(f'{data_dir}/y_test_{REGIME}_{combined_id}.npy', allow_pickle = True)


In [8]:
best_models = tuner.get_best_models(10000)

results_list = list()
models_dir = 'saved_models'
i = 0
for model_i in best_models:
    i+=1
    model_i.save(f'{models_dir}/model_{i}.h5')

    y_pred = model_i.predict(x_test)
    y_pred = np.where(y_pred > 0.5, 1, 0).ravel()

    test_metrics = metrics_report(y_test, y_pred)
    print(test_metrics)
    results_list.append(test_metrics)
pd.DataFrame(results_list).to_excel('results_best_models.xlsx')

{'accuracy': 0.7392, 'f1_macro': 0.7276, 'f1_weighted': 0.7245, 'f1_micro': 0.7392, 'f1_1': 0.6714, 'f1_0': 0.7838, 'precision': 1.0, 'recall': 0.5054, 'specificity': 1.0, 'npv': 0.6445, 'TP': 6560, 'FP': 0, 'FN': 6420, 'TN': 11640}
{'accuracy': 0.9041, 'f1_macro': 0.904, 'f1_weighted': 0.9037, 'f1_micro': 0.9041, 'f1_1': 0.9, 'f1_0': 0.9079, 'precision': 1.0, 'recall': 0.8182, 'specificity': 1.0, 'npv': 0.8314, 'TP': 10620, 'FP': 0, 'FN': 2360, 'TN': 11640}
{'accuracy': 0.7939, 'f1_macro': 0.789, 'f1_weighted': 0.7873, 'f1_micro': 0.7939, 'f1_1': 0.757, 'f1_0': 0.821, 'precision': 1.0, 'recall': 0.609, 'specificity': 1.0, 'npv': 0.6964, 'TP': 7905, 'FP': 0, 'FN': 5075, 'TN': 11640}
{'accuracy': 0.7403, 'f1_macro': 0.7247, 'f1_weighted': 0.7283, 'f1_micro': 0.7403, 'f1_1': 0.7901, 'f1_0': 0.6593, 'precision': 0.6882, 'recall': 0.9274, 'specificity': 0.5315, 'npv': 0.8679, 'TP': 12038, 'FP': 5453, 'FN': 942, 'TN': 6187}
{'accuracy': 0.6366, 'f1_macro': 0.6169, 'f1_weighted': 0.6216, 'f1

  precision = round(true_positives / (true_positives + false_positives), 4)
