In [18]:
import os

import numpy as np
import pandas as pd
from scipy.stats import median_absolute_deviation
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras 
import tensorflow_addons as tfa
import random
from keras.regularizers import l2
random_state = 1234
from sklearn.metrics import roc_curve, auc
from matplotlib import pyplot as plt
import optuna
from pathlib import Path
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [20]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [21]:
parent_dir = Path(os.getcwd()).parent.parent
cross_validation_dir = os.path.join(parent_dir, "Data", "train_test_indices.npy")
# dataset = "MSK_Impact_train"
dataset = "BCAST_train"
data_dir = os.path.join(parent_dir, "Data" , dataset)
train_test_indices = np.load(cross_validation_dir, allow_pickle = True)

In [22]:
def list_files_in_dir(dirname):
    dir_files = list()
    for root, _, files in os.walk(dirname):
        for file in files:
            dir_files.append(os.path.join(root, file))
    
    return dir_files

In [23]:
def build_mlp_model(input_shape=(96,), n_hidden_layers=2, n_hidden_nodes=16, 
                    activation="relu", learning_rate=0.001, weight_decay = 0, l2_kernel = 0.01, l2_bias = 0.01):
    # optimizer parameters
    loss = "binary_crossentropy"
    optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, weight_decay = weight_decay)
    metrics = keras.metrics.AUC(name='auc')
    
    # ANN model
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=input_shape))
    for _ in range(n_hidden_layers):
        model.add(keras.layers.Dense(n_hidden_nodes, activation=activation, kernel_regularizer=l2(l2_kernel), bias_regularizer=l2(l2_bias)))
    
    model.add(keras.layers.Dense(1, activation="sigmoid"))
    
    # optimizer
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    
    return model

In [24]:
def get_row_indices_with_sum_zero(X):
    return X.index[(X.sum(axis=1) == 0)].tolist()

In [32]:
def scale_rows(X):
    return X.div(X.sum(axis=1), axis=0)

In [26]:
def dataset_generator(data_dir, num_files, y_col):

  for datafile in random.sample(list_files_in_dir(data_dir), num_files):
      data = pd.read_csv(datafile)
      X = data.iloc[:, :96]
      y = (data[y_col]).astype(np.int_)
      X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
#           print(X.sum(axis=0))
      X = scale_rows(X)
#           print(X.sum(axis=1))
      yield X, y

In [27]:
def scale_data(df):
    # Define the scaler 
    scaler = StandardScaler().fit(df)
    # Scales each individual row   
    df[df.columns] = scaler.fit_transform(df[df.columns])
    
    return df
    

In [28]:
def train_val_test_generator(data_dir, num_samples, y_col, test_frac=0.1, n_folds = 10):
    val_frac = test_frac/(1.0 - test_frac)
    
    for X, y in dataset_generator(data_dir, num_samples, y_col):
        fold_data = list()
        skf = StratifiedKFold(n_splits=n_folds, shuffle = True)
#         skf = StratifiedKFold(n_splits=num_folds)
        
        for train_val_index, test_index in skf.split(X, y):
            X_test, y_test = X.iloc[test_index, :], y.iloc[test_index]
            X_train, X_val, y_train, y_val = train_test_split(
                X.iloc[train_val_index,:], y.iloc[train_val_index], 
                test_size=val_frac, 
                random_state=random_state, 
                stratify=y.iloc[train_val_index]
            )
            
            #X_train, X_val, X_test = scale_columns(X_train, X_val, X_test)
            fold_data.append(((X_train, y_train), 
                              (X_val, y_val), 
                              (X_test, y_test)))
            
        yield fold_data

In [29]:
def train_val_test_generator_default(data_dir, num_samples, y_col, test_frac=0.1):
    
    val_frac = test_frac/(1.0 - test_frac)
    
    for X, y in dataset_generator(data_dir, num_samples, y_col):
        fold_data = list()
        for train_val_index, test_index in train_test_indices:
            X_test, y_test = X.iloc[test_index, :], y.iloc[test_index]
            X_train, X_val, y_train, y_val = train_test_split(
                X.iloc[train_val_index,:], y.iloc[train_val_index], 
                test_size=val_frac, 
                random_state=random_state, 
                stratify=y.iloc[train_val_index]
            )
#             print(train_val_index, test_index)
            
            #X_train, X_val, X_test = scale_columns(X_train, X_val, X_test)
            fold_data.append(((X_train, y_train), 
                              (X_val, y_val), 
                              (X_test, y_test)))
            
        yield fold_data

In [30]:
def compute_mlp_performance(trial, input_shape=(96,), data_dir=data_dir, n_folds=10, class_weight_0 = 1, class_weight_1 = 1, num_samples = 1, y_col = 'is_sig3_20'):
    aucs = list()
    models = list()
    
    # 60-20-20 split
    test_frac=1.0/float(n_folds)
#     fpr_list = []
#     tpr_list = []
#     roc_auc_list = []
    
    for folds_data in train_val_test_generator(data_dir, num_samples=num_samples, y_col=y_col):
        fold_aucs = list()
        fold_models = list()
        
        for fold_data in folds_data:
            # get data
            (X_train, y_train), (X_val, y_val), (X_test, y_test) = fold_data
            # build model and ensure that parameters passed in are within the normal range
            # if we don't type cast as integers, bayesian optimizer will guess float values
            model = build_mlp_model(input_shape, 
                                    trial.suggest_int('n_hidden_layers', 1,3), 
                                    trial.suggest_int('n_hidden_nodes', 20, 300), 
                                    trial.suggest_categorical("activation", ["relu", "sigmoid", "softmax"]), 
                                    trial.suggest_float('learning_rate', 1e-9, 1e-1),
                                    weight_decay = trial.suggest_float('weight_decay', 0, 5e-1),
                                    l2_kernel = trial.suggest_float('l2_kernel', 0, 5e-1),
                                    l2_bias = trial.suggest_float('l2_bias', 0, 5e-1))
            model.fit(X_train, y_train, 
                      validation_data=(X_val, y_val), 
                      epochs=1000, batch_size=32, verbose=0,
                      class_weight= {0 : trial.suggest_float('class_weight_0', 0, 5), 1 : trial.suggest_float('class_weight_1', 0, 5)},
                      callbacks=[keras.callbacks.EarlyStopping(monitor='val_auc', patience=5)])
            
            # evaluate
            y_score = model.evaluate(X_test, y_test, verbose=0)[1]
            fold_aucs.append(y_score)
            fold_models.append(model)
        aucs.append(fold_aucs)
        models.append(fold_models)
        
    # Gets median index value for all the different samples (rows)  
    medianIndices = [indices[len(aucs[0])//2] for indices in np.argsort(aucs, axis=1)]
    medianValues = [values[index] for values, index in zip(aucs, medianIndices)]
    
    # Gets the file which contains the median of median value
    fileInd = np.argsort(medianValues)[len(medianValues)//2]
    
    aucs = np.array(aucs)
    
    median_of_median_model = models[fileInd][medianIndices[fileInd]]
    median_of_median_auc = np.median(np.median(aucs, axis=1))
    mad_of_mad_auc = median_absolute_deviation(aucs, axis=1)
#     return median_of_median_auc, mad_of_mad_auc
    return median_of_median_auc



In [37]:
BCAST_CV_indices = []
for X, y in dataset_generator(data_dir, 1, 'is_sig3'):
    fold_data = list()
    skf = StratifiedKFold(n_splits=10, shuffle = True)
#         skf = StratifiedKFold(n_splits=num_folds)

    for train_val_index, test_index in skf.split(X, y):
        BCAST_CV_indices.append([train_val_index, test_index])
        

In [44]:
BCAST_CV_indices

[[array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  14,
          15,  17,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
          30,  31,  32,  33,  35,  36,  38,  40,  41,  42,  43,  44,  45,
          46,  47,  49,  50,  51,  53,  54,  55,  57,  58,  61,  62,  64,
          65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
          78,  80,  81,  82,  83,  84,  86,  87,  89,  90,  91,  92,  93,
          94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
         107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120,
         121, 123, 125, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136,
         138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
         151, 152, 153, 154, 155, 156, 157, 159, 160, 161, 163, 164, 165,
         166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178,
         179, 180, 181, 183, 184, 185, 187, 188, 189, 190, 191, 192, 193,
         194, 195, 196, 197, 200, 201,

In [40]:
np.save('BCAST_CV_indices.npy', BCAST_CV_indices)

In [42]:
old_indices = np.load(r"D:\NIH\Mutational-Spectrum-master\Data\BCAST_CV_indices.npy", allow_pickle=True)

In [43]:
old_indices

array([[array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  14,
                15,  17,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
                30,  31,  32,  33,  35,  36,  38,  40,  41,  42,  43,  44,  45,
                46,  47,  49,  50,  51,  53,  54,  55,  57,  58,  61,  62,  64,
                65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
                78,  80,  81,  82,  83,  84,  86,  87,  89,  90,  91,  92,  93,
                94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
               107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120,
               121, 123, 125, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136,
               138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
               151, 152, 153, 154, 155, 156, 157, 159, 160, 161, 163, 164, 165,
               166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178,
               179, 180, 181, 183, 184, 

In [33]:
study_is_sig3 = optuna.create_study(direction='maximize')
study_is_sig3.optimize(compute_mlp_performance, n_trials=300)

[32m[I 2022-07-14 12:50:41,330][0m A new study created in memory with name: no-name-5b2ef980-61ea-473b-80a8-6045046a062e[0m
[33m[W 2022-07-14 12:50:42,062][0m Trial 0 failed because of the following error: InvalidArgumentError()[0m
Traceback (most recent call last):
  File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\_optimize.py", line 213, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\aaron\AppData\Local\Temp\ipykernel_6800\2296759501.py", line 28, in compute_mlp_performance
    model.fit(X_train, y_train,
  File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\tensorflow\python\eager\execute.py", line 54, in quick_execute
    tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_im

InvalidArgumentError: Graph execution error:

Detected at node 'assert_greater_equal/Assert/AssertGuard/Assert' defined at (most recent call last):
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\traitlets\config\application.py", line 972, in launch_instance
      app.start()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 504, in dispatch_queue
      await self.process_one()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 493, in process_one
      await dispatch(*args)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
      await result
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 724, in execute_request
      reply_content = await reply_content
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 2880, in run_cell
      result = self._run_cell(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 2935, in _run_cell
      return runner(coro)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 3134, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 3337, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 3397, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\aaron\AppData\Local\Temp\ipykernel_6800\3244150248.py", line 2, in <cell line: 2>
      study_is_sig3.optimize(compute_mlp_performance, n_trials=300)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\study.py", line 400, in optimize
      _optimize(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\_optimize.py", line 66, in _optimize
      _optimize_sequential(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\_optimize.py", line 163, in _optimize_sequential
      trial = _run_trial(study, func, catch)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\_optimize.py", line 213, in _run_trial
      value_or_values = func(trial)
    File "C:\Users\aaron\AppData\Local\Temp\ipykernel_6800\2296759501.py", line 28, in compute_mlp_performance
      model.fit(X_train, y_train,
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1409, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 894, in train_step
      return self.compute_metrics(x, y, y_pred, sample_weight)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 987, in compute_metrics
      self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\compile_utils.py", line 501, in update_state
      metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\utils\metrics_utils.py", line 70, in decorated
      update_op = update_state_fn(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\metrics\base_metric.py", line 140, in update_state_fn
      return ag_update_state(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\metrics\metrics.py", line 1759, in update_state
      return metrics_utils.update_confusion_matrix_variables(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\utils\metrics_utils.py", line 602, in update_confusion_matrix_variables
      tf.debugging.assert_greater_equal(
Node: 'assert_greater_equal/Assert/AssertGuard/Assert'
Detected at node 'assert_greater_equal/Assert/AssertGuard/Assert' defined at (most recent call last):
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\traitlets\config\application.py", line 972, in launch_instance
      app.start()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 504, in dispatch_queue
      await self.process_one()
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 493, in process_one
      await dispatch(*args)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
      await result
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\kernelbase.py", line 724, in execute_request
      reply_content = await reply_content
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 2880, in run_cell
      result = self._run_cell(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 2935, in _run_cell
      return runner(coro)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 3134, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 3337, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\IPython\core\interactiveshell.py", line 3397, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\aaron\AppData\Local\Temp\ipykernel_6800\3244150248.py", line 2, in <cell line: 2>
      study_is_sig3.optimize(compute_mlp_performance, n_trials=300)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\study.py", line 400, in optimize
      _optimize(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\_optimize.py", line 66, in _optimize
      _optimize_sequential(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\_optimize.py", line 163, in _optimize_sequential
      trial = _run_trial(study, func, catch)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\optuna\study\_optimize.py", line 213, in _run_trial
      value_or_values = func(trial)
    File "C:\Users\aaron\AppData\Local\Temp\ipykernel_6800\2296759501.py", line 28, in compute_mlp_performance
      model.fit(X_train, y_train,
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1409, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 894, in train_step
      return self.compute_metrics(x, y, y_pred, sample_weight)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\training.py", line 987, in compute_metrics
      self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\engine\compile_utils.py", line 501, in update_state
      metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\utils\metrics_utils.py", line 70, in decorated
      update_op = update_state_fn(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\metrics\base_metric.py", line 140, in update_state_fn
      return ag_update_state(*args, **kwargs)
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\metrics\metrics.py", line 1759, in update_state
      return metrics_utils.update_confusion_matrix_variables(
    File "C:\Users\aaron\anaconda3\envs\machinelearning\lib\site-packages\keras\utils\metrics_utils.py", line 602, in update_confusion_matrix_variables
      tf.debugging.assert_greater_equal(
Node: 'assert_greater_equal/Assert/AssertGuard/Assert'
2 root error(s) found.
  (0) INVALID_ARGUMENT:  assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (sequential_4/dense_14/Sigmoid:0) = ] [[0.49993217][0.49993217][0.49993217]...] [y (Cast_3/x:0) = ] [0]
	 [[{{node assert_greater_equal/Assert/AssertGuard/Assert}}]]
	 [[assert_less_equal/Assert/AssertGuard/pivot_f/_13/_43]]
  (1) INVALID_ARGUMENT:  assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (sequential_4/dense_14/Sigmoid:0) = ] [[0.49993217][0.49993217][0.49993217]...] [y (Cast_3/x:0) = ] [0]
	 [[{{node assert_greater_equal/Assert/AssertGuard/Assert}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_6181]

In [22]:
study_is_sig3.optimize(compute_mlp_performance, n_trials=150)

[32m[I 2022-07-06 10:39:04,871][0m Trial 150 finished with value: 0.6679738759994507 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 179, 'activation': 'relu', 'learning_rate': 0.004618475069081576, 'weight_decay': 0.029733993392913993, 'l2_kernel': 0.020818081350416715, 'l2_bias': 0.4933002261074192, 'class_weight_0': 3.9720038812960907, 'class_weight_1': 2.5278886719603872}. Best is trial 41 with value: 0.6877726316452026.[0m
[32m[I 2022-07-06 10:39:17,529][0m Trial 151 finished with value: 0.6476271152496338 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 184, 'activation': 'relu', 'learning_rate': 0.005043740187310367, 'weight_decay': 0.02269038397548642, 'l2_kernel': 0.02192158583791648, 'l2_bias': 0.49999111268295743, 'class_weight_0': 3.9047414194656396, 'class_weight_1': 2.502705394404022}. Best is trial 41 with value: 0.6877726316452026.[0m
[32m[I 2022-07-06 10:39:28,525][0m Trial 152 finished with value: 0.6519607901573181 and parameters: {'n_hidden_

[32m[I 2022-07-06 10:42:44,862][0m Trial 169 finished with value: 0.6835464537143707 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 191, 'activation': 'relu', 'learning_rate': 0.0058370878455874945, 'weight_decay': 0.024154589821024287, 'l2_kernel': 0.002164225446048027, 'l2_bias': 0.4983499167377951, 'class_weight_0': 3.7297156308973793, 'class_weight_1': 3.018583390289908}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:42:57,935][0m Trial 170 finished with value: 0.6347222328186035 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 202, 'activation': 'relu', 'learning_rate': 0.006794709972378983, 'weight_decay': 0.0007281237633305522, 'l2_kernel': 0.0015541500628536483, 'l2_bias': 0.4992512749504899, 'class_weight_0': 3.728321855023454, 'class_weight_1': 3.0722630078869937}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:43:10,506][0m Trial 171 finished with value: 0.6787581741809845 and parameters: {'n_hi

[32m[I 2022-07-06 10:46:23,287][0m Trial 188 finished with value: 0.6832281351089478 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 180, 'activation': 'relu', 'learning_rate': 0.008169614897660112, 'weight_decay': 0.015467096479912472, 'l2_kernel': 0.0018660283710596262, 'l2_bias': 0.4907811615863089, 'class_weight_0': 3.5553321768693196, 'class_weight_1': 2.9743254442047466}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:46:33,953][0m Trial 189 finished with value: 0.6672842800617218 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 181, 'activation': 'relu', 'learning_rate': 0.006923711075494429, 'weight_decay': 0.047654135933400565, 'l2_kernel': 0.0015000673398591277, 'l2_bias': 0.48791546449196577, 'class_weight_0': 3.2713695681748622, 'class_weight_1': 3.2538818159543945}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:46:45,185][0m Trial 190 finished with value: 0.6439018249511719 and parameters: {'n_

[32m[I 2022-07-06 10:49:54,516][0m Trial 207 finished with value: 0.6929667592048645 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 182, 'activation': 'relu', 'learning_rate': 0.009987153300015414, 'weight_decay': 9.293409668638397e-05, 'l2_kernel': 0.021257257760529724, 'l2_bias': 0.49943904713103116, 'class_weight_0': 3.719589713143314, 'class_weight_1': 3.520024046656335}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:50:06,833][0m Trial 208 finished with value: 0.6653594672679901 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 183, 'activation': 'relu', 'learning_rate': 0.01055576511361343, 'weight_decay': 0.008957035072695374, 'l2_kernel': 0.000186615093848784, 'l2_bias': 0.4816176488289812, 'class_weight_0': 3.748009828351408, 'class_weight_1': 3.3393933515044347}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:50:19,094][0m Trial 209 finished with value: 0.6687580049037933 and parameters: {'n_hidde

[32m[I 2022-07-06 10:53:30,060][0m Trial 226 finished with value: 0.6266979277133942 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 198, 'activation': 'relu', 'learning_rate': 0.014625222290178564, 'weight_decay': 0.03630462341388156, 'l2_kernel': 0.007982171998646384, 'l2_bias': 0.48259481629228634, 'class_weight_0': 4.011177232362251, 'class_weight_1': 3.052536619238545}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:53:41,293][0m Trial 227 finished with value: 0.606535941362381 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 170, 'activation': 'relu', 'learning_rate': 0.008201811100530714, 'weight_decay': 0.017588930158422545, 'l2_kernel': 0.36806886887158546, 'l2_bias': 0.4673263529230254, 'class_weight_0': 3.6610401069946126, 'class_weight_1': 2.9489940290176397}. Best is trial 157 with value: 0.7009804248809814.[0m
[32m[I 2022-07-06 10:53:53,107][0m Trial 228 finished with value: 0.6506252288818359 and parameters: {'n_hidden_

[32m[I 2022-07-06 10:57:01,367][0m Trial 245 finished with value: 0.6362225711345673 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 217, 'activation': 'relu', 'learning_rate': 0.01617817252306975, 'weight_decay': 0.01708081643087322, 'l2_kernel': 0.019366794562048397, 'l2_bias': 0.49962117702311604, 'class_weight_0': 3.6940511803461447, 'class_weight_1': 3.3927505781419685}. Best is trial 244 with value: 0.7147058844566345.[0m
[32m[I 2022-07-06 10:57:12,598][0m Trial 246 finished with value: 0.673437088727951 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 241, 'activation': 'relu', 'learning_rate': 0.014055647492035264, 'weight_decay': 0.03263578886311959, 'l2_kernel': 0.03265866576591006, 'l2_bias': 0.4919666366294139, 'class_weight_0': 3.8531830632976645, 'class_weight_1': 4.111123358943887}. Best is trial 244 with value: 0.7147058844566345.[0m
[32m[I 2022-07-06 10:57:23,325][0m Trial 247 finished with value: 0.6554472148418427 and parameters: {'n_hidden_l

[32m[I 2022-07-06 11:00:32,666][0m Trial 264 finished with value: 0.6525007486343384 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 205, 'activation': 'relu', 'learning_rate': 0.011208836418435012, 'weight_decay': 0.31402016733861804, 'l2_kernel': 0.009217036703823766, 'l2_bias': 0.4901296119309874, 'class_weight_0': 3.580911553645616, 'class_weight_1': 2.3475565602240316}. Best is trial 244 with value: 0.7147058844566345.[0m
[32m[I 2022-07-06 11:00:43,568][0m Trial 265 finished with value: 0.6671994924545288 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 220, 'activation': 'relu', 'learning_rate': 0.016443908353681376, 'weight_decay': 0.0005612803096973693, 'l2_kernel': 0.018434959344957143, 'l2_bias': 0.49981169684423316, 'class_weight_0': 3.7186978924791054, 'class_weight_1': 2.9931436650782994}. Best is trial 244 with value: 0.7147058844566345.[0m
[32m[I 2022-07-06 11:00:55,597][0m Trial 266 finished with value: 0.6685222089290619 and parameters: {'n_hid

[32m[I 2022-07-06 11:04:04,438][0m Trial 283 finished with value: 0.5 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 181, 'activation': 'relu', 'learning_rate': 0.002396199661334114, 'weight_decay': 0.3411623317569552, 'l2_kernel': 0.21767398167062485, 'l2_bias': 0.44648528668185034, 'class_weight_0': 2.4090916789677577, 'class_weight_1': 2.9362187629126235}. Best is trial 244 with value: 0.7147058844566345.[0m
[32m[I 2022-07-06 11:04:16,472][0m Trial 284 finished with value: 0.6340437531471252 and parameters: {'n_hidden_layers': 1, 'n_hidden_nodes': 175, 'activation': 'relu', 'learning_rate': 0.0125787655335805, 'weight_decay': 0.0001917602639008205, 'l2_kernel': 0.01157940090876966, 'l2_bias': 0.48912577050865863, 'class_weight_0': 3.9501922917701906, 'class_weight_1': 2.8312053008890965}. Best is trial 244 with value: 0.7147058844566345.[0m
[32m[I 2022-07-06 11:04:26,404][0m Trial 285 finished with value: 0.6480392515659332 and parameters: {'n_hidden_layers': 1, 'n

In [23]:
study_is_sig3.best_trial

FrozenTrial(number=244, values=[0.7147058844566345], datetime_start=datetime.datetime(2022, 7, 6, 10, 56, 39, 441230), datetime_complete=datetime.datetime(2022, 7, 6, 10, 56, 50, 432565), params={'n_hidden_layers': 1, 'n_hidden_nodes': 224, 'activation': 'relu', 'learning_rate': 0.014405369664987436, 'weight_decay': 0.008844031348287134, 'l2_kernel': 0.0008442313215686649, 'l2_bias': 0.49960347582667186, 'class_weight_0': 3.6971304115389914, 'class_weight_1': 3.2478186786138186}, distributions={'n_hidden_layers': IntUniformDistribution(high=3, low=1, step=1), 'n_hidden_nodes': IntUniformDistribution(high=300, low=20, step=1), 'activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'softmax')), 'learning_rate': UniformDistribution(high=0.1, low=1e-09), 'weight_decay': UniformDistribution(high=0.5, low=0.0), 'l2_kernel': UniformDistribution(high=0.5, low=0.0), 'l2_bias': UniformDistribution(high=0.5, low=0.0), 'class_weight_0': UniformDistribution(high=5.0, low=0.0), 'class_wei