In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Preliminary Data Cleaning
1. Load dataset in
2. Drop NA
3. Drop Outlier
4. Train test split
5. Scale the Age variable

In [2]:
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

df = pd.read_csv('lung cancer survey.csv')
df_no_na = df.dropna()

df_age = df_no_na[df_no_na["AGE"] > 21]
df_age

# Assuming 'age' is in the first column (index 0) of X
age_index = 1

X = df_age.drop('LUNG_CANCER', axis=1).values
y = df_age['LUNG_CANCER'].values
# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler only on the age column in X_train
X_train_age = X_train[:, age_index].reshape(-1, 1)
scaler.fit(X_train_age)

# Transform the age column in X_train and X_val
X_train[:, age_index] = scaler.transform(X_train_age).flatten()
X_val[:, age_index] = scaler.transform(X_val[:, age_index].reshape(-1, 1)).flatten()

X_train

array([[ 1.        ,  1.16977204,  0.        , ...,  1.        ,
         0.        ,  1.        ],
       [ 1.        , -1.2946817 ,  1.        , ...,  1.        ,
         0.        ,  1.        ],
       [ 0.        ,  0.64167481,  1.        , ...,  1.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  1.25778824,  0.        , ...,  1.        ,
         1.        ,  1.        ],
       [ 1.        , -0.32650345,  0.        , ...,  1.        ,
         0.        ,  1.        ],
       [ 1.        , -1.11864929,  1.        , ...,  1.        ,
         0.        ,  1.        ]])

# Base Neural Network Model

The model below considers 3 hidden layers, each holding 10 nodes.

This is just a baseline model to understand the model parameters better

In [3]:
# Set seeds for reproducibility
random.seed(888)
tf.random.set_seed(888)
np.random.seed(888)

# Creating the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(15,)),  # Adjust input shape based on feature count
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

opt = tf.keras.optimizers.Adam(learning_rate=0.01)
# Compile the model
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# Train the model
history = model.fit(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val), verbose=0)


In [4]:
# Get model predictions for validation set
y_val_pred = model.predict(X_val)
y_val_pred = (y_val_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate Precision, Recall, and F1-score
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)
f1 = f1_score(y_val, y_val_pred)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Precision: 0.8863
Recall: 0.9853
F1-score: 0.9332


In [34]:
from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Dense
from keras_tuner import HyperModel, RandomSearch, BayesianOptimization
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad

class MyHyperModel(HyperModel):
    def __init__(self, input_dim):
        self.input_dim = input_dim

    def build(self, hp):
        model = Sequential()
        model.add(layers.Input(shape=(self.input_dim,)))

        # First hidden layer with tunable units and activation
        model.add(Dense(
            units=hp.Int('units_1', min_value=8, max_value=45, step=3),
            activation=hp.Choice('activation_1', values=['relu', 'tanh', 'sigmoid', 'linear'])
        ))

        # Adding up to 5 more hidden layers based on the hp.Int range from 1 to 5 additional layers
        for i in range(hp.Int('num_layers', 1, 5)):  # This creates 2 to 6 total hidden layers
            model.add(Dense(
                units=hp.Int(f'units_{i+2}', min_value=8, max_value=45, step=3),
                activation=hp.Choice(f'activation_{i+2}', values=['relu', 'tanh', 'sigmoid', 'linear'])
            ))

        # Output layer for binary classification
        model.add(Dense(1, activation='sigmoid'))

        # Choosing the optimizer and learning rate as hyperparameters
        learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-1, sampling='log')
        optimizer_choice = hp.Choice('optimizer', values=['sgd', 'adam', 'rmsprop', 'adagrad'])

        # Define optimizer based on choice
        if optimizer_choice == 'sgd':
            opt = SGD(learning_rate=learning_rate)
        elif optimizer_choice == 'adam':
            opt = Adam(learning_rate=learning_rate)
        elif optimizer_choice == 'rmsprop':
            opt = RMSprop(learning_rate=learning_rate)
        elif optimizer_choice == 'adagrad':
            opt = Adagrad(learning_rate=learning_rate)
        else:
            raise ValueError("Optimizer not recognized.")

        # Compile model with selected optimizer
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
        
        return model


In [None]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = BayesianOptimization(
    hypermodel,
    objective='val_accuracy',
    max_trials= 1000,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir',
    project_name='binary_classification'
)

# Start search
tuner.search(X_train, y_train, epochs=2000, batch_size=len(X_train), validation_data=(X_val, y_val))


Trial 773 Complete [00h 00m 47s]
val_accuracy: 0.8955555558204651

Best val_accuracy So Far: 0.9005555510520935
Total elapsed time: 12h 34m 22s

Search: Running Trial #774

Value             |Best Value So Far |Hyperparameter
26                |38                |units_1
tanh              |linear            |activation_1
4                 |5                 |num_layers
26                |23                |units_2
sigmoid           |linear            |activation_2
0.1               |0.04038           |learning_rate
adam              |adam              |optimizer
23                |8                 |units_3
linear            |relu              |activation_3
44                |8                 |units_4
relu              |relu              |activation_4
8                 |8                 |units_5
linear            |relu              |activation_5
44                |8                 |units_6
linear            |relu              |activation_6

Epoch 1/2000
[1m1/1[0m [32m━━━━━━━━━━━━

In [14]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)




[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 830us/step
Best model Precision on validation set: 0.8881987577639752
Best model Recall on validation set: 0.9986033519553073
Best model F1 score on validation set: 0.9401709401709402
Best Hyperparameters: {'units_1': 26, 'activation_1': 'linear', 'num_layers': 5, 'units_2': 29, 'activation_2': 'relu', 'learning_rate': 0.0005543780243486915, 'optimizer': 'rmsprop', 'units_3': 44, 'activation_3': 'relu', 'units_4': 23, 'activation_4': 'sigmoid', 'units_5': 44, 'activation_5': 'sigmoid', 'units_6': 29, 'activation_6': 'tanh'}


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad
# Set seeds for reproducibility
random.seed(888)
tf.random.set_seed(888)
np.random.seed(888)

learning_rate = 0.04038
# Creating the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(15,)),  # Adjust input shape based on feature count
    tf.keras.layers.Dense(38, activation='linear'),
    tf.keras.layers.Dense(23, activation='linear'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

opt = Adam(learning_rate=learning_rate)
# Compile the model
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=2000, batch_size=len(X_train), verbose=1)

# Display optimized weights and biases after training (useful for analysis)
for layer in model.layers:
    weights, biases = layer.get_weights()
    print(f"Weights for {layer.name}:\n{weights}\nBiases:\n{biases}")

# Evaluate model on validation data (i.e., calculate loss function & accuracy performance metric of fitted model on validation data)
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch 1/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 906ms/step - accuracy: 0.1926 - loss: 0.7341
Epoch 2/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8074 - loss: 0.6709
Epoch 3/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8074 - loss: 0.6471
Epoch 4/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8074 - loss: 0.6236
Epoch 5/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8074 - loss: 0.6000
Epoch 6/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8074 - loss: 0.5770
Epoch 7/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8074 - loss: 0.5554
Epoch 8/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8074 - loss: 0.5359
Epoch 9/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━

In [7]:
# Get model predictions for validation set
y_val_pred = model.predict(X_val)
y_val_pred = (y_val_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate Precision, Recall, and F1-score
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)
f1 = f1_score(y_val, y_val_pred)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443us/step
Precision: 0.7956
Recall: 1.0000
F1-score: 0.8861


In [4]:
# Simplify parameters with 6 layers
from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Dense
from keras_tuner import HyperModel, RandomSearch, BayesianOptimization
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad

class MyHyperModel(HyperModel):
    def __init__(self, input_dim):
        self.input_dim = input_dim

    def build(self, hp):
        model = Sequential()
        model.add(layers.Input(shape=(self.input_dim,)))

        # First hidden layer with tunable units and activation
        model.add(Dense(
            units=hp.Int('units_1', min_value=8, max_value=45),
            activation=hp.Choice('activation_1', values=['relu', 'tanh', 'sigmoid', 'linear'])
        ))

        # Adding up to 5 more hidden layers based on the hp.Int range from 1 to 5 additional layers
        for i in range(hp.Int('num_layers', 1, 5)):  # This creates 2 to 6 total hidden layers
            model.add(Dense(
                units=hp.Int(f'units_{i+2}', min_value=8, max_value=45),
                activation=hp.Choice(f'activation_{i+2}', values=['relu', 'tanh', 'sigmoid', 'linear'])
            ))

        # Output layer for binary classification
        model.add(Dense(1, activation='sigmoid'))

        # Choosing the optimizer and learning rate as hyperparameters
        learning_rate = hp.Float('learning_rate', min_value=1e-2, max_value=5e-1, sampling='log')
        optimizer_choice = hp.Choice('optimizer', values=['sgd', 'adam', 'rmsprop', 'adagrad'])

        # Define optimizer based on choice
        if optimizer_choice == 'sgd':
            opt = SGD(learning_rate=learning_rate)
        elif optimizer_choice == 'adam':
            opt = Adam(learning_rate=learning_rate)
        elif optimizer_choice == 'rmsprop':
            opt = RMSprop(learning_rate=learning_rate)
        elif optimizer_choice == 'adagrad':
            opt = Adagrad(learning_rate=learning_rate)
        else:
            raise ValueError("Optimizer not recognized.")

        # Compile model with selected optimizer
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
        
        return model

In [27]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 50,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir12',
    project_name='binary_classification',
    seed = 888
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val), verbose = 0)


In [28]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 40
activation_1: linear
num_layers: 1
units_2: 45
activation_2: relu
learning_rate: 0.4083861903475116
optimizer: sgd
units_3: 42
activation_3: tanh
units_4: 14
activation_4: linear
units_5: 37
activation_5: linear
units_6: 27
activation_6: tanh
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 548us/step
Best model Precision on validation set: 0.8888198757763975
Best model Recall on validation set: 0.9993016759776536
Best model F1 score on validation set: 0.9408284023668639


In [6]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 1000,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir13',
    project_name='binary_classification',
    seed = 888
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val), verbose = 0)


In [8]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 8
activation_1: linear
num_layers: 2
units_2: 23
activation_2: relu
learning_rate: 0.026526058661021926
optimizer: adam
units_3: 8
activation_3: sigmoid
units_4: 14
activation_4: relu
units_5: 38
activation_5: sigmoid
units_6: 31
activation_6: tanh
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 415us/step
Best model Precision on validation set: 0.8888888888888888
Best model Recall on validation set: 1.0
Best model F1 score on validation set: 0.9411764705882353


  saveable.load_own_variables(weights_store.get(inner_path))


# Simplified Hyperparameter tuning

In [3]:
from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Dense
from keras_tuner import HyperModel, RandomSearch, BayesianOptimization
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad

class MyHyperModel(HyperModel):
    def __init__(self, input_dim):
        self.input_dim = input_dim

    def build(self, hp):
        model = Sequential()
        model.add(layers.Input(shape=(self.input_dim,)))

        # First hidden layer with tunable units and activation
        model.add(Dense(
            units=hp.Int('units_1', min_value=8, max_value=45, step=3),
            activation=hp.Choice('activation_1', values=['relu', 'tanh', 'sigmoid', 'linear'])
        ))

        # Adding up to 2 more hidden layers based on the hp.Int range from 1 to 3 additional layers
        for i in range(hp.Int('num_layers', 1, 2)):  # This creates 2 to 3 total hidden layers
            model.add(Dense(
                units=hp.Int(f'units_{i+2}', min_value=8, max_value=45, step=3),
                activation=hp.Choice(f'activation_{i+2}', values=['relu', 'tanh', 'sigmoid', 'linear'])
            ))

        # Output layer for binary classification
        model.add(Dense(1, activation='sigmoid'))

        # Choosing the optimizer and learning rate as hyperparameters
        learning_rate = hp.Float('learning_rate', min_value=1e-2, max_value=5e-1, sampling='log') # 0.01 to 0.5
        optimizer_choice = hp.Choice('optimizer', values=['sgd', 'adam', 'rmsprop', 'adagrad'])

        # Define optimizer based on choice
        if optimizer_choice == 'sgd':
            opt = SGD(learning_rate=learning_rate)
        elif optimizer_choice == 'adam':
            opt = Adam(learning_rate=learning_rate)
        elif optimizer_choice == 'rmsprop':
            opt = RMSprop(learning_rate=learning_rate)
        elif optimizer_choice == 'adagrad':
            opt = Adagrad(learning_rate=learning_rate)
        else:
            raise ValueError("Optimizer not recognized.")

        # Compile model with selected optimizer
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
        
        return model

In [4]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 50,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir2',
    project_name='binary_classification',
    seed = 888
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val))


Trial 48 Complete [00h 00m 28s]
val_accuracy: 0.8600000143051147

Best val_accuracy So Far: 0.8999999761581421
Total elapsed time: 00h 31m 21s

Search: Running Trial #49

Value             |Best Value So Far |Hyperparameter
14                |17                |units_1
sigmoid           |linear            |activation_1
1                 |2                 |num_layers
35                |17                |units_2
sigmoid           |sigmoid           |activation_2
0.011416          |0.026317          |learning_rate
rmsprop           |adam              |optimizer
20                |11                |units_3
linear            |tanh              |activation_3

Epoch 1/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 401ms/step - accuracy: 0.1926 - loss: 1.0970 - val_accuracy: 0.7956 - val_loss: 0.6368
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.8074 - loss: 0.6340 - val_accuracy: 0.7956 - val_loss: 0.5364
Epoch 3/1000

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [13]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)


Best Hyperparameters:
units_1: 17
activation_1: linear
num_layers: 2
units_2: 17
activation_2: sigmoid
learning_rate: 0.02631659804964201
optimizer: adam
units_3: 11
activation_3: tanh
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Best model Precision on validation set: 0.8888198757763975
Best model Recall on validation set: 0.9993016759776536
Best model F1 score on validation set: 0.9408284023668639


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 10,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir3',
    project_name='binary_classification',
    seed = 42
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val))


Trial 9 Complete [00h 00m 29s]
val_accuracy: 0.8311111330986023

Best val_accuracy So Far: 0.898888885974884
Total elapsed time: 00h 04m 30s

Search: Running Trial #10

Value             |Best Value So Far |Hyperparameter
17                |14                |units_1
sigmoid           |relu              |activation_1
2                 |1                 |num_layers
26                |41                |units_2
relu              |sigmoid           |activation_2
0.073537          |0.19566           |learning_rate
sgd               |adam              |optimizer
44                |14                |units_3
linear            |tanh              |activation_3

Epoch 1/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 315ms/step - accuracy: 0.8074 - loss: 0.5680 - val_accuracy: 0.7956 - val_loss: 0.5566
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.8074 - loss: 0.5488 - val_accuracy: 0.7956 - val_loss: 0.5426
Epoch 3/1000


In [15]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 14
activation_1: relu
num_layers: 1
units_2: 41
activation_2: sigmoid
learning_rate: 0.19565526996745297
optimizer: adam
units_3: 14
activation_3: tanh
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 552us/step
Best model Precision on validation set: 0.888681592039801
Best model Recall on validation set: 0.9979050279329609
Best model F1 score on validation set: 0.9401315789473684


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 10,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir4',
    project_name='binary_classification',
    seed = 999
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val))


Trial 5 Complete [00h 00m 30s]
val_accuracy: 0.894444465637207

Best val_accuracy So Far: 0.9005555510520935
Total elapsed time: 00h 02m 37s

Search: Running Trial #6

Value             |Best Value So Far |Hyperparameter
17                |38                |units_1
linear            |linear            |activation_1
2                 |2                 |num_layers
41                |26                |units_2
tanh              |tanh              |activation_2
0.01362           |0.051937          |learning_rate
adam              |adam              |optimizer
44                |8                 |units_3
tanh              |relu              |activation_3

Epoch 1/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 654ms/step - accuracy: 0.3376 - loss: 0.8152 - val_accuracy: 0.7967 - val_loss: 0.5058
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.8071 - loss: 0.4915 - val_accuracy: 0.7956 - val_loss: 0.5337
Epoch 3/1000
[

In [21]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 38
activation_1: linear
num_layers: 2
units_2: 26
activation_2: tanh
learning_rate: 0.051937206105072076
optimizer: adam
units_3: 8
activation_3: relu
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 627us/step
Best model Precision on validation set: 0.8888888888888888
Best model Recall on validation set: 1.0
Best model F1 score on validation set: 0.9411764705882353


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 10,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir5',
    project_name='binary_classification',
    seed = 777
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val))


Trial 8 Complete [00h 00m 27s]
val_accuracy: 0.894444465637207

Best val_accuracy So Far: 0.897777795791626
Total elapsed time: 00h 03m 42s

Search: Running Trial #9

Value             |Best Value So Far |Hyperparameter
23                |14                |units_1
linear            |sigmoid           |activation_1
1                 |2                 |num_layers
38                |20                |units_2
sigmoid           |sigmoid           |activation_2
0.24098           |0.028809          |learning_rate
adam              |rmsprop           |optimizer
41                |20                |units_3
relu              |relu              |activation_3

Epoch 1/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 621ms/step - accuracy: 0.8071 - loss: 0.6102 - val_accuracy: 0.7956 - val_loss: 1.6672
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.8074 - loss: 1.5653 - val_accuracy: 0.7956 - val_loss: 0.5226
Epoch 3/1000
[1

In [27]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 23
activation_1: linear
num_layers: 1
units_2: 38
activation_2: sigmoid
learning_rate: 0.24097772968347564
optimizer: adam
units_3: 41
activation_3: relu
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 547us/step
Best model Precision on validation set: 0.8888888888888888
Best model Recall on validation set: 1.0
Best model F1 score on validation set: 0.9411764705882353


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 20,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir5',
    project_name='binary_classification',
    seed = 88
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val))


Trial 14 Complete [00h 00m 26s]
val_accuracy: 0.8966666460037231

Best val_accuracy So Far: 0.9005555510520935
Total elapsed time: 00h 14m 21s

Search: Running Trial #15

Value             |Best Value So Far |Hyperparameter
23                |23                |units_1
linear            |linear            |activation_1
1                 |1                 |num_layers
35                |38                |units_2
sigmoid           |sigmoid           |activation_2
0.010046          |0.24098           |learning_rate
rmsprop           |adam              |optimizer
32                |41                |units_3
relu              |relu              |activation_3

Epoch 1/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398ms/step - accuracy: 0.8074 - loss: 0.5556 - val_accuracy: 0.7956 - val_loss: 0.5183
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.8074 - loss: 0.4973 - val_accuracy: 0.7956 - val_loss: 0.4856
Epoch 3/1000

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.9119 - loss: 0.2868 - val_accuracy: 0.8972 - val_loss: 0.3221
Epoch 493/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.9114 - loss: 0.2883 - val_accuracy: 0.8956 - val_loss: 0.3296
Epoch 494/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.9129 - loss: 0.2860 - val_accuracy: 0.8939 - val_loss: 0.3345
Epoch 495/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.9117 - loss: 0.2872 - val_accuracy: 0.8961 - val_loss: 0.3221
Epoch 496/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.9078 - loss: 0.2938 - val_accuracy: 0.8967 - val_loss: 0.3227
Epoch 497/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.9107 - loss: 0.2888 - val_accuracy: 0.8906 - val_loss: 0.3523
Epoch 498/1000
[1m1/1[0m [32m━━━━━

In [29]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 23
activation_1: linear
num_layers: 1
units_2: 38
activation_2: sigmoid
learning_rate: 0.24097772968347564
optimizer: adam
units_3: 41
activation_3: relu
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 552us/step
Best model Precision on validation set: 0.8888888888888888
Best model Recall on validation set: 1.0
Best model F1 score on validation set: 0.9411764705882353


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 10,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir6',
    project_name='binary_classification',
    seed = 50
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val))

Trial 5 Complete [00h 00m 29s]
val_accuracy: 0.8611111044883728

Best val_accuracy So Far: 0.8955555558204651
Total elapsed time: 00h 02m 27s

Search: Running Trial #6

Value             |Best Value So Far |Hyperparameter
41                |23                |units_1
sigmoid           |tanh              |activation_1
2                 |2                 |num_layers
20                |38                |units_2
relu              |relu              |activation_2
0.040319          |0.028924          |learning_rate
adam              |rmsprop           |optimizer
11                |11                |units_3
sigmoid           |tanh              |activation_3

Epoch 1/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 646ms/step - accuracy: 0.1926 - loss: 0.7708 - val_accuracy: 0.7956 - val_loss: 0.5327
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.8074 - loss: 0.5230 - val_accuracy: 0.7956 - val_loss: 0.5064
Epoch 3/1000


In [None]:
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad
# Set seeds for reproducibility
random.seed(888)
tf.random.set_seed(888)
np.random.seed(888)

learning_rate = 0.028924
# Creating the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(15,)),  # Adjust input shape based on feature count
    tf.keras.layers.Dense(23, activation='tanh'),
    tf.keras.layers.Dense(38, activation='relu'),
    tf.keras.layers.Dense(11, activation='tanh'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

opt = Adam(learning_rate=learning_rate)
# Compile the model
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=2000, batch_size=len(X_train), verbose=1)

# Display optimized weights and biases after training (useful for analysis)
for layer in model.layers:
    weights, biases = layer.get_weights()
    print(f"Weights for {layer.name}:\n{weights}\nBiases:\n{biases}")

# Evaluate model on validation data (i.e., calculate loss function & accuracy performance metric of fitted model on validation data)
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch 1/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 524ms/step - accuracy: 0.7658 - loss: 0.6040
Epoch 2/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8074 - loss: 0.4853
Epoch 3/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8074 - loss: 0.5007
Epoch 4/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8074 - loss: 0.4848
Epoch 5/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8074 - loss: 0.4691
Epoch 6/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8074 - loss: 0.4693
Epoch 7/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8074 - loss: 0.4631
Epoch 8/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8074 - loss: 0.4440
Epoch 9/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━

In [None]:
# Get model predictions for validation set
y_val_pred = model.predict(X_val)
y_val_pred = (y_val_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate Precision, Recall, and F1-score
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)
f1 = f1_score(y_val, y_val_pred)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

Changing hyper model parameters

In [4]:
from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Dense
from keras_tuner import HyperModel, RandomSearch, BayesianOptimization
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad

class MyHyperModel(HyperModel):
    def __init__(self, input_dim):
        self.input_dim = input_dim

    def build(self, hp):
        model = Sequential()
        model.add(layers.Input(shape=(self.input_dim,)))

        # First hidden layer with tunable units and activation
        model.add(Dense(
            units=hp.Int('units_1', min_value=8, max_value=45),
            activation=hp.Choice('activation_1', values=['relu', 'tanh', 'sigmoid', 'linear'])
        ))

        # Adding up to 3 more hidden layers based on the hp.Int range from 1 to 2 additional layers
        for i in range(hp.Int('num_layers', 1, 2)):  # This creates 2 to 3 total hidden layers
            model.add(Dense(
                units=hp.Int(f'units_{i+2}', min_value=8, max_value=45),
                activation=hp.Choice(f'activation_{i+2}', values=['relu', 'tanh', 'sigmoid', 'linear'])
            ))

        # Output layer for binary classification
        model.add(Dense(1, activation='sigmoid'))

        # Choosing the optimizer and learning rate as hyperparameters
        learning_rate = hp.Float('learning_rate', min_value=1e-2, max_value=5e-1, sampling='log') # 0.01 to 0.5
        optimizer_choice = hp.Choice('optimizer', values=['sgd', 'adam', 'rmsprop', 'adagrad'])

        # Define optimizer based on choice
        if optimizer_choice == 'sgd':
            opt = SGD(learning_rate=learning_rate)
        elif optimizer_choice == 'adam':
            opt = Adam(learning_rate=learning_rate)
        elif optimizer_choice == 'rmsprop':
            opt = RMSprop(learning_rate=learning_rate)
        elif optimizer_choice == 'adagrad':
            opt = Adagrad(learning_rate=learning_rate)
        else:
            raise ValueError("Optimizer not recognized.")

        # Compile model with selected optimizer
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
        
        return model

In [24]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 20,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir10',
    project_name='binary_classification',
    seed = 222
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val), verbose = 0)

In [25]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 28
activation_1: sigmoid
num_layers: 2
units_2: 18
activation_2: sigmoid
learning_rate: 0.038088885261179824
optimizer: adam
units_3: 8
activation_3: relu
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 619us/step
Best model Precision on validation set: 0.888268156424581
Best model Recall on validation set: 0.9993016759776536
Best model F1 score on validation set: 0.9405192244495564


  saveable.load_own_variables(weights_store.get(inner_path))


# Improvement from caleb brute force method.

# Final Model


In [14]:
from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Dense
from keras_tuner import HyperModel, RandomSearch, BayesianOptimization
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad

class MyHyperModel(HyperModel):
    def __init__(self, input_dim):
        self.input_dim = input_dim

    def build(self, hp):
        model = Sequential()
        model.add(layers.Input(shape=(self.input_dim,)))

        # First hidden layer with tunable units and activation
        model.add(Dense(
            units=hp.Int('units_1', min_value=8, max_value=45),
            activation=hp.Choice('activation_1', values=['sigmoid'])
        ))
        model.add(Dense(
            units=hp.Int('units_2', min_value=8, max_value=45),
            activation=hp.Choice('activation_2', values=['sigmoid'])
        ))
        model.add(Dense(
            units=hp.Int('units_3', min_value=8, max_value=45),
            activation=hp.Choice('activation_3', values=['sigmoid'])
        ))
        # Output layer for binary classification
        model.add(Dense(1, activation='sigmoid'))

        # Choosing the optimizer and learning rate as hyperparameters
        learning_rate = 0.5
        opt = Adam(learning_rate=learning_rate)
        # Compile model with selected optimizer
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
        
        return model

In [15]:
# Assuming input_dim is the number of features (e.g., 15)
hypermodel = MyHyperModel(input_dim=15)

# Define the tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials= 10,  # You can adjust the number of trials
    executions_per_trial=1,
    directory='my_dir14',
    project_name='binary_classification',
    seed = 888
)

# Start search
tuner.search(X_train, y_train, epochs=1000, batch_size=len(X_train), validation_data=(X_val, y_val), verbose = 0)

In [16]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Get the best model from the tuner
best_model = tuner.get_best_models(num_models=1)[0]  # Gets the top model

# Retrieve the best hyperparameters from the tuner
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Display the best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hyperparameters.values.items():
    print(f"{param}: {value}")
    
# Make predictions on the validation set
y_val_pred = (best_model.predict(X_val) > 0.5).astype("int32")  # Threshold at 0.5 for binary classification

# Calculate F1 score
f1 = f1_score(y_val,y_val_pred)
precision = precision_score(y_val, y_val_pred)
recall = recall_score(y_val, y_val_pred)

print("Best model Precision on validation set:", precision)
print("Best model Recall on validation set:", recall)
print("Best model F1 score on validation set:", f1)

Best Hyperparameters:
units_1: 11
activation_1: sigmoid
units_2: 9
activation_2: sigmoid
units_3: 16
activation_3: sigmoid
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 627us/step
Best model Precision on validation set: 0.7955555555555556
Best model Recall on validation set: 1.0
Best model F1 score on validation set: 0.8861386138613861


  saveable.load_own_variables(weights_store.get(inner_path))
