## Experiment with TensorFlow

1. **Import necessary TensorFlow modules.**
2. **Define the model architecture.** Assuming a simple architecture with two hidden layers.
3. **Compile the model** with an appropriate optimizer, loss function, and metrics.
4. **Prepare the data** similarly to how it's prepared for the `MLPClassifier`, ensuring input features are correctly scaled and split into training and test sets.
5. **Train the model** on the data.

Note the following:

- **Architecture**: This example uses two dense layers with 128 and 64 units, respectively, and ReLU activation functions. The output layer uses a sigmoid activation function for binary classification. Adjust the layer sizes and quantities to match your specific needs or the original `MLPClassifier` configuration.
- **Compilation**: The model is compiled with the Adam optimizer and binary cross-entropy loss, which are standard for binary classification tasks.
- **Training**: The model is trained for 100 epochs with a validation split. Adjust the number of epochs and validation split according to your specific requirements.

## Imports Feature Selection 

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import keras_tuner as kt
from tensorflow.keras.metrics import TruePositives, TrueNegatives, FalsePositives, FalseNegatives
from tabulate import tabulate


df = pd.read_csv("./data/diabetes_binary_5050split_health_indicators_BRFSS2015.csv")

# change columns to lowercase
df.columns = df.columns.str.lower()
df.columns

# create interaction terms
df['bmi_highbp_diffwalk_interaction'] = df['bmi'] * df['highbp'] * df['diffwalk']
df['age_highchol_heartdiseaseorattack_interaction'] = df['age'] * df['highchol'] * df['heartdiseaseorattack']
df['genhlth_physhlth_interaction'] = df['genhlth'] * df['physhlth']


# Assuming df is your DataFrame and has already been loaded
X = df.drop('diabetes_binary', axis=1).values
y = df['diabetes_binary'].values

# drop columns that are poorly correlated with 'diabetes_binary'
df.drop(['smoker', 'fruits', 'veggies', 'hvyalcoholconsump', 'anyhealthcare', 'nodocbccost', 'menthlth', 'sex'], axis=1, inplace=True)


# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scaling the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Defining the model architecture
model = models.Sequential()
model.add(layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))  # Using sigmoid for binary classification

# Compiling the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['recall'])

# Training the model
history = model.fit(X_train_scaled, y_train, epochs=100, validation_split=0.2, verbose=1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-04-11 14:13:36.155348: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-04-11 14:13:36.155370: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-04-11 14:13:36.155375: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-04-11 14:13:36.155775: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-11 14:13:36.155809: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/100


2024-04-11 14:13:36.701473: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 11ms/step - loss: 0.5264 - recall: 0.7811 - val_loss: 0.5056 - val_recall: 0.7929
Epoch 2/100
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 11ms/step - loss: 0.5018 - recall: 0.8043 - val_loss: 0.5101 - val_recall: 0.8332
Epoch 3/100
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 10ms/step - loss: 0.5005 - recall: 0.8016 - val_loss: 0.5053 - val_recall: 0.8332
Epoch 4/100
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 10ms/step - loss: 0.4951 - recall: 0.8074 - val_loss: 0.5021 - val_recall: 0.7879
Epoch 5/100
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 10ms/step - loss: 0.4923 - recall: 0.8051 - val_loss: 0.5033 - val_recall: 0.8022
Epoch 6/100
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - loss: 0.4938 - recall: 0.8093 - val_loss: 0.5029 - val_recall: 0.7952
Epoch 7/100
[1m1414/1414[0m [32

## Tabulate pre-tuning scores

In [2]:
# Initialize the list to store metric results
metric_results = []

# Assuming y_test and X_test_scaled are your test labels and features
# First, get the model's predictions
y_pred = model.predict(X_test_scaled)
y_pred_classes = (y_pred > 0.5).astype("int32")

# Now, compute the metrics
tp = TruePositives()(y_test, y_pred_classes).numpy()
tn = TrueNegatives()(y_test, y_pred_classes).numpy()
fp = FalsePositives()(y_test, y_pred_classes).numpy()
fn = FalseNegatives()(y_test, y_pred_classes).numpy()

accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)  # Sensitivity
specificity = tn / (tn + fp)
f1_score = 2 * (precision * recall) / (precision + recall)

# Assuming `metric_results` is your existing list of model performances
metric_results.append(["TensorFlow Model", accuracy, precision, recall, specificity, f1_score])

# Print the table
print(tabulate(metric_results, headers=["Model", "Accuracy", "Precision", "Sensitivity", "Specificity", "F1-Score"]))


[1m442/442[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 953us/step
Model               Accuracy    Precision    Sensitivity    Specificity    F1-Score
----------------  ----------  -----------  -------------  -------------  ----------
TensorFlow Model    0.698352     0.687483        0.72726       0.669448    0.706812


## Hyperparameter tuning using Keras optimizer

In [3]:
def model_builder(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(X_train_scaled.shape[1],)))
    
    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-512
    hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
    model.add(tf.keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    
    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

# Create a callback to stop training early after reaching a certain value for the validation loss
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(X_train_scaled, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")

# Build the model with the optimal hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hps)
model.fit(X_train_scaled, y_train, epochs=10, validation_split=0.2)


Trial 30 Complete [00h 02m 59s]
val_accuracy: 0.7553708553314209

Best val_accuracy So Far: 0.757050633430481
Total elapsed time: 00h 32m 10s

The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 352 and the optimal learning rate for the optimizer
is 0.001.

Epoch 1/10
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 12ms/step - accuracy: 0.7401 - loss: 0.5229 - val_accuracy: 0.7508 - val_loss: 0.5052
Epoch 2/10
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 11ms/step - accuracy: 0.7517 - loss: 0.5018 - val_accuracy: 0.7514 - val_loss: 0.5024
Epoch 3/10
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 12ms/step - accuracy: 0.7526 - loss: 0.5034 - val_accuracy: 0.7517 - val_loss: 0.5028
Epoch 4/10
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 14ms/step - accuracy: 0.7570 - loss: 0.4984 - val_accuracy: 0.7555 - val_loss: 0.5020
Epoch 5/10
[1m1414/141

<keras.src.callbacks.history.History at 0x3b4c88310>

## Tabulate best score

In [4]:
# 1. Evaluate the model to get the loss and accuracy
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)

# 2. Predict on the test set
y_pred = model.predict(X_test_scaled)
y_pred_classes = (y_pred > 0.5).astype("int32")

# 3. Calculate metrics
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)  # Sensitivity
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_classes).ravel()
specificity = tn / (tn+fp)
f1 = f1_score(y_test, y_pred_classes)

# Prepare results for tabulation
metric_results = [["Best HyperTuned Model", test_accuracy, precision, recall, specificity, f1]]

# 4. Print the table
print(tabulate(metric_results, headers=["Model", "Accuracy", "Precision", "Sensitivity", "Specificity", "F1-Score"]))


[1m442/442[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Model                    Accuracy    Precision    Sensitivity    Specificity    F1-Score
---------------------  ----------  -----------  -------------  -------------  ----------
Best HyperTuned Model    0.750336     0.732066       0.789645       0.711033    0.759766


## Run best model on full dataset

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the dataset
df_full = pd.read_csv("./data/diabetes_binary_health_indicators_BRFSS2015.csv")

# Change columns to lowercase
df_full.columns = df_full.columns.str.lower()

# Create interaction terms
df_full['bmi_highbp_diffwalk_interaction'] = df_full['bmi'] * df_full['highbp'] * df_full['diffwalk']
df_full['age_highchol_heartdiseaseorattack_interaction'] = df_full['age'] * df_full['highchol'] * df_full['heartdiseaseorattack']
df_full['genhlth_physhlth_interaction'] = df_full['genhlth'] * df_full['physhlth']

# Drop columns that are poorly correlated with 'diabetes_binary'
df_full.drop(['smoker', 'fruits', 'veggies', 'hvyalcoholconsump', 'anyhealthcare', 'nodocbccost', 'menthlth', 'sex'], axis=1, inplace=True)

# Assuming 'scaler' is your StandardScaler instance used previously and 'model' is your trained best model

# Prepare the dataset for prediction
X_full = df_full.drop('diabetes_binary', axis=1).values  # Use the correct case for column names

# Scale the features
X_full_scaled = scaler.transform(X_full)

# Predict
y_pred_full = model.predict(X_full_scaled)
y_pred_classes_full = (y_pred_full > 0.5).astype("int32")

# Now, y_pred_classes_full contains the binary predictions for your full dataset


[1m7928/7928[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2ms/step


## Validate prediction against actual value 

In [21]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Actual vs. Predicted
y_true = df_full['diabetes_binary']
y_pred = df_full['predicted_diabetes']

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)  # Also known as sensitivity
f1 = f1_score(y_true, y_pred)
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
specificity = tn / (tn + fp)

# Summarize metrics in a DataFrame for a nice table view
import pandas as pd

metrics_summary = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall (Sensitivity)", "Specificity", "F1-Score"],
    "Score": [accuracy, precision, recall, specificity, f1]
})

metrics_summary


Unnamed: 0,Metric,Score
0,Accuracy,0.740062
1,Precision,0.319432
2,Recall (Sensitivity),0.765631
3,Specificity,0.735923
4,F1-Score,0.450789
