## Set-up ##

In [1]:
# Import packages 
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import pandas as pd
import csv
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
import keras_tuner
import datetime

## Dataset splitting and defining target and sample data

In [2]:
# Read in dataset
trn = pd.read_csv("df2.csv", header = 0, na_filter = False)
trn

Unnamed: 0.1,Unnamed: 0,Diabetes_binary,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,1,0,1,1,1,40,1,0,0,0,...,1,0,5,18,15,1,0,9,4,3
1,2,0,0,0,0,25,1,0,0,1,...,0,1,3,0,0,0,0,7,6,1
2,3,0,1,1,1,28,0,0,0,0,...,1,1,5,30,30,1,0,9,4,8
3,4,0,1,0,1,27,0,0,0,1,...,1,0,2,0,0,0,0,11,3,6
4,5,0,1,1,1,24,0,0,0,1,...,1,0,2,3,0,0,0,11,5,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253675,253676,0,1,1,1,45,0,0,0,0,...,1,0,3,0,5,0,1,5,6,7
253676,253677,1,1,1,1,18,0,0,0,0,...,1,0,4,0,0,1,0,11,2,4
253677,253678,0,0,0,1,28,0,0,0,1,...,1,0,1,0,0,0,0,2,5,2
253678,253679,0,1,0,1,23,0,0,0,0,...,1,0,3,0,0,0,1,7,5,1


In [3]:
# Check features in dataframe
print(list(trn))

['Unnamed: 0', 'Diabetes_binary', 'HighBP', 'HighChol', 'CholCheck', 'BMI', 'Smoker', 'Stroke', 'HeartDiseaseorAttack', 'PhysActivity', 'Fruits', 'Veggies', 'HvyAlcoholConsump', 'AnyHealthcare', 'NoDocbcCost', 'GenHlth', 'MentHlth', 'PhysHlth', 'DiffWalk', 'Sex', 'Age', 'Education', 'Income']


In [4]:
# Drop redundant columns 
trn_new = trn.drop(trn.columns[[0]],
                       axis = 1)

In [5]:
# View the cleaned dataframe
trn_new.head()

Unnamed: 0,Diabetes_binary,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0,1,1,1,40,1,0,0,0,0,...,1,0,5,18,15,1,0,9,4,3
1,0,0,0,0,25,1,0,0,1,0,...,0,1,3,0,0,0,0,7,6,1
2,0,1,1,1,28,0,0,0,0,1,...,1,1,5,30,30,1,0,9,4,8
3,0,1,0,1,27,0,0,0,1,1,...,1,0,2,0,0,0,0,11,3,6
4,0,1,1,1,24,0,0,0,1,1,...,1,0,2,3,0,0,0,11,5,4


In [6]:
# Drop the predictor feature from the dataframe
trn_new2 = trn_new.drop(['Diabetes_binary'], axis = 1)

In [7]:
# View the sample dataframe
trn_new2

Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,1,1,1,40,1,0,0,0,0,1,...,1,0,5,18,15,1,0,9,4,3
1,0,0,0,25,1,0,0,1,0,0,...,0,1,3,0,0,0,0,7,6,1
2,1,1,1,28,0,0,0,0,1,0,...,1,1,5,30,30,1,0,9,4,8
3,1,0,1,27,0,0,0,1,1,1,...,1,0,2,0,0,0,0,11,3,6
4,1,1,1,24,0,0,0,1,1,1,...,1,0,2,3,0,0,0,11,5,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253675,1,1,1,45,0,0,0,0,1,1,...,1,0,3,0,5,0,1,5,6,7
253676,1,1,1,18,0,0,0,0,0,0,...,1,0,4,0,0,1,0,11,2,4
253677,0,0,1,28,0,0,0,1,1,0,...,1,0,1,0,0,0,0,2,5,2
253678,1,0,1,23,0,0,0,0,1,1,...,1,0,3,0,0,0,1,7,5,1


In [8]:
# Sort the predictor variable
y = trn_new['Diabetes_binary']

In [9]:
# View the dataframe
y

0         0
1         0
2         0
3         0
4         0
         ..
253675    0
253676    1
253677    0
253678    0
253679    1
Name: Diabetes_binary, Length: 253680, dtype: int64

## Early Stopping ##

In [10]:
# Creating early stopping
custom_early_stopping = EarlyStopping(
    monitor='val_accuracy', 
    patience=20, 
    min_delta=0.001, 
    mode='max'
)

## Static model ##

In [11]:
# Define the keras model
model = Sequential()
model.add(Input(shape=(21,)))
model.add(Dense(21, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [12]:
# Compiling the model
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [13]:
# Converting dataframes to tensor
trn_new2 = tf.convert_to_tensor(trn_new2)
y = tf.convert_to_tensor(y)

In [14]:
# Fit the keras model on the dataset
model.fit(trn_new2, y, epochs=1500, batch_size=500, callbacks = [custom_early_stopping], validation_split=0.2)

Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
Epoch 5/1500
Epoch 6/1500
Epoch 7/1500
Epoch 8/1500
Epoch 9/1500
Epoch 10/1500
Epoch 11/1500
Epoch 12/1500
Epoch 13/1500
Epoch 14/1500
Epoch 15/1500
Epoch 16/1500
Epoch 17/1500
Epoch 18/1500
Epoch 19/1500
Epoch 20/1500
Epoch 21/1500
Epoch 22/1500
Epoch 23/1500
Epoch 24/1500
Epoch 25/1500
Epoch 26/1500
Epoch 27/1500
Epoch 28/1500
Epoch 29/1500
Epoch 30/1500


<keras.callbacks.History at 0x1964c3e9fd0>

## Model function ##

In [15]:
# Creating model function
def get_model(hp):
    model = Sequential()
    model.add(Input(shape=(21,)))
    for i in range(hp.Int("num_layers",1,7)):
        model.add(Dense(units = hp.Int(f"units_{i}",min_value = 10, max_value = 28, step = 3), activation = hp.Choice("activation",["relu","tanh"]),))
    model.add(Dense(1, activation='sigmoid'))
    learning_rate = hp.Float("lr",min_value = 1e-4,max_value = 1e-2,sampling="log")
    model.compile(loss = 'binary_crossentropy', optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate), metrics = ['accuracy'])
    return model

## Tensorboard

In [16]:
# Setting tensorboard callback
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")

## Keras Tuner

In [17]:
# Defining Keras tuner
tuner = keras_tuner.RandomSearch(hypermodel=get_model,objective="val_accuracy",max_trials=30,executions_per_trial=1,overwrite=True,directory="./benmk/",project_name="diabetes")

In [18]:
# Whats being captured by the Keras tuner
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 7, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 28, 'step': 3, 'sampling': 'linear'}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
lr (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


In [19]:
# Fit the keras model on the dataset
tuner.search(trn_new2, y, epochs=1500, batch_size=500, 
             callbacks = [custom_early_stopping, tensorboard_callback], validation_split=0.2)

Trial 30 Complete [00h 00m 42s]
val_accuracy: 0.8649479746818542

Best val_accuracy So Far: 0.8654603958129883
Total elapsed time: 00h 20m 50s
INFO:tensorflow:Oracle triggered exit


## Tensorboard

In [24]:
# Load the extension for tensorboard
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [27]:
# Reload extension for tensorboard
%reload_ext tensorboard

In [25]:
# Launch tensorboard
%tensorboard --logdir "./logs"

Reusing TensorBoard on port 6006 (pid 1108), started 3 days, 16:35:42 ago. (Use '!kill 1108' to kill it.)

In [29]:
# Launch tensorboard alternative way
%tensorboard --logdir "./logs"  --host localhost

Reusing TensorBoard on port 6006 (pid 8548), started 22:12:07 ago. (Use '!kill 8548' to kill it.)