## Set-up

In [1]:
# Import packages 
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import pandas as pd
import csv
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner

## Dataset Splitting and defining target and sample data

In [2]:
# Read in Training dataset
trn = pd.read_csv("downsample.csv", header = 0, na_filter = False)
trn

Unnamed: 0,X,Diabetes_binary,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,96300,0,1,1,1,28,0,0,0,1,...,1,0,3,0,5,0,1,8,4,7
1,219896,0,0,0,1,19,0,0,0,1,...,1,0,1,2,5,0,1,1,3,8
2,106786,0,1,1,1,28,0,0,0,1,...,1,0,3,0,5,1,0,9,5,6
3,35130,0,0,0,1,21,0,0,0,0,...,1,0,2,0,0,0,0,9,4,5
4,227116,0,1,0,1,34,1,0,1,0,...,1,0,5,30,15,1,0,4,5,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70687,253660,1,0,1,1,37,0,0,0,0,...,1,0,4,0,0,0,0,6,4,1
70688,253669,1,0,1,1,29,1,0,1,0,...,1,0,2,0,0,1,1,10,3,6
70689,253671,1,1,1,1,25,0,0,1,0,...,1,0,5,15,0,1,0,13,6,4
70690,253677,1,1,1,1,18,0,0,0,0,...,1,0,4,0,0,1,0,11,2,4


In [3]:
# Check features in dataframe
print(list(trn))

['X', 'Diabetes_binary', 'HighBP', 'HighChol', 'CholCheck', 'BMI', 'Smoker', 'Stroke', 'HeartDiseaseorAttack', 'PhysActivity', 'Fruits', 'Veggies', 'HvyAlcoholConsump', 'AnyHealthcare', 'NoDocbcCost', 'GenHlth', 'MentHlth', 'PhysHlth', 'DiffWalk', 'Sex', 'Age', 'Education', 'Income']


In [4]:
# Drop redundant columns 
trn_new = trn.drop(trn.columns[[0]],
                       axis = 1)

In [5]:
# View the cleaned dataframe
trn_new.head()

Unnamed: 0,Diabetes_binary,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0,1,1,1,28,0,0,0,1,1,...,1,0,3,0,5,0,1,8,4,7
1,0,0,0,1,19,0,0,0,1,0,...,1,0,1,2,5,0,1,1,3,8
2,0,1,1,1,28,0,0,0,1,0,...,1,0,3,0,5,1,0,9,5,6
3,0,0,0,1,21,0,0,0,0,0,...,1,0,2,0,0,0,0,9,4,5
4,0,1,0,1,34,1,0,1,0,1,...,1,0,5,30,15,1,0,4,5,3


In [6]:
# Drop the predictor feature from the dataframe
trn_new2 = trn_new.drop(['Diabetes_binary'], axis = 1)

In [7]:
# View the sample dataframe
trn_new2

Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,1,1,1,28,0,0,0,1,1,1,...,1,0,3,0,5,0,1,8,4,7
1,0,0,1,19,0,0,0,1,0,1,...,1,0,1,2,5,0,1,1,3,8
2,1,1,1,28,0,0,0,1,0,1,...,1,0,3,0,5,1,0,9,5,6
3,0,0,1,21,0,0,0,0,0,0,...,1,0,2,0,0,0,0,9,4,5
4,1,0,1,34,1,0,1,0,1,1,...,1,0,5,30,15,1,0,4,5,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70687,0,1,1,37,0,0,0,0,0,1,...,1,0,4,0,0,0,0,6,4,1
70688,0,1,1,29,1,0,1,0,1,1,...,1,0,2,0,0,1,1,10,3,6
70689,1,1,1,25,0,0,1,0,1,0,...,1,0,5,15,0,1,0,13,6,4
70690,1,1,1,18,0,0,0,0,0,0,...,1,0,4,0,0,1,0,11,2,4


In [8]:
# Sort the predictor variable
y = trn_new['Diabetes_binary']

In [9]:
# View the dataframe
y

0        0
1        0
2        0
3        0
4        0
        ..
70687    1
70688    1
70689    1
70690    1
70691    1
Name: Diabetes_binary, Length: 70692, dtype: int64

## Early-Stopping

In [10]:
# Creating early stopping
custom_early_stopping = EarlyStopping(
    monitor='val_accuracy', 
    patience=20, 
    min_delta=0.001, 
    mode='max'
)

## Static model

In [11]:
# Define the keras model
model = Sequential()
model.add(Input(shape=(21,)))
model.add(Dense(21, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [12]:
# Compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
# Converting dataframes to tensor
trn_new2 = tf.convert_to_tensor(trn_new2)
y = tf.convert_to_tensor(y)

In [14]:
# Fit the keras model on the dataset
model.fit(trn_new2, y, epochs=1500, batch_size=200, callbacks = [custom_early_stopping], validation_split=0.2)

Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
Epoch 5/1500
Epoch 6/1500
Epoch 7/1500
Epoch 8/1500
Epoch 9/1500
Epoch 10/1500
Epoch 11/1500
Epoch 12/1500
Epoch 13/1500
Epoch 14/1500
Epoch 15/1500
Epoch 16/1500
Epoch 17/1500
Epoch 18/1500
Epoch 19/1500
Epoch 20/1500
Epoch 21/1500
Epoch 22/1500
Epoch 23/1500
Epoch 24/1500
Epoch 25/1500
Epoch 26/1500
Epoch 27/1500
Epoch 28/1500
Epoch 29/1500
Epoch 30/1500
Epoch 31/1500
Epoch 32/1500
Epoch 33/1500
Epoch 34/1500
Epoch 35/1500
Epoch 36/1500
Epoch 37/1500
Epoch 38/1500
Epoch 39/1500
Epoch 40/1500
Epoch 41/1500
Epoch 42/1500
Epoch 43/1500
Epoch 44/1500
Epoch 45/1500


<keras.callbacks.History at 0x22bbdab8fd0>

## Model function

In [15]:
# Creating model function
def get_model(hp):
    model = Sequential()
    model.add(Input(shape=(21,)))
    for i in range(hp.Int("num_layers",1,7)):
        model.add(Dense(units = hp.Int(f"units_{i}",min_value = 10, max_value = 28, step = 3), activation = hp.Choice("activation",["relu","tanh"]),))
    model.add(Dense(1, activation='sigmoid'))
    learning_rate = hp.Float("lr",min_value = 1e-4,max_value = 1e-2,sampling="log")
    model.compile(loss = 'binary_crossentropy', optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate), metrics = ['accuracy'])
    return model

## Tensorboard

In [16]:
# Setting tensorboard callback
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs2")

## Keras tuner

In [17]:
# Defining Keras tuner
tuner = keras_tuner.RandomSearch(hypermodel=get_model,objective="val_accuracy",max_trials=30,executions_per_trial=1,overwrite=True,directory="./benmk/",project_name="diabetes2")

In [18]:
# Whats being captured by the Keras tuner
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 7, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 28, 'step': 3, 'sampling': 'linear'}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
lr (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


In [19]:
# Fit the keras model on the dataset
tuner.search(trn_new2, y, epochs=1500, batch_size=200, callbacks = [custom_early_stopping, tensorboard_callback], validation_split=0.2)

Trial 30 Complete [00h 00m 26s]
val_accuracy: 0.825093686580658

Best val_accuracy So Far: 0.8376122713088989
Total elapsed time: 00h 15m 31s
INFO:tensorflow:Oracle triggered exit


# Tensorboard

In [20]:
# Load the extension for tensorboard
%load_ext tensorboard

In [None]:
# Reload extension for tensorboard
%reload_ext tensorboard

In [21]:
# Launch tensorboard
%tensorboard --logdir "./logs2"

Reusing TensorBoard on port 6006 (pid 14996), started 3 days, 14:10:44 ago. (Use '!kill 14996' to kill it.)

In [22]:
# Launch tensorboard alternative way
%tensorboard --logdir "./logs2"  --host localhost