In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
import keras
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import BatchNormalization, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [3]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
X = df.drop('Outcome', axis=1)
Y = df['Outcome']

In [5]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [8]:
def L2_model():
    
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(64,input_shape=(8,),
                kernel_regularizer=keras.regularizers.L2(l2=1e-3),
                activity_regularizer=keras.regularizers.L2(1e-3),
                activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def L1_model():
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(64,input_shape=(8,),
                kernel_regularizer=keras.regularizers.L1(0.01),
                activity_regularizer=keras.regularizers.L1(0.01),
                activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


def dropout_model():
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(64,input_shape=(8,),activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [10]:
kfold = StratifiedKFold(n_splits=10, shuffle=True)

In [11]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=L2_model, epochs=50, batch_size=10, verbose=0)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("L2_model: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
L2_model_results = results

L2_model: 69.79% (4.24%)


In [12]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=L1_model, epochs=50, batch_size=5, verbose=0)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("L1_model: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
L1_model_results = results

L1_model: 67.06% (2.20%)


In [13]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=dropout_model, epochs=50, batch_size=10, verbose=0)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("dropout_model: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
dropout_model_results = results

dropout_model: 68.36% (3.75%)


In [26]:
def early_stopping_model():
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(64,input_shape=(8,),activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=1)
mc = keras.callbacks.ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

In [27]:
history = early_stopping_model().fit(X, Y, validation_data=(X, Y), epochs=100, verbose=0, callbacks=[es, mc])


Epoch 00001: val_accuracy improved from -inf to 0.60547, saving model to best_model.h5

Epoch 00002: val_accuracy improved from 0.60547 to 0.62760, saving model to best_model.h5

Epoch 00003: val_accuracy improved from 0.62760 to 0.66927, saving model to best_model.h5
Epoch 00003: early stopping


In [37]:
# evaluate model with standardized dataset
early_stopping = keras.models.load_model('best_model.h5')
_, train_acc = early_stopping.evaluate(X, Y, verbose=0)
_, test_acc = early_stopping.evaluate(X, Y, verbose=0)
print("dropout_model: %.2f%% (%.2f%%)" % (train_acc, test_acc))
dropout_model_results = "dropout_model: %.2f%% (%.2f%%)" % (train_acc, test_acc)

dropout_model: 0.67% (0.67%)
