<h2>Diabetes dataset in Keras library</h2> 
This code is a modified version of the code from this tutorial at <a href="https://machinelearningmastery.com/multi-class-classification-tutorial-keras-deep-learning-library/">MachineLearningMastery</a>.

<h2>Load data</h2>

In [1]:
# Ignore future warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import pandas as pd
import numpy as np
import tensorflow as tf
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder

# set randomizer seed
# Note that setting seed for Keras with TensorFlow backend seems to be problematic
tf.set_random_seed(42)
np.random.seed(42)

# Load data
df = pd.read_csv('data/iris.csv')
np_data = df.values

# Split data into X and y
X = np_data[:,0:-1]
Y_raw = np_data[:,-1]
# Convert class label strings to integers
encoder = LabelEncoder()
encoder.fit(Y_raw)
Y = encoder.transform(Y_raw)

# Convert 1D label array to 3D label matrix (one-hot vector)
Y = np_utils.to_categorical(Y, 3)

Using TensorFlow backend.


<h2>Define and evaluate model</h2>

In [2]:
from keras.models import Sequential
from keras.layers import Dense
import time
from sklearn.metrics import accuracy_score

def build_model():
    # Create model
    model = Sequential()
    model.add(Dense(12, input_dim=4, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Start timer
start = time.time()

# Build model
model = build_model()
# Train the model
model.fit(X, Y, epochs=300, batch_size=20, verbose=0)
# Evaluate accuracy
score = model.evaluate(X, Y, verbose=0)

# Stop timer
end = time.time()

# Evaluate accuracy
score = model.evaluate(X, Y, verbose=0)
# Print results
print("\nAccuracy: {0:0.2f}%".format(score[1] * 100))
print("Time elapsed: {0:0.2f} sec".format(end - start))


Accuracy: 97.33%
Time elapsed: 3.90 sec


<h2>10-fold Cross Validation</h2>

In [3]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from keras.wrappers.scikit_learn import KerasClassifier

# Build classifier
estimator = KerasClassifier(build_fn=build_model, epochs=300, batch_size=20, verbose=0)

# Create folds
kfold = KFold(n_splits=10, shuffle=True, random_state=42)

# Calculate cross validation score
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Average accuracy: %.2f%% (stdev = %.2f%%)" % (results.mean() * 100, results.std() * 100))

Average accuracy: 98.00% (stdev = 3.06%)


<h2>Predictions</h2>

In [4]:
from sklearn.metrics import accuracy_score

# Predict training data
predictions = model.predict(X)
# Predictions and labels (Y) are one-hot vectors, so we need to convert
# them to single values using the argmax function
accuracy = accuracy_score(Y.argmax(1), predictions.argmax(1))
print("Accuracy: {0:0.2f}%".format(accuracy*100))

Accuracy: 97.33%
