In [1]:
import pandas as pd

## Download PIMA Indians Diabetes dataset

1. Number of times pregnant 
2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test 
3. Diastolic blood pressure (mm Hg) 
4. Triceps skin fold thickness (mm) 
5. 2-Hour serum insulin (mu U/ml) 
6. Body mass index (weight in kg/(height in m)^2) 
7. Diabetes pedigree function 
8. Age (years) 
9. Class variable (0 or 1) 

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataset = pd.read_csv(url, names=names)

## Summarize Data

In [3]:
print(dataset.shape)
print(dataset.dtypes)

(768, 9)
preg       int64
plas       int64
pres       int64
skin       int64
test       int64
mass     float64
pedi     float64
age        int64
class      int64
dtype: object


In [4]:
print(dataset.head(20))

    preg  plas  pres  skin  test  mass   pedi  age  class
0      6   148    72    35     0  33.6  0.627   50      1
1      1    85    66    29     0  26.6  0.351   31      0
2      8   183    64     0     0  23.3  0.672   32      1
3      1    89    66    23    94  28.1  0.167   21      0
4      0   137    40    35   168  43.1  2.288   33      1
5      5   116    74     0     0  25.6  0.201   30      0
6      3    78    50    32    88  31.0  0.248   26      1
7     10   115     0     0     0  35.3  0.134   29      0
8      2   197    70    45   543  30.5  0.158   53      1
9      8   125    96     0     0   0.0  0.232   54      1
10     4   110    92     0     0  37.6  0.191   30      0
11    10   168    74     0     0  38.0  0.537   34      1
12    10   139    80     0     0  27.1  1.441   57      0
13     1   189    60    23   846  30.1  0.398   59      1
14     5   166    72    19   175  25.8  0.587   51      1
15     7   100     0     0     0  30.0  0.484   32      1
16     0   118

# The first model

![title](img/MLP1.PNG)

In [5]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

Using Theano backend.


In [6]:
seed = 7
np.random.seed(seed)

In [7]:
array = dataset.values
X = array[:,0:8]
Y = array[:,8]

In [8]:
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, init='uniform', activation='relu'))
model.add(Dense(8, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))

In [9]:
# compile model
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [10]:
# fit model
model.fit(X, Y, nb_epoch=150, batch_size=10,verbose=0)

<keras.callbacks.History at 0x11490cbd0>

In [11]:
# evaluate model
scores = model.evaluate(X,Y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

 32/768 [>.............................] - ETA: 0sacc: 78.91%


# Use a validation dataset

In [12]:
from sklearn import cross_validation



In [13]:
array = dataset.values
X = array[:,0:8]
Y = array[:,8]

validation_size = 0.20
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [14]:
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, init='uniform', activation='relu'))
model.add(Dense(8, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))

In [15]:
# compile model
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [16]:
# fit model
model.fit(X_train, Y_train, validation_data=(X_test,Y_test), nb_epoch=150, batch_size=10)

Train on 614 samples, validate on 154 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
E

<keras.callbacks.History at 0x1155a1410>

# Manual k-Fold CV

In [17]:
from sklearn.model_selection import StratifiedKFold

In [18]:
# define 4-fold cross validation test harness
kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=seed)
cvscores = []

In [19]:
for train, test in kfold.split(X, Y):
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=8, init= 'uniform' , activation= 'relu' ))
    model.add(Dense(8, init= 'uniform' , activation= 'relu' ))
    model.add(Dense(1, init= 'uniform' , activation= 'sigmoid' ))
    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy'])
    # Fit the model
    model.fit(X[train], Y[train], nb_epoch=150, batch_size=10, verbose=0)
    # evaluate the model
    scores = model.evaluate(X[test], Y[test], verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)

acc: 70.83%
acc: 83.85%
acc: 75.00%
acc: 70.83%


In [20]:
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

75.13% (+/- 5.32%)


# Tune Hyperparameters using Grid Search

In [21]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

In [22]:
validation_size = 0.20
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [23]:
scaler = StandardScaler().fit(X_train)
rescaledX_train = scaler.transform(X_train)

In [24]:
# Function to create model, required for KerasClassifier
def create_model(optimizer= 'rmsprop' , init= 'uniform' ):
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=8, init=init, activation= 'relu' ))
    model.add(Dense(8, init=init, activation= 'relu' ))
    model.add(Dense(1, init=init, activation= 'sigmoid' ))
    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer=optimizer, metrics=[ 'accuracy' ])
    return model

In [25]:
# create model
model = KerasClassifier(build_fn=create_model, verbose=0)

In [26]:
# grid search epochs, batch size and optimizer
optimizers = ['rmsprop','adam']
init = ['normal','uniform']
epochs = np.array([150])
batches = np.array([10])
param_grid = dict(optimizer=optimizers, nb_epoch=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(rescaledX_train, Y_train)

In [27]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.763844 using {'init': 'normal', 'optimizer': 'adam', 'nb_epoch': 150, 'batch_size': 10}


In [28]:
for params, mean_score, scores in grid_result.grid_scores_:
    print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))

0.754073 (0.021914) with: {'init': 'normal', 'optimizer': 'rmsprop', 'nb_epoch': 150, 'batch_size': 10}
0.763845 (0.013953) with: {'init': 'normal', 'optimizer': 'adam', 'nb_epoch': 150, 'batch_size': 10}
0.762227 (0.007933) with: {'init': 'uniform', 'optimizer': 'rmsprop', 'nb_epoch': 150, 'batch_size': 10}
0.754065 (0.004912) with: {'init': 'uniform', 'optimizer': 'adam', 'nb_epoch': 150, 'batch_size': 10}




# Multiclass Classification

## Download Iris Flowers Dataset

1. sepal length in cm
2. sepal width in cm
3. petal length in cm
4. petal width in cm
5. class:
-- Iris Setosa
-- Iris Versicolour
-- Iris Virginica

In [29]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['seplen', 'sepwid', 'petlen', 'petwid', 'class']
iris = pd.read_csv(url, names=names)

In [30]:
print(iris.shape)
print(iris.dtypes)
iris.head()

(150, 5)
seplen    float64
sepwid    float64
petlen    float64
petwid    float64
class      object
dtype: object


Unnamed: 0,seplen,sepwid,petlen,petwid,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [31]:
X = iris.values[:,0:4].astype(float)
Y = iris.values[:,4]

## Encode the Outcome

In [32]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

In [33]:
Y[0:5]

array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa'], dtype=object)

In [34]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert int to dummy variables (one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

In [35]:
dummy_y[0:5]

array([[ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.]])

## Define the NN

4 inputs -> [4 hidden nodes] -> 3 outputs

In [36]:
# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(4, input_dim=4, init= 'normal' , activation= 'relu' ))
    model.add(Dense(3, init= 'normal' , activation= 'sigmoid' ))
    # Compile model
    model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
    return model

## Evalute the Model using k-Fold CV

In [37]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [38]:
estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Accuracy: 96.67% (4.47%)


# Improve Model Performance

## The Sonar Object Classification Dataset

- A dataset that describes sonar chirp returns bouncing off different surfaces. 
- 60 input variables: the strength of the returns at different angles, all continuous, generally range from 0-1
- binary classification problem (rocks vs. metal cylinders)
- 208 observations

In [39]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
#names = ['seplen', 'sepwid', 'petlen', 'petwid', 'class']
sonar = pd.read_csv(url,header=None)

In [40]:
print(sonar.shape)
print(sonar.dtypes)
sonar.head()

(208, 61)
0     float64
1     float64
2     float64
3     float64
4     float64
5     float64
6     float64
7     float64
8     float64
9     float64
10    float64
11    float64
12    float64
13    float64
14    float64
15    float64
16    float64
17    float64
18    float64
19    float64
20    float64
21    float64
22    float64
23    float64
24    float64
25    float64
26    float64
27    float64
28    float64
29    float64
       ...   
31    float64
32    float64
33    float64
34    float64
35    float64
36    float64
37    float64
38    float64
39    float64
40    float64
41    float64
42    float64
43    float64
44    float64
45    float64
46    float64
47    float64
48    float64
49    float64
50    float64
51    float64
52    float64
53    float64
54    float64
55    float64
56    float64
57    float64
58    float64
59    float64
60     object
dtype: object


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [41]:
X = sonar.values[:,0:60].astype(float)
Y = sonar.values[:,60]

In [42]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [43]:
Y[0:5]

array(['R', 'R', 'R', 'R', 'R'], dtype=object)

In [44]:
encoded_Y[0:5]

array([1, 1, 1, 1, 1])

## Baseline Model

60 inputs -> [60 hidden nodes] -> 1 output

In [45]:
# baseline model
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, init= 'normal' , activation= 'relu' ))
    model.add(Dense(1, init= 'normal' , activation= 'sigmoid' ))
    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy'])
    return model

In [46]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, nb_epoch=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 81.23% (7.89%)


## Improve model performance with data preparation

In [47]:
from sklearn.pipeline import Pipeline

In [48]:
estimators = []
estimators.append(('standardize',StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, nb_epoch=100,batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 84.61% (4.14%)


## Evaluate a smaller network

60 inputs -> [30] -> 1 output

In [49]:
def create_smaller():
    # create model
    model = Sequential()
    model.add(Dense(30, input_dim=60, init= 'normal' , activation= 'relu' ))
    model.add(Dense(1, init= 'normal' , activation= 'sigmoid' ))
    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
    return model

In [50]:
estimators = []
estimators.append(('standardize',StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, nb_epoch=100,batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Smaller: 85.56% (5.69%)


## Evaluate a larger network

60 inputs -> [60 -> 30] -> 1 output

In [51]:
def create_larger():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, init= 'normal' , activation= 'relu' ))
    model.add(Dense(30, init= 'normal' , activation= 'relu' ))
    model.add(Dense(1, init= 'normal' , activation= 'sigmoid' ))
    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
    return model

In [52]:
estimators = []
estimators.append(('standardize',StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, nb_epoch=100,batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Larger: 85.54% (4.39%)


## Add Dropout and Max Norm Constraints

60 inputs (dropout 0.2) -> [60] -> 1 output

In [53]:
from keras.layers import Dropout
from keras.constraints import maxnorm

In [54]:
def create_dropout1():
    # create model
    model = Sequential()
    model.add(Dropout(0.2, input_shape=(60,)))
    model.add(Dense(30, init= 'normal' , activation= 'relu', W_constraint=maxnorm(3) ))
    model.add(Dense(1, init= 'normal' , activation= 'sigmoid'))
    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
    return model

In [55]:
estimators = []
estimators.append(('standardize',StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_dropout1, nb_epoch=100,batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Smaller: 86.00% (3.58%)


60 inputs -> [60 (dropout 0.2)] -> 1 output

In [56]:
def create_dropout2():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, init= 'normal' , activation= 'relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(1, init= 'normal' , activation= 'sigmoid'))
    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
    return model

In [57]:
estimators = []
estimators.append(('standardize',StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_dropout2, nb_epoch=100,batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Smaller: 85.11% (6.53%)
