# Machine Learning in Python - Keras & Scikit-Learn

In [1]:
import pandas as pd
import numpy as np

from IPython.display import display, HTML, Image
from IPython.display import SVG

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')

from TAS_Python_Utilities import data_viz
from TAS_Python_Utilities import visualize_tree

from sklearn.tree import export_graphviz
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing 

from keras.models import Sequential
from keras.layers import Dense, Activation, Merge, Dropout
from keras.utils.np_utils import to_categorical
from keras.utils.vis_utils import model_to_dot
from keras.wrappers.scikit_learn import KerasClassifier
 

Using TensorFlow backend.


## Dataset Pre-processing

Read in a dataset

In [2]:
abt = pd.read_csv("mnist_train_small.csv", encoding = "ISO-8859-1")

# Put all but the target variable into the descriptive features array
X = abt[abt.columns.difference(["value"])]
Y = abt["value"]

# Use a range scaling to scale all variables to between 0 and 1
min_max_scaler = preprocessing.MinMaxScaler()
cols = X.columns
X = pd.DataFrame(min_max_scaler.fit_transform(X), columns = cols) # Watch out for putting back in columns here

X_train_plus_valid, X_test, y_train_plus_valid, y_test = train_test_split(X, Y, random_state=0, test_size = 0.30, train_size = 0.7)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_plus_valid, y_train_plus_valid, random_state=0, test_size = 0.199/0.7, train_size = 0.5/0.7)

# Convert the singl column label into a dummy coded label
y_train_wide = to_categorical(np.asarray(y_train))
y_train_plus_valid_wide = to_categorical(np.asarray(y_train_plus_valid))
y_valid_wide = to_categorical(np.asarray(y_valid))

## Model Tuning

Specfiy the structure of the neural network model and training parameters in a function

In [3]:
# Function to create model, required for KerasClassifier
def create_model(optimiser = "rmsprop", hidden_units = 512):
    # create model
    model = Sequential()
    model.add(Dense(input_dim=784, units=hidden_units))
    model.add(Activation("sigmoid"))
    model.add(Dense(units=10))
    model.add(Activation("softmax"))
    # Compile model
    model.compile(optimizer=optimiser,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model


Perform a grid search

In [4]:
param_grid ={'optimiser': ['rmsprop', 'adam'], \
             'hidden_units':[100, 200, 300],
#            'epochs': [20, 50, 100, 150], \
              'epochs': [4, 8], \
             'batch_size': [8, 16, 32, 64]}
#             'batch_size': [8, 16]}

model = KerasClassifier(build_fn=create_model, verbose=0)
my_tuned_model = GridSearchCV(estimator=model, param_grid=param_grid, verbose = 2, cv = 2)
my_tuned_model.fit(np.asfarray(X_train_plus_valid), np.asfarray(y_train_plus_valid))

# summarize results
print("Best: %f using %s" % (my_tuned_model.best_score_, my_tuned_model.best_params_))
means = my_tuned_model.cv_results_['mean_test_score']
stds = my_tuned_model.cv_results_['std_test_score']
params = my_tuned_model.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    

Fitting 2 folds for each of 48 candidates, totalling 96 fits
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop .....
[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop, total=  16.0s
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop .....


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   17.0s remaining:    0.0s


[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop, total=  13.3s
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=adam, total=  13.1s
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=adam, total=  13.1s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop .....
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop, total=  13.6s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop .....
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop, total=  14.5s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=adam, total=  14.4s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=adam, total=  15.4s
[CV] batch_size=8, 

[Parallel(n_jobs=1)]: Done  96 out of  96 | elapsed: 25.4min finished


Best: 0.916143 using {'batch_size': 8, 'epochs': 8, 'hidden_units': 300, 'optimiser': 'adam'}
0.902286 (0.006000) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 100, 'optimiser': 'rmsprop'}
0.899429 (0.000286) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 100, 'optimiser': 'adam'}
0.899857 (0.004714) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 200, 'optimiser': 'rmsprop'}
0.904286 (0.000857) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 200, 'optimiser': 'adam'}
0.904143 (0.002714) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 300, 'optimiser': 'rmsprop'}
0.906286 (0.001429) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 300, 'optimiser': 'adam'}
0.909714 (0.002000) with: {'batch_size': 8, 'epochs': 8, 'hidden_units': 100, 'optimiser': 'rmsprop'}
0.913143 (0.003143) with: {'batch_size': 8, 'epochs': 8, 'hidden_units': 100, 'optimiser': 'adam'}
0.910857 (0.000571) with: {'batch_size': 8, 'epochs': 8, 'hidden_units': 200, 'optimiser': 'rmsprop'}


In [8]:
print("Best: %f using %s" % (my_tuned_model.best_score_, my_tuned_model.best_params_))


Best: 0.916143 using {'batch_size': 8, 'epochs': 8, 'hidden_units': 300, 'optimiser': 'adam'}


Draw the model

In [9]:
SVG(model_to_dot(my_tuned_model).create(prog='dot', format='svg'))

AttributeError: 'GridSearchCV' object has no attribute 'layers'

Evaluate the model on a test dataset

In [11]:
print("****** Test Data ********")

# Make a set of predictions for the validation data
y_pred = my_tuned_model.predict(np.asfarray(X_test))

# Print performance details
print("Accuracy : - " , metrics.accuracy_score(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred))

# Print confusion matrix
print("Confusion Matrix")
display(pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], margins=True))

****** Test Data ********
Accuracy : -  0.9393333333333334
             precision    recall  f1-score   support

          0       0.96      0.97      0.96       289
          1       0.98      0.98      0.98       355
          2       0.96      0.90      0.93       303
          3       0.94      0.93      0.94       303
          4       0.97      0.90      0.93       269
          5       0.92      0.90      0.91       286
          6       0.92      0.97      0.94       310
          7       0.95      0.96      0.95       299
          8       0.88      0.94      0.91       287
          9       0.92      0.94      0.93       299

avg / total       0.94      0.94      0.94      3000

Confusion Matrix


Predicted,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,280,0,0,0,0,0,1,2,5,1,289
1,0,347,0,1,0,0,0,1,5,1,355
2,3,4,274,4,1,2,5,2,7,1,303
3,1,0,3,282,0,7,0,4,4,2,303
4,0,2,0,0,241,0,7,1,4,14,269
5,3,0,1,6,2,257,8,0,6,3,286
6,2,0,1,0,0,6,300,0,1,0,310
7,0,0,6,1,3,0,0,286,0,3,299
8,1,1,1,3,0,5,6,0,269,1,287
9,3,0,0,2,2,1,0,5,4,282,299
