# Machine Learning in Python - Keras & Scikit-Learn

In [1]:
import pandas as pd
import numpy as np

from IPython.display import display, HTML, Image
from IPython.display import SVG

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')

from TAS_Python_Utilities import data_viz
from TAS_Python_Utilities import visualize_tree

from sklearn.tree import export_graphviz
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing 

from keras.models import Sequential
from keras.layers import Dense, Activation, Merge, Dropout
from keras.utils.np_utils import to_categorical
from keras.utils.vis_utils import model_to_dot
from keras.wrappers.scikit_learn import KerasClassifier
 

Using TensorFlow backend.


## Dataset Pre-processing

Read in a dataset

In [2]:
abt = pd.read_csv("mnist_train_small.csv", encoding = "ISO-8859-1")

# Put all but the target variable into the descriptive features array
X = abt[abt.columns.difference(["value"])]
Y = abt["value"]

# Use a range scaling to scale all variables to between 0 and 1
min_max_scaler = preprocessing.MinMaxScaler()
cols = X.columns
X = pd.DataFrame(min_max_scaler.fit_transform(X), columns = cols) # Watch out for putting back in columns here

X_train_plus_valid, X_test, y_train_plus_valid, y_test = train_test_split(X, Y, random_state=0, test_size = 0.30, train_size = 0.7)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_plus_valid, y_train_plus_valid, random_state=0, test_size = 0.199/0.7, train_size = 0.5/0.7)

# Convert the singl column label into a dummy coded label
y_train_wide = to_categorical(np.asarray(y_train))
y_train_plus_valid_wide = to_categorical(np.asarray(y_train_plus_valid))
y_valid_wide = to_categorical(np.asarray(y_valid))

## Model Tuning

Specfiy the structure of the neural network model and training parameters in a function

In [3]:
# Function to create model, required for KerasClassifier
def create_model(optimiser = "rmsprop", hidden_units = 512):
    # create model
    model = Sequential()
    model.add(Dense(input_dim=784, units=hidden_units))
    model.add(Activation("sigmoid"))
    model.add(Dense(units=10))
    model.add(Activation("softmax"))
    # Compile model
    model.compile(optimizer=optimiser,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model

Perform a grid search

In [4]:
param_grid ={'optimiser': ['rmsprop', 'adam'], \
             'hidden_units':[100, 200, 300],
#            'epochs': [20, 50, 100, 150], \
              'epochs': [4, 8], \
#             'batch_size': [8, 16, 32, 64]}
             'batch_size': [8, 16]}
 
model = KerasClassifier(build_fn=create_model, verbose=0)
my_tuned_model = GridSearchCV(estimator=model, param_grid=param_grid, verbose = 2, cv = 2)
my_tuned_model.fit(np.asfarray(X_train_plus_valid), np.asfarray(y_train_plus_valid))

# summarize results
print("Best: %f using %s" % (my_tuned_model.best_score_, my_tuned_model.best_params_))
means = my_tuned_model.cv_results_['mean_test_score']
stds = my_tuned_model.cv_results_['std_test_score']
params = my_tuned_model.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    

Fitting 2 folds for each of 24 candidates, totalling 48 fits
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop .....
[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop, total=   4.1s
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop .....


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.3s remaining:    0.0s


[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=rmsprop, total=   3.9s
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=adam, total=   4.6s
[CV] batch_size=8, epochs=4, hidden_units=100, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=100, optimiser=adam, total=   4.8s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop .....
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop, total=   5.2s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop .....
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=rmsprop, total=   5.1s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=adam, total=   5.5s
[CV] batch_size=8, epochs=4, hidden_units=200, optimiser=adam ........
[CV]  batch_size=8, epochs=4, hidden_units=200, optimiser=adam, total=   5.8s
[CV] batch_size=8, 

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:  5.7min finished


Best: 0.916000 using {'batch_size': 8, 'epochs': 8, 'hidden_units': 100, 'optimiser': 'adam'}
0.898000 (0.001143) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 100, 'optimiser': 'rmsprop'}
0.901429 (0.004857) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 100, 'optimiser': 'adam'}
0.903143 (0.004571) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 200, 'optimiser': 'rmsprop'}
0.904429 (0.000714) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 200, 'optimiser': 'adam'}
0.899429 (0.000857) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 300, 'optimiser': 'rmsprop'}
0.904143 (0.003286) with: {'batch_size': 8, 'epochs': 4, 'hidden_units': 300, 'optimiser': 'adam'}
0.908000 (0.004286) with: {'batch_size': 8, 'epochs': 8, 'hidden_units': 100, 'optimiser': 'rmsprop'}
0.916000 (0.002286) with: {'batch_size': 8, 'epochs': 8, 'hidden_units': 100, 'optimiser': 'adam'}
0.913143 (0.003714) with: {'batch_size': 8, 'epochs': 8, 'hidden_units': 200, 'optimiser': 'rmsprop'}


Draw the model

In [6]:
SVG(model_to_dot(my_tuned_model).create(prog='dot', format='svg'))

AttributeError: 'KerasClassifier' object has no attribute 'layers'

Evaluate the model on a test dataset

In [7]:
print("****** Test Data ********")

# Make a set of predictions for the validation data
y_pred = my_tuned_model.predict(np.asfarray(X_test))

# Print performance details
print(metrics.classification_report(y_test, y_pred))

# Print confusion matrix
print("Confusion Matrix")
display(pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], margins=True))

****** Test Data ********
             precision    recall  f1-score   support

          0       0.92      0.97      0.94       289
          1       0.97      0.99      0.98       355
          2       0.92      0.92      0.92       303
          3       0.96      0.91      0.93       303
          4       0.94      0.93      0.94       269
          5       0.91      0.93      0.92       286
          6       0.92      0.94      0.93       310
          7       0.95      0.94      0.95       299
          8       0.93      0.92      0.92       287
          9       0.95      0.90      0.92       299

avg / total       0.94      0.94      0.94      3000

Confusion Matrix


Predicted,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,281,1,1,0,1,0,0,1,4,0,289
1,0,351,2,0,0,0,0,0,1,1,355
2,4,3,280,2,1,0,6,2,5,0,303
3,2,0,4,277,0,12,0,5,2,1,303
4,1,3,0,0,251,1,4,1,3,5,269
5,4,1,2,4,2,265,8,0,0,0,286
6,4,2,4,0,2,6,290,0,2,0,310
7,1,0,7,1,2,0,0,282,0,6,299
8,3,1,2,3,0,6,6,1,264,1,287
9,6,1,2,3,8,2,0,5,3,269,299
