In [1]:
import tensorflow as tf
import keras

Using TensorFlow backend.


### Drill: 99% MLP
- Try and tune the MLP model from above to achieve 99% accuracy
- How does its runtime compare to the CNN/RNNs?

In [2]:
# import dataset
from keras.datasets import mnist

# model building imports
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers import LSTM, Input, TimeDistributed
from keras.models import Model
from keras.optimizers import RMSprop

# import backend
from keras import backend as K

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [4]:
# 60,000 train images, 10,000 test images
# Images are 28*28, array length should be 784

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)

# Convert to float32 for type consistency
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalize values from 0-255 (256 values of pixels) to 1
X_train /= 255
X_test /= 255

# print sample sizes
print('X_train shape: {}\nX_test shape: {}'.format(X_train.shape,
                                                   X_test.shape))

# Convert class vectors to binary class matrices
# Instead of one column with 10 values, create 10 binary columns
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

# instantiate model
model = Sequential()

# add dense layers to create fully connected MLP
# note: input shape is specified only for the first layer
model.add(Dense(64, activation='relu', input_shape=(784,)))
model.add(Dropout(0.1)) # dropout layers removing features to reduce overfitting

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(10, activation='softmax')) # set last layer units equal to number of classes

model.summary()

# compile model
model.compile(
    loss='categorical_crossentropy',
    optimizer=RMSprop(),
    metrics=['accuracy']
)
# train/evaluate
history = model.fit(
    X_train,
    y_train,
    batch_size=128,
    epochs=10,
    verbose=1,
    validation_data=(X_test, y_test)
)
score = model.evaluate(X_test, y_test, verbose=0)
print('test loss: {}'.format(score[0]))
print('test accuracy: {}'.format(score[1]))

X_train shape: (60000, 784)
X_test shape: (10000, 784)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                50240     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10


In [17]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
import time

In [23]:
def create_mlp(optimizer='rmsprop', loss='categorical_crossentropy'):
    model = Sequential()

    # add layers
    model.add(Dense(64, activation='relu', input_shape=(784,)))
    model.add(Dropout(0.1)) # dropout layers removing features to reduce overfitting

    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.1))

    model.add(Dense(10, activation='softmax')) # set last layer units equal to number of classes

    #model.summary()

    # compile model
    model.compile(
        loss=loss,
        optimizer=optimizer,
        metrics=['accuracy']
    )
    
    return model

In [26]:
neural_net = KerasClassifier(build_fn=create_mlp, verbose=0)

optimizers = ['adagrad', 'adamax']
loss = ['poisson']
batches = [128]
epochs = [100]

params = dict(optimizer=optimizers, loss=loss, batch_size=batches, epochs=epochs)
grid = GridSearchCV(estimator=neural_net, param_grid=params)

In [27]:
start_time = time.time()
grid_results = grid.fit(X_train, y_train)

print('params: {}'.format(grid_results.best_params_))
print('score: {}'.format(grid_results.best_score_))
print('gridsearch runtime: {}'.format(time.time() - start_time))

params: {'batch_size': 128, 'epochs': 100, 'loss': 'poisson', 'optimizer': 'adamax'}
score: 0.97355
gridsearch runtime: 937.8041977882385


In [29]:
# compile model
model.compile(
    loss='poisson',
    optimizer='adagrad',
    metrics=['accuracy']
)
# train/evaluate
history = model.fit(
    X_train,
    y_train,
    batch_size=64,
    epochs=100,
    verbose=0,
    validation_data=(X_test, y_test)
)
score = model.evaluate(X_test, y_test, verbose=0)
print('test loss: {}'.format(score[0]))
print('test accuracy: {}'.format(score[1]))

test loss: 0.11076272282600402
test accuracy: 0.9787


In [30]:
# compile model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adamax',
    metrics=['accuracy']
)
# train/evaluate
history = model.fit(
    X_train,
    y_train,
    batch_size=128,
    epochs=100,
    verbose=0,
    validation_data=(X_test, y_test)
)
score = model.evaluate(X_test, y_test, verbose=0)
print('test loss: {}'.format(score[0]))
print('test accuracy: {}'.format(score[1]))

test loss: 0.11936670499945304
test accuracy: 0.9787
