# Keras Training : Multi-Layer-Perception(MLP) on MNIST dataset _ using functional API

## Import Libraries

In [20]:
import keras
from keras.utils import to_categorical

# Can plot model and save it.
from keras.utils import plot_model
from keras.datasets import mnist

# keras.model have two main types of models: Sequential and the Model class using functional API
# we will focus on the functional API model in this notebook
from keras.models import Model

# Dense: fully connected layer
# Activation: activation function such as relu,sigmoid,softmax...(look up Keras docs for more)
from keras.layers import Input, Dense, Activation, Dropout

# Many optimizers can be chosen: Adagrad, RMSprop, SGD...(look up Keras docs for more)
from keras.optimizers import Adagrad, RMSprop, SGD
import numpy as np
import matplotlib.pyplot as plt

# In order to save and load model
import h5py
from keras.models import load_model

## Useful parameters

In [2]:
# 10 classes
num_class = 10

# Image size: 28*28 
image_size_flat = 784

# Training batch size, default is 32
batch = 64

# Training epochs
epoch = 10

## Load MNIST dataset (Also preprocessing it!)

In [3]:
(x_train, y_train),(x_test, y_test) = mnist.load_data()

x_train_flat = x_train.reshape(-1,image_size_flat)
x_train_flat = x_train_flat.astype('float32')
x_train_flat /= 255.0

x_test_flat = x_test.reshape(-1,image_size_flat)
x_test_flat = x_test_flat.astype('float32')
x_test_flat /= 255.0

y_train_onehot = to_categorical(y_train, num_classes = num_class)
y_test_onehot = to_categorical(y_test, num_classes = num_class)

## Define useful methods (draw_image and draw_prob from version1)

In [4]:
def draw_image(images, y_label, y_pred = None):
    
    assert(len(images) == len(y_label) == 9)
    
    fig, axes = plt.subplots(3,3)
    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i].reshape(28,28), cmap = 'binary')
        
        if y_pred is None:
            ax.set_xlabel("True: "+str(y_label[i]))
        else:
            ax.set_xlabel("True: "+str(y_label[i])+"  Pred: "+str(y_pred[i]))
        
        ax.set_xticks([])
        ax.set_yticks([])
    
    plt.show()

In [5]:
def draw_prob(images, y_label, y_prob):
    
    assert(len(images) == len(y_label) == 9)
    labels = [0,1,2,3,4,5,6,7,8,9]
    
    fig, axes = plt.subplots(9,2,figsize=(10, 20))
    fig.subplots_adjust(hspace=1.0, wspace=0.3)
    
    for i, ax in enumerate(axes.flat):
        if i%2 == 0:
            ax.imshow(images[int(i/2)].reshape(28,28), cmap = 'binary')
            ax.set_xlabel("True: "+str(y_label[int(i/2)]))
            ax.set_xticks([])
            ax.set_yticks([])
        else:
            index = int((i-1)/2)
            y_pos = np.arange(5)  # Show five top probabilities
            ans = y_label[index]  # The answer of the current image
            probs = y_prob[index]  # List of probabilities
            prob_result = zip(labels,probs) # (label,prob) list
            prob_result_sorted = sorted(prob_result, key = lambda tup: tup[1], reverse=True)
            prob_result_sorted = list(zip(*prob_result_sorted))
            
            bars = ax.barh(y_pos,(prob_result_sorted[1][:5]),color='green')
            ax.set_yticks(y_pos)
            ax.set_yticklabels(prob_result_sorted[0][:5])
            ax.set_xlabel("Probability")

## Construct MLP model

### Build model

In [10]:
inputs = Input(shape = (image_size_flat,), name='Input_image_flat')
hidden1 = Dense(256, activation='relu', name = 'Dense1')(inputs)
drop1 = Dropout(0.2, name = 'Dropout1')(hidden1)
hidden2 = Dense(512, activation='relu', name = 'Dense2')(drop1)
drop2 = Dropout(0.2, name = 'Dropout2')(hidden2)
outputs = Dense(num_class, activation = 'softmax', name = 'Output')(drop2)

model = Model(inputs = inputs, outputs = outputs)

### Show model info

In [11]:
# Show model summary
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input_image_flat (InputLayer (None, 784)               0         
_________________________________________________________________
Dense1 (Dense)               (None, 256)               200960    
_________________________________________________________________
Dropout1 (Dropout)           (None, 256)               0         
_________________________________________________________________
Dense2 (Dense)               (None, 512)               131584    
_________________________________________________________________
Dropout2 (Dropout)           (None, 512)               0         
_________________________________________________________________
Output (Dense)               (None, 10)                5130      
Total params: 337,674
Trainable params: 337,674
Non-trainable params: 0
_________________________________________________________________
None

In [12]:
# plot model graph
plot_model(model, to_file='mnist_mlp_v2_graph.png')

### Compile model

In [13]:
model.compile(
    optimizer = RMSprop(lr = 0.001),
    loss = 'categorical_crossentropy',
    metrics = ['acc']
)

## Train model

In [14]:
model.fit(
    x = x_train_flat,
    y = y_train_onehot,
    batch_size = batch,
    epochs = epoch,
    validation_split = 0.3,
    verbose = 1,
    shuffle = True
)

Train on 42000 samples, validate on 18000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1295089b0>

## Evaluate model

In [15]:
result = model.evaluate(
    x=x_test_flat,
    y=y_test_onehot,
    batch_size=batch,
    verbose=1,
)



In [16]:
print("Test loss: "+str(result[0]))
print("Test accuracy: "+str(result[1]))

Test loss: 0.103196964479897
Test accuracy: 0.9777


## Save model

In [17]:
model.save('mnist_mlp_v2.h5')

## Try loading model

In [24]:
del model

In [26]:
model = load_model('mnist_mlp_v2.h5')

### Evaluate again with loaded model

In [27]:
result = model.evaluate(
    x=x_test_flat,
    y=y_test_onehot,
    batch_size=batch,
    verbose=1,
)



In [28]:
print("Test loss: "+str(result[0]))
print("Test accuracy: "+str(result[1]))

Test loss: 0.103196964479897
Test accuracy: 0.9777
