In [None]:
# Repeating the MNIST experiment
""" An attempt to obtain impressive results on MNIST.
    Code extracted from: https://github.com/shahariarrabby/Mnist_cnn_Swish
    Expected results > 99.5%
"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

np.random.seed(2)

import keras
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, Activation

# For adding new activation function
from keras import backend as K
from keras.datasets import mnist
from keras.utils.generic_utils import get_custom_objects
from keras.utils import np_utils
import tensorflow as tf

Using TensorFlow backend.


In [2]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print("X_train original shape", X_train.shape)
print("y_train original shape", Y_train.shape)

X_train original shape (60000, 28, 28)
y_train original shape (60000,)


In [3]:
# Normalization
nb_classes = 10
# Normalize the data
X_train = X_train / 255.0
X_test = X_test / 255.0

In [4]:
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

In [5]:
# Set the random seed
random_seed = 2

In [7]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

In [8]:
def e_swish_2(x):
    sigmoid = K.sigmoid(x)
    return K.maximum(x*sigmoid, x*(2-sigmoid))

In [25]:
# Set the CNN model 
# my CNN architechture is In -> [[Conv2D->relu]*2 -> MaxPool2D -> Dropout]*2 -> Flatten -> Dense -> Dropout -> Out
def create(act):
    with tf.device("/cpu:0"):
        model = Sequential()
        # First conv block
        model.add(Conv2D(filters=32, kernel_size=(5,5), padding='Same', input_shape = (28,28,1)))
        model.add(Activation(act))
        model.add(Conv2D(filters=32, kernel_size=(5, 5), padding="Same"))
        model.add(Activation(act))
        model.add(MaxPool2D(pool_size=(2,2)))
        model.add(Dropout(0.25))
        # Second conv block
        model.add(Conv2D(filters=64, kernel_size=(3,3), padding='Same'))
        model.add(Activation(act))
        model.add(Conv2D(filters=64, kernel_size=(3,3), padding='Same'))
        model.add(Activation(act))
        model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
        model.add(Dropout(0.25))
        # Classification
        model.add(Flatten())
        model.add(Dense(256))
        model.add(Activation(act))
        model.add(Dropout(0.5))
        model.add(Dense(nb_classes))
        model.add(Activation("sigmoid"))
    
    return model

In [27]:
opt = keras.optimizers.Adam()
epochs = 1    # Turn epochs to 30 to get 0.9967 accuracy
batch_size = 64
model = create(e_swish_2)
model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
with tf.device("/cpu:0"):
    his = model.fit(X_train, Y_train, epochs = epochs, verbose=1)
print(his.history)
scores = model.evaluate(X_test, Y_test)
print(scores)

Epoch 1/1
{'loss': [0.17691457835470015], 'acc': [0.95025000000000004]}
[0.031686499312717935, 0.98980000000000001]
