***Import Required Libraries***

In [0]:
import pandas as pd
import numpy as np
np.random.seed(2)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.optimizers import RMSprop, Adam, Adadelta, Adagrad, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
import keras
from keras.datasets import mnist
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Conv2D, MaxPooling2D, Activation
from keras import backend as K
from keras.backend import sigmoid
from keras.utils.generic_utils import get_custom_objects
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)

**Swish Activation Function**

I was trying to train the model with Swish activation function too but it did not help much.

In [0]:
def swish(x, beta = 1):
    return (x * sigmoid(beta * x))

In [0]:
get_custom_objects().update({'swish': Activation(swish)})

In [0]:
num_classes = 10

# input image dimensions
img_rows, img_cols = 28, 28

In [0]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [0]:
# Normalize the data
x_train = x_train / 255.0
x_test = x_test / 255.0

In [0]:
# Reshape image in 3 dimensions
x_train = x_train.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)

In [136]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())


model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())


model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(BatchNormalization())
model.add(Dense(10, activation = "softmax"))
model.summary()

Model: "sequential_236"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_941 (Conv2D)          (None, 28, 28, 32)        832       
_________________________________________________________________
conv2d_942 (Conv2D)          (None, 28, 28, 64)        51264     
_________________________________________________________________
max_pooling2d_471 (MaxPoolin (None, 14, 14, 64)        0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 14, 14, 64)        256       
_________________________________________________________________
conv2d_943 (Conv2D)          (None, 14, 14, 64)        36928     
_________________________________________________________________
conv2d_944 (Conv2D)          (None, 14, 14, 64)        36928     
_________________________________________________________________
max_pooling2d_472 (MaxPoolin (None, 7, 7, 64)       

In [0]:
# Define the optimizer
optimizer = Adam(lr=0.001, epsilon=1e-08, decay=0.0)

In [0]:
# Compile the model
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [0]:
# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [0]:
epochs = 10
batch_size = 64

In [0]:
# Data Augmentation Using ImageDataGenerator

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

In [142]:
# Fit the model
history = model.fit_generator(datagen.flow(x_train,y_train, batch_size=batch_size),epochs = epochs, 
                              validation_data = (x_test,y_test),verbose = 2, 
                              steps_per_epoch=x_train.shape[0] // batch_size, 
                              callbacks=[learning_rate_reduction, es])

Epoch 1/10
 - 26s - loss: 0.1394 - acc: 0.9564 - val_loss: 0.0822 - val_acc: 0.9746
Epoch 2/10
 - 19s - loss: 0.0584 - acc: 0.9823 - val_loss: 0.0498 - val_acc: 0.9837
Epoch 3/10
 - 19s - loss: 0.0457 - acc: 0.9859 - val_loss: 0.1321 - val_acc: 0.9576
Epoch 4/10
 - 20s - loss: 0.0405 - acc: 0.9871 - val_loss: 0.0338 - val_acc: 0.9899
Epoch 5/10
 - 20s - loss: 0.0379 - acc: 0.9881 - val_loss: 0.0476 - val_acc: 0.9867
Epoch 6/10
 - 20s - loss: 0.0318 - acc: 0.9901 - val_loss: 0.0200 - val_acc: 0.9934
Epoch 7/10
 - 20s - loss: 0.0314 - acc: 0.9901 - val_loss: 0.0246 - val_acc: 0.9920
Epoch 8/10
 - 20s - loss: 0.0308 - acc: 0.9903 - val_loss: 0.0173 - val_acc: 0.9947
Epoch 9/10
 - 20s - loss: 0.0259 - acc: 0.9920 - val_loss: 0.0182 - val_acc: 0.9935
Epoch 10/10
 - 20s - loss: 0.0263 - acc: 0.9918 - val_loss: 0.0176 - val_acc: 0.9940


In [0]:
score = model.evaluate(x_test, y_test, verbose=0)

In [144]:
print(score[1])

0.994


## Conclusion:

This is my ***second submission*** which exceeds the time limit of 1 hr. Here I have tried to make the code work better than my last submission. Though, I have tried many options with the explanation given above, but still other possibilities do exist, for ex: Using Hyperopt for finding out the best hyperparameter combinations, using OpenCV to preprocess image data and your additional requirement, i.e., using Classes to improve the aesthetics of the code.