Build and train a CNN+MLP deep learning model with Keras with two followings for MNIST dataset:
```
1. Conv2D(32, kernel_size=(3, 3), activation='relu')
2. Conv2D(64, kernel_size=(3, 3), activation='relu')
3. MaxPooling2D(pool_size=(2, 2))
4. Dense(128, activation='relu')
5. Dense(num_classes, activation='softmax')
```
Also build another model with BatchNormalization and Dropout. 
Compare these two models performance for test data

## Import Packages

In [1]:
import keras
from keras import backend as K
# CNN and MLP architecture
from keras.models import Sequential
from keras.layers import (
    Dense,
    Conv2D,
    MaxPooling2D,
    UpSampling2D,
    Dropout,
    Flatten
)
from keras.models import Model
from keras import backend as K
import numpy as np
from keras.optimizers import SGD
from keras.initializers import RandomNormal
# MNIST
from keras.datasets import mnist
# Data normalization
from sklearn.preprocessing import StandardScaler

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## 1 - Data Preparation

In [2]:
# Image Dimensions
img_rows, img_cols = 28, 28

# Splitting Data between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# store the number of labels 
num_classes = len(np.unique(y_train))

# Reshaping Data
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

# Displaying Resulting Dimensions
print(f'Shape of X_train: {x_train.shape}')
print(f'Shape of X_test: {x_test.shape}')

Shape of X_train: (60000, 28, 28, 1)
Shape of X_test: (10000, 28, 28, 1)


## 2 - Data Normalization

In [3]:
x_train = x_train/np.max(x_train)
x_test = x_test/np.max(x_train)

## 3 - One Hot Encoding

In [4]:
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

## 4 - Define Model (No Batch Normalization/Dropout)

In [6]:
# Instanitate a model using the Sequential API
fully_connected = Sequential()

# Convolutional Layers
fully_connected.add(Conv2D(32, kernel_size=(3, 3), activation='relu',
                           input_shape=(28, 28, 1)))
fully_connected.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
fully_connected.add(MaxPooling2D(pool_size=(2, 2)))  # no learning params
fully_connected.add(Flatten())

# MLP Layers
fully_connected.add(Dense(128, activation='relu'))
fully_connected.add(Dense(num_classes, activation='softmax'))

# Compile Model
fully_connected.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

# Print Summary
fully_connected.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dense_4 (Dense)              (None, 10)                1290      
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________


In [8]:
# Train the model
fully_connected.fit(x_train, y_train,
                    epochs=3, batch_size=100,
                    validation_data=(x_test, y_test),
                    verbose=0)

<keras.callbacks.History at 0x14ba7ebd0>

## 5 - Define Better Model 