## Step 1: Import libraries and modules

In [None]:
#Import libraries
import numpy as np
import theano
import keras
from keras.models import Sequential

#These are the layers that are used in almost any neural network:
from keras.layers import Dense, Dropout, Activation, Flatten

#import the CNN layers from Keras
from keras.layers import Convolution2D, MaxPooling2D

#some utilities help us transform our data
from keras.utils import np_utils

from matplotlib import pyplot as plt
from keras.optimizers import SGD

# Visualizing of confusion matrix
import seaborn as sn
import pandas  as pd
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
np.random.seed(123)  # for reproducibility

## Step 2: Load image data from MNIST #######


In [None]:
#Import MNIST Dataset
from keras.datasets import mnist

#Load pre-shuffled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

print (X_train.shape)
# (60000, 28, 28)


#plt.imshow(X_train[0])
num_classes = 10
fig = plt.figure(figsize=(3,3))
for i in range(num_classes):
    ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
    idx = np.where(y_train[:]==i)[0]
    features_idx = X_train[idx,::]
    img_num = np.random.randint(features_idx.shape[0])
    im = np.transpose(features_idx[img_num,::])
    #ax.set_title(class_names[i])
    plt.imshow(im)
plt.show()

## Step 3: Preprocess input data for Keras

In [None]:
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)

print (X_train.shape)
# (60000, 1, 28, 28)

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

## Step 4: Preprocess class labels for Keras.

In [None]:
print (y_train.shape)
# (60000,)

In [None]:
print (y_train[:10])
# [5 0 4 1 9 2 1 3 1 4]

In [None]:
# Convert 1-dimensional class arrays to 10-dimensional class matrices
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

print (Y_train.shape)
#print (Y_train[:10])

# (60000, 10)

### Sub-Step: Encoder

## Step 5: Define model architecture

In [None]:
#Declaring a sequantial model
cnn = Sequential()

#Declare input layer with activation function ReLu ( https://keras.io/activations/ )
#Using keras 1.x => model.add(Conv2D(32, 3, 3, activation='relu', input_shape=(1,28,28)))

cnn.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(1,28,28), data_format='channels_first'))

print (model.output_shape)
# (None, 32, 26, 26)

#Using keras 1.x => model.add(Convolution2D(32, 3, 3, activation='relu'))
#Using Keras 2.0
cnn.add(Convolution2D(32, (3, 3)))
cnn.add(Activation('relu'))
cnn.add(MaxPooling2D(pool_size=(2,2)))

cnn.add(Dropout(0.25))

#Fully connected dense layers
cnn.add(Flatten())
cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.5))
cnn.add(Dense(10, activation='softmax'))

#Custom Optimizer
#sgd=SGD(lr = 0.1, decay=1e-6, momentum=0.9 ,nesterov=True)


## Step 6: Compile model

In [None]:
# loss function and the optimizer (SGD, Adam, etc.).
cnn.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


## Step 7: Fit model on training data.


In [None]:
## Fit model on training data
cnnFit = cnn.fit(X_train, Y_train, 
          batch_size=32, epochs=1, verbose=1)

## Step 8: Evaluate model on test data.
#### evaluate our model on the test data


In [None]:
cnn.summary()
score = cnn.evaluate(X_test, Y_test, verbose=1)
print("Accuracy: %.2f%%" % (score[1]*100))

### After training process, we can see loss and accuracy on plots using the code below

In [None]:
print(cnnFit.history['acc'])

plt.figure(0)
plt.plot(cnnFit.history['acc'],'r')
plt.plot(cnnFit.history['acc'],'g')
plt.xticks(np.arange(0, 101, 2.0))
plt.rcParams['figure.figsize'] = (8, 6)
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title("Training Accuracy vs Validation Accuracy")
plt.legend(['train','validation'])
 
 
plt.figure(1)
plt.plot(cnnFit.history['loss'],'r')
plt.plot(cnnFit.history['loss'],'g')
plt.xticks(np.arange(0, 101, 2.0))
plt.rcParams['figure.figsize'] = (8, 6)
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title("Training Loss vs Validation Loss")
plt.legend(['train','validation'])
 
 
plt.show()

In [None]:
### Confusion matrix result

In [None]:
Y_pred = cnn.predict(X_test, verbose=2)
y_pred = np.argmax(Y_pred, axis=1)
 
for ix in range(10):
    print(ix, confusion_matrix(np.argmax(Y_test,axis=1),y_pred)[ix].sum())
cm = confusion_matrix(np.argmax(Y_test,axis=1),y_pred)
#print(cm)
 
df_cm = pd.DataFrame(cm, range(10), range(10))
plt.figure(figsize = (10,7))
sn.set(font_scale=1.4)#for label size
sn.heatmap(df_cm, annot=True,annot_kws={"size": 12})# font size
plt.show()

## Save CNN MODEL

In [None]:
from keras.models import load_model

cnn.save('cnnFitMnist.h5')  # creates a HDF5 file 'my_model.h5'
del cnn  # deletes the existing model

# returns a compiled model
# identical to the previous one
model = load_model('cnnFitMnist.h5')
