### Loading imp libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten, Activation
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import MaxPool2D
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

### loading datasets

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
print(train.shape)

train.head()

In [None]:
print(test.shape)

test.head()

### Data Pre-processing

In [None]:
X_train = train.iloc[:, 1:].values  #pixels values
y_train = train.iloc[:, 0].values   #labels

In [None]:
#changing datatype!
X_train = X_train.astype('float32')
y_train = y_train.astype('int32')
test = test.values.astype('float32')

In [None]:
#Normalizing the values of an image from 0-255 to 0-1
X_train = X_train/255.0
test = test/255.0

In [None]:
# print the shape before reshaping
print("X_train shape", X_train.shape)
print("y_train shape", y_train.shape)
print("test shape", test.shape)

In [None]:
#reshaping the image, since it is grayscale image
#reshape to be [samples][pixels][width][height]
X_train = X_train.reshape((-1, 28, 28, 1))
test = test.reshape((-1, 28, 28, 1))

In [None]:
# print the final input shape ready for training
print("Train matrix shape", X_train.shape)
print("Test matrix shape", test.shape)


### Splitting Training and Test Set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, 
                                                   test_size=0.2, random_state=2)

### One Hot Encoding of labels

In [None]:
# one-hot encoding using keras'
n_classes = 10
print("Shape before one-hot encoding: ", y_train.shape)
y_train = np_utils.to_categorical(y_train, n_classes)
y_test = np_utils.to_categorical(y_test, n_classes)
print("Shape after one-hot encoding: ", y_train.shape)

### simple cnn model with one conv2D layer

In [None]:
# create model
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model1 = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)

In [None]:
scores = model.evaluate(X_test, y_test, verbose=0)

print("Test Loss", scores[0])
print("Test Accuracy", scores[1])

print("CNN Error: %.2f%%" % (100-scores[1]*100))

In [None]:
# history for accuracy
fig = plt.figure(figsize=(9, 3))
plt.plot(model1.history['acc'])
plt.plot(model1.history['val_acc'])
plt.title('accuracy vs epoch')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# history for loss
plt.plot(model1.history['loss'])
plt.plot(model1.history['val_loss'])
plt.title('loss vs epoch ')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

### CNN model with multiple convolutional layers

In [None]:
model = Sequential()
model.add(Conv2D(32,(5,5),input_shape = (28, 28, 1),activation='relu',padding = 'same'))
model.add(MaxPooling2D(pool_size=(2, 2),padding= "same"))
model.add(Conv2D(16,(3,3),activation='relu',padding = 'same'))
model.add(MaxPooling2D(pool_size=(2, 2),padding= "same"))
model.add(Flatten()) 
model.add(Dense(1024, activation='relu'))
model.add(Dropout(rate = 0.25))
model.add(Dense(10,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model2 = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)

In [None]:
scores = model.evaluate(X_test, y_test, verbose=0)

print("Test Loss", scores[0])
print("Test Accuracy", scores[1])

print("CNN Error: %.2f%%" % (100-scores[1]*100))

In [None]:
fig = plt.figure()
plt.subplot(2,1,1)
plt.plot(model2.history['acc'])
plt.plot(model2.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.subplot(2,1,2)
plt.plot(model2.history['loss'])
plt.plot(model2.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.tight_layout()

### larger CNN Model(adding more layers)

In [None]:
model = Sequential()
# layer
model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(64,(3, 3)))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())

# Fully connected layer
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(10))

model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model3 = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=200)

In [None]:
scores = model.evaluate(X_test, y_test, verbose=0)

print("Test Loss", scores[0])
print("Test Accuracy", scores[1])

print("CNN Error: %.2f%%" % (100-scores[1]*100))

In [None]:
fig = plt.figure()
plt.subplot(2,1,1)
plt.plot(model3.history['acc'])
plt.plot(model3.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.subplot(2,1,2)
plt.plot(model3.history['loss'])
plt.plot(model3.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.tight_layout()

In [None]:
pred_val = model.predict(X_test)

In [None]:
pred_val = np.argmax(pred_val, axis=1)

In [None]:
pred_val

In [None]:
val = np.argmax(y_test, axis=1)

In [None]:
val

In [None]:
from sklearn import metrics
matrix = metrics.confusion_matrix(val, pred_val)

In [None]:
print(matrix)

In [None]:
m = metrics.classification_report(val, pred_val)

In [None]:
print(m)

In [None]:
pred = model.predict(test)
y_classes = pred.argmax(axis=-1)
result = pd.DataFrame()
result['ImageId'] = list(range(1,28001))
result['Label'] = y_classes
result.to_csv("output.csv", index = False)

### Adding Extra layer and BatchNormalization

To improve val accuracy
* added extra layer
* added normalization layer( Batch Normalization)
* used RMSprop optimizer
* callback function to reduce learning rate 

In [None]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu', input_shape = (28,28,1)))
model.add(BatchNormalization())

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu'))
model.add(BatchNormalization())

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model.add(BatchNormalization())

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same',  activation ='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(10, activation = "softmax"))


In [None]:
# Define Optimizer
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

In [None]:
model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics=["accuracy"])


In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=128,  callbacks=[learning_rate_reduction])

In [None]:
scores = model.evaluate(X_test, y_test, verbose=0)

print("Test Loss", scores[0])
print("Test Accuracy", scores[1])

print("CNN Error: %.2f%%" % (100-scores[1]*100))

In [None]:
fig = plt.figure()
plt.subplot(2,1,1)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.subplot(2,1,2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.tight_layout()

In [None]:
pred_val = model.predict(X_test)
pred_val = np.argmax(pred_val, axis=1)
pred_val

In [None]:
val = np.argmax(y_test, axis=1)
val

In [None]:
from sklearn import metrics
matrix = metrics.confusion_matrix(val, pred_val)
matrix

In [None]:
m = metrics.classification_report(val, pred_val)

In [None]:
print(m)

In [None]:
pred = model.predict(test)
y_classes = pred.argmax(axis=-1)
result = pd.DataFrame()
result['ImageId'] = list(range(1,28001))
result['Label'] = y_classes
result.to_csv("final_result.csv", index = False)