# CNN
CNN model on Digit Dataset

Read the below link again 
https://www.kaggle.com/chapagain/digit-recognizer-beginner-s-guide-mlp-cnn-keras
https://keras.io/models/model/
read more about fit and evaluate function

In [1]:
#libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set() # setting seaborn default for plots

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

from keras.utils import np_utils
from keras.datasets import mnist

# for Convolutional Neural Network (CNN) model
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU 
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import RMSprop
from keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint

from keras import backend as K
K.set_image_dim_ordering('th')

Using TensorFlow backend.


# Loading Dataset
Make sure to change the path before running

In [2]:
path_dir = 'C:/Users/212727435/Downloads/MNIST_Digit/data_mnist'
train_path = path_dir + '/train.csv'
test_path =  path_dir + '/test.csv'

In [3]:
train = pd.read_csv(train_path)
print (train.shape)
train.head()

(42000, 785)


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
test = pd.read_csv(test_path)
print(test.shape)
test.head()

(28000, 784)


Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Separating the labels from training dataset and making it as x_label
y_train = train['label']
x_train = train.drop(labels=['label'],axis=1)
x_test = test

# Frequency of the digits in the dataset
#print(y_train.value_counts())

In [6]:
# Set values of the Data
x_train = x_train.values.astype('float32') # pixel values of all images in train set
y_train = y_train.values.astype('int32') # labels of all images
x_test = test.values.astype('float32') # pixel values of all images in test set

In [7]:
# fix random seed for reproducibility
random_seed = 7
np.random.seed(random_seed)

# Normalizing the Input

In [8]:
# normalize inputs from 0-255 to 0-1 as pixel values are gray scale between 0 and 255
x_train = x_train / 255
x_test = x_test / 255

# Converting Output into one hot code
A one hot encoding is a representation of categorical variables as binary vectors. This first requires that the categorical values be mapped to integer values. Then, each integer value is represented as a binary vector that is all zero values except the index of the integer, which is marked with a 1 and as this is a multi classification problem so we can convert the output class values into one-hot format which is simply a binary matrix, i.e.

value 0 will be converted to one-hot format as [1, 0, 0, 0, 0, 0, 0, 0, 0]

value 1 will be converted to one-hot format as [0, 1, 0, 0, 0, 0, 0, 0, 0] etc

In [9]:
# one hot encode outputs'
Y_train = np_utils.to_categorical(y_train)
num_classes = Y_train.shape[1]

# Splitting Dataset Into Training and Validation
We split the train dataset into two parts in 9:1 ratio. 90% will be the actual training set and the remaining 10% will be the validation/testing set.

In [10]:
# Split in Training set and Validation set
x_train2, x_val, y_train2, y_val = train_test_split(x_train, Y_train, test_size = 0.10, random_state=random_seed)
print (x_train2.shape, y_train2.shape, x_val.shape, y_val.shape)
num_pixels = x_train.shape[1]

(37800, 784) (37800, 10) (4200, 784) (4200, 10)


In [11]:
# converting one-hot format of digits to normal values/labels
print(y_val)
Y_val = np.argmax(y_val, 1) # reverse of to_categorical
print (Y_val)
# Note that: small y_val contains values in one-hot format and capital Y_val contains normal digit values

[[0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]]
[1 1 4 ... 0 2 2]


In [12]:
# Reshaping the Image for CNN 2-dimesional input in [samples][pixels][width][height]
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28).astype('float32')
x_train2 = x_train2.reshape(x_train2.shape[0], 1, 28, 28).astype('float32')
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28).astype('float32')
x_val = x_val.reshape(x_val.shape[0], 1, 28, 28).astype('float32')

print (num_pixels, x_train2.shape, x_test.shape, x_val.shape)

784 (37800, 1, 28, 28) (28000, 1, 28, 28) (4200, 1, 28, 28)


# CNN Model
Model idea and code from [here](https://www.kaggle.com/cdeotte/25-million-images-0-99757-mnist#)

In [13]:
def model():
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(1, 28, 28), activation='relu',data_format='channels_first'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(128, kernel_size = 4, activation='relu'))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(10, activation='softmax'))

        # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Training the model

In [14]:
# With data augmentation to prevent overfitting
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

In [15]:
# DECREASE LEARNING RATE EACH EPOCH
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
    
# descriptive weight file naming
checkpointer = ModelCheckpoint(filepath=('mnist_digit.h5'), 
                               verbose=1, save_best_only=True)

callbacks = [annealer,early_stopping,checkpointer]
epochs = 100

In [16]:
model = model()
history = model.fit_generator(datagen.flow(x_train2,y_train2, batch_size=64),
                              epochs = epochs, validation_data = (x_val,y_val),
                              verbose = 2, steps_per_epoch=(len(x_train)//64),validation_steps=(len(x_val)//64),callbacks=callbacks)

Epoch 1/100
 - 22s - loss: 0.4576 - acc: 0.8563 - val_loss: 0.0828 - val_acc: 0.9781

Epoch 00001: val_loss improved from inf to 0.08279, saving model to mnist_digit.h5
Epoch 2/100
 - 20s - loss: 0.1341 - acc: 0.9605 - val_loss: 0.0478 - val_acc: 0.9881

Epoch 00002: val_loss improved from 0.08279 to 0.04776, saving model to mnist_digit.h5
Epoch 3/100
 - 20s - loss: 0.0931 - acc: 0.9718 - val_loss: 0.0361 - val_acc: 0.9881

Epoch 00003: val_loss improved from 0.04776 to 0.03605, saving model to mnist_digit.h5
Epoch 4/100
 - 20s - loss: 0.0770 - acc: 0.9768 - val_loss: 0.0429 - val_acc: 0.9867

Epoch 00004: val_loss did not improve from 0.03605
Epoch 5/100
 - 20s - loss: 0.0655 - acc: 0.9800 - val_loss: 0.0648 - val_acc: 0.9850

Epoch 00005: val_loss did not improve from 0.03605
Epoch 6/100
 - 20s - loss: 0.0650 - acc: 0.9804 - val_loss: 0.0332 - val_acc: 0.9914

Epoch 00006: val_loss improved from 0.03605 to 0.03321, saving model to mnist_digit.h5
Epoch 7/100
 - 20s - loss: 0.0529 - ac

In [17]:
scores = model.evaluate(x_val, y_val, verbose=0)
print (scores)
print ('Score: {}'.format(scores[0]))
print ('Accuracy: {}'.format(scores[1]))

[0.029838296078336757, 0.9935714285714285]
Score: 0.029838296078336757
Accuracy: 0.9935714285714285
