In [2]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')

In [3]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [4]:
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#Next we reshape it so that it is suitable for training a CNN. In Keras, the layers used for two-dimensional convolutions expect
#pixel values with the dimensions [pixels][width][height].
# In the case of RGB, the first dimension pixels would be 3 for the red, green and blue components. But in Mnist, where the pixel
#values are gray scale, the pixel dimension is set to 1.
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

In [5]:
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
#upto here, we proprocess the data and steps are same as that of MLP.

In [6]:
#we now create our simple neural network model
#Convolutional neural networks are more complex than standard multi-layer perceptrons, so we will start by using a simple 
#structure to begin with that uses all of the elements for state of the art results i.e excellent results.
#Below summarizes the network architecture.
#The first hidden layer is a convolutional layer called a Convolution2D. The layer has 32 feature maps each of size 5×5 and a 
#relu activation function. Next we define a pooling layer that takes the max called MaxPooling2D. It is configured with a pool 
#size of 2×2.The next layer is a regularization layer using dropout called Dropout. It is configured to randomly exclude 20% of 
#neurons in the layer in order to reduce overfitting.Next is a layer that converts the 2D matrix data to a vector called Flatten.
#It allows the output to be processed by standard fully connected layers.Next a fully connected layer with 128 neurons and 
#relu activation function.Finally, the output layer has 10 neurons for the 10 classes and a softmax activation function to output
#probability-like predictions for each class.
#As before, the model is trained using logarithmic loss using categorical_crossentropy and the ADAM gradient descent algorithm.

def baseline_model():
	# create model
	model = Sequential()
	model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.2))
	model.add(Flatten())
	model.add(Dense(128, activation='relu'))
	model.add(Dense(num_classes, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [7]:
# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 208s - loss: 0.2328 - acc: 0.9342 - val_loss: 0.0811 - val_acc: 0.9742
Epoch 2/10
 - 202s - loss: 0.0739 - acc: 0.9781 - val_loss: 0.0464 - val_acc: 0.9845
Epoch 3/10
 - 204s - loss: 0.0534 - acc: 0.9840 - val_loss: 0.0431 - val_acc: 0.9862
Epoch 4/10
 - 199s - loss: 0.0405 - acc: 0.9878 - val_loss: 0.0404 - val_acc: 0.9873
Epoch 5/10
 - 200s - loss: 0.0336 - acc: 0.9894 - val_loss: 0.0353 - val_acc: 0.9878
Epoch 6/10
 - 206s - loss: 0.0276 - acc: 0.9916 - val_loss: 0.0313 - val_acc: 0.9896
Epoch 7/10
 - 221s - loss: 0.0233 - acc: 0.9928 - val_loss: 0.0359 - val_acc: 0.9883
Epoch 8/10
 - 200s - loss: 0.0205 - acc: 0.9936 - val_loss: 0.0331 - val_acc: 0.9886
Epoch 9/10
 - 220s - loss: 0.0168 - acc: 0.9944 - val_loss: 0.0300 - val_acc: 0.9902
Epoch 10/10
 - 257s - loss: 0.0142 - acc: 0.9957 - val_loss: 0.0307 - val_acc: 0.9905
CNN Error: 0.95%


In [2]:
#This model has 5 hidden layers and 1 output layers, so it is a 6-layered neural network. we found the error to be 0.95%, 
#which is lesser than MLP and  we try to minimize it further. accuracy obtained=99.05%(greater than MLP)
