# MNIST - Keras: Convolutional Layers

In [1]:
# We study to classify the MNIST digits by using Keras convolutional layers.
#
# We build a model with three convolutional layers (conv2D) and maxpooling2D
# between the convolutional layers. Dropout layers are added to avoid
# over-fitting. The core layers are used as the two last layers in the
# network.
# 
# Here the resulting accuracy ~0.99 is much higher than that using core layers.
#
# The data set can be downloaded from kaggle website as follows:
# https://www.kaggle.com/apallekonda/keras-mnist

In [2]:
import numpy as np
import pandas as pd

In [3]:
data=np.load('mnist.npz')

In [4]:
y_train=data.f.y_train
y_test=data.f.y_test
X_train=data.f.x_train
X_test=data.f.x_test

In [5]:
X_train=X_train.reshape(-1,28,28,1)
X_test=X_test.reshape(-1,28,28,1)
print(X_train.shape,X_test.shape)

(60000, 28, 28, 1) (10000, 28, 28, 1)


In [6]:
X_train=X_train.astype('float32')
X_test=X_test.astype('float32')
X_train=X_train/255.0
X_test=X_test/255.0

In [7]:
# The label set is required to transform as the one-hot encoding form
from keras.utils import to_categorical
y_train=to_categorical(y_train)
y_test=to_categorical(y_test)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [8]:
from sklearn.model_selection import train_test_split
X_train,X_valid,y_train,y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=21)

In [9]:
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.models import Sequential
#from keras.layers.advanced_activations import LeakyReLU

# Model

In [10]:
model=Sequential()
model.add(Conv2D(32,kernel_size=(3,3),activation='relu',kernel_initializer='glorot_normal',
                bias_initializer='zeros',padding='same',input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.25))
model.add(Conv2D(64,kernel_size=(3,3),activation='tanh',padding='same',input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.25))
model.add(Conv2D(128,kernel_size=(3,3),activation='relu',padding='same',input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64,activation='tanh'))
model.add(Dropout(0.3))
model.add(Dense(10,activation='softmax'))

In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 128)         73856     
__________

In [12]:
from keras.callbacks import EarlyStopping
early_stopping_monitor = EarlyStopping(patience=3)

In [13]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [14]:
model.fit(X_train,y_train,epochs=50,batch_size=128, verbose=1, callbacks = [early_stopping_monitor],validation_data=(X_valid,y_valid))

Train on 48000 samples, validate on 12000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50


<keras.callbacks.History at 0x143d4998320>

In [15]:
model.evaluate(X_test,y_test)



[0.023568003817554563, 0.9929]