In [2]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')


In [3]:
#Fix random seed for reproducibility
seed = 7
np.random.seed(seed)


In [4]:
#Load_data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#Reshape to [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0],1,28,28).astype('float32')
X_test = X_test.reshape(X_test.shape[0],1,28,28).astype('float32')


In [5]:
#Again, normalise input ranges.
X_train = X_train/255
X_test = X_test/255

#Same, one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]


In [6]:
def baseline_model():
    model = Sequential()
    model.add(Conv2D(32, (5,5), input_shape = (1,28,28), activation = 'relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(num_classes, activation = 'softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model


In [7]:
def larger_model():
    model = Sequential()
    model.add(Conv2D(30, (5, 5), input_shape=(1, 28, 28), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(15, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [8]:
model = baseline_model()
model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 10, batch_size = 200, verbose = 2)
scores = model.evaluate(X_test, y_test, verbose = 0)
print "CNN Error -- Smaller Model : %.2f%%" %(100-scores[1]*100)


Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 189s - loss: 0.2321 - acc: 0.9343 - val_loss: 0.0815 - val_acc: 0.9745
Epoch 2/10
 - 162s - loss: 0.0737 - acc: 0.9781 - val_loss: 0.0459 - val_acc: 0.9842
Epoch 3/10
 - 155s - loss: 0.0530 - acc: 0.9838 - val_loss: 0.0436 - val_acc: 0.9854
Epoch 4/10
 - 146s - loss: 0.0401 - acc: 0.9876 - val_loss: 0.0413 - val_acc: 0.9864
Epoch 5/10
 - 146s - loss: 0.0335 - acc: 0.9892 - val_loss: 0.0335 - val_acc: 0.9885
Epoch 6/10
 - 147s - loss: 0.0272 - acc: 0.9917 - val_loss: 0.0313 - val_acc: 0.9893
Epoch 7/10
 - 145s - loss: 0.0228 - acc: 0.9930 - val_loss: 0.0358 - val_acc: 0.9877
Epoch 8/10
 - 146s - loss: 0.0200 - acc: 0.9939 - val_loss: 0.0325 - val_acc: 0.9886
Epoch 9/10
 - 147s - loss: 0.0166 - acc: 0.9947 - val_loss: 0.0302 - val_acc: 0.9899
Epoch 10/10
 - 146s - loss: 0.0144 - acc: 0.9956 - val_loss: 0.0315 - val_acc: 0.9911
CNN Error -- Smaller Model : 0.89%


In [9]:
model = larger_model()
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200)
scores = model.evaluate(X_test, y_test, verbose=0)
print("Large CNN Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Large CNN Error: 0.83%


## So we could see that a larger model doesn't always guarantee higher efficiency. It is the case most of the times, but there are many proven exceptions.