In [0]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Activation, GlobalAveragePooling2D, BatchNormalization, Dropout
from keras.datasets import mnist 
from keras.utils import np_utils

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(X_train.shape[0], 28, 28,1)
X_test = X_test.reshape(X_test.shape[0], 28, 28,1)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


### For Total number of trainable params < 15K
----

In [35]:
model = Sequential()
model.add(Conv2D(16, 3, use_bias=False, input_shape=(28,28,1), activation='relu')) #26
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(32, 3, use_bias=False, activation='relu')) #24
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(MaxPooling2D(pool_size=(2, 2))) #12


model.add(Conv2D(16, 1, use_bias=False, activation='relu')) #12

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #10
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #8
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #6
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #4
model.add(BatchNormalization())

model.add(Conv2D(10, 1, use_bias=False, activation='relu'))

model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))

model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_77 (Conv2D)           (None, 26, 26, 16)        144       
_________________________________________________________________
batch_normalization_30 (Batc (None, 26, 26, 16)        64        
_________________________________________________________________
dropout_25 (Dropout)         (None, 26, 26, 16)        0         
_________________________________________________________________
conv2d_78 (Conv2D)           (None, 24, 24, 32)        4608      
_________________________________________________________________
batch_normalization_31 (Batc (None, 24, 24, 32)        128       
_________________________________________________________________
dropout_26 (Dropout)         (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 12, 12, 32)      

In [36]:
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
def scheduler(epoch, lr):
  return round(0.003 * 1/(1 + 0.319 * epoch), 10)

model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.003), metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=128, epochs=20, verbose=1, validation_data=(X_test, Y_test), callbacks=[LearningRateScheduler(scheduler, verbose=1)])

Train on 60000 samples, validate on 10000 samples
Epoch 1/20

Epoch 00001: LearningRateScheduler setting learning rate to 0.003.
Epoch 2/20

Epoch 00002: LearningRateScheduler setting learning rate to 0.0022744503.
Epoch 3/20

Epoch 00003: LearningRateScheduler setting learning rate to 0.0018315018.
Epoch 4/20

Epoch 00004: LearningRateScheduler setting learning rate to 0.0015329586.
Epoch 5/20

Epoch 00005: LearningRateScheduler setting learning rate to 0.0013181019.
Epoch 6/20

Epoch 00006: LearningRateScheduler setting learning rate to 0.0011560694.
Epoch 7/20

Epoch 00007: LearningRateScheduler setting learning rate to 0.0010295127.
Epoch 8/20

Epoch 00008: LearningRateScheduler setting learning rate to 0.0009279307.
Epoch 9/20

Epoch 00009: LearningRateScheduler setting learning rate to 0.0008445946.
Epoch 10/20

Epoch 00010: LearningRateScheduler setting learning rate to 0.0007749935.
Epoch 11/20

Epoch 00011: LearningRateScheduler setting learning rate to 0.0007159905.
Epoch 12/

<keras.callbacks.History at 0x7f7d82595710>

In [37]:

score = model.evaluate(X_test, Y_test, verbose=0)
print(score)

[0.01818742549381859, 0.9947]


### For Total number of params < 15K (final model and training)
----

Since the Total number of parameters exceeded 15K, I dropped batch normalisation and then trained the model.


In [42]:
model = Sequential()
model.add(Conv2D(16, 3, use_bias=False, input_shape=(28,28,1), activation='relu')) #26
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(32, 3, use_bias=False, activation='relu')) #24
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(MaxPooling2D(pool_size=(2, 2))) #12


model.add(Conv2D(16, 1, use_bias=False, activation='relu')) #12

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #10
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #8
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #6
#model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, 3, use_bias=False, activation='relu')) #4

model.add(Conv2D(10, 1, use_bias=False, activation='relu'))

model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))

model.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_101 (Conv2D)          (None, 26, 26, 16)        144       
_________________________________________________________________
batch_normalization_45 (Batc (None, 26, 26, 16)        64        
_________________________________________________________________
dropout_39 (Dropout)         (None, 26, 26, 16)        0         
_________________________________________________________________
conv2d_102 (Conv2D)          (None, 24, 24, 32)        4608      
_________________________________________________________________
batch_normalization_46 (Batc (None, 24, 24, 32)        128       
_________________________________________________________________
dropout_40 (Dropout)         (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 12, 12, 32)      

In [43]:
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
def scheduler(epoch, lr):
  return round(0.003 * 1/(1 + 0.319 * epoch), 10)

model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.003), metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=128, epochs=20, verbose=1, validation_data=(X_test, Y_test), callbacks=[LearningRateScheduler(scheduler, verbose=1)])

Train on 60000 samples, validate on 10000 samples
Epoch 1/20

Epoch 00001: LearningRateScheduler setting learning rate to 0.003.
Epoch 2/20

Epoch 00002: LearningRateScheduler setting learning rate to 0.0022744503.
Epoch 3/20

Epoch 00003: LearningRateScheduler setting learning rate to 0.0018315018.
Epoch 4/20

Epoch 00004: LearningRateScheduler setting learning rate to 0.0015329586.
Epoch 5/20

Epoch 00005: LearningRateScheduler setting learning rate to 0.0013181019.
Epoch 6/20

Epoch 00006: LearningRateScheduler setting learning rate to 0.0011560694.
Epoch 7/20

Epoch 00007: LearningRateScheduler setting learning rate to 0.0010295127.
Epoch 8/20

Epoch 00008: LearningRateScheduler setting learning rate to 0.0009279307.
Epoch 9/20

Epoch 00009: LearningRateScheduler setting learning rate to 0.0008445946.
Epoch 10/20

Epoch 00010: LearningRateScheduler setting learning rate to 0.0007749935.
Epoch 11/20

Epoch 00011: LearningRateScheduler setting learning rate to 0.0007159905.
Epoch 12/

<keras.callbacks.History at 0x7f7d81ddc4a8>

In [44]:

score = model.evaluate(X_test, Y_test, verbose=0)
print(score)

[0.020135334616147155, 0.9951]
