In [1]:
# import os
# import tensorflow as tf
# from tensorflow.keras.layers import Conv2D, ZeroPadding2D
# from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D
# from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.losses import categorical_crossentropy
# from tensorflow.keras.optimizers import Adadelta
# from tensorflow.keras import backend as K
# import torch
import keras
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Flatten, Dense, Dropout
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from keras import backend as K
# import matplotlib.pyplot as plt
from src import utils
from src.utils import Plot
# from src.utils import Plot

# os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

# import importlib
# importlib.reload(utils)

Using TensorFlow backend.


A common CNN feature learning architecture is similar to that shown above, where feature maps are constructed with ReLU-activated convolutional layers and intermittent max pooling. It's also fairly common to normalize the batch of input layers prior to convolution in the feature learning segment.

## Classification

In the classification segment of the network, we flatten the final laywe shift our focus to classification using fully connected (FC) layers.

### Fully Connected (FC) Layers

In [2]:
x_train, y_train, x_test, y_test, input_shape = utils.load_mnist(method='keras')
x_train = K.eval(x_train)
y_train = K.eval(y_train)
x_test = K.eval(x_test)
y_test = K.eval(y_test)

$$\begin{aligned} \underset{30 \times 30 \times 1}{ \ \ \textsf{input}^+} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{30 \times 30 \times 32}{ \ \ \textsf{conv1}^+} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{28 \times 28 \times 64}{\textsf{conv2}} \ \ _{ \ast \ \ \text{max pool}_{2 \times 2} \ \ \longmapsto}\\[5pt]
\underset{16 \times 16 \times 64}{ \, \ \textsf{pool1}^\ddagger} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{14 \times 14 \times 64}{\textsf{conv3}} \ \ _{ \ast \ \ \text{max pool}_{2 \times 2} \ \ \longmapsto} \ \ \underset{8 \times 8 \times 64}{ \, \ \textsf{pool2}^\ddagger} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto}\\[5pt]
\underset{7 \times 7 \times 64}{ \, \ \textsf{conv4}^\dagger} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{5 \times 5 \times 64}{ \, \ \textsf{conv5}^\dagger} \ \ _{ + \ \ \text{flatten}_{1600} \ + \ \text{dropout}_{0.25} \ + \ \text{dense}_{128} \ + \ \text{ReLU} \ \ \longmapsto}\\[10pt]
\underset{1 \times 1 \times 128}{\textsf{FC6}} \ \ _{ + \ \ \text{dropout}_{0.5} \ + \ \text{dense}_{128} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{1 \times 1 \times 128}{\textsf{FC7}} \ \ _{ + \ \ \text{dropout}_{0.5} \ + \ \text{dense}_{10} \ + \ \text{Softmax} \ \ \longmapsto} \ \ \underset{1 \times 1 \times 10}{\textsf{output}} \end{aligned}$$

Here, we introduce FC layers by building a network beginning with feature learning. We initialize a Keras sequential model object as follows:

Beginning with the architecture

$$\underset{30 \times 30 \times 1}{ \ \ \textsf{input}^+} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{30 \times 30 \times 32}{ \ \ \textsf{conv1}^+} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{28 \times 28 \times 64}{\textsf{conv2}} \ \ _{ \ast \ \ \text{max pool}_{2 \times 2} \ \ \longmapsto} \ \ \underset{16 \times 16 \times 64}{ \, \ \textsf{pool1}^\ddagger}$$

we add layers as follows:

In [3]:
model = Sequential()

In [4]:
model.add(Conv2D(16, 3, padding='same', activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(16, 3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

$$\underset{16 \times 16 \times 64}{ \, \ \textsf{pool1}^\ddagger} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{14 \times 14 \times 64}{\textsf{conv3}} \ \ _{ \ast \ \ \text{max pool}_{2 \times 2} \ \ \longmapsto} \ \ \underset{8 \times 8 \times 64}{ \, \ \textsf{pool2}^\ddagger} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{7 \times 7 \times 64}{ \, \ \textsf{conv4}^\dagger}$$

In [5]:
model.add(Conv2D(32, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=3, padding='same', activation='relu'))
# model.add(BatchNormalization())

Here we switch over to classification by vectorizing `conv5` as follows:

$$\underset{7 \times 7 \times 64}{ \, \ \textsf{conv4}^\dagger} \ \ _{\circledast \ \ \text{conv}_{3 \times 3} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{5 \times 5 \times 64}{ \, \ \textsf{conv5}^\dagger} \ \ _{ + \ \ \text{flatten}_{1600} \ + \ \text{dropout}_{0.25} \ + \ \text{dense}_{156} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{1 \times 1 \times 156}{\textsf{FC6}}$$

In [6]:
model.add(Conv2D(64, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(64, kernel_size=3, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu'))

$$\underset{1 \times 1 \times 156}{\textsf{FC6}} \ \ _{ + \ \text{dropout}_{0.5} \ + \ \text{dense}_{156} \ + \ \text{ReLU} \ \ \longmapsto} \ \ \underset{1 \times 1 \times 156}{\textsf{FC7}} \ \ _{ + \ \text{dropout}_{0.5} \ + \ \text{dense}_{10} \ + \ \text{Softmax} \ \ \longmapsto} \ \ \underset{1 \times 1 \times 10}{\textsf{output}}$$

In [7]:
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [8]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 16)        160       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 16)        2320      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 14, 14, 16)        64        
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 32)        4640      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 14, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 32)         

In [None]:
# fp = 'models/best_weights.hdf5'
# checkpoint = ModelCheckpoint(fp, monitor='val_accuracy', save_best_only=True,
#                              save_weights_only=True, mode='max')

In [9]:
model.compile(optimizer='nadam', loss='categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
# model.fit(x_train, y_train, batch_size=128, epochs=10, verbose=1, callbacks=[checkpoint],
#           validation_split=0.2, initial_epoch=0)
model.fit(x_train, y_train, batch_size=128, epochs=1, verbose=1,
          validation_split=0.2, initial_epoch=0)

Train on 48000 samples, validate on 12000 samples
Epoch 1/1


<keras.callbacks.callbacks.History at 0x658efcb90>

In [None]:
model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=1, callbacks=[checkpoint],
          validation_data=(x_test, y_test), initial_epoch=10)

In [None]:
model.fit(x_train, y_train, batch_size=128, epochs=30, verbose=1, callbacks=[checkpoint],
          validation_data=(x_test, y_test), initial_epoch=20)

In [None]:
model.fit(x_train, y_train, batch_size=128, epochs=40, verbose=1, callbacks=[checkpoint],
          validation_data=(x_test, y_test), initial_epoch=30)

In [None]:
model.fit(x_train, y_train, batch_size=128, epochs=50, verbose=1, callbacks=[checkpoint],
          validation_data=(x_test, y_test), initial_epoch=40)

In [None]:
model.load_weights(fp)

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])