* Train a simple convnet on the MNIST dataset the first 5 digits [0..4].

* Freeze convolutional layers and fine-tune dense layers
   for the classification of digits [5..9].

* Results:
    * 99.8% test accuracy after 5 epochs for the first five digits classifier
    * 99.2% for the last five digits after transfer + fine-tuning.

In [9]:
from __future__ import print_function

import datetime
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import tensorflow as tf
now = datetime.datetime.now

batch_size = 128
num_classes = 5
epochs = 5

In [2]:
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size
kernel_size = 3

if K.image_data_format() == 'channels_first':
    input_shape = (1, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 1)

In [10]:
def train_model(model, train, test, num_classes):
    x_train = train[0].reshape((train[0].shape[0],) + input_shape)
    x_test = test[0].reshape((test[0].shape[0],) + input_shape)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = tf.keras.utils.to_categorical(train[1], num_classes)
    y_test = tf.keras.utils.to_categorical(test[1], num_classes)

    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])

    t = now()
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))
    print('Training time: %s' % (now() - t))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

the data, split between train and test sets

In [11]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

create two datasets one with digits below 5 and one with 5 and above

In [12]:
x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]

x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5
x_test_gte5 = x_test[y_test >= 5]
y_test_gte5 = y_test[y_test >= 5] - 5

define two groups of layers: feature (convolutions) and classification (dense)

In [13]:
feature_layers = [
    Conv2D(filters, kernel_size,
           padding='valid',
           input_shape=input_shape),
    Activation('relu'),
    Conv2D(filters, kernel_size),
    Activation('relu'),
    MaxPooling2D(pool_size=pool_size),
    Dropout(0.25),
    Flatten(),
]

classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(0.5),
    Dense(num_classes),
    Activation('softmax')
]

create complete model

In [14]:
model = Sequential(feature_layers + classification_layers)

train model for 5-digit classification [0..4]

In [15]:
train_model(model,
            train=(x_train_lt5, y_train_lt5),
            test = (x_test_lt5, y_test_lt5), num_classes)

x_train shape: (30596, 28, 28, 1)
30596 train samples
5139 test samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:02:16.734974
Test score: 1.3828550577163696
Test accuracy: 0.8256469964981079


freeze feature layers and rebuild model

In [16]:
for l in feature_layers:
    l.trainable = False

transfer: train dense layers for new classification task [5..9]

In [17]:
train_model(model=model,
            train=(x_train_gte5, y_train_gte5),
            test=(x_test_gte5, y_test_gte5),
            num_classes=num_classes)

x_train shape: (29404, 28, 28, 1)
29404 train samples
4861 test samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:00:31.369513
Test score: 1.4185786247253418
Test accuracy: 0.6776383519172668


In [18]:
# The predict_classes function outputs the highest probability class
# according to the trained classifier for each input example.
predicted_classes = model.predict_classes(x_test)

# Check which items we got right / wrong
correct_indices = np.nonzero(predicted_classes == y_test_gte5)[0]
incorrect_indices = np.nonzero(predicted_classes != y_test_gte5)[0]



ValueError: in user code:

    C:\Users\jose.velasco\Anaconda3\lib\site-packages\keras\engine\training.py:1544 predict_function  *
        return step_function(self, iterator)
    C:\Users\jose.velasco\Anaconda3\lib\site-packages\keras\engine\training.py:1527 run_step  *
        outputs = model.predict_step(data)
    C:\Users\jose.velasco\Anaconda3\lib\site-packages\keras\engine\training.py:1500 predict_step  *
        return self(x, training=False)
    C:\Users\jose.velasco\Anaconda3\lib\site-packages\keras\engine\base_layer.py:989 __call__  *
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\jose.velasco\Anaconda3\lib\site-packages\keras\engine\input_spec.py:227 assert_input_compatibility  *
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential_1 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (None, 28, 28)
