originally from > https://raw.githubusercontent.com/fchollet/keras/master/examples/mnist_transfer_cnn.py


Transfer learning toy example.

1 - Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
2 - Freeze convolutional layers and fine-tune dense layers
   for the classification of digits [5..9].

Get to 99.8% test accuracy after 5 epochs
for the first five digits classifier
and 99.2% for the last five digits after transfer + fine-tuning.

NB: needed to update keras from 1.2.2 version to current version @ 14/11/2017
sudo pip install keras --upgrade

In [6]:
from __future__ import print_function
import datetime
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
print ("keras.__version__:", keras.__version__)

keras.__version__: 2.1.0


In [2]:
from keras import backend as K
import os

#https://keras.io/backend/

def set_keras_backend(backend):

    if K.backend() != backend:
        os.environ['KERAS_BACKEND'] = backend
        K.set_image_dim_ordering('th')
        reload(K)
        assert K.backend() == backend

set_keras_backend("theano")


Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5110)


In [3]:
now = datetime.datetime.now

batch_size = 128
num_classes = 5
epochs = 5


In [4]:
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size
kernel_size = 3


In [9]:
print ("K.image_data_format():", K.image_data_format())
if K.image_data_format() == 'channels_first':
    input_shape = (1, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 1)
print ("input_shape:", input_shape)

K.image_data_format(): channels_last
input_shape: (28, 28, 1)


In [11]:
def train_model(model, train, test, num_classes):
    
    print ("train[0]:", train[0].shape)
    print ("test[0]:", test[0].shape)
    
    #reshape input data.
    x_train = train[0].reshape((train[0].shape[0],) + input_shape)
    x_test = test[0].reshape((test[0].shape[0],) + input_shape)
    
    #convert values from int to float32
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    #convert data from 0-255 range to 0-1 range.
    x_train /= 255
    x_test /= 255
    
    #
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(train[1], num_classes)
    y_test = keras.utils.to_categorical(test[1], num_classes)
    print (type(y_train), y_train.shape)
    print (type(y_test), y_test.shape)

    #
    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])

    t = now()
    
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))
    
    print('Training time: %s' % (now() - t))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])


In [17]:
def showTypeShape(name, input):
    print (name+":"+str(type(input))+" "+str(input.shape))

In [19]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()


showTypeShape("x_train", x_train)
showTypeShape("y_train", y_train)
showTypeShape("x_test", x_test)
showTypeShape("y_test", y_test)
print()

# create two datasets one with digits below 5 and one with 5 and above
x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]


showTypeShape("x_train_lt5", x_train_lt5)
showTypeShape("y_train_lt5", y_train_lt5)
showTypeShape("x_test_lt5", x_test_lt5)
showTypeShape("y_test_lt5", y_test_lt5)
print()

x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5
x_test_gte5 = x_test[y_test >= 5]
y_test_gte5 = y_test[y_test >= 5] - 5

showTypeShape("x_train_gte5", x_train_gte5)
showTypeShape("y_train_gte5", y_train_gte5)
showTypeShape("x_test_gte5", x_test_gte5)
showTypeShape("y_test_gte5", y_test_gte5)


x_train:<type 'numpy.ndarray'> (60000, 28, 28)
y_train:<type 'numpy.ndarray'> (60000,)
x_test:<type 'numpy.ndarray'> (10000, 28, 28)
y_test:<type 'numpy.ndarray'> (10000,)

x_train_lt5:<type 'numpy.ndarray'> (30596, 28, 28)
y_train_lt5:<type 'numpy.ndarray'> (30596,)
x_test_lt5:<type 'numpy.ndarray'> (5139, 28, 28)
y_test_lt5:<type 'numpy.ndarray'> (5139,)

x_train_gte5:<type 'numpy.ndarray'> (29404, 28, 28)
y_train_gte5:<type 'numpy.ndarray'> (29404,)
x_test_gte5:<type 'numpy.ndarray'> (4861, 28, 28)
y_test_gte5:<type 'numpy.ndarray'> (4861,)


In [23]:
# define two groups of layers: feature (convolutions) and classification (dense)
feature_layers = [
    Conv2D(filters, kernel_size,
           padding='valid',
           input_shape=input_shape),
    Activation('relu'),
    Conv2D(filters, kernel_size),
    Activation('relu'),
    MaxPooling2D(pool_size=pool_size),
    Dropout(0.25),
    Flatten(),
]

print (type(feature_layers), len(feature_layers))
for layer in feature_layers:
    print (type(layer))

<type 'list'> 7
<class 'keras.layers.convolutional.Conv2D'>
<class 'keras.layers.core.Activation'>
<class 'keras.layers.convolutional.Conv2D'>
<class 'keras.layers.core.Activation'>
<class 'keras.layers.pooling.MaxPooling2D'>
<class 'keras.layers.core.Dropout'>
<class 'keras.layers.core.Flatten'>


In [34]:
classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(0.5),
    Dense(num_classes),
    Activation('softmax')
]
print (type(classification_layers), len(classification_layers))
for layer in classification_layers:
    print (type(layer), "trainable=", layer.trainable, layer.get_config(), "\n")

<type 'list'> 5
<class 'keras.layers.core.Dense'> trainable= True {'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'distribution': 'uniform', 'scale': 1.0, 'seed': None, 'mode': 'fan_avg'}}, 'name': 'dense_9', 'kernel_constraint': None, 'bias_regularizer': None, 'bias_constraint': None, 'activation': 'linear', 'trainable': True, 'kernel_regularizer': None, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'units': 128, 'use_bias': True, 'activity_regularizer': None} 

<class 'keras.layers.core.Activation'> trainable= True {'activation': 'relu', 'trainable': True, 'name': 'activation_17'} 

<class 'keras.layers.core.Dropout'> trainable= True {'noise_shape': None, 'rate': 0.5, 'trainable': True, 'seed': None, 'name': 'dropout_9'} 

<class 'keras.layers.core.Dense'> trainable= True {'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'distribution': 'uniform', 'scale': 1.0, 'seed': None, 'mode': 'fan_avg'}}, 'name': 'dense_10', 'kernel_constraint

In [25]:
# create complete model
model = Sequential(feature_layers + classification_layers)
print (type(model.layers), len(model.layers))

In [28]:
# train model for 5-digit classification [0..4]
print ("started")
startTime= now()
train_model(model,
            (x_train_lt5, y_train_lt5),
            (x_test_lt5, y_test_lt5), num_classes)
timeElapsed=now()-startTime
print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))


started
train[0]: (30596, 28, 28)
test[0]: (5139, 28, 28)
x_train shape: (30596, 28, 28, 1)
30596 train samples
5139 test samples
<type 'numpy.ndarray'> (30596, 5)
<type 'numpy.ndarray'> (5139, 5)
Train on 30596 samples, validate on 5139 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:01:26.420527
Test score: 0.0130250662213
Test accuracy: 0.99435687877
Time elpased (hh:mm:ss.ms) 0:01:27.195205


In [29]:
# freeze feature layers and rebuild model
for l in feature_layers:
    l.trainable = False
print (type(feature_layers), len(feature_layers))

<type 'list'> 7


In [30]:
# transfer: train dense layers for new classification task [5..9]
train_model(model,
            (x_train_gte5, y_train_gte5),
            (x_test_gte5, y_test_gte5), num_classes)
#NB: since feature_layers are now not trainable, model runs faster.

train[0]: (29404, 28, 28)
test[0]: (4861, 28, 28)
x_train shape: (29404, 28, 28, 1)
29404 train samples
4861 test samples
<type 'numpy.ndarray'> (29404, 5)
<type 'numpy.ndarray'> (4861, 5)
Train on 29404 samples, validate on 4861 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:00:15.815848
Test score: 0.0399413917625
Test accuracy: 0.987039703654
