originally from > https://raw.githubusercontent.com/fchollet/keras/master/examples/mnist_transfer_cnn.py


Transfer learning toy example.

1 - Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
2 - Freeze convolutional layers and fine-tune dense layers
   for the classification of digits [5..9].

Get to 99.8% test accuracy after 5 epochs
for the first five digits classifier
and 99.2% for the last five digits after transfer + fine-tuning.

NB: needed to update keras from 1.2.2 version to current version @ 14/11/2017
sudo pip install keras --upgrade

In [32]:
from __future__ import print_function
import datetime
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
print ("keras.__version__:", keras.__version__)

keras.__version__: 2.1.0


In [2]:
from keras import backend as K
import os

#https://keras.io/backend/

def set_keras_backend(backend):

    if K.backend() != backend:
        os.environ['KERAS_BACKEND'] = backend
        K.set_image_dim_ordering('th')
        reload(K)
        assert K.backend() == backend

set_keras_backend("theano")


Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5110)


In [13]:
now = datetime.datetime.now

batch_size = 128
num_classes = 10
epochs = 5


In [14]:
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size
kernel_size = 3


In [15]:
print ("K.image_data_format():", K.image_data_format())
if K.image_data_format() == 'channels_first':
    input_shape = (1, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 1)
print ("input_shape:", input_shape)

K.image_data_format(): channels_last
input_shape: (28, 28, 1)


In [16]:
def train_model(model, train, test, num_classes):
    
    print ("train[0]:", train[0].shape)
    print ("test[0]:", test[0].shape)
    
    #reshape input data.
    x_train = train[0].reshape((train[0].shape[0],) + input_shape)
    x_test = test[0].reshape((test[0].shape[0],) + input_shape)
    
    #convert values from int to float32
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    #convert data from 0-255 range to 0-1 range.
    x_train /= 255
    x_test /= 255
    
    #
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(train[1], num_classes)
    y_test = keras.utils.to_categorical(test[1], num_classes)
    print (type(y_train), y_train.shape)
    print (type(y_test), y_test.shape)

    #
    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])

    t = now()
    
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))
    
    print('Training time: %s' % (now() - t))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])


In [17]:
def showTypeShape(name, input):
    print (name+":"+str(type(input))+" "+str(input.shape))

In [18]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()


showTypeShape("x_train", x_train)
showTypeShape("y_train", y_train)
showTypeShape("x_test", x_test)
showTypeShape("y_test", y_test)
print()


x_train:<type 'numpy.ndarray'> (60000, 28, 28)
y_train:<type 'numpy.ndarray'> (60000,)
x_test:<type 'numpy.ndarray'> (10000, 28, 28)
y_test:<type 'numpy.ndarray'> (10000,)



In [19]:
# define two groups of layers: feature (convolutions) and classification (dense)
feature_layers = [
    Conv2D(filters, kernel_size,
           padding='valid',
           input_shape=input_shape),
    Activation('relu'),
    Conv2D(filters, kernel_size),
    Activation('relu'),
    MaxPooling2D(pool_size=pool_size),
    Dropout(0.25),
    Flatten(),
]

print (type(feature_layers), len(feature_layers))
for layer in feature_layers:
    print (type(layer))

<type 'list'> 7
<class 'keras.layers.convolutional.Conv2D'>
<class 'keras.layers.core.Activation'>
<class 'keras.layers.convolutional.Conv2D'>
<class 'keras.layers.core.Activation'>
<class 'keras.layers.pooling.MaxPooling2D'>
<class 'keras.layers.core.Dropout'>
<class 'keras.layers.core.Flatten'>


In [45]:
classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(0.5),
    Dense(num_classes),
    Activation('softmax')
]
print (type(classification_layers), len(classification_layers))
for layer in classification_layers:
    print (type(layer), "trainable=", layer.trainable)#
    print (layer.get_config(), "\n")

<type 'list'> 5
<class 'keras.layers.core.Dense'> trainable= True
{'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'distribution': 'uniform', 'scale': 1.0, 'seed': None, 'mode': 'fan_avg'}}, 'name': 'dense_7', 'kernel_constraint': None, 'bias_regularizer': None, 'bias_constraint': None, 'activation': 'linear', 'trainable': True, 'kernel_regularizer': None, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'units': 128, 'use_bias': True, 'activity_regularizer': None} 

<class 'keras.layers.core.Activation'> trainable= True
{'activation': 'relu', 'trainable': True, 'name': 'activation_11'} 

<class 'keras.layers.core.Dropout'> trainable= True
{'noise_shape': None, 'rate': 0.5, 'trainable': True, 'seed': None, 'name': 'dropout_6'} 

<class 'keras.layers.core.Dense'> trainable= True
{'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'distribution': 'uniform', 'scale': 1.0, 'seed': None, 'mode': 'fan_avg'}}, 'name': 'dense_8', 'kernel_constraint'

In [21]:
# create complete model
model = Sequential(feature_layers + classification_layers)
print (type(model.layers), len(model.layers))

<type 'list'> 12


In [22]:
# train model 
#(x_train, y_train), (x_test, y_test)
print ("started")
startTime= now()
train_model(model,
            (x_train, y_train),
            (x_test, y_test), num_classes)
timeElapsed=now()-startTime
print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))


started
train[0]: (60000, 28, 28)
test[0]: (10000, 28, 28)
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
<type 'numpy.ndarray'> (60000, 10)
<type 'numpy.ndarray'> (10000, 10)
Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:02:29.537740
Test score: 0.0417779098911
Test accuracy: 0.9862
Time elpased (hh:mm:ss.ms) 0:02:31.030766


In [25]:
print (type(model), type(model.layers), len(model.layers))

<class 'keras.models.Sequential'> <type 'list'> 12


In [33]:
x_test = x_test.reshape((x_test.shape[0],) + input_shape)

pred = model.predict(x_test, batch_size = batch_size, verbose = 0)
print (type(pred), pred.shape)
print (pred[0])
pred_class = np.argmax(pred,axis=-1)
#x_test, y_test
print (pred_class[0:20])
print (y_test[0:20])

<type 'numpy.ndarray'> (10000, 10)
[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 8 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [35]:
match = 0.
for i in range(0, y_test.shape[0]):
    if y_test[i]==pred_class[i]:
        match +=1
accuracy = match/y_test.shape[0]
print (accuracy)

#Test accuracy: 0.9862
#0.9838
#NB: discrepency between methods due to sept 'np.argmax(pred2,axis=-1)' 
#using the highest probability to return index of the category. 


0.9838


In [36]:
# freeze feature layers and rebuild model
for l in feature_layers:
    l.trainable = False
print (type(feature_layers), len(feature_layers))

<type 'list'> 7


In [37]:
# transfer: train dense layers for new classification task
train_model(model,
            (x_train, y_train),
            (x_test, y_test), num_classes)
#NB: since feature_layers are now not trainable, model runs faster.

train[0]: (60000, 28, 28)
test[0]: (10000, 28, 28, 1)
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
<type 'numpy.ndarray'> (60000, 10)
<type 'numpy.ndarray'> (10000, 10)
Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:00:29.802153
Test score: 0.0324378654635
Test accuracy: 0.9888


In [43]:
pred2 = model.predict(x_test, batch_size = batch_size, verbose = 0)
#insert step here to assess accuracy of the predictions.
print (type(pred2), pred2.shape)
print (pred2[0])
pred_class2 = np.argmax(pred2,axis=-1)
#x_test, y_test
print ("predicted:", pred_class2[0:20])
print ("   actual:", y_test[0:20])

<type 'numpy.ndarray'> (10000, 10)
[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
predicted: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 8 4]
   actual: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [41]:
match = 0.
for i in range(0, y_test.shape[0]):
    if y_test[i]==pred_class2[i]:
        match +=1
accuracy = match/y_test.shape[0]
print (accuracy)
#NB: discrepency between methods due to sept 'np.argmax(pred2,axis=-1)' 
#using the highest probability to return index of the category. 
#Test accuracy: 0.9888
#0.9863

0.9863
