In [1]:
%matplotlib inline

# Transfer Learning
In this assignment, we will use the weights of a network pre-trained in a particular problem as starting point to train our CNN to a different problem. As training a network from scratch is time-consuming and demands a lot of data, this is a frequent strategy, specially if both datasets (the one used for pre-training and the target) shares similar structures/elements/concepts. 

This is specially true when working with images. Most filters learned in initial convolutional layers will detect low-level elements, such as borders, corners and color blobs, which are common to most problems in the image domain. 

In this notebook, we will load the SqueezeNet architecture trained in the ImageNet dataset and fine-tune it to CIFAR-10.

## Imports

In [2]:
import os
import numpy as np
import pandas as pd
from random import sample, seed
seed(42)
np.random.seed(42)

import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = (15,15) # Make the figures a bit bigger

# Keras imports
from keras.layers import Input, Convolution2D, MaxPooling2D, Activation, concatenate, Dropout, GlobalAveragePooling2D, Flatten, Dense
from keras.models import Model
from keras import regularizers
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.preprocessing.image import load_img, img_to_array
from keras.datasets import cifar10
from keras.callbacks import TensorBoard
from sklearn.cross_validation import StratifiedShuffleSplit
from keras import models
from keras import optimizers
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from scipy.misc import imresize
import tensorflow as tf




#Utility to plot
def plotImages(imgList):
    for i in range(len(imgList)):
        plotImage(imgList[i])
        
        
def plotImage(img):
    fig = plt.figure(figsize=(3,3))
    ax = fig.add_subplot(111)

    ax.imshow(np.uint8(img), interpolation='nearest')
    plt.show()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
import keras.callbacks as callbacks
tbCallBack = callbacks.TensorBoard(log_dir = "./tensorboard/")

In [4]:
df = pd.read_csv('./all/fer2013/fer2013.csv', dtype={'emotion':np.int32, 'pixels':str, 'Usage':str})

In [5]:
df['pixels'] = df['pixels'].apply(lambda x: np.fromstring(x,sep=' '))

In [6]:
train = df.loc[df['Usage'] == 'Training']
validation = df.loc[df['Usage'] == 'PublicTest']
test = df.loc[df['Usage'] == 'PrivateTest']

y_train = pd.get_dummies(train['emotion'])
y_train.columns = ['Angry','Disgust','Fear','Happy','Sad','Surprise','Neutral']

y_val = pd.get_dummies(validation['emotion'])
y_val.columns = ['Angry','Disgust','Fear','Happy','Sad','Surprise','Neutral']

y_test = pd.get_dummies(test['emotion'])
y_test.columns = ['Angry','Disgust','Fear','Happy','Sad','Surprise','Neutral']

In [7]:
x_train = np.vstack(train['pixels'].values)
x_validation = np.vstack(validation['pixels'].values)
x_test = np.vstack(test['pixels'].values)

In [8]:
X_train = np.stack((np.reshape(x_train,(-1, 48, 48, 1)),)*3, axis=-2).squeeze()
X_val = np.stack((np.reshape(x_validation,(-1, 48, 48, 1)),)*3, axis=-2).squeeze()
X_test =  np.stack((np.reshape(x_test,(-1, 48, 48, 1)),)*3, axis=-2).squeeze()

In [9]:
X_test.shape

(3589, 48, 48, 3)

In [10]:
with tf.device('/cpu:0'):
    tf_x_train = tf.placeholder(tf.float32, shape=(28709, 48, 48, 3))
    tf_x_validation = tf.placeholder(tf.float32, shape=(3589, 48, 48, 3))
    tf_x_test = tf.placeholder(tf.float32, shape=(3589, 48, 48, 3))
    
    tf_x_train_resized = tf.image.resize_images(tf_x_train,  size=[71,71])
    tf_x_validation_resized = tf.image.resize_images(tf_x_validation,  size=[71,71])
    tf_x_test_resized = tf.image.resize_images(tf_x_test,  size=[71,71])

In [11]:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    X_train,X_val,X_test = sess.run([tf_x_train_resized,tf_x_validation_resized,tf_x_test_resized], feed_dict={tf_x_train: X_train,
                                                   tf_x_validation: X_val,
                                                   tf_x_test: X_test
                                                  })

In [12]:
np.shape(X_train)

(28709, 71, 71, 3)

In [13]:
datagen = ImageDataGenerator(
    samplewise_center=True,
    samplewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=False)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train)

In [14]:
model_imagenet = applications.xception.Xception(include_top=False, weights='imagenet', input_shape=(71, 71, 3))

In [15]:
model_imagenet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 71, 71, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 35, 35, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 35, 35, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 35, 35, 32)   0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

In [16]:
model_imagenet.layers[-1].name

'block14_sepconv2_act'

In [17]:
#Add new classification layers
x = model_imagenet.layers[-1].output
x = Flatten()(x)
x = Dense(7)(x)
x = Activation('softmax', name='new_loss')(x)

#new Model
model = Model(model_imagenet.inputs, x, name='model_new')

In [18]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 71, 71, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 35, 35, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 35, 35, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 35, 35, 32)   0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

In [19]:
#freeze layers
#for layer in model.layers[:-2]:
#    layer.trainable = False

for layer in model.layers:
    print(layer.name, layer.trainable)

input_1 False
block1_conv1 True
block1_conv1_bn True
block1_conv1_act True
block1_conv2 True
block1_conv2_bn True
block1_conv2_act True
block2_sepconv1 True
block2_sepconv1_bn True
block2_sepconv2_act True
block2_sepconv2 True
block2_sepconv2_bn True
conv2d_1 True
block2_pool True
batch_normalization_1 True
add_1 True
block3_sepconv1_act True
block3_sepconv1 True
block3_sepconv1_bn True
block3_sepconv2_act True
block3_sepconv2 True
block3_sepconv2_bn True
conv2d_2 True
block3_pool True
batch_normalization_2 True
add_2 True
block4_sepconv1_act True
block4_sepconv1 True
block4_sepconv1_bn True
block4_sepconv2_act True
block4_sepconv2 True
block4_sepconv2_bn True
conv2d_3 True
block4_pool True
batch_normalization_3 True
add_3 True
block5_sepconv1_act True
block5_sepconv1 True
block5_sepconv1_bn True
block5_sepconv2_act True
block5_sepconv2 True
block5_sepconv2_bn True
block5_sepconv3_act True
block5_sepconv3 True
block5_sepconv3_bn True
add_4 True
block6_sepconv1_act True
block6_sepconv1 

In [20]:
# squeezeNetModel.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
# squeezeNetModel.fit(trainVal_data, trainVal_label, epochs=20)

In [21]:
print(model_imagenet.output.shape)
print(model.output.shape)

(?, 3, 3, 2048)
(?, 7)


Now, we compile our model and train it:

In [22]:
batch_size_val = 2**7

In [23]:
batch_size_val

128

In [24]:
# Compile model and train it.

model.compile(loss = "categorical_crossentropy", optimizer = optimizers.Adam(lr = 0.0001), metrics=["accuracy"])

history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size_val), validation_data=datagen.flow(X_val, y_val, batch_size=batch_size_val), steps_per_epoch=len(X_train) / batch_size_val, epochs=100, callbacks=[tbCallBack])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
 21/224 [=>............................] - ETA: 2:06 - loss: 0.5020 - acc: 0.8415

KeyboardInterrupt: 

Finally, let's evaluate on our test set:

In [25]:
# Evaluate on validation:
# ...
print(model.metrics_names)
print(model.evaluate_generator(datagen.flow(X_val, y_val, batch_size=batch_size_val), steps=len(X_val)/batch_size_val))

['loss', 'acc']
[1.1716699262587398, 0.6074115352631944]


In [26]:
# Evaluate on validation:
# ...
print(model.metrics_names)
print(model.evaluate_generator(datagen.flow(X_test, y_test, batch_size=batch_size_val), steps=len(X_test)/batch_size_val))

['loss', 'acc']
[1.1521542078607028, 0.6224575090886624]


In [27]:
model.save('xception_gray.h5')  # creates a HDF5 file 'my_model.h5'