In [2]:
%matplotlib inline

# Transfer Learning
In this assignment, we will use the weights of a network pre-trained in a particular problem as starting point to train our CNN to a different problem. As training a network from scratch is time-consuming and demands a lot of data, this is a frequent strategy, specially if both datasets (the one used for pre-training and the target) shares similar structures/elements/concepts. 

This is specially true when working with images. Most filters learned in initial convolutional layers will detect low-level elements, such as borders, corners and color blobs, which are common to most problems in the image domain. 

In this notebook, we will load the SqueezeNet architecture trained in the ImageNet dataset and fine-tune it to CIFAR-10.

## Imports

In [3]:
import os
import numpy as np
from random import sample, seed
seed(42)
np.random.seed(42)

import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = (15,15) # Make the figures a bit bigger

# Keras imports
from keras.layers import Input, Convolution2D, MaxPooling2D, Activation, concatenate, Dropout, GlobalAveragePooling2D, Flatten, Dense
from keras.models import Model
from keras import regularizers
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.preprocessing.image import load_img, img_to_array
from keras.datasets import cifar10
from keras.callbacks import TensorBoard
from sklearn.cross_validation import StratifiedShuffleSplit
from keras import models
from keras import optimizers


#Utility to plot
def plotImages(imgList):
    for i in range(len(imgList)):
        plotImage(imgList[i])
        
        
def plotImage(img):
    fig = plt.figure(figsize=(3,3))
    ax = fig.add_subplot(111)

    ax.imshow(np.uint8(img), interpolation='nearest')
    plt.show()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
import keras.callbacks as callbacks
tbCallBack = callbacks.TensorBoard(log_dir = "./tensorboard/")

## SqueezeNet definition
These methods define our architecture and load the weights obtained using ImageNet data.

In [5]:
# Fire Module Definition
sq1x1 = "squeeze1x1"
exp1x1 = "expand1x1"
exp3x3 = "expand3x3"
relu = "relu_"

def fire_module(x, fire_id, squeeze=16, expand=64):
    s_id = 'fire' + str(fire_id) + '/'

    channel_axis = 3
    
    x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x)
    x = Activation('relu', name=s_id + relu + sq1x1)(x)

    left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x)
    left = Activation('relu', name=s_id + relu + exp1x1)(left)

    right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x)
    right = Activation('relu', name=s_id + relu + exp3x3)(right)

    x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat')
    return x

#SqueezeNet model definition
def SqueezeNet(input_shape):
    img_input = Input(shape=input_shape) #placeholder
    
    x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input)
    x = Activation('relu', name='relu_conv1')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)

    x = fire_module(x, fire_id=2, squeeze=16, expand=64)
    x = fire_module(x, fire_id=3, squeeze=16, expand=64)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)

    x = fire_module(x, fire_id=4, squeeze=32, expand=128)
    x = fire_module(x, fire_id=5, squeeze=32, expand=128)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)

    x = fire_module(x, fire_id=6, squeeze=48, expand=192)
    x = fire_module(x, fire_id=7, squeeze=48, expand=192)
    x = fire_module(x, fire_id=8, squeeze=64, expand=256)
    x = fire_module(x, fire_id=9, squeeze=64, expand=256)
    
    x = Dropout(0.5, name='drop9')(x)

    x = Convolution2D(1000, (1, 1), padding='valid', name='conv10')(x)
    x = Activation('relu', name='relu_conv10')(x)
    x = GlobalAveragePooling2D()(x)
    x = Activation('softmax', name='loss')(x)

    model = Model(img_input, x, name='squeezenet')

    # Download and load ImageNet weights
    model.load_weights('./squeezenet_weights_tf_dim_ordering_tf_kernels.h5')
    
    return model    

## CIFAR-10

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. The class are **airplane, automobile, bird, cat, deer, dog, frog, horse, ship, truck**.

In [6]:
#Load data
(trainVal_data, trainVal_label), (X_test, y_test) = cifar10.load_data()
print("Train/Val data. X: ", trainVal_data.shape, ", Y: ", trainVal_label.shape)
print("Test data. X: ", X_test.shape, ", Y: ", y_test.shape)

Train/Val data. X:  (50000, 32, 32, 3) , Y:  (50000, 1)
Test data. X:  (10000, 32, 32, 3) , Y:  (10000, 1)


In [7]:
# Prepare the data
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

trainVal_label = to_categorical(trainVal_label)
y_test = to_categorical(y_test)

X_train, X_val, y_train, y_val = train_test_split(trainVal_data, trainVal_label, test_size=0.3, random_state=1)

In [8]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=False)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train)

-----------------
## SqueezeNet with frozen layers
Our initial attempt will be to remove SqueezeNet's top layers --- responsible for the classification into ImageNet classes --- and train a new set of layers to our CIFAR-10 classes. We will also freeze the layers before `drop9`. Our architecture will be like this:

<img src="frozenSqueezeNet.png" width=70% height=70%>

In [9]:
squeezeNetModel = SqueezeNet((32,32,3))

In [10]:
#Add new classification layers
x = squeezeNetModel.layers[-5].output
x = Convolution2D(10, (1, 1), padding='valid', name='new_conv10')(x)
x = Activation('relu', name='new_relu_conv10')(x)
x = GlobalAveragePooling2D()(x)
x = Activation('softmax', name='new_loss')(x)

#new Model
model = Model(squeezeNetModel.inputs, x, name='squeezenet_new')

In [11]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 15, 15, 64)   1792        input_1[0][0]                    
__________________________________________________________________________________________________
relu_conv1 (Activation)         (None, 15, 15, 64)   0           conv1[0][0]                      
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 7, 7, 64)     0           relu_conv1[0][0]                 
__________________________________________________________________________________________________
fire2/sque

In [12]:
#freeze layers
for layer in model.layers[:-5]:
    layer.trainable = False

for layer in squeezeNetModel.layers:
    print(layer.name, layer.trainable)

input_1 False
conv1 False
relu_conv1 False
pool1 False
fire2/squeeze1x1 False
fire2/relu_squeeze1x1 False
fire2/expand1x1 False
fire2/expand3x3 False
fire2/relu_expand1x1 False
fire2/relu_expand3x3 False
fire2/concat False
fire3/squeeze1x1 False
fire3/relu_squeeze1x1 False
fire3/expand1x1 False
fire3/expand3x3 False
fire3/relu_expand1x1 False
fire3/relu_expand3x3 False
fire3/concat False
pool3 False
fire4/squeeze1x1 False
fire4/relu_squeeze1x1 False
fire4/expand1x1 False
fire4/expand3x3 False
fire4/relu_expand1x1 False
fire4/relu_expand3x3 False
fire4/concat False
fire5/squeeze1x1 False
fire5/relu_squeeze1x1 False
fire5/expand1x1 False
fire5/expand3x3 False
fire5/relu_expand1x1 False
fire5/relu_expand3x3 False
fire5/concat False
pool5 False
fire6/squeeze1x1 False
fire6/relu_squeeze1x1 False
fire6/expand1x1 False
fire6/expand3x3 False
fire6/relu_expand1x1 False
fire6/relu_expand3x3 False
fire6/concat False
fire7/squeeze1x1 False
fire7/relu_squeeze1x1 False
fire7/expand1x1 False
fire7/ex

In [13]:
# squeezeNetModel.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
# squeezeNetModel.fit(trainVal_data, trainVal_label, epochs=20)

In [14]:
print(squeezeNetModel.output.shape)
print(model.output.shape)
squeezeNetModel.layers[-5].name

(?, 1000)
(?, 10)


'drop9'

Now, we compile our model and train it:

In [15]:
model.layers[-4].trainable

True

In [16]:
np.shape(y_train)

(35000, 10)

In [17]:
batch_size_val = 32

In [18]:
# Compile model and train it.

model.compile(loss = "categorical_crossentropy", optimizer = optimizers.Adam(lr = 0.0001), metrics=["accuracy"])

history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size_val), validation_data=datagen.flow(X_val, y_val, batch_size=batch_size_val), steps_per_epoch=len(X_train) / batch_size_val, epochs=100, callbacks=[tbCallBack])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Finally, let's evaluate on our test set:

In [19]:
# Evaluate on validation:
# ...
print(model.metrics_names)
print(model.evaluate_generator(datagen.flow(X_val, y_val, batch_size=batch_size_val), steps=len(X_val)/batch_size_val))

['loss', 'acc']
[1.5857155921300252, 0.44633333331743874]
