In [None]:
%matplotlib inline

In [None]:
import os
import numpy as np
from random import sample, seed
seed(42)
np.random.seed(42)
import gc
import re

import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = (15,15) # Make the figures a bit bigger

# Keras imports
from keras.layers import Input, Convolution2D, MaxPooling2D, Activation, concatenate, Dropout, GlobalAveragePooling2D
from keras.models import Model, load_model
from keras import regularizers
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.preprocessing.image import load_img, img_to_array
from keras.datasets import cifar10
from keras.callbacks import TensorBoard, Callback, EarlyStopping
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn.cross_validation import StratifiedShuffleSplit

def plotImages(imgList):
    for i in range(len(imgList)):
        plotImage(imgList[i])
        
        
def plotImage(img):
    fig = plt.figure(figsize=(3,3))
    ax = fig.add_subplot(111)

    ax.imshow(np.uint8(img), interpolation='nearest')
    plt.show()

## SqueezeNet definition

In [None]:
# Fire Module Definition
sq1x1 = "squeeze1x1"
exp1x1 = "expand1x1"
exp3x3 = "expand3x3"
relu = "relu_"

def fire_module(x, fire_id, squeeze=16, expand=64):
    s_id = 'fire' + str(fire_id) + '/'

    channel_axis = 3
    
    x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x)
    x = Activation('relu', name=s_id + relu + sq1x1)(x)

    left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x)
    left = Activation('relu', name=s_id + relu + exp1x1)(left)

    right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x)
    right = Activation('relu', name=s_id + relu + exp3x3)(right)

    x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat')
    return x

#SqueezeNet model definition
def SqueezeNet(input_shape):
    img_input = Input(shape=input_shape) #placeholder
    
    x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input)
    x = Activation('relu', name='relu_conv1')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)

    x = fire_module(x, fire_id=2, squeeze=16, expand=64)
    x = fire_module(x, fire_id=3, squeeze=16, expand=64)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)

    x = fire_module(x, fire_id=4, squeeze=32, expand=128)
    x = fire_module(x, fire_id=5, squeeze=32, expand=128)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)

    x = fire_module(x, fire_id=6, squeeze=48, expand=192)
    x = fire_module(x, fire_id=7, squeeze=48, expand=192)
    x = fire_module(x, fire_id=8, squeeze=64, expand=256)
    x = fire_module(x, fire_id=9, squeeze=64, expand=256)
    
    x = Dropout(0.5, name='drop9')(x)

    x = Convolution2D(1000, (1, 1), padding='valid', name='conv10')(x)
    x = Activation('relu', name='relu_conv10')(x)
    x = GlobalAveragePooling2D()(x)
    x = Activation('softmax', name='loss')(x)

    model = Model(img_input, x, name='squeezenet')

    # Download and load ImageNet weights
    model.load_weights('./squeezenet_weights_tf_dim_ordering_tf_kernels.h5')
    
    return model    

## CIFAR-10

Data: 60000 32x32 images, 10 classes, evenly distributed.

In [None]:
#Load data
(trainVal_data, trainVal_label), (X_test, y_test) = cifar10.load_data()
print("Train/Val data. X: ", trainVal_data.shape, ", Y: ", trainVal_data.shape)
print("Test data. X: ", X_test.shape, ", Y: ", y_test.shape)

In [None]:
y_test_oh = to_categorical(y_test)

In [None]:
X_test = X_test/255.

X_train = trainVal_data/255.
y_train = trainVal_label
y_train_oh = to_categorical(trainVal_label)

del trainVal_data
del trainVal_label
gc.collect()


In [None]:
squeezeNetModel = SqueezeNet((32,32,3))
squeezeNetModel.summary()

## SqueezeNet with frozen layers

In [None]:
#freeze layers
for layer in squeezeNetModel.layers:
    layer.trainable = False

#Add new classification layers
x = Convolution2D(10, (1, 1), padding='valid', name='conv10')(squeezeNetModel.layers[-5].output)
x = Activation('relu', name='relu_conv10')(x)
x = GlobalAveragePooling2D()(x)
x = Activation('softmax', name='loss')(x)

#new Model
model = Model(squeezeNetModel.inputs, x, name='freeze_1')
model.summary()

Now, we compile our model and train it:

In [None]:
model_name = './freeze_1'
model_file = model_name + '.model'

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])


epochs = 8
batch_size = 32

cb = [
      ModelCheckpoint(model_file, monitor='val_acc', verbose=0, save_best_only=True, mode='auto', period=1),
     ]

hist = model.fit(X_train, y_train_oh, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1, callbacks=cb)

In [None]:
hist.history.keys()

In [None]:
hist.history['val_loss']

Training and validation loss curves:

In [None]:
plt.plot(hist.history['loss'], label='Training Loss')
plt.plot(hist.history['val_loss'], label='Validation Loss')
plt.ylabel('Log Loss', fontsize=16)
plt.xlabel('Epoch', fontsize=16)
plt.legend(fontsize=16)
plt.savefig('freeze_1', dpi=200)
plt.show()

In [None]:
# Evaluate on validation:
# ...
test_loss, test_acc = model.evaluate(X_test, y_test_oh)
print('\nTest loss: {}'.format(test_loss))
print('Test accuracy (NORMALIZED): {}'.format(test_acc))

-----------------
-----------------

# Training last 2 Fire Modules + classification layers
As we could see, the frozen network performed very poorly. By freezing most layers, we do not allow SqueezeNet to adapt its weights to features present in CIFAR-10.

Let's try to unfreeze the last two fire modules and train once more. The architecture will be:
<img src="partFrozenSqueezeNet.png" width=70% height=70%>

In [None]:
squeezeNetModel = SqueezeNet((32,32,3))

for layer in squeezeNetModel.layers:
    if not re.search('.*(8|9)', layer.name):
        layer.trainable = False

x = Convolution2D(10, (1, 1), padding='valid', name='conv10')(squeezeNetModel.layers[-5].output)
x = Activation('relu', name='relu_conv10')(x)
x = GlobalAveragePooling2D()(x)
x = Activation('softmax', name='loss')(x)


#new Model
model = Model(squeezeNetModel.inputs, x, name='freeze_2')

model.summary()

Now, we compile our model and train it:

In [None]:
model_name = './freeze_2'
model_file = model_name + '.model'

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

epochs = 15
batch_size = 32

cb = [
      ModelCheckpoint(model_file, monitor='val_acc', verbose=0, save_best_only=True, mode='auto', period=1)
     ]

hist = model.fit(X_train, y_train_oh, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1, callbacks=cb)

In [None]:
plt.plot(hist.history['loss'], label='Training Loss')
plt.plot(hist.history['val_loss'], label='Validation Loss')
plt.ylabel('Log Loss', fontsize=16)
plt.xlabel('Epoch', fontsize=16)
plt.legend(fontsize=16)
plt.savefig('freeze_2', dpi=200)
plt.show()

Finally, let's evaluate on our test set:

In [None]:
# Evaluate on validation:
# ...
test_loss, test_acc = model.evaluate(X_test, y_test_oh)
print('\nTest loss: {}'.format(test_loss))
print('Test accuracy (NORMALIZED): {}'.format(test_acc))

# Fine-tuning all layers

In [None]:
squeezeNetModel = SqueezeNet((32,32,3))

for layer in squeezeNetModel.layers:
    layer.trainable = True       #by default they are all trainable, but just for clarification

x = Convolution2D(10, (1, 1), padding='valid', name='conv10')(squeezeNetModel.layers[-5].output)
x = Activation('relu', name='relu_conv10')(x)
x = GlobalAveragePooling2D()(x)
x = Activation('softmax', name='loss')(x)

#new Model
model = Model(squeezeNetModel.inputs, x, name='squeezenet_new')

model.summary()

Now, we compile our model and train it:

In [None]:
from time import time

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

epochs = 10
batch_size = 32

cb = [
      ModelCheckpoint(model_file, monitor='val_acc', verbose=0, save_best_only=True, mode='auto', period=1),
      TensorBoard(log_dir="./logs/{}".format(time()), write_graph=True)
     ]

hist_plus = model.fit(X_train, y_train_oh, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1, callbacks=cb)

Finally, let's evaluate on our validation set:

In [None]:
hist_15 = hist

In [None]:
plt.plot(hist_15.history['loss'] + hist_plus.history['loss'], label='Training Loss')
plt.plot(hist_15.history['val_loss'] + hist_plus.history['val_loss'], label='Validation Loss')
plt.ylabel('Log Loss', fontsize=16)
plt.xlabel('Epoch', fontsize=16)
plt.legend(fontsize=16)
plt.savefig('freeze_3', dpi=200)
plt.show()

In [None]:
best_model = load_model('freeze_2.model')
test_loss, test_acc = best_model.evaluate(X_test, y_test_oh)
print('\nTest loss: {}'.format(test_loss))
print('Test accuracy (NORMALIZED): {}'.format(test_acc))