# Fine-tune MobileNetV2 on a new set of classes

In [1]:
import keras
from keras.applications.inception_v3 import InceptionV3
from keras.applications.mobilenetv2 import MobileNetV2
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
import pandas as pd

Using TensorFlow backend.


In [2]:
new_inceptionv3_model = InceptionV3(weights=None, include_top=False)
new_inceptionv3_model.layers[-5:]

[<keras.layers.core.Activation at 0x1d3591bbfd0>,
 <keras.layers.merge.Concatenate at 0x1d359551940>,
 <keras.layers.merge.Concatenate at 0x1d3599c89b0>,
 <keras.layers.core.Activation at 0x1d359aa0f98>,
 <keras.layers.merge.Concatenate at 0x1d359b02c88>]

In [4]:
base_inceptionv3_model = InceptionV3(weights=None, include_top=True)
base_inceptionv3_model.layers[-5:]

[<keras.layers.merge.Concatenate at 0x2562c5e17f0>,
 <keras.layers.core.Activation at 0x2562c6afbe0>,
 <keras.layers.merge.Concatenate at 0x2562c723f28>,
 <keras.layers.pooling.GlobalAveragePooling2D at 0x2562c6e2e80>,
 <keras.layers.core.Dense at 0x2562c6e2e10>]

# Trying to load model without top layers

### Base model

In [3]:
base_model= MobileNetV2(input_shape=(224, 224, 3), alpha=1.4, depth_multiplier=1.0, include_top=True, weights='imagenet')
base_model.layers[-5:]

[<keras.layers.convolutional.Conv2D at 0x1dc41f0c208>,
 <keras.layers.normalization.BatchNormalization at 0x1dc41fc3d68>,
 <keras.layers.advanced_activations.ReLU at 0x1dc4200ba90>,
 <keras.layers.pooling.GlobalAveragePooling2D at 0x1dc42071f60>,
 <keras.layers.core.Dense at 0x1dc4209cef0>]

len(base_model.layers)

### Our new model

In [3]:
# create the base pre-trained model
new_model = MobileNetV2(input_shape=(224, 224, 3), alpha=1.4, depth_multiplier=1.0, include_top=False, weights='imagenet', 
                        pooling=None)
new_model.layers[-5:]

[<keras.layers.convolutional.Conv2D at 0x1d35dfc8b38>,
 <keras.layers.normalization.BatchNormalization at 0x1d35e0153c8>,
 <keras.layers.convolutional.Conv2D at 0x1d35e0bdf98>,
 <keras.layers.normalization.BatchNormalization at 0x1d35e1306a0>,
 <keras.layers.advanced_activations.ReLU at 0x1d35e180400>]

### Add new layers

In [4]:
# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(new_model.output)
# let's add a fully-connected layer
# x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 10 classes
predictions = Dense(10, activation='softmax', name='softmax')(x)
# output = Reshape((num_classes,))(x)

In [5]:
new_model = Model(inputs=new_model.input, outputs=predictions)
new_model.layers[-5:]

[<keras.layers.convolutional.Conv2D at 0x1d35e0bdf98>,
 <keras.layers.normalization.BatchNormalization at 0x1d35e1306a0>,
 <keras.layers.advanced_activations.ReLU at 0x1d35e180400>,
 <keras.layers.pooling.GlobalAveragePooling2D at 0x1d360743cc0>,
 <keras.layers.core.Dense at 0x1d35e2343c8>]

### Compare models

print(base_model.input, base_model.output, new_model.input, new_model.output, sep="\n")

### Create generator

In [6]:
from keras.preprocessing.image import ImageDataGenerator
import os

# add callbacks
# highly optional in case of xiauchus GitHub
from keras.callbacks import EarlyStopping
earlystop = EarlyStopping(monitor='val_acc', patience=30, verbose=0, mode='auto')

def generate(batch, size):
    """Data generation and augmentation

    # Arguments
        batch: Integer, batch size.
        size: Integer, image size.

    # Returns
        train_generator: train set generator
        validation_generator: validation set generator
        count1: Integer, number of train set.
        count2: Integer, number of test set.
    """

    #  Using the data Augmentation in traning data
    ptrain = 'data/train'
    pval = 'data/validation'

    datagen1 = ImageDataGenerator(
        rescale=1. / 255,
        shear_range=0.2,
        zoom_range=0.2,
        rotation_range=90,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True)

    datagen2 = ImageDataGenerator(rescale=1. / 255)

    train_generator = datagen1.flow_from_directory(
        ptrain,
        target_size=(size, size),
        batch_size=batch,
        class_mode='categorical')

    validation_generator = datagen2.flow_from_directory(
        pval,
        target_size=(size, size),
        batch_size=batch,
        class_mode='categorical')

    count1 = 0
    for root, dirs, files in os.walk(ptrain):
        for each in files:
            count1 += 1

    count2 = 0
    for root, dirs, files in os.walk(pval):
        for each in files:
            count2 += 1

    return train_generator, validation_generator, count1, count2

#batch = 128
batch = 256
size = 224
train_generator, validation_generator, count1, count2 = generate(batch, size)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.


In [7]:
len(new_model.layers)

157

In [8]:
# we chose to train the top 3 layers, i.e. we will freeze the first 155 layers and unfreeze the rest:
for layer in new_model.layers[:-2]:
    layer.trainable = False
for layer in new_model.layers[-2:]:
    layer.trainable = True

In [None]:
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(new_model.layers):
    print(i, layer.name)

0 input_4
1 Conv1_pad
2 Conv1
3 bn_Conv1
4 Conv1_relu
5 expanded_conv_depthwise
6 expanded_conv_depthwise_BN
7 expanded_conv_depthwise_relu
8 expanded_conv_project
9 expanded_conv_project_BN
10 block_1_expand
11 block_1_expand_BN
12 block_1_expand_relu
13 block_1_pad
14 block_1_depthwise
15 block_1_depthwise_BN
16 block_1_depthwise_relu
17 block_1_project
18 block_1_project_BN
19 block_2_expand
20 block_2_expand_BN
21 block_2_expand_relu
22 block_2_depthwise
23 block_2_depthwise_BN
24 block_2_depthwise_relu
25 block_2_project
26 block_2_project_BN
27 block_2_add
28 block_3_expand
29 block_3_expand_BN
30 block_3_expand_relu
31 block_3_pad
32 block_3_depthwise
33 block_3_depthwise_BN
34 block_3_depthwise_relu
35 block_3_project
36 block_3_project_BN
37 block_4_expand
38 block_4_expand_BN
39 block_4_expand_relu
40 block_4_depthwise
41 block_4_depthwise_BN
42 block_4_depthwise_relu
43 block_4_project
44 block_4_project_BN
45 block_4_add
46 block_5_expand
47 block_5_expand_BN
48 block_5_exp

In [9]:
# compile the model (should be done *after* setting layers to non-trainable)
new_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

hist = new_model.fit_generator(train_generator, 
                               steps_per_epoch=count1 // batch, 
                               epochs=5, 
                               verbose=1, 
                               validation_data=validation_generator, 
                               validation_steps=count2 // batch, 
                               callbacks=[earlystop], 
                               class_weight=None, 
                               max_queue_size=10, 
                               workers=1, 
                               use_multiprocessing=False, 
                               shuffle=True, 
                               initial_epoch=0)

# train the model on the new data for a few epochs

if not os.path.exists('model'):
    os.makedirs('model')

df = pd.DataFrame.from_dict(hist.history)
df.to_csv('model/new_model_hist.csv', encoding='utf-8', index=False)
new_model.save_weights('model/new_model_weights.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
