# Fine-tuning with Birds Data Set and Replacing All Classes

In [42]:
import h5py
import numpy as np
import shutil

from misc_utils.tensor_sampling_utils import sample_tensors

from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

from eval_utils.average_precision_evaluator import Evaluator

import os
root_dir = os.getcwd()

## 1. Load the trained weights file and make a copy
Load the VOC pre-trained weight and make a copy for modification. Again, weights can be found here: https://drive.google.com/file/d/1vtNI6kSnv7fkozl7WxyhGyReB6JvDM41/view

In [43]:
import os
root_dir = os.getcwd()
weights_source_path = os.path.join(root_dir,'VGG_VOC0712Plus_SSD_300x300_iter_240000.h5') 

weights_destination_path = os.path.join(root_dir,'Birds_training_dest.h5') 

# Make a copy of the weights file.
shutil.copy(weights_source_path, weights_destination_path)

weights_source_file = h5py.File(weights_source_path, 'r')
weights_destination_file = h5py.File(weights_destination_path)

## 2. Re-initailize classcification layers
There are 20 classes + 1 background in the VOC pre-trained model. We want to change all these 20 classes into 20 species of birds, similar to assignment 3. Therefore, we will re-initalize all the classification layers so they can be re-trained on the new data set.

In [44]:
classifier_names = ['conv4_3_norm_mbox_conf',
                    'fc7_mbox_conf',
                    'conv6_2_mbox_conf',
                    'conv7_2_mbox_conf',
                    'conv8_2_mbox_conf',
                    'conv9_2_mbox_conf']

In [45]:
n_classes_source = 21

classes_of_interest = 21 

for name in classifier_names:
    # Get the trained weights for this layer from the source HDF5 weights file.
    kernel = weights_source_file[name][name]['kernel:0'].value
    bias = weights_source_file[name][name]['bias:0'].value

    # Get the shape of the kernel. We're interested in sub-sampling
    # the last dimension, 'o'.
    height, width, in_channels, out_channels = kernel.shape
    
    if isinstance(classes_of_interest, (list, tuple)):
        subsampling_indices = []
        for i in range(int(out_channels/n_classes_source)):
            indices = np.array(classes_of_interest) + i * n_classes_source
            subsampling_indices.append(indices)
        subsampling_indices = list(np.concatenate(subsampling_indices))
    elif isinstance(classes_of_interest, int):
        subsampling_indices = int(classes_of_interest * (out_channels/n_classes_source))
    else:
        raise ValueError("`classes_of_interest` must be either an integer or a list/tuple.")
    
    #re-initialize weights of classifier layers
    new_kernel, new_bias = sample_tensors(weights_list=[kernel, bias],
                                          sampling_instructions=[height, width, in_channels, subsampling_indices],
                                          axes=[[3]], # The one bias dimension corresponds to the last kernel dimension.
                                          init=['gaussian', 'zeros'],
                                          mean=0.0,
                                          stddev=0.005)
    
    # Delete the old weights from the destination file.
    del weights_destination_file[name][name]['kernel:0']
    del weights_destination_file[name][name]['bias:0']
    # Create new datasets for the sub-sampled weights.
    weights_destination_file[name][name].create_dataset(name='kernel:0', data=new_kernel)
    weights_destination_file[name][name].create_dataset(name='bias:0', data=new_bias)

# Make sure all data is written to our output file before this sub-routine exits.
weights_destination_file.flush()

## 3. Use the Re-initialized Weight for New Training on Birds Data Set

### 3.1 Set the model configuration parameters

In [46]:
img_height = 300 # Height of the model input images
img_width = 300 # Width of the model input images
img_channels = 3 # Number of color channels of the model input images
mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
scales = scales_pascal
aspect_ratios = [[1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters
two_boxes_for_ar1 = True
steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer.
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation
normalize_coords = True

### 3.2 Build Model and Load Weights

In [47]:
# 1: Build the Keras model.

K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=clip_boxes,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=mean_color,
                swap_channels=swap_channels)

# 2: Load the re-initialized weights 
weights_path = os.path.join(root_dir,'Birds_training_load.h5') 

model.load_weights(weights_path, by_name=True)

# 3: Instantiate an optimizer and the SSD loss function and compile the model. 
#Adam optimizer is used here since it yields better result. Arguments inside SGD don't matter since they will be redefined later.

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

### 3.3 Set up the data generators for the training

In [48]:
# 1: Instantiate two `DataGenerator` objects: One for training, one for validation.

train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

# 2: Parse the image and label lists for the training and validation datasets. This can take a while.
bird_dir = os.path.join(root_dir,'dataset/bird_data')

#Classes are modified to 20 species of birds
classes = ['background',
           'Black_footed_Albatross', 'Laysan_Albatross', 'Sooty_Albatross', 'Groove_Billed_Ani',
           'Crested_Auklet', 'Least_Auklet', 'Parakeet_Auklet', 'Rhinoceros_Auklet',
           'Brewer_Blackbird', 'Red_winged_Blackbird', 'Rusty_Blackbird', 'Yellow_headed_Blackbird',
           'Bobolink', 'Indigo_Bunting', 'Lazuli_Bunting', 'Painted_Bunting',
           'Cardinal', 'Spotted_Catbird', 'Gray_Catbird', 'Yellow_breasted_Chat']

#CSV files are modified such that a background class is added to it
train_dataset.parse_csv(images_dir=bird_dir,
                         labels_filename=os.path.join(root_dir,'dataset/bird_data/train_20birds.csv'),
                         input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'])

val_dataset.parse_csv(images_dir=bird_dir,
                      labels_filename=os.path.join(root_dir,'dataset/bird_data/val_20birds.csv'),
                      input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
                      include_classes='all')

# after adding birds data:  743
# after adding birds data:  372


In [49]:
# 3: Set the batch size.

batch_size = 16 # Change the batch size if you like, or if you run into GPU memory issues.

# 4: Set the image transformations for pre-processing and data augmentation options.

# For the training generator:
ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                            img_width=img_width,
                                            background=mean_color)

# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=img_height, width=img_width)

# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                   model.get_layer('fc7_mbox_conf').output_shape[1:3],
                   model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    scales=scales,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.5,
                                    normalize_coords=normalize_coords)

# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

train_generator = train_dataset.generate(batch_size=batch_size,
                                         shuffle=False,
                                         transformations=[ssd_data_augmentation],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
                                     shuffle=False,
                                     transformations=[convert_to_3_channels,
                                                      resize],
                                     label_encoder=ssd_input_encoder,
                                     returns={'processed_images',
                                              'encoded_labels'},
                                     keep_images_without_gt=False)

train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size   = val_dataset.get_dataset_size()

### 3.4 Set the remaining training parameters

In [51]:
#Learning rate scheduler 

def lr_schedule(epoch):
    if epoch < 5:
        return 0.001
    elif epoch < 15:
        return 0.0001
    else:
        return 0.00001

In [54]:
# Define model callbacks.

model_checkpoint = ModelCheckpoint(filepath= os.path.join(root_dir,'with_bird.h5'), 
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='auto',
                                   period=1)

csv_logger = CSVLogger(filename='Birds_tuning_log.csv',
                       separator=',',
                       append=True)

learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                verbose=1)

terminate_on_nan = TerminateOnNaN()

callbacks = [model_checkpoint,
             csv_logger,
             learning_rate_scheduler,
             terminate_on_nan]

### 3.5 Train

In [56]:
#Training time around 2 hours on GeForce GTX 1050
#Set `initial_epoch` and `final_epoch` accordingly for resuming.
initial_epoch   = 2
final_epoch     = 30
steps_per_epoch = 20

history = model.fit_generator(generator=train_generator,
                              steps_per_epoch=steps_per_epoch,
                              epochs=final_epoch,
                              callbacks=callbacks,
                              validation_data=val_generator,
                              validation_steps=ceil(val_dataset_size/batch_size)
                              ,initial_epoch=initial_epoch)

Epoch 3/30

Epoch 00003: LearningRateScheduler setting learning rate to 0.001.

Epoch 00003: val_loss improved from 12.32413 to 10.58504, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 4/30

Epoch 00004: LearningRateScheduler setting learning rate to 0.001.

Epoch 00004: val_loss improved from 10.58504 to 9.52443, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 5/30

Epoch 00005: LearningRateScheduler setting learning rate to 0.001.

Epoch 00005: val_loss improved from 9.52443 to 7.93813, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 6/30

Epoch 00006: LearningRateScheduler setting learning rate to 0.0001.

Epoch 00006: val_loss improved from 7.93813 to 7.75550, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 7/30

Epoch 00007: LearningRateScheduler setting learning rate to 0.0001.

Epoch 00007: val_loss improved from 7.75550 to 7.58016, saving model to


Epoch 00013: val_loss improved from 7.24824 to 7.20753, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 14/30

Epoch 00014: LearningRateScheduler setting learning rate to 0.0001.

Epoch 00014: val_loss improved from 7.20753 to 7.16289, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 15/30

Epoch 00015: LearningRateScheduler setting learning rate to 0.0001.

Epoch 00015: val_loss improved from 7.16289 to 7.12141, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 16/30

Epoch 00016: LearningRateScheduler setting learning rate to 1e-05.

Epoch 00016: val_loss improved from 7.12141 to 7.09835, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 17/30

Epoch 00017: LearningRateScheduler setting learning rate to 1e-05.

Epoch 00017: val_loss improved from 7.09835 to 7.09483, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 18/30

Epoch


Epoch 00023: val_loss improved from 7.05308 to 7.04513, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 24/30

Epoch 00024: LearningRateScheduler setting learning rate to 1e-05.

Epoch 00024: val_loss did not improve from 7.04513
Epoch 25/30

Epoch 00025: LearningRateScheduler setting learning rate to 1e-05.

Epoch 00025: val_loss improved from 7.04513 to 7.03810, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 26/30

Epoch 00026: LearningRateScheduler setting learning rate to 1e-05.

Epoch 00026: val_loss did not improve from 7.03810
Epoch 27/30

Epoch 00027: LearningRateScheduler setting learning rate to 1e-05.

Epoch 00027: val_loss improved from 7.03810 to 7.02476, saving model to C:\Users\haili Calnielmon\Desktop\myKeras_ssd\with_bird.h5
Epoch 28/30

Epoch 00028: LearningRateScheduler setting learning rate to 1e-05.

Epoch 00028: val_loss did not improve from 7.02476
Epoch 29/30

Epoch 00029: LearningRateScheduler 