# Train YOLOv2 on LISA Dataset
-----------------------------------------------


In [1]:
import sys
print(sys.version) # Check Python Version
import numpy as np
import os
from keras.optimizers import Adam
from utils.parse_input import load_data    # Data handler for LISA dataset
from cfg import *

2.7.13 |Anaconda 4.4.0 (64-bit)| (default, Dec 20 2016, 23:09:15) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]


Using TensorFlow backend.


### Prepare LISA Dataset

In [2]:
pretrained_path = "/home/ubuntu/dataset/darknet19_544.weights"

x_train, y_train = load_data('data/training.txt')
labels           = np.unique(y_train[:,1])
num_classes      = len(labels)            # Count number of classes in the dataset
print("Train: {} samples\nNumber of classes: {}".format(len(x_train),num_classes))
print("\nLabel Sample: \n{}".format(y_train[0]))

ANCHORS       = np.array(((0.023717899133663362, 0.035715759075907606),
(0.059577141608391594, 0.08738709207459215),
(0.08816276658767774, 0.1294924960505529),
(0.03283318210930825, 0.0483890193566751),
(0.04450034340659346, 0.064308608058608)))
print("\n\nRelative Anchors using K-mean clustering [K=5]\n {}".format(ANCHORS))


Number of ground truth boxes: 3672 boxes
Train: 3672 samples
Number of classes: 31

Label Sample: 
[1093.5 408.0 45.0 48.0 'doNotEnter']


Relative Anchors using K-mean clustering [K=5]
 [[ 0.0237179   0.03571576]
 [ 0.05957714  0.08738709]
 [ 0.08816277  0.1294925 ]
 [ 0.03283318  0.04838902]
 [ 0.04450034  0.06430861]]


### Construct YOLOv2 On Keras

In [3]:
from model.yolov2 import YOLOv2, darknet19
import keras.backend as K

K.clear_session() # Avoid duplicate model

darknet19 = darknet19(pretrained_path, freeze_layers=True)
yolov2    = YOLOv2(feature_extractor=darknet19, num_anchors=len(ANCHORS), num_classes=N_CLASSES)
model     = yolov2.model

Pre-trained weights have been loaded into model


In [14]:
import keras.backend as K
import tensorflow as tf

# @TODO:
#    - processed boxes are ALL relative, no sqrt w
#    - Data gnerator label [ batch, 36]

def _process_gt(y_true, output_shape):
    """
    Process ground truth output
    """
    OUTPUT_W = tf.cast(output_shape[0], tf.int32)
    OUTPUT_H = tf.cast(output_shape[1], tf.int32)
    INPUT_W  = tf.cast(32*OUTPUT_W, tf.float32)
    INPUT_H  = tf.cast(32*OUTPUT_H, tf.float32)

    y_true   = K.tile(y_true, (1, 1, 1, K.cast(OUTPUT_W*OUTPUT_H, tf.int32)*N_ANCHORS))
    y_true   = K.reshape(y_true, [-1, OUTPUT_W, OUTPUT_H, N_ANCHORS, N_CLASSES + 5])
    
    #  Adjust ground truth to relative size
    true_xy  = y_true[..., 0:2] / K.reshape([(INPUT_W), (INPUT_H)], [1, 1, 1, 1, 2])
    true_wh  = y_true[..., 2:4] / K.reshape([(INPUT_W), (INPUT_H)], [1, 1, 1, 1, 2])
    true_boxes = tf.concat([true_xy, true_wh], 4)
    true_conf = y_true[...,4]
    true_clf  = y_true[...,5:]
    
    return true_boxes, true_conf, true_clf


def _process_prediction(y_pred):
    
    # Scaled anchors to size of feature map
    output_shape   = K.shape(y_pred)[1:3]
    OUTPUT_W = tf.cast(output_shape[0], tf.int32)
    OUTPUT_H = tf.cast(output_shape[1], tf.int32)

    scaled_anchors = ANCHORS*K.reshape([OUTPUT_W, OUTPUT_H], [1, 1, 1, 1, 2])
    anchor_tensor  = K.reshape(scaled_anchors, [1, 1, 1, N_ANCHORS, 2])
    y_pred         = K.reshape(y_pred, [-1, output_shape[0], output_shape[1], N_ANCHORS, N_CLASSES + 5])
    
    # Create offset map
    cx, cy = _create_offset_map(K.shape(y_pred))
    px     = tf.cast(anchor_tensor[...,0], dtype=tf.float32)
    py     = tf.cast(anchor_tensor[..., 1], dtype=tf.float32)
    
    # Calculate Prediction in relative position (percentage)
    OUTPUT_SIZE = tf.cast(output_shape, tf.float32)
    bx  = (tf.sigmoid(y_pred[..., 0]) + cx) / OUTPUT_SIZE[0]
    by  = (tf.sigmoid(y_pred[..., 1]) + cy) / OUTPUT_SIZE[1]
    bw  = px * tf.exp(y_pred[..., 2])  / OUTPUT_SIZE[0]
    bh  = py * tf.exp(y_pred[..., 3])  / OUTPUT_SIZE[1]
    
    pred_boxes = tf.stack([bx, by, bw, bh], -1)
    pred_conf  = tf.sigmoid(y_pred[..., 4]) # to = sig
    pred_clf   = y_pred[..., 5:]

    return pred_boxes, pred_conf, pred_clf
    
                           
def _calc_iou(true_boxes, pred_boxes):
    # Scaled anchors to size of feature map
    output_shape   = K.shape(pred_boxes)[1:3]
    OUTPUT_SIZE =  K.cast(K.reshape([output_shape[0], output_shape[1]], [1, 1, 1, 1, 2]), tf.float32)
    
    pred_xy     = pred_boxes[...,:2]    * OUTPUT_SIZE 
    pred_wh     = pred_boxes[..., 2:4]  * OUTPUT_SIZE 
    pred_area   = pred_wh[..., 0] * pred_wh[..., 1]

    true_xy     = true_boxes[...,:2]  * OUTPUT_SIZE 
    true_wh     = true_boxes[...,2:4] * OUTPUT_SIZE 
    true_area   = true_wh[..., 0] * true_wh[..., 1]
    
    # Calculate IoU between ground truth and prediction
    intersect_ul   = tf.maximum(pred_xy - 0.5 *  pred_wh, true_xy - 0.5 * true_wh)
    intersect_br   = tf.minimum(pred_xy + 0.5 *  pred_wh, true_xy + 0.5 * true_wh)
    intersect_wh   = tf.maximum(intersect_br - intersect_ul, 0.0)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]

    iou = tf.truediv(intersect_area, true_area + pred_area - intersect_area)
    
    return iou

    
def _create_offset_map(output_shape):
    """
    """
    GRID_W    = tf.cast(output_shape[1], tf.int32)
    GRID_H    = tf.cast(output_shape[2], tf.int32)
    N_ANCHORS = len(ANCHORS)

    cx = tf.cast((K.arange(0, stop=GRID_H)), dtype=tf.float32)
    cx = K.expand_dims(cx, -1)
    cx = K.tile(cx, (GRID_W, N_ANCHORS))
    cx = K.reshape(cx, [-1, GRID_W, GRID_H, N_ANCHORS])

    cy = K.cast((K.arange(0, stop=GRID_W)), dtype=tf.float32)
    cy = K.reshape(cy, [-1, 1])
    cy = K.tile(cy, [1, N_ANCHORS*GRID_H])  
    cy = K.reshape(cy, [-1])    
    cy = K.reshape(cy, [-1, GRID_W, GRID_H, N_ANCHORS])
    
    return cx, cy


def custom_loss(y_true, y_pred):
    """
    y_true: shape [BATCH, NUM_CLASSES + 5]
    y_pred: [BATCH, OUTPUT_W, OUTPUT_H, N_ANCHORS, N_CLASSES + 5]
    
    y_true provides: [xc, yc, w, h, conf, class_prob]
    y
    """
    pred_shape = K.shape(y_pred)[1:3]

    true_boxes, true_conf, true_cls = _process_gt(y_true, pred_shape)
    pred_boxes, pred_conf, pred_cls = _process_prediction(y_pred)
    
    # Calculate IoU
    iou           = _calc_iou(true_boxes, pred_boxes)
    best_box      = tf.equal(iou, tf.reduce_max(iou, [3], True)) 
    best_box      = tf.to_float(best_box)
    true_conf     = tf.expand_dims(best_box * true_conf, -1)
    pred_conf     = tf.expand_dims(pred_conf, -1)
    
    ### Compute the weights
    weight_coor = 5.0 * tf.concat(4 * [true_conf], 4)
    cls_loss = tf.pow(pred_boxes - true_boxes, 2)* weight_coor
    
    # Objective ness
    weight_conf = 0.5 * (1. - true_conf) + 5.0 * true_conf
    obj_loss = tf.pow(pred_conf - pred_conf, 2)* weight_conf
    
    # Clf loss
    weight_prob = 1.0 *  tf.concat(N_CLASSES * [true_conf], 4) 
    clf_loss    = tf.pow(pred_cls - pred_cls, 2)*weight_prob
    
    # Total loss
    loss = tf.concat([cls_loss, obj_loss, clf_loss], 4)
    loss = tf.reshape(loss, [-1, 30*40*N_ANCHORS*(4 + 1 + N_CLASSES)])
    loss = tf.reduce_sum(loss, 1)
    loss = .5 * tf.reduce_mean(loss)
    return loss


In [15]:
import os
import keras
import keras
from utils.data_generator import flow_from_list
from sklearn.model_selection import train_test_split
from utils.multi_gpu import make_parallel, get_gpus

# HYPER-PARAMETERS
BATCH_SIZE = 8
EPOCHS     = 5
LEARN_RATE = 1e-5


# Data Generator
train_data_gen = flow_from_list(x_train, y_train, ANCHORS, batch_size=BATCH_SIZE, augment_data=True)

# For Debugging purpose
tf_board   = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False)
early_stop = keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.001, patience=3, mode='min', verbose=1)
save_moldel= keras.callbacks.LambdaCallback(
    on_epoch_end=lambda epoch,logs: model.save_weights('yolov2-epoch%s-loss:%s.weights'%(epoch, str(logs.get('loss')))))

# TRAIN ON MULTI-GPUS
n_gpus = get_gpus()
if n_gpus > 1:
    BATCH_SIZE = n_gpus * BATCH_SIZE
    model_par = make_parallel(model, n_gpus)
else:
    model_par = model

model_par.compile(optimizer=Adam(LEARN_RATE),loss=custom_loss)
hist =  model_par.fit_generator(generator   = train_data_gen, 
                            steps_per_epoch = 100, 
                            epochs          = 10, 
                            callbacks       = [tf_board, early_stop, save_moldel],
                            workers=1, verbose=1)

model.save_weights('yolov2.weights')

There is(are) 1 GPU(s) on device.
Epoch 1/10

KeyboardInterrupt: 

## Multi-GPUs Training - Data Parallelism Approach

* Each GPU will have a copy of the model
* During training time, mean of all gradidents from each GPU will be calculated to update the model
<img style="width:40%" src="https://www.tensorflow.org/images/Parallelism.png">

### Visualize training process using Tensorboard
Open `http://<public-dns>:6006`

In [None]:
import cv2
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
%matplotlib inline

def extract_sign(img, bbox, output_size=(32, 32)):
    xc, yc, w, h = bbox.x, bbox.y, bbox.w, bbox.h
    x1   = int(xc - w/2)
    y1   = int(yc - h/2)
    x2   = int(xc + w/2)
    y2   = int(yc + h/2)
    roi = img[y1:y2, x1:x2]
    roi = cv2.resize(roi, output_size)
    return roi
x_train, y_train = shuffle(x_train, y_train)
fig = plt.figure(figsize=(17, 8))
for i, label in enumerate(labels):
    ax           = fig.add_subplot(4, 8, 1 + i, xticks=[], yticks=[])
    idx          = np.where(y_train[:, 1] == label)[0][0]
    img          = cv2.cvtColor(cv2.imread(x_train[idx]), cv2.COLOR_BGR2RGB)
    box          = y_train[idx][0]
    sign_only    = extract_sign(img, box, (32, 32)) #  just extract the sign
    ax.set_title(label)
    plt.imshow(sign_only)
plt.show()

In [None]:
import tensorflow as tf 


def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

def sigmoid(x):
    return 1. / (1.  + np.exp(-x))

def interpret_netout(image, netout):
    output_shape = K.shape(netout)
    boxes = []
    with sess.as_default():
        GRID_W = tf.cast(output_shape[0], tf.int32).eval()
        GRID_H = tf.cast(output_shape[1], tf.int32).eval()
    
    netout = np.reshape(netout, [GRID_W, GRID_H, len(ANCHORS), -1])
    # interpret the output by the network
    print(GRID_W, GRID_H)
    print(np.shape(netout))
    for row in range(GRID_W):
        for col in range(GRID_H):
            for b in range(len(ANCHORS)):
                box = BoundBox(N_CLASSES)

                # first 5 weights for x, y, w, h and confidence
                box.x, box.y, box.w, box.h, box.c = netout[row, col, b,:5]
                box.x = (col + sigmoid(box.x)) / GRID_W
                box.y = (row + sigmoid(box.y)) / GRID_H
#                 box.w = ANCHORS[b][0] * np.exp(box.w) / GRID_W
#                 box.h = ANCHORS[b][1] * np.exp(box.h) / GRID_H
                box.w = ANCHORS[b][0] * np.exp(box.w)
                box.h = ANCHORS[b][1] * np.exp(box.h)
                box.c = sigmoid(box.c)

                # last  weights for class probabilities
                classes = netout[row,col,b,5:]
                box.probs = softmax(classes) * box.c # P(obj|class) = P(obj)*softmax(P(classes))
                box.probs *= box.probs > THRESHOLD
                boxes.append(box)

    # suppress non-maximal boxes
    for c in range(N_CLASSES):
        sorted_indices = list(reversed(np.argsort([box.probs[c] for box in boxes])))

        for i in xrange(len(sorted_indices)):
            index_i = sorted_indices[i]
            
            if boxes[index_i].probs[c] == 0: 
                continue
            else:
                for j in xrange(i+1, len(sorted_indices)):
                    index_j = sorted_indices[j]
                    
                    if boxes[index_i].iou(boxes[index_j]) >= 0.4:
                        boxes[index_j].probs[c] = 0

    # draw the boxes using a threshold
    for box in boxes:
        max_indx = np.argmax(box.probs)
        max_prob = box.probs[max_indx]
        if max_prob > THRESHOLD:
            xmin  = int((box.x - box.w/2) * image.shape[1])
            xmax  = int((box.x + box.w/2) * image.shape[1])
            ymin  = int((box.y - box.h/2) * image.shape[0])
            ymax  = int((box.y + box.h/2) * image.shape[0])
            print((xmin,ymin), (xmax,ymax))
            print(max_indx)
            cv2.rectangle(image, (xmin,ymin), (xmax,ymax),np.random.randint(0, 255, [3]), 3)
            cv2.putText(image, LABELS[max_indx], (xmin, ymin - 12), 0, 1e-3 * image.shape[0], (0,255,0), 2)
    return image