# YOLO Train

## Read data

In [1]:
import os
import xml.etree.ElementTree as ET
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random
from utils import box_iou, letterbox_image

In [2]:
xml_path = '../oxford_data/annotations/xmls'
image_path = '../oxford_data/images/'
file_list = os.listdir(xml_path)
num_classes = 2

In [4]:
classes_name = ['cat', 'dog']
num_anchors = 3

In [3]:
# read anchors
with open('yolo_anchors.txt') as f:
    anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    anchors = np.array(anchors).reshape(-1, 2)

### Read images

In [4]:
def read_image(file_list, image_path):
    """
    Arguments:
        file_list: xml file names.
        image_path: image file path.
    """
    image_wh = []
    for i, file in enumerate(file_list):
        image = Image.open(image_path + file[:-4] + '.jpg')
        image_wh.append(image.size)
        image = letterbox_image(image, (416, 416))
        image = np.array(image, dtype='float32')
        image = np.expand_dims(image, 0)
        if i == 0:
            X = image
        else:
            X = np.concatenate([X, image], axis=0)
        
    return X, image_wh

In [5]:
X, image_wh = read_image(file_list[:10], image_path)
print(X.shape)

(10, 416, 416, 3)


### Read boxes

In [6]:
def read_boxes(file_list, xml_path):
    """
    Arguments:
        file_list: xml file names.
        xml_path: xml file path.
    """
    boxes = []
    for file in file_list:
        tree =  ET.ElementTree(file=xml_path + os.sep + file)
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        for elem in tree.iterfind('object/bndbox/xmin'):
            xmin.append(int(elem.text))

        for elem in tree.iterfind('object/bndbox/xmax'):
            xmax.append(int(elem.text))

        for elem in tree.iterfind('object/bndbox/ymin'):
            ymin.append(int(elem.text))

        for elem in tree.iterfind('object/bndbox/ymax'):
            ymax.append(int(elem.text))

        xmin = np.array(xmin).reshape(-1, 1)
        xmax = np.array(xmax).reshape(-1, 1)
        ymin = np.array(ymin).reshape(-1, 1)
        ymax = np.array(ymax).reshape(-1, 1)
        box = np.concatenate([xmin, xmax, ymin, ymax], axis=-1)
        boxes.append(box)
        
    return boxes

In [7]:
boxes = read_boxes(file_list, xml_path)
print(len(boxes))

3686


### Read y classes

In [8]:
y_classes = np.zeros(len(file_list), dtype='int32')
for i, file in enumerate(file_list):
    if file[0].islower():
        y_classes[i] = 1
        
print(y_classes.shape)

(3686,)


## Transfer boxes to y format

In [10]:
def boxes_to_y(true_boxes, box_class, anchors, num_classes, image_wh):
    """
    transfer true boxes to yolo y format.
    Arguments:
        true_boxes: bbox absolute value in image_wh of one image, value as (xmin, ymin, xmax, ymax), shape(?, 4).
        box_class: 0/1 - cat/dog.
        anchors: anchor boxe size array, shape(num_anchors, 2).
        num_classes: total class num.
        image_wh: true input image size of (w, h).
        
    Returns:
        y_true: xywh fomat, shape(box_num, 4).
    """
    # transfer xy to range(0, 416).
    xymin, xymax = true_boxes[:, 0:2], true_boxes[:, 2:4]
    
    input_size = np.array([416, 416])
    zoom_scale = np.min(input_size/image_wh)
    image_wh = np.array(image_wh)
    nopadding_wh = np.round(image_wh * zoom_scale)  # w,h==416 or one of w,h < 416.
    padding_offset = (input_size - nopadding_wh)//2  # padding to 416.
    
    xymin = xymin * zoom_scale + padding_offset
    xymax = xymax * zoom_scale + padding_offset
    
    # calculate box center xy and wh, range(0, 416).
    boxes_wh = xymax - xymin
    boxes_xy = xymin + boxes_wh//2
    
    # normalize to range(0, 1)
    boxes_xy /= input_size
    
    # grid shape
    grid_wh = [input_size//32, input_size//16, input_size//8]  # [[13, 13], [26, 26], [52, 52]]
    boxes_xy = [boxes_xy * grid_wh[i] for i in range(3)]  # to grid scale, range(0, grid_wh).
    grid_index = [np.floor(boxes_xy[i]) for i in range(3)]
    boxes_xy = [(boxes_xy[i] - grid_index[i]) for i in range(3)]  # size respect to one grid, range(0, 1).
    
    # true size of xy min max cordinates relative to grid left top corner.
    anchor_xymax = anchors/2
    anchor_xymin = -anchor_xymax
    box_xymax = boxes_wh/2
    box_xymin = -box_xymax
    
    # create y_true.
    y_true = [np.zeros((grid_wh[i][1], grid_wh[i][0], 3, 5+num_classes), dtype='float32') for i in range(3)]
    
    # iterate on each box
    num_boxes = true_boxes.shape[0]
    for box_index in range(num_boxes):
        # calculate iou.
        box1 = np.concatenate([box_xymin[box_index], box_xymax[box_index]]).reshape(1, -1)
        box2 = np.concatenate([anchor_xymin, anchor_xymax], axis=-1)
        iou = box_iou(box1, box2)
        
        # select the best anchor
        anchor_index = np.argmax(iou)
        layer_index = 2 - anchor_index//3
        layer_anchor_index = anchor_index % 3
        
        box_xy = boxes_xy[layer_index][box_index]  # shape(2,)
        box_wh = boxes_wh[box_index]/anchors[anchor_index]  # shape(2,)
        
        #  fill in y_true.
        w = grid_index[layer_index][box_index, 0].astype('int32')
        h = grid_index[layer_index][box_index, 1].astype('int32')
        y_true[layer_index][h, w, layer_anchor_index, :2] = box_xy
        y_true[layer_index][h, w, layer_anchor_index, 2:4] = box_wh
        y_true[layer_index][h, w, layer_anchor_index, 4:5] = 1
        y_true[layer_index][h, w, layer_anchor_index, 5+box_class] = 1
        
    return y_true

### Get y_true

In [14]:
y_true = []
for i in range(len(boxes[:10])):
    _y_true = boxes_to_y(boxes[i], y_classes[i], anchors, num_classes, image_wh[i])
    _y_true = [_y_true[j].reshape(_y_true[j].shape[0], _y_true[j].shape[1], _y_true[j].shape[2] * _y_true[j].shape[3]) for j in range(3)]
    _y_true = [np.expand_dims(_y_true[j], axis=0) for j in range(3)]
    if i == 0:
        y_true = _y_true
    else:
        y_true = [np.concatenate([y_true[j], _y_true[j]], axis=0) for j in range(3)]
    

In [15]:
print(y_true[0].shape, y_true[1].shape, y_true[2].shape)

(10, 13, 13, 21) (10, 26, 26, 21) (10, 52, 52, 21)


## Build new model

In [1]:
from keras.layers import Conv2D
from keras.models import Model, load_model
from keras.regularizers import l2
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [8]:
model = load_model('yolo.h5')
for layer in model.layers:
    layer.trainable = False
    
y = [model.layers[-4].output, model.layers[-5].output, model.layers[-6].output]
for i in range(3):
    y[i] = Conv2D(num_anchors*(num_classes+5), (1, 1),
                  padding='same',
                  kernel_regularizer=l2(5e-4),
                  name='conv2d_%d'%(59+i*8))(y[i])
    
model = Model(model.input, y)



In [9]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 416, 416, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 416, 416, 32) 128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 416, 416, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
zero_paddi

## YOLO loss

In [None]:
def yolo_loss(feats, y_true, anchors, num_classes):
    # iterate along 3 layers.
    for i in range(3):
        pass