 # Table of Contents
<div class="toc" style="margin-top: 1em;"><ul class="toc-item" id="toc-level0"><li><span><a href="http://localhost:8888/notebooks/barebone-yolo.ipynb#YOLO" data-toc-modified-id="YOLO-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>YOLO</a></span><ul class="toc-item"><li><span><a href="http://localhost:8888/notebooks/barebone-yolo.ipynb#Import-packages" data-toc-modified-id="Import-packages-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Import packages</a></span></li><li><span><a href="http://localhost:8888/notebooks/barebone-yolo.ipynb#Define-and-initialize-global-variables" data-toc-modified-id="Define-and-initialize-global-variables-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Define and initialize global variables</a></span></li><li><span><a href="http://localhost:8888/notebooks/barebone-yolo.ipynb#Construct-the-Network" data-toc-modified-id="Construct-the-Network-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Construct the Network</a></span><ul class="toc-item"><li><span><a href="http://localhost:8888/notebooks/barebone-yolo.ipynb#Input" data-toc-modified-id="Input-1.3.1"><span class="toc-item-num">1.3.1&nbsp;&nbsp;</span>Input</a></span></li></ul></li><li><span><a href="http://localhost:8888/notebooks/barebone-yolo.ipynb#Load-Pretrained-weights" data-toc-modified-id="Load-Pretrained-weights-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Load Pretrained weights</a></span></li></ul></li></ul></div>

# YOLO

## Import packages

In [1]:
from keras import models
from keras import layers
from keras import callbacks
from keras import optimizers
from keras.utils.vis_utils import plot_model
import keras.backend as K
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('seaborn')
import numpy as np
import os
import cv2
import imgaug as ia
from imgaug import augmenters as iaa
from preprocessing import parse_annotation, BatchGenerator
from utils import WeightReader, decode_netout, draw_boxes

Using TensorFlow backend.


## Define and initialize global variables

In [2]:
LABELS = ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

IMAGE_H, IMAGE_W = 416, 416
GRID_H,  GRID_W  = 13 , 13
BOX              = 5
CLASS            = len(LABELS)
CLASS_WEIGHTS    = np.ones(CLASS, dtype='float32')
THRESHOLD        = 0.3
ANCHORS          = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]

NO_OBJECT_SCALE  = 1.0
OBJECT_SCALE     = 5.0
COORD_SCALE      = 1.0
CLASS_SCALE      = 1.0

BATCH_SIZE       = 6
WARM_UP_BATCHES  = 0
TRUE_BOX_BUFFER  = 50



ALPHA = 0.1

In [3]:
pre_trained_weights='weights/yolo-voc.weights'
coco_train_path = ''
coco_valid_path = ''

## Construct the Network

In [4]:
# the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
def space_to_depth_x2(x):
    return tf.space_to_depth(x, block_size=2)

### Input

In [5]:
input_image = layers.Input(shape=(IMAGE_H, IMAGE_W, 3))
true_boxes  = layers.Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))

In [6]:
def yolo():
    
    input_image = layers.Input(shape=(IMAGE_H, IMAGE_W, 3))
    true_boxes  = layers.Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))
 
    # Layer 1
    x = layers.Conv2D(32, (3, 3), strides=(1, 1), 
                        padding='same', name='conv_1', use_bias=False)(input_image)
    x = layers.BatchNormalization(name='norm_1')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    x = layers.MaxPool2D(pool_size=(2,2))(x)
    
    # Layer 2
    x = layers.Conv2D(64, (3, 3), strides=(1, 1), padding='same', name='conv_2', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_2')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    
    
    # Layer 3
    x = layers.Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_3', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_3')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 4 
    x = layers.Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_4', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_4')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 5
    x = layers.Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_5', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_5')(x)
    x= layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    
    # Layer 6
    x = layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_6', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_6')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    
    # Layer 7
    x = layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', name='conv_7', use_bias=False)(x)
    x= layers.BatchNormalization(name='norm_7')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 8
    x = layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_8', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_8')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    
    # Layer 9
    x = layers.Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_9', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_9')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 10
    x = layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_10', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_10')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 11
    x = layers.Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_11', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_11')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    
    # Layer 12
    x = layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_12', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_12')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 13
    x = layers.Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_13', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_13')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    
    skip_connection = x
    
    x = layers.MaxPool2D(pool_size=(2, 2))(x)
    
    # Layer 14
    x = layers.Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_14', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_14')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 15
    x = layers.Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_15', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_15')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 16
    x = layers.Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_16', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_16')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 17
    x = layers.Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_17', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_17')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 18
    x = layers.Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_18', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_18')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 19
    x = layers.Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_19', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_19')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 20
    x = layers.Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_20', use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_20')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    
    # Layer 21
    skip_connection = layers.Conv2D(64, (1, 1), strides=(1, 1), 
                                padding='same', name='conv_21', use_bias=False)(skip_connection)
    skip_connection = layers.BatchNormalization(name='norm_21')(skip_connection)
    skip_connection = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(skip_connection)
    skip_connection = layers.Lambda(space_to_depth_x2)(skip_connection)
    
    x = layers.concatenate([skip_connection, x])
    
    # Layer 22
    x = layers.Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_22',
                     use_bias=False)(x)
    x = layers.BatchNormalization(name='norm_22')(x)
    x = layers.advanced_activations.LeakyReLU(alpha=ALPHA)(x)
    
    # Layer 23
    x = layers.Conv2D((4 + 1 + CLASS) * 5, (1,1), strides=(1,1), padding='same', name='conv_23')(x)
    output = layers.Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)
    
    # small hack to allow true_boxes to be registered when Keras build the model 
    # for more information: https://github.com/fchollet/keras/issues/2790
    output = layers.Lambda(lambda args: args[0])([output, true_boxes])
    
    model = models.Model([input_image, true_boxes], output)
                                    
    
    return model


In [7]:
model = yolo()
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_3 (InputLayer)             (None, 416, 416, 3)   0                                            
____________________________________________________________________________________________________
conv_1 (Conv2D)                  (None, 416, 416, 32)  864         input_3[0][0]                    
____________________________________________________________________________________________________
norm_1 (BatchNormalization)      (None, 416, 416, 32)  128         conv_1[0][0]                     
____________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)        (None, 416, 416, 32)  0           norm_1[0][0]                     
___________________________________________________________________________________________

In [8]:
plot_model(model, to_file='model.png')

![](model.png)

## Load Pretrained weights

Load the weights originally provided by YOLO

In [9]:
weight_reader = WeightReader(pre_trained_weights)

In [10]:
weight_reader.reset()
nb_conv = 23

In [11]:
for i in range(1, nb_conv+1):
    conv_layer = model.get_layer('conv_' + str(i))
    print(i)
    
    if i < nb_conv:
        norm_layer = model.get_layer('norm_' + str(i))
        
        size = np.prod(norm_layer.get_weights()[0].shape)
        
        beta = weight_reader.read_bytes(size)
        gamma = weight_reader.read_bytes(size)
        mean = weight_reader.read_bytes(size)
        var = weight_reader.read_bytes(size)
        
        weights = norm_layer.set_weights([gamma, beta, mean, var])
        
    if len(conv_layer.get_weights()) > 1:
        bias = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
        kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
        kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
        kernel = kernel.transpose([2,3,1,0])
        conv_layer.set_weights([kernel, bias])
        
    else:
        kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
        kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
        kernel = kernel.transpose([2,3,1,0])
        conv_layer.set_weights([kernel])

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22


ValueError: cannot reshape array of size 9827453 into shape (1024,1280,3,3)

In [None]:
%load_ext version_information
%version_information keras