# YOLO Train

## Read data

In [1]:
import os
import numpy as np
import random
from utils import box_iou, read_boxes, read_image, read_anchors

%load_ext autoreload
%autoreload 2

In [2]:
xml_path = 'E:/data/The Oxford-IIIT Pet Dataset/annotations/xmls'
image_path = 'E:/data/The Oxford-IIIT Pet Dataset/images/'
file_list = os.listdir(xml_path)
num_classes = 2
classes_name = {'cat': 0, 'dog': 1}

Random sample data when testing

In [3]:
random.seed(2018)
choose_file_index = random.sample([i for i in range(len(file_list))], 1000)
file_list = [file_list[i] for i in choose_file_index]
print(len(file_list))
print(file_list[:10])

1000
['miniature_pinscher_185.xml', 'Bengal_120.xml', 'american_bulldog_112.xml', 'havanese_158.xml', 'pug_175.xml', 'Russian_Blue_158.xml', 'Maine_Coon_203.xml', 'english_cocker_spaniel_128.xml', 'beagle_189.xml', 'japanese_chin_181.xml']


### Read anchors

In [4]:
anchors = read_anchors('model/pet_anchors.txt')
num_anchors = len(anchors) // 3

### Read images

In [5]:
X = read_image(file_list, image_path)
print(X.shape)

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:29<00:00, 34.42it/s]


(1000, 416, 416, 3)


### Read boxes

In [6]:
y_true = read_boxes(file_list, xml_path, classes_name, anchors)
print(len(y_true))
print(y_true[0].shape, y_true[1].shape, y_true[2].shape)

100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 193.46it/s]


3
(1000, 13, 13, 3, 7) (1000, 26, 26, 3, 7) (1000, 52, 52, 3, 7)


## Build new model

In [7]:
from yolo_model import yolo_model, yolo_loss
from keras.layers import Input, Lambda
from keras.models import Model, load_model
from keras.regularizers import l2
from keras.optimizers import Adam
from keras import backend as K

Using TensorFlow backend.


In [8]:
def build_model():
    inputs = Input((416, 416, 3), dtype='float32')
    y1, y2, y3 = yolo_model(inputs, num_anchors, num_classes)
    model = Model(inputs, [y1, y2, y3])
    model.load_weights("model/yolo_base.h5", by_name=True)

    for layer in model.layers[:-3]:
        layer.trainable = False
        
    y_tensor = [Input(shape=(13, 13, num_anchors, num_classes+5), name='y_input_1'),
                Input(shape=(26, 26, num_anchors, num_classes+5), name='y_input_2'),
                Input(shape=(52, 52, num_anchors, num_classes+5), name='y_input_3')]
    loss_layer = Lambda(yolo_loss, name='yolo_loss', 
                        arguments={'anchors': anchors, 'num_classes': num_classes})([*model.output, *y_tensor])

    train_model = Model([model.input, *y_tensor], loss_layer)
    
    return train_model

In [21]:
K.clear_session()
train_model = build_model()
train_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 416, 416, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 416, 416, 32) 128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 416, 416, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
zero_paddi

__________________________________________________________________________________________________
leaky_re_lu_22 (LeakyReLU)      (None, 52, 52, 256)  0           batch_normalization_22[0][0]     
__________________________________________________________________________________________________
add_9 (Add)                     (None, 52, 52, 256)  0           add_8[0][0]                      
                                                                 leaky_re_lu_22[0][0]             
__________________________________________________________________________________________________
conv2d_23 (Conv2D)              (None, 52, 52, 128)  32768       add_9[0][0]                      
__________________________________________________________________________________________________
batch_normalization_23 (BatchNo (None, 52, 52, 128)  512         conv2d_23[0][0]                  
__________________________________________________________________________________________________
leaky_re_l

zero_padding2d_5 (ZeroPadding2D (None, 27, 27, 512)  0           add_19[0][0]                     
__________________________________________________________________________________________________
conv2d_44 (Conv2D)              (None, 13, 13, 1024) 4718592     zero_padding2d_5[0][0]           
__________________________________________________________________________________________________
batch_normalization_44 (BatchNo (None, 13, 13, 1024) 4096        conv2d_44[0][0]                  
__________________________________________________________________________________________________
leaky_re_lu_44 (LeakyReLU)      (None, 13, 13, 1024) 0           batch_normalization_44[0][0]     
__________________________________________________________________________________________________
conv2d_45 (Conv2D)              (None, 13, 13, 512)  524288      leaky_re_lu_44[0][0]             
__________________________________________________________________________________________________
batch_norm

batch_normalization_69 (BatchNo (None, 52, 52, 128)  512         conv2d_71[0][0]                  
__________________________________________________________________________________________________
leaky_re_lu_69 (LeakyReLU)      (None, 52, 52, 128)  0           batch_normalization_69[0][0]     
__________________________________________________________________________________________________
conv2d_72 (Conv2D)              (None, 52, 52, 256)  294912      leaky_re_lu_69[0][0]             
__________________________________________________________________________________________________
batch_normalization_70 (BatchNo (None, 52, 52, 256)  1024        conv2d_72[0][0]                  
__________________________________________________________________________________________________
leaky_re_lu_70 (LeakyReLU)      (None, 52, 52, 256)  0           batch_normalization_70[0][0]     
__________________________________________________________________________________________________
conv2d_73 

## Train

In [22]:
# train_model.compile(optimizer=Adam(lr=0.001, clipnorm=1.), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
train_model.compile(optimizer=Adam(lr=0.001), loss={'yolo_loss': lambda y_true, y_pred: y_pred})

In [None]:
train_model.fit([X, *y_true], np.zeros(len(X)), batch_size=64, epochs=10, validation_split=0.1)

Train on 900 samples, validate on 100 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

In [16]:
train_model.save('model/yolo_train.h5')

### Test loss

In [10]:
data_index = 0
input_tensor = np.expand_dims(X[data_index], 0)
y_tensor = [np.expand_dims(y_true[0][data_index], 0), np.expand_dims(y_true[1][data_index], 0), np.expand_dims(y_true[2][data_index], 0)]


In [17]:
loss = train_model.output
sess = K.get_session()
loss_info = sess.run(loss, feed_dict={train_model.input[0]: input_tensor, 
                                      train_model.input[1]: y_tensor[0], 
                                      train_model.input[2]: y_tensor[1], 
                                      train_model.input[3]: y_tensor[2]})

In [18]:
print(loss_info)

[3.7198381  2.7847905  0.47783184 2.864696   3.0248    ]
