# Trening modelu pochodzącego od yolo

In [1]:
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam
from keras.preprocessing.image import img_to_array, load_img
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
%matplotlib inline

Using TensorFlow backend.


### Wczytaj dane treningowe

In [2]:
train_set_labels = pd.read_csv("../data/yolo_train/labels.csv")
#test_set_labels = pd.read_csv("../data/yolo_test/labels.csv")

train_set_labels = train_set_labels.iloc[:40000]

print(train_set_labels.tail())
#test_set_labels.head()

       Unnamed: 0      file          cx   cy      width  height classes
39995       39995  9998.png  105.064378  3.5  20.600858      53       b
39996       39996  9999.png    3.000000 -2.5  30.000000      65       q
39997       39997  9999.png   27.000000  3.5  30.000000      53       f
39998       39998  9999.png   52.000000  7.5  25.000000      45       5
39999       39999  9999.png   99.000000  7.5  25.000000      45       s


In [3]:
train_set_labels.describe()

Unnamed: 0.1,Unnamed: 0,cx,cy,width,height
count,40000.0,40000.0,40000.0,40000.0,40000.0
mean,19999.5,60.929345,3.065012,26.230247,53.869975
std,11547.14972,37.371776,3.621453,4.317903,7.242905
min,0.0,2.0,-6.5,15.444015,45.0
25%,9999.75,27.317073,0.0,23.121387,45.0
50%,19999.5,60.224719,3.5,25.411765,53.0
75%,29999.25,94.065934,7.5,30.0,60.0
max,39999.0,139.230769,7.5,35.0,73.0


In [4]:
def load_data(path):
    data = []
    num = 0
    for i in range(10000):
        image = load_img(path + str(i) + ".png")
        arr = img_to_array(image)
        data.append(arr)
    return np.true_divide(np.array(data), 255)

In [5]:
train_set = load_data("../data/yolo_train/")
train_set.shape

(10000, 60, 160, 3)

#### Określenie szerkości siatki, rozmiaru batcha, liczby klas i wag poszczególnych części funkcji strat

In [6]:
HEIGHT = 60
WIDTH = 160
GRID_H = 8
GRID_W = 8

BATCH = 8
CLASS_NUM = 62
INPUT_SHAPE = train_set[1,:,:,:].shape

COORD_SCALE, PROB_SCALE, OBJ_SCALE, NOOB_SCALE = 5.0, 1.0, 5.0, 0.1
print(INPUT_SHAPE)

(60, 160, 3)


#### Definicja modelu, na którym były trenowane wagi części konwolucyjnej (klasyfikacja liczb i liter)
Jest nam to potrzebne, żeby móc wczytać wagi wyuczone na zbiorze pojedynczych znaków

In [7]:
model = Sequential()

# 1
model.add(Conv2D(16, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=INPUT_SHAPE))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 2
model.add(Conv2D(32, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 3
model.add(Conv2D(64, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))

# 4
model.add(Conv2D(128, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same'))

# 5
model.add(Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))

# 6
model.add(Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))

# 7
model.add(Conv2D(CLASS_NUM, (8, 8), strides=(1, 1), kernel_initializer='he_normal'))
model.add(Activation('softmax'))
#model.add(Reshape((CLASS_NUM,)))

# 7
#model.add(Conv2D(4 + 1 + CLASS_NUM, (1, 1), strides=(1, 1), kernel_initializer='he_normal'))
#model.add(Activation('linear'))

Instructions for updating:
Colocations handled automatically by placer.


#### Wczytanie wag modelu i modyfikacja sieci

- Warstwy wcześniej przeuczone zostają wyłączone z dalszego treningu
- Dwie ostatnia warstwa zostaje zastąpiona przez trzy nowe nie przeuczone warstwy

In [8]:
model.load_weights("../pretrain.hdf5")

In [9]:
for layer in model.layers:
    layer.trainable = False

In [10]:
connecting_layer = model.layers[-3].output
top_model = Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False)(connecting_layer)
top_model = LeakyReLU(alpha=0.1)(top_model)
top_model = Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False)(top_model)
top_model = LeakyReLU(alpha=0.1)(top_model)
top_model = Conv2D(512, (1,1), strides=(1,1), padding='same')(top_model)
top_model = LeakyReLU(alpha=0.1)(top_model)
top_model = Conv2D(512, (1,1), strides=(1,1), padding='same')(top_model)
top_model = LeakyReLU(alpha=0.1)(top_model)
top_model = Conv2D(4 + 1 + CLASS_NUM, (1, 1), strides=(1, 1), kernel_initializer='he_normal')(top_model)
top_model = Activation('linear')(top_model)

In [11]:
new_model = Model(model.input, top_model)
new_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1_input (InputLayer)  (None, 60, 160, 3)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 60, 160, 16)       432       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 60, 160, 16)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 80, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 80, 32)        4608      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 30, 80, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 40, 32)        0         
__________

#### Zmiana formatu zbioru etykiet
Chcemy, żeby był taki sam jak wyjście z sieci

In [12]:
train_set_labels.tail()

Unnamed: 0.1,Unnamed: 0,file,cx,cy,width,height,classes
39995,39995,9998.png,105.064378,3.5,20.600858,53,b
39996,39996,9999.png,3.0,-2.5,30.0,65,q
39997,39997,9999.png,27.0,3.5,30.0,53,f
39998,39998,9999.png,52.0,7.5,25.0,45,5
39999,39999,9999.png,99.0,7.5,25.0,45,s


In [13]:
gt = train_set_labels.loc[:, ["cx", "cy", "width", "height"]]
tmp = pd.get_dummies(train_set_labels.loc[:, "classes"])
gt["confidence"]=1
gt = gt.loc[:, ["confidence", "cy", "cx", "height", "width"]]
gt2 = gt.merge(tmp, left_index=True, right_index=True)
print(gt2.head().values[1,:])
gt2.head()

[ 1.          3.5        56.08247423 53.         24.74226804  0.
  0.          0.          0.          0.          0.          0.
  0.          1.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.        ]


Unnamed: 0,confidence,cy,cx,height,width,0,1,2,3,4,...,q,r,s,t,u,v,w,x,y,z
0,1,0.0,28.041237,60,28.041237,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,3.5,56.082474,53,24.742268,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,7.5,86.597938,45,20.618557,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,7.5,115.463918,45,20.618557,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,1,-0.5,3.855422,61,32.771084,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
d = gt2.values
cy = d[:,1] + np.true_divide(d[:,3],2)
cx = d[:,2] + np.true_divide(d[:,4],2)
bynum = np.floor(np.divide(cy,GRID_H))
bxnum = np.floor(np.divide(cx,GRID_W))
by = np.true_divide(cy-np.multiply(bynum, GRID_H), GRID_H)
bx = np.true_divide(cx-np.multiply(bxnum, GRID_W), GRID_W)
bh = np.true_divide(d[:,3],GRID_H)
bw = np.true_divide(d[:,4],GRID_W)
bynum = bynum.astype(int)
bxnum = bxnum.astype(int)

d[:,1] = by
d[:,2] = bx
d[:,3] = bh
d[:,4] = bw

tsgt = np.zeros((10000, 8, 20, 67)) # zamien na numclass
for i in range(10000):
    for j in range(4):
        tsgt[i,bynum[i+j],bxnum[i+j],:] = d[i+j,:]
tsgt[0,3,:,0:6]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [1.        , 0.75      , 0.25773196, 7.5       , 3.50515464,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [1.        , 0.75      , 0.55670103, 6.625     , 3.09278351,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0

#### Definicja funkcji straty

In [15]:
def my_loss(real_y, pred_y):
    pred_box_conf = tf.expand_dims(tf.sigmoid(pred_y[:,:,:,0]), -1)
    
    pred_box_xy = tf.sigmoid(pred_y[:,:,:,1:3])
    real_box_xy = real_y[:,:,:,1:3]
    
    pred_box_wh = tf.exp(pred_y[:,:,:,3:5])
    real_box_wh = real_y[:,:,:,3:5]
    
    pred_class_prob = tf.nn.softmax(pred_y[:,:,:,5:])
    real_class_prob = real_y[:,:,:,5:]
    
    pred_y = tf.concat([pred_box_conf, pred_box_xy, pred_box_wh, pred_class_prob], 3)
    
    pred_box_ul = pred_box_xy - 0.5 * pred_box_wh
    pred_box_br = pred_box_xy + 0.5 * pred_box_wh
    pred_box_area = pred_box_wh[:,:,:,0] * pred_box_wh[:,:,:,1]
    pred_box_wh = tf.sqrt(pred_box_wh)
    
    real_box_ul = real_box_xy - 0.5 * real_box_wh
    real_box_br = real_box_xy + 0.5 * real_box_wh
    real_box_area = real_box_wh[:,:,:,0] * real_box_wh[:,:,:,1]
    real_box_wh = tf.sqrt(real_box_wh)
    
    intersect_ul = tf.maximum(pred_box_ul, real_box_ul) 
    intersect_br = tf.minimum(pred_box_br, real_box_br)
    intersect_wh = intersect_br - intersect_ul
    intersect_wh = tf.maximum(intersect_wh, 0.0)
    intersect_area = intersect_wh[:,:,:,0] * intersect_wh[:,:,:,1]
    
    iou = tf.truediv(intersect_area, pred_box_area + real_box_area - intersect_area)
    real_box_conf = tf.expand_dims(iou * real_y[:,:,:,0], -1)
    
    coord_w = tf.concat(4 * [tf.expand_dims(real_y[:,:,:,0],-1)], 3)
    coord_w = COORD_SCALE * coord_w
    
    conf_w = NOOB_SCALE * (1. - tf.expand_dims(real_y[:,:,:,0],-1)) + OBJ_SCALE * tf.expand_dims(real_y[:,:,:,0],-1)
    
    prob_w = tf.concat(CLASS_NUM * [tf.expand_dims(real_y[:,:,:,0],-1)], 3) 
    prob_w = PROB_SCALE * prob_w 
    
    real_y = tf.concat([real_box_conf, real_box_xy, real_box_wh, real_class_prob], 3)
    weights = tf.concat([conf_w, coord_w, prob_w], 3)
    
    loss = tf.pow(pred_y - real_y, 2)
    loss = loss * weights
    loss = tf.reshape(loss, [-1, int(GRID_W*GRID_H*(4 + 1 + CLASS_NUM)*2.5)])
    loss = tf.reduce_sum(loss, 1)
    loss = .5 * tf.reduce_mean(loss)
    
    #print(loss.eval(session=tf.Session()))
    return loss

#### W tej komórce testowałem funkcję błędu (debug)

In [16]:
real = np.zeros((2, 8, 20, 7))
real[0, 1, 3, :] = [1, 0.375, 0.9375, 1.75, 6.625, 0, 1]
real[1, 1, 3, :] = [1, 0.375, 0.9375, 1.75, 6.625, 1, 0]
pred = np.zeros((2, 8, 20, 7))
pred[0, 1, 3, :] = [4.6, -0.5, 2.71, 0.56, 1.89, 0, 1]
pred[1, 1, 3, :] = [4.6, -0.5, 2.71, 0.56, 1.89, 1, 0]
print(tf.convert_to_tensor(real).eval(session=tf.Session())[0,1,3,:])
print(tf.convert_to_tensor(pred).eval(session=tf.Session())[0,1,3,:])
#my_loss(tf.convert_to_tensor(real), tf.convert_to_tensor(pred))

[1.     0.375  0.9375 1.75   6.625  0.     1.    ]
[ 4.6  -0.5   2.71  0.56  1.89  0.    1.  ]


#### Wybór optymalizatora, ustawień uczenia i sposobu zapisywania wyników

In [17]:
#new_model.load_weights('../mean_yolo.14.hdf5')

In [18]:
sgd = SGD(lr=0.000001, decay=0.0005, momentum=0.9)
adam = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.00005)
new_model.compile(loss=my_loss, optimizer=adam)

filepath="../checkpoints/weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

#### Trening

In [19]:
new_model.fit(train_set, tsgt, epochs=1, batch_size=2, shuffle = True, validation_split=0.1, callbacks=callbacks_list)

Instructions for updating:
Use tf.cast instead.
Train on 9000 samples, validate on 1000 samples
Epoch 1/1

Epoch 00001: val_loss improved from inf to 4.24630, saving model to ../checkpoints/weights-improvement-01-4.25.hdf5


OSError: Unable to create file (unable to open file: name = '../checkpoints/weights-improvement-01-4.25.hdf5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)

#### Sprawdzenie wyników predykcji na pierwszym przykładzie ze zbioru treningowego
Pozwala to ocenić wyłącznie jak dobrze model przystosował się do tego konkretnego zbioru danych

In [None]:
def sigmoid(x):
    return 1. / (1. + np.exp(-x))

In [None]:
p = new_model.predict(train_set[:1,:,:,:])
print(np.amax(tsgt[:,:,:,0]))
print(sigmoid(np.amax(p[:,:,:,0])))
print("confidence:")
print(tsgt[0,3,[5,8,12,15],0])
print(sigmoid(p[:,3,[5,8,12,15],0]))
print("cy:")
print(tsgt[0,3,[5,8,12,15],1])
print(sigmoid(p[:,3,[5,8,12,15],1]))
print("cx:")
print(tsgt[0,3,[5,8,12,15],2])
print(sigmoid(p[:,3,[5,8,12,15],2]))
print("height:")
print(tsgt[0,3,[5,8,12,15],3])
print(np.exp(p[:,3,[5,8,12,15],3]))
print("width:")
print(tsgt[0,3,[5,8,12,15],4])
print(np.exp(p[:,3,[5,8,12,15],4]))
print("probs:")
print(tsgt[0,3,[5,8,12,15],5:])
print((p[:,3,[5,8,12,15],5:]))