In [1]:
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam
from keras.preprocessing.image import img_to_array, load_img
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
%matplotlib inline

Using TensorFlow backend.


In [2]:
train_set_labels = pd.read_csv("../data/yolo_train/labels.csv")
test_set_labels = pd.read_csv("../data/yolo_test/labels.csv")

train_set_labels = train_set_labels.iloc[:40000]

print(train_set_labels.tail())
test_set_labels.head()

       Unnamed: 0      file          cx   cy      width  height classes
39995       39995  9998.png  105.064378  3.5  20.600858      53       b
39996       39996  9999.png    3.000000 -2.5  30.000000      65       q
39997       39997  9999.png   27.000000  3.5  30.000000      53       f
39998       39998  9999.png   52.000000  7.5  25.000000      45       5
39999       39999  9999.png   99.000000  7.5  25.000000      45       s


Unnamed: 0.1,Unnamed: 0,file,cx,cy,width,height,classes
0,0,0.png,26.666667,7.5,20.20202,45,n
1,1,0.png,67.070707,7.5,20.20202,45,P
2,2,0.png,87.272727,2.5,20.20202,55,p
3,3,0.png,113.939394,3.5,24.242424,53,v
4,4,1.png,30.0,3.5,30.0,53,W


In [3]:
train_set_labels.describe()

Unnamed: 0.1,Unnamed: 0,cx,cy,width,height
count,40000.0,40000.0,40000.0,40000.0,40000.0
mean,19999.5,60.929345,3.065012,26.230247,53.869975
std,11547.14972,37.371776,3.621453,4.317903,7.242905
min,0.0,2.0,-6.5,15.444015,45.0
25%,9999.75,27.317073,0.0,23.121387,45.0
50%,19999.5,60.224719,3.5,25.411765,53.0
75%,29999.25,94.065934,7.5,30.0,60.0
max,39999.0,139.230769,7.5,35.0,73.0


In [4]:
def load_data(path):
    data = []
    num = 0
    for i in range(10000):
    #while(os.path.isfile(path + str(num) + ".png")):
        image = load_img(path + str(i) + ".png")
        arr = img_to_array(image)
        data.append(arr)
    return np.true_divide(np.array(data), 255)

In [5]:
train_set = load_data("../data/yolo_train/")
train_set.shape

(10000, 60, 160, 3)

In [6]:
HEIGHT = 60
WIDTH = 160
GRID_H = 8
GRID_W = 20

BATCH = 10
CLASS_NUM = 62
INPUT_SHAPE = train_set[1,:,:,:].shape

COORD_SCALE, PROB_SCALE, OBJ_SCALE, NOOB_SCALE = 5.0, 1.0, 5.0, 0.5
print(INPUT_SHAPE)
print(np.amin(train_set))
print((GRID_H, GRID_W, 4 + 1 + CLASS_NUM))

(60, 160, 3)
0.0
(8, 20, 67)


In [7]:
model = Sequential()

# 1
model.add(Conv2D(16, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=INPUT_SHAPE))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 2
model.add(Conv2D(32, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 3
model.add(Conv2D(64, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))

# 4
model.add(Conv2D(128, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same'))

# 5
model.add(Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))

# 6
model.add(Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False))
#model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))

# 7
model.add(Conv2D(CLASS_NUM, (8, 8), strides=(1, 1), kernel_initializer='he_normal'))
model.add(Activation('softmax'))
#model.add(Reshape((CLASS_NUM,)))

# 7
#model.add(Conv2D(4 + 1 + CLASS_NUM, (1, 1), strides=(1, 1), kernel_initializer='he_normal'))
#model.add(Activation('linear'))

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
model.load_weights("./model.hdf5")

In [9]:
for layer in model.layers:
    layer.trainable = False

In [10]:
connecting_layer = model.layers[-4].output
top_model = Conv2D(4 + 1 + CLASS_NUM, (1, 1), strides=(1, 1), kernel_initializer='he_normal')(connecting_layer)
top_model = Activation('linear')(top_model)

In [11]:
new_model = Model(model.input, top_model)
new_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1_input (InputLayer)  (None, 60, 160, 3)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 60, 160, 16)       432       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 60, 160, 16)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 80, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 80, 32)        4608      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 30, 80, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 40, 32)        0         
__________

In [12]:
train_set_labels.tail()

Unnamed: 0.1,Unnamed: 0,file,cx,cy,width,height,classes
39995,39995,9998.png,105.064378,3.5,20.600858,53,b
39996,39996,9999.png,3.0,-2.5,30.0,65,q
39997,39997,9999.png,27.0,3.5,30.0,53,f
39998,39998,9999.png,52.0,7.5,25.0,45,5
39999,39999,9999.png,99.0,7.5,25.0,45,s


In [13]:
gt = train_set_labels.loc[:, ["cx", "cy", "width", "height"]]
tmp = pd.get_dummies(train_set_labels.loc[:, "classes"])
gt["confidence"]=1
gt = gt.loc[:, ["confidence", "cx", "cy", "width", "height"]]
gt2 = gt.merge(tmp, left_index=True, right_index=True)
print(gt2.head().values[:,2])
gt2.head()

[ 0.   3.5  7.5  7.5 -0.5]


Unnamed: 0,confidence,cx,cy,width,height,0,1,2,3,4,...,q,r,s,t,u,v,w,x,y,z
0,1,28.041237,0.0,28.041237,60,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,56.082474,3.5,24.742268,53,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,86.597938,7.5,20.618557,45,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,115.463918,7.5,20.618557,45,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,1,3.855422,-0.5,32.771084,61,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
d = gt2.values
#d = np.array([[1., 10., 5., 35., 53., 0., 1.], [1., 50., 3., 33., 56., 1., 0.]])
cx = d[:,1] + np.true_divide(d[:,3],2)
cy = d[:,2] + np.true_divide(d[:,4],2)
bxnum = np.floor(np.divide(cx,GRID_W))
bynum = np.floor(np.divide(cy,GRID_H))
bx = np.true_divide(cx-np.multiply(bxnum, GRID_W), GRID_W)
by = np.true_divide(cy-np.multiply(bynum, GRID_H), GRID_H)
bw = np.true_divide(d[:,3],GRID_W)
bh = np.true_divide(d[:,4],GRID_H)
bxnum = bxnum.astype(int)
bynum = bynum.astype(int)

d[:,1] = bx
d[:,2] = by
d[:,3] = bw
d[:,4] = bh

print(bxnum[3])
print(bynum[3])

tsgt = np.zeros((10000, 8, 20, 67)) # zamien na numclass
for i in range(10000):
    for j in range(4):
        tsgt[i,bxnum[i+j],bynum[i+j],:] = d[i+j,:]
tsgt[0,2,3,:]

6
3


array([1.        , 0.10309278, 0.75      , 1.40206186, 7.5       ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        ])

In [15]:
def my_loss(real_y, pred_y):
    pred_box_conf = tf.expand_dims(tf.sigmoid(pred_y[:,:,:,0]), -1)
    
    pred_box_xy = tf.sigmoid(pred_y[:,:,:,1:3])
    real_box_xy = real_y[:,:,:,1:3]
    
    pred_box_wh = tf.exp(pred_y[:,:,:,3:5])
    real_box_wh = real_y[:,:,:,3:5]
    
    pred_class_prob = tf.nn.softmax(pred_y[:,:,:,5:])
    real_class_prob = real_y[:,:,:,5:]
    
    pred_y = tf.concat([pred_box_conf, pred_box_xy, pred_box_wh, pred_class_prob], 3)
    
    pred_box_ul = pred_box_xy - 0.5 * pred_box_wh
    pred_box_br = pred_box_xy + 0.5 * pred_box_wh
    pred_box_area = pred_box_wh[:,:,:,0] * pred_box_wh[:,:,:,1]
    pred_box_wh = tf.sqrt(pred_box_wh)
    
    real_box_ul = real_box_xy - 0.5 * real_box_wh
    real_box_br = real_box_xy + 0.5 * real_box_wh
    real_box_area = real_box_wh[:,:,:,0] * real_box_wh[:,:,:,1]
    real_box_wh = tf.sqrt(real_box_wh)
    
    intersect_ul = tf.maximum(pred_box_ul, real_box_ul) 
    intersect_br = tf.minimum(pred_box_br, real_box_br)
    intersect_wh = intersect_br - intersect_ul
    intersect_wh = tf.maximum(intersect_wh, 0.0)
    intersect_area = intersect_wh[:,:,:,0] * intersect_wh[:,:,:,1]
    
    iou = tf.truediv(intersect_area, pred_box_area + real_box_area - intersect_area)
    real_box_conf = tf.expand_dims(tf.round(iou) * real_y[:,:,:,0], -1)
    
    real_y = tf.concat([real_box_conf, real_box_xy, real_box_wh, real_class_prob], 3)
    
    coord_w = tf.concat(4 * [real_box_conf], 3)
    coord_w = COORD_SCALE * coord_w
    
    conf_w = NOOB_SCALE * (1. - real_box_conf) + OBJ_SCALE * real_box_conf
    
    prob_w = tf.concat(CLASS_NUM * [real_box_conf], 3) 
    prob_w = PROB_SCALE * prob_w 
    
    weights = tf.concat([conf_w, coord_w, prob_w], 3)
    
    loss = tf.pow(pred_y - real_y, 2)
    loss = loss * weights
    loss = tf.reshape(loss, [-1, GRID_W*GRID_H*(4 + 1 + CLASS_NUM)])
    loss = tf.reduce_sum(loss, 1)
    loss = .5 * tf.reduce_mean(loss)
    
    #print(loss.eval(session=tf.Session()))
    return loss

In [None]:
real = np.zeros((2, 8, 20, 7))
real[0, 1, 3, :] = [1, 0.375, 0.9375, 1.75, 6.625, 0, 1]
real[1, 1, 3, :] = [1, 0.375, 0.9375, 1.75, 6.625, 1, 0]
pred = np.zeros((2, 8, 20, 7))
pred[0, 1, 3, :] = [4.6, -0.5, 2.71, 0.56, 1.89, 0, 1]
pred[1, 1, 3, :] = [4.6, -0.5, 2.71, 0.56, 1.89, 1, 0]
print(tf.convert_to_tensor(real).eval(session=tf.Session())[0,1,3,:])
print(tf.convert_to_tensor(pred).eval(session=tf.Session())[0,1,3,:])
#my_loss(tf.convert_to_tensor(real), tf.convert_to_tensor(pred))

In [17]:
sgd = SGD(lr=0.00001, decay=0.0005, momentum=0.9)

new_model.compile(loss=my_loss, optimizer=sgd, metrics=["accuracy"])

In [18]:
new_model.fit(train_set, tsgt, epochs=10, batch_size=32, shuffle = True, validation_split=0.2)

Instructions for updating:
Use tf.cast instead.
Train on 8000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f4cae9e7128>

In [24]:
p = model.predict(train_set[:1,:,:,:])
print(np.amax(tsgt[:,:,:,0]))
np.amax(p[:,:,:,5:])

1.0


0.99990976

In [None]:
def dummy():
    print(OBJ_SCALE)

In [None]:
dummy()