In [1]:
import tensorflow as tf
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display
import glob

In [2]:
_BATCH_NORM_DECAY = 0.9
_BATCH_NORM_EPSILON = 1e-05
_LEAKY_RELU = 0.1
_ANCHORS = [(10, 13), (16, 30), (33, 23),
            (30, 61), (62, 45), (59, 119),
            (116, 90), (156, 198), (373, 326)]
_MODEL_SIZE = (416, 416)

In [3]:
def batch_norm(inputs, training, data_format):
    return tf.layers.batch_normalization(
        inputs=inputs, axis=1 if data_format == 'channels_first' else 3,
        momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON,
        scale=True, training=training)

In [4]:
def fixed_padding(inputs, kernel_size, data_format):
    print('[INFO] fixed padding')
    pad_total = kernel_size - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg

    if data_format == 'channels_first':
        padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
                                        [pad_beg, pad_end],
                                        [pad_beg, pad_end]])
    else:
        padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
                                        [pad_beg, pad_end], [0, 0]])
    return padded_inputs

In [5]:
def conv2d_fixed_padding(inputs, filters, kernel_size, data_format, strides=1):
    if strides > 1:
        inputs = fixed_padding(inputs, kernel_size, data_format)

    return tf.layers.conv2d(
        inputs=inputs, filters=filters, kernel_size=kernel_size,
        strides=strides, padding=('SAME' if strides == 1 else 'VALID'),
        use_bias=False, data_format=data_format)

In [6]:
def darknet53_residual_block(inputs, filters, training, data_format,
                             strides=1):
    shortcut = inputs

    inputs = conv2d_fixed_padding(
        inputs, filters=filters, kernel_size=1, strides=strides,
        data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs = conv2d_fixed_padding(
        inputs, filters=2 * filters, kernel_size=3, strides=strides,
        data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs += shortcut

    return inputs

In [7]:
def darknet53(inputs, training, data_format):
    inputs = conv2d_fixed_padding(inputs, filters=32, kernel_size=3,
                                  data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    print(inputs.get_shape())
    inputs = conv2d_fixed_padding(inputs, filters=64, kernel_size=3,
                                  strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    print(inputs.get_shape())
    inputs = darknet53_residual_block(inputs, filters=32, training=training,
                                      data_format=data_format)
    print(inputs.get_shape())
    inputs = conv2d_fixed_padding(inputs, filters=128, kernel_size=3,
                                  strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    print(inputs.get_shape())
    for _ in range(2):
        inputs = darknet53_residual_block(inputs, filters=64,
                                          training=training,
                                          data_format=data_format)
    print(inputs.get_shape())
    inputs = conv2d_fixed_padding(inputs, filters=256, kernel_size=3,
                                  strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    print(inputs.get_shape())
    for _ in range(8):
        inputs = darknet53_residual_block(inputs, filters=128,
                                          training=training,
                                          data_format=data_format)

    print(inputs.get_shape())
    route1 = inputs

    inputs = conv2d_fixed_padding(inputs, filters=512, kernel_size=3,
                                  strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    print(inputs.get_shape())
    for _ in range(8):
        inputs = darknet53_residual_block(inputs, filters=256,
                                          training=training,
                                          data_format=data_format)

    route2 = inputs
    print(inputs.get_shape())
    inputs = conv2d_fixed_padding(inputs, filters=1024, kernel_size=3,
                                  strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    print(inputs.get_shape())
    for _ in range(4):
        inputs = darknet53_residual_block(inputs, filters=512,
                                          training=training,
                                          data_format=data_format)
    print(inputs.get_shape())
    return route1, route2, inputs

In [8]:
def yolo_convolution_block(inputs, filters, training, data_format):
    inputs = conv2d_fixed_padding(inputs, filters=filters, kernel_size=1,
                                  data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs = conv2d_fixed_padding(inputs, filters=2 * filters, kernel_size=3,
                                  data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs = conv2d_fixed_padding(inputs, filters=filters, kernel_size=1,
                                  data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs = conv2d_fixed_padding(inputs, filters=2 * filters, kernel_size=3,
                                  data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs = conv2d_fixed_padding(inputs, filters=filters, kernel_size=1,
                                  data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    route = inputs

    inputs = conv2d_fixed_padding(inputs, filters=2 * filters, kernel_size=3,
                                  data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    return route, inputs

In [9]:
def yolo_layer(inputs, n_classes, anchors, img_size, data_format):
    """Creates Yolo final detection layer.

    Detects boxes with respect to anchors.

    Args:
        inputs: Tensor input.
        n_classes: Number of labels.
        anchors: A list of anchor sizes.
        img_size: The input size of the model.
        data_format: The input format.

    Returns:
        Tensor output.
    """
    n_anchors = len(anchors)
    shape = inputs.get_shape().as_list()
    print("input : ",shape)
    # 나오는 값의 크기 : n_anchors * (5 + n_classes)
    inputs = tf.layers.conv2d(inputs, filters=n_anchors * (5 + n_classes),
                              kernel_size=1, strides=1, use_bias=True,
                              data_format=data_format)
    shape = inputs.get_shape().as_list()
    print("n_anchors * (5 + n_classes) : ",shape)
    grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3]
    print("grid : ",grid_shape)
    if data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0, 2, 3, 1])
    inputs = tf.reshape(inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1],
                                 5 + n_classes])
    print("reshape : ",inputs.get_shape().as_list())
    strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1])
    print("output strides",strides)
    box_centers, box_shapes, confidence, classes = \
        tf.split(inputs, [2, 2, 1, n_classes], axis=-1)
    print('box center : ',box_centers.get_shape().as_list())
    print('box shape : ',box_shapes.get_shape().as_list())
    print('box confidence : ',confidence.get_shape().as_list())
    print('box classes : ',classes.get_shape().as_list())
    x = tf.range(grid_shape[0], dtype=tf.float32)
    print('grid x : ',x.get_shape().as_list())
    y = tf.range(grid_shape[1], dtype=tf.float32)
    print('grid y : ',y.get_shape().as_list())
    x_offset, y_offset = tf.meshgrid(x, y)
    x_offset = tf.reshape(x_offset, (-1, 1))
    y_offset = tf.reshape(y_offset, (-1, 1))
    x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
    x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
    x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
    print('grid offset : ',x_y_offset.get_shape().as_list())
    box_centers = tf.nn.sigmoid(box_centers)
    print('sigmoid box center : ',box_centers.get_shape().as_list())
    box_centers = (box_centers + x_y_offset) * strides
    print('offset + box center : ',box_centers.get_shape().as_list())
    anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1])
    print('anchors',anchors.get_shape().as_list())
    box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)
    with tf.Session() as sess:
        print(sess.run(anchors))
    print('box shape : ',box_shapes.get_shape().as_list())
    confidence = tf.nn.sigmoid(confidence)
    print('box confidence : ',confidence.get_shape().as_list())
    classes = tf.nn.sigmoid(classes)
    print('box classes : ',classes.get_shape().as_list())
    inputs = tf.concat([box_centers, box_shapes,
                        confidence, classes], axis=-1)
    print('result : ',inputs.get_shape().as_list())
    return inputs

In [10]:
def upsample(inputs, out_shape, data_format):
    if data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0, 2, 3, 1])
        new_height = out_shape[3]
        new_width = out_shape[2]
    else:
        new_height = out_shape[2]
        new_width = out_shape[1]

    inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))

    if data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0, 3, 1, 2])

    return inputs

In [15]:
def box_preprocessing(gt_boxes,input_shape,anchors, num_classes):
    '''
    gt_boxes : true box, shape = (m,T,5)
        x_min, y_min, x_max, y_max, class_id
    anchors : shape = (N,2) 
        width,height
    '''
    
    num_layers = len(anchors)//3
    anchor_mask = [[6,7,8],[3,4,5],[0,1,2]] if num_layers==3 else [[3,4,5],[1,2,3]]
    
    gt_boxes = np.array(gt_boxes, dtype='float32')
    input_shape = np.array(input_shape, dtype='int32')
    
    boxes_xy = (gt_boxes[...,0:2] + gt_boxes[...,2:4]) // 2
    boxes_wh = gt_boxes[...,2:4] - gt_boxes[...,0:2]
    
    gt_boxes[..., 0:2] = boxes_xy/input_shape[::-1] # 1x1
    gt_boxes[..., 2:4] = boxes_wh/input_shape[::-1] # 1x1
    
    m = gt_boxes.shape[0]
    
    grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
    
    y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
        dtype='float32') for l in range(num_layers)]
    
    print(y_true[0].shape) # (batch, grid , grid , anchor , [x,y,w,h,c])
    
    anchors = np.expand_dims(anchors, 0)
    anchor_maxes = anchors / 2.
    anchor_mins = -anchor_maxes
    valid_mask = boxes_wh[..., 0]>0 # 유효성 검사 w > 0
    
    # 최적의 anchor를 찾고 그 anchor를 이용해 실제 y를 (batch, grid, grid , anchor , 85)(grid = 13,26,52)의 형태로 만든다.
    for b in range(m):
        wh = boxes_wh[b, valid_mask[b]] # (batch,num_valid_boxes,2)
        if len(wh)==0: 
            continue
        
        wh = np.expand_dims(wh, -2) # (batch,num_valid_boxes,1,2)
        box_maxes = wh / 2.
        box_mins = -box_maxes
        
        intersect_mins = np.maximum(box_mins, anchor_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_area = intersect_wh[...,0] + intersect_wh[...,1]
        box_area = wh[..., 0] * wh[..., 1]
        anchor_area = anchors[..., 0] * anchors[..., 1]
        iou = intersect_area / (box_area + anchor_area - intersect_area)
        # Find best anchor for each true box
        
        print(iou.shape)
        
        best_anchor = np.argmax(iou, axis=-1)
        
        print(best_anchor.shape)
        
        count = 0
        for t, n in enumerate(best_anchor):
            for l in range(num_layers):
                if n in anchor_mask[l]:
                    i = np.floor(gt_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
                    j = np.floor(gt_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
                    k = anchor_mask[l].index(n)
                    c = gt_boxes[b,t, 4].astype('int32')
                    y_true[l][b, j, i, k, 0:4] = gt_boxes[b,t, 0:4]
                    y_true[l][b, j, i, k, 4] = 1
                    y_true[l][b, j, i, k, 5+c] = 1

#[batch_size,layers,feat_size,feat_size,num_of_anchors each layer,c+5]
    return y_true

In [16]:
gt = np.random.rand(2,10,5)

y_true = box_preprocessing(gt,_MODEL_SIZE,_ANCHORS,80)
print(y_true[0].shape)

(2, 13, 13, 3, 85)
(3, 9)
(3,)
(6, 9)
(6,)
(2, 13, 13, 3, 85)


In [15]:
def iou(pre_boxes, true_boxes):
    pred_xy = pre_boxes[..., :2]
    pred_wh = pre_boxes[..., 2:4]
    true_xy = true_boxes[..., :2]
    true_wh = true_boxes[..., 2:4]

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = tf.maximum(pred_mins, true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)

    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    true_areas = true_wh[..., 0] * true_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_score = tf.truediv(intersect_areas, union_areas)

    return iou_score

In [34]:
def yolov3_loss(y_pred, y_true, anchors, input_shape):
    
    '''
    detect1 = (batch, 13 , 13 , 3, 85)
    detect2 = (batch, 26 , 26 , 3, 85)
    detect3 = (batch, 52 , 52 , 3, 85)
    '''
    
    # expanding
    anchors = tf.constant(anchors, dtype='float', shape=[1, 1, 1, 9, 2])
    sess = tf.Session()
    print('anchors : ',sess.run(anchors))
    
    C = 80
    ignore_thresh = 0.5
    batch_size = 1
    
    cellbase_x = tf.to_float(tf.reshape(tf.tile(tf.range(52), [52]), (1, 52, 52, 1, 1)))
    print(sess.run(cellbase_x).shape)
    cellbase_y = tf.transpose(cellbase_x, (0, 2, 1, 3, 4))
    print(sess.run(cellbase_y).shape)
    cellbase_grid = tf.tile(tf.concat([cellbase_x, cellbase_y], -1), [batch_size, 1, 1, 3, 1])
    print(sess.run(cellbase_grid).shape)
    
    img_w = input_shape[0]
    img_h = input_shape[1]
    img_factor = tf.reshape(tf.cast([img_w, img_h], tf.float32), [1, 1, 1, 1, 2])
    print('shape : ',sess.run(img_factor))
    
    loss = 0
    sum_loss_xy = 0
    sum_loss_wh = 0
    sum_loss_c = 0
    sum_loss_class = 0
    
    for i in range(1):
        anchor = anchors[..., 3 * i:3 * (i + 1), :]
        print('anchor : ',sess.run(anchor))
        object_mask = y_true[i][...,4:5]
        
        grid_w = tf.shape(y_pred[i])[1]  # 13
        grid_h = tf.shape(y_pred[i])[2]  # 13
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1, 1, 1, 1, 2])
        print('grid_factor : ',sess.run(grid_factor))
        
        # 예측한 box를 새롭게 조절된 box로 만든다.
        net_out_reshape = tf.reshape(y_pred[i], [-1, grid_w, grid_h, 3, (4 + 1 + C)])
        adjusted_out_xy = (cellbase_grid[:, :grid_w, :grid_h, :, :] + tf.sigmoid(
            net_out_reshape[..., :2])) / grid_factor
        adjusted_out_wh = tf.exp(net_out_reshape[..., 2:4]) * anchor / img_factor
        adjusted_out_c = tf.expand_dims(tf.sigmoid(net_out_reshape[..., 4]), axis=-1)
        adjusted_out_class = tf.sigmoid(net_out_reshape[..., 5:])
        adjusted_net_out = tf.concat([adjusted_out_xy, adjusted_out_wh, adjusted_out_c, adjusted_out_class],axis=-1)
        
        pred_boxes = tf.expand_dims(adjusted_net_out[..., 0:4], 4)
        
        # 실제 box를 새롭게 조절된 box로 만든다.
        adjusted_true_xy = y_true[i][..., :2] * grid_factor - cellbase_grid[:, :grid_w, :grid_h, :, :]
        adjusted_true_wh = tf.log(y_true[i][..., 2:4] / anchor * img_factor + 1e-9)  # 1e-9 just avoid log(0) = -inf

        adjusted_true_c = y_true[i][..., 4:5]
        adjusted_true_class = y_true[i][..., 5:]
        
        # TODO i don't like for loop
        
        ignore_masks = list()
        for k in range(batch_size):
            origin_box = tf.boolean_mask(y_true[i][k, ..., :4], tf.cast(y_true[i][k, ..., 4], dtype=bool))
            origin_box = tf.tile(tf.reshape(origin_box, shape=[1, 1, 1, -1, 4]), [grid_w, grid_h, 3, 1, 1])
            iou_scores = iou(pred_boxes[k], origin_box)
            best_ious = tf.reduce_max(iou_scores, axis=-1)
            ignore_mask = tf.expand_dims(tf.to_float(best_ious < ignore_thresh), -1)
            ignore_masks.append(ignore_mask)
        ignore_masks = tf.stack(ignore_masks)
        xywh_scale = 2 - y_true[i][..., 2:3] * y_true[i][..., 3:4]
        
        loss_xy = tf.reduce_sum(
            object_mask * xywh_scale * tf.nn.sigmoid_cross_entropy_with_logits(logits=net_out_reshape[..., :2],
                                                                               labels=adjusted_true_xy)) / batch_size
        loss_wh = tf.reduce_sum(
            object_mask * xywh_scale * 0.5 * tf.square(net_out_reshape[..., 2:4] - adjusted_true_wh)) / batch_size
        
        loss_c = tf.reduce_sum(
            object_mask * tf.nn.sigmoid_cross_entropy_with_logits(logits=net_out_reshape[..., 4:5],
                                                                  labels=adjusted_true_c) + (
                    1 - object_mask) * tf.nn.sigmoid_cross_entropy_with_logits(logits=net_out_reshape[..., 4:5],
                                                                               labels=adjusted_true_c) * ignore_masks) / batch_size
        loss_class = tf.reduce_sum(
            object_mask * tf.nn.sigmoid_cross_entropy_with_logits(logits=net_out_reshape[..., 5:],
                                                                  labels=adjusted_true_class)) / batch_size

        sum_loss_xy += loss_xy
        sum_loss_wh += loss_wh
        sum_loss_c += loss_c
        sum_loss_class += loss_class
        loss += loss_xy + loss_wh + loss_c + loss_class

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('loss_xy', sum_loss_xy)
    tf.summary.scalar('loss_wh', sum_loss_wh)
    tf.summary.scalar('loss_c', sum_loss_c)
    tf.summary.scalar('loss_class', sum_loss_class)
    
    sess.close()
    
    return loss,sum_loss_xy,sum_loss_wh,sum_loss_c,sum_loss_class
        

In [35]:
yolov3_loss(y_true,y_true,_ANCHORS,_MODEL_SIZE)

anchors :  [[[[[ 10.  13.]
    [ 16.  30.]
    [ 33.  23.]
    [ 30.  61.]
    [ 62.  45.]
    [ 59. 119.]
    [116.  90.]
    [156. 198.]
    [373. 326.]]]]]
(1, 52, 52, 1, 1)
(1, 52, 52, 1, 1)
(1, 52, 52, 3, 2)
shape :  [[[[[416. 416.]]]]]
anchor :  [[[[[10. 13.]
    [16. 30.]
    [33. 23.]]]]]
grid_factor :  [[[[[13. 13.]]]]]


(<tf.Tensor 'add_124:0' shape=() dtype=float32>,
 <tf.Tensor 'add_117:0' shape=() dtype=float32>,
 <tf.Tensor 'add_118:0' shape=() dtype=float32>,
 <tf.Tensor 'add_119:0' shape=() dtype=float32>,
 <tf.Tensor 'add_120:0' shape=() dtype=float32>)

In [17]:
num_anchors = len(anchors)//3

inputs = tf.placeholder(tf.float32, [None, 416, 416, 3],name='inputs')
true_detect1 = tf.placeholder(tf.float32, [None, 13 , 13 , 3, 42])
true_detect2 = tf.placeholder(tf.float32, [None, 26 , 26 , 3, 42])
true_detect3 = tf.placeholder(tf.float32, [None, 52 , 52 , 3, 42])
is_training = tf.placeholder(tf.bool,name='training')
# layer = inputs 

'''
route1 = (batch , 13 , 13 , 256)
route2 = (batch , 26 , 26 , 512)
inputs = (batch , 13 , 13 , 1024)
'''

route1, route2, inputs = darknet53(inputs, training=training,
                                               data_format=self.data_format)

route, inputs = yolo_convolution_block(
                inputs, filters=512, training=training,
                data_format=self.data_format)
# detection
target_detect1 = yolo_layer(inputs, n_classes=self.n_classes,
                     anchors=_ANCHORS[6:9],
                     img_size=self.model_size,
                     data_format=self.data_format)

inputs = conv2d_fixed_padding(route, filters=256, kernel_size=1,
                              data_format=self.data_format)
inputs = batch_norm(inputs, training=training,
                    data_format=self.data_format)
inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
upsample_size = route2.get_shape().as_list()
inputs = upsample(inputs, out_shape=upsample_size,
                  data_format=self.data_format)
axis = 1 if self.data_format == 'channels_first' else 3
inputs = tf.concat([inputs, route2], axis=axis)

route, inputs = yolo_convolution_block(
    inputs, filters=256, training=training,
    data_format=self.data_format)
target_detect2 = yolo_layer(inputs, n_classes=self.n_classes,
                     anchors=_ANCHORS[3:6],
                     img_size=self.model_size,
                     data_format=self.data_format)

inputs = conv2d_fixed_padding(route, filters=128, kernel_size=1,
                              data_format=self.data_format)
inputs = batch_norm(inputs, training=training,
                    data_format=self.data_format)
inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
upsample_size = route1.get_shape().as_list()
inputs = upsample(inputs, out_shape=upsample_size,
                  data_format=self.data_format)
inputs = tf.concat([inputs, route1], axis=axis)
route, inputs = yolo_convolution_block(
    inputs, filters=128, training=training,
    data_format=self.data_format)
target_detect3 = yolo_layer(inputs, n_classes=self.n_classes,
                     anchors=_ANCHORS[0:3],
                     img_size=self.model_size,
                     data_format=self.data_format)

#target = tf.concat([target_detect1, target_detect2, target_detect3], axis=1)

target_detect1 = tf.reshape(target_detect1,(-1,13,13,3,85))
target_detect2 = tf.reshape(target_detect2,(-1,26,26,3,85))
target_detect3 = tf.reshape(target_detect3,(-1,52,52,3,85))

y_pred = [target_detect1,target_detect2,target_detect3]
y_true = [true_detect1,true_detect2,true_detect3]


learning_rate = 0.0001

loss,loss_xy,loss_wh,loss_c,loss_class = yolo3_loss(y_pred, y_true,_ANCHORS)


'''
---------------------------------------------------------- 
여기 까지 완료
아래는 학습
----------------------------------------------------------
'''

update_opts = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies([tf.group(*update_opts)]):
    train_opt = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
    train_writer = tf.summary.FileWriter(FLAGS.log_dir,sess.graph)
    saver = tf.train.Saver(max_to_keep=5)
    img_path, annotation = get_path_and_annotation('./data/train_ocr.txt')
    train_feeder = read_data(img_path,annotation,FLAGS.batch_size,False)
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    if ckpt:
        saver.restore(sess,ckpt)
        print('Restore from the checkpoint {0}'.format(ckpt))
    else:
        print('Train yolo from start')
    for i in range(50000):
        image_batch, annotation_batch = next(train_feeder)
        image_batch, annotation_batch = resize_images_boxes(image_batch,annotation_batch,15)
        y_true = preprocess_true_boxes(annotation_batch, (FLAGS.image_size,FLAGS.image_size), anchors, FLAGS.num_classes)
        [_,total_loss,current_lossxy,current_losswh,current_lossc,current_loss_class,temp_pred] = sess.run([train_opt,loss,loss_xy,loss_wh,loss_c,loss_class,y_pred],feed_dict={inputs:image_batch,y_true_1:y_true[0],y_true_2:y_true[1],y_true_3:y_true[2],is_training: True})
        if(i%500 == 0):
            saver.save(sess,os.path.join(FLAGS.checkpoint_dir,'OCR-'),global_step=i)
            print('Model saved!!')
            
            #######################################################################
            constant_graph = convert_variables_to_constants(sess, sess.graph_def, ['boxes','scores','classes'])
            with tf.gfile.FastGFile('./model.pb', mode='wb') as f:
                f.write(constant_graph.SerializeToString())
            ######################################################################
            
            
            
        print('Batch: '+str(i)+'   Current total loss: '+str(total_loss)+'  loss_xy: '+str(current_lossxy)+'  loss_wh: '+str(current_losswh)+'  loss_c: '+str(current_lossc)+' loss_class: '+str(current_loss_class))

SyntaxError: invalid syntax (<ipython-input-17-900522f3e1d7>, line 19)

In [18]:
def read_data(img_list,annotation,batch_size,aug):
    num_batch = len(img_list)/batch_size
    count=0
    while(True):
        image_data = []
        annotation_data = []
        for i in range(batch_size):
            temp_index = i+count*batch_size
            temp_index %=len(img_list) 
            image = cv2.imread(img_list[temp_index])
            if aug:
                image = data_augmentation(image)
            image = image[:,:,::-1]
            image = image.astype(np.float32)
            image_shape = image.shape
#             image = cv2.resize(image,(FLAGS.image_size,FLAGS.image_size))
            
#             image = image/255
            image_data.append(image)
            annotation_data.append(np.array(annotation[temp_index]).astype(np.int32))
        count+=1
        image_data = np.array(image_data)
        yield image_data,annotation_data

def get_path_and_annotation(file_path):
    annotation = []
    img_path = []
    line_list = []
    with open(file_path,'r') as f:
        for line in f:
            temp=[]
            line = line.strip('\n')
            line_list.append(line)
        random.shuffle(line_list)

    for i in line_list:
        line = i.split(' ')
        img_path.append(line[0])
        temp = []
        temp_inner= []
        for j in range(1,len(line)):
            temp.append(line[j].split(','))
        annotation.append(temp)
    return img_path,annotation

def resize_images_boxes(image_batch,annotation_batch,max_num_box):
    batch_size = image_batch.shape[0]
    bbox = np.zeros((batch_size,max_num_box,5))
#     target_size = FLAGS.image_size
    image_size = (image_batch.shape[1],image_batch.shape[2])
    x_ratio = FLAGS.image_size/image_size[1]
    y_ratio = FLAGS.image_size/image_size[0]
    temp_image = []
    for i in range(batch_size):
        temp_image.append(cv2.resize(image_batch[i],(FLAGS.image_size,FLAGS.image_size)))
        for j in range(len(annotation_batch[i])):
            bbox[i][j][0]= np.round(annotation_batch[i][j][0]*x_ratio)
            bbox[i][j][1]= np.round(annotation_batch[i][j][1]*y_ratio)
            bbox[i][j][2]= np.round(annotation_batch[i][j][2]*x_ratio)
            bbox[i][j][3]= np.round(annotation_batch[i][j][3]*y_ratio)
            bbox[i][j][4]= annotation_batch[i][j][4]
    return np.array(temp_image,dtype=np.float32),bbox

def data_augmentation(img,annotation):
    for anno in annotation:
        coords = []
        xywh_t = []
        for i in anno[...,0:4]:
            coords.append(tl.prepro.obj_box_coord_upleft_butright_to_centroid(i))
        im_flip, coords = tl.prepro.obj_box_left_right_flip(img,coords,is_rescale=False,is_center=True)
        for i in range(len(coords)):
            xywh_t.append(tl.prepro.obj_box_coord_centroid_to_upleft_butright(coords[i]))
    return xywh_t