In [1]:
import tensorflow as tf
import os
import sys
add_dir = os.path.abspath('./data/')
sys.path.append(add_dir)
import tf_extended as tfe
import numpy as np
import cv2
import tensorflow.contrib.slim as slim
from tensorflow.python.ops import random_ops,control_flow_ops
from tensorflow.python.keras.initializers import he_normal
import math
import random
from data import ssd_common
from data import loss_function
from matplotlib import pyplot as plt
from data import custom_layers
import tensorlayer as tl
from tensorflow.python.framework.graph_util import convert_variables_to_constants

  from ._conv import register_converters as _register_converters


In [2]:
tf.app.flags.DEFINE_integer('image_size', 300, "Needs to provide same value as in training.")
tf.app.flags.DEFINE_integer('batch_size', 16, "Batch size for training.")
tf.app.flags.DEFINE_integer('num_class', 21, "Actual num of class +1.")
tf.app.flags.DEFINE_string('log_dir','./SSD_Billy/log','tensorboard directory')
tf.app.flags.DEFINE_string('checkpoint_dir','./SSD_Billy/checkpoint/','The directory where to save the parameters of the network')
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')

In [3]:
def random_flip(image):

    rand_num = random.random()*2-1
    if(rand_num <(-1/3)):
        return cv2.flip(image, -1)
    elif(rand_num>(-1/3) and rand_num < (1/3)):
        return cv2.flip(image, 0)
    elif(rand_num>(1/3)):
        return cv2.flip(image, 1)
    else:
        return cv2.flip(image, rand_num)
    
def random_crop(image):
    rate = (random.random()+1)*0.1*0.5 #random crop 10% to 20%
    cropImg = image[int(image.shape[0]*rate):int(image.shape[0]*(1-rate)),int(image.shape[1]*rate):int(image.shape[1]*(1-rate))]
    return cropImg
    
def random_rotate_image(image):
    #random rotate
    (h,w) = image.shape[:2]
    center = (w//2,h//2)
    M = cv2.getRotationMatrix2D(center,random.random()*360,1.0)
    image = cv2.warpAffine(image,M,(w,h),borderMode = cv2.BORDER_REPLICATE)
    return image

def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5):
    def _rand_scale(scale):
        scale = np.random.uniform(1, scale)
        return scale if (np.random.randint(2) == 0) else 1. / scale

    # determine scale factors
    dhue = np.random.uniform(-hue, hue)
    dsat = _rand_scale(saturation)
    dexp = _rand_scale(exposure)
    # convert RGB space to HSV space
    image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float')
    # change satuation and exposure
    image[:, :, 1] *= dsat
    image[:, :, 2] *= dexp
    # change hue
    image[:, :, 0] += dhue
    image[:, :, 0] -= (image[:, :, 0] > 180) * 180
    image[:, :, 0] += (image[:, :, 0] < 0) * 180
    
    # avoid overflow when astype('uint8')
    image[...] = np.clip(image[...], 0, 255)
    # convert back to RGB from HSV
    return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB)

def data_augmentation(image):
    image = random_crop(image)
    image = random_flip(image)
    image = random_rotate_image(image)
    image = random_distort_image(image, hue=18, saturation=1.5, exposure=1.5)
    return image

In [4]:
def fully_connected(prev_layer, num_units, is_training=False):
    layer = tf.layers.dense(prev_layer, num_units, use_bias=True, activation=None)
    layer = tf.layers.batch_normalization(layer, training=is_training)
    layer = tf.nn.relu(layer)
    return layer

In [5]:
def conv_layer_conv(prev_layer, layer_depth, is_training=False):
    conv_layer1 = tf.layers.conv2d(prev_layer, layer_depth, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer1_bn = tf.layers.batch_normalization(conv_layer1, training=is_training)
    conv_layer1_out = tf.nn.relu(conv_layer1_bn)

    pool_layer1 = tf.layers.max_pooling2d(conv_layer1_out,[2,2],strides=2,padding='same')
    return pool_layer1

In [6]:
def conv_layer_2conv(prev_layer, layer_depth, is_training=False):
    conv_layer1 = tf.layers.conv2d(prev_layer, layer_depth, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer1_bn = tf.layers.batch_normalization(conv_layer1, training=is_training)
    conv_layer1_out = tf.nn.relu(conv_layer1_bn)
    
    conv_layer2 = tf.layers.conv2d(conv_layer1_out, layer_depth, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer2_bn = tf.layers.batch_normalization(conv_layer2, training=is_training)
    conv_layer2_out = tf.nn.relu(conv_layer2_bn)

    pool_layer2 = tf.layers.max_pooling2d(conv_layer2_out,[2,2],strides=2,padding='same')
    return pool_layer2,conv_layer2_out

In [7]:
def conv_layer_3conv(prev_layer, layer_depth, is_training=False):
    conv_layer1 = tf.layers.conv2d(prev_layer, layer_depth, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer1_bn = tf.layers.batch_normalization(conv_layer1, training=is_training)
    conv_layer1_out = tf.nn.relu(conv_layer1_bn)
    
    conv_layer2 = tf.layers.conv2d(conv_layer1_out, layer_depth, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer2_bn = tf.layers.batch_normalization(conv_layer2, training=is_training)
    conv_layer2_out = tf.nn.relu(conv_layer2_bn)
    
    conv_layer3 = tf.layers.conv2d(conv_layer2_out, layer_depth, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer3_bn = tf.layers.batch_normalization(conv_layer3, training=is_training)
    conv_layer3_out = tf.nn.relu(conv_layer3_bn)
    pool_layer3 = tf.layers.max_pooling2d(conv_layer3_out,[2,2],strides=2,padding='same')
    return pool_layer3,conv_layer3_out

In [8]:
# len(anchor_sizes[0])+len(anchor_ratios[0])

In [9]:
def ssd_multibox_layer(layer,anchor_sizes,anchor_ratios,feat_shapes,normalization=False):
    #need to figure out why length of anchor_size + length of anchor_ratios
    if normalization > 0:
        layer = custom_layers.l2_normalization(layer, scaling=True)
    num_anchors = len(anchor_sizes)+len(anchor_ratios)
    num_loc_pred = num_anchors *4
    num_cls_pred = num_anchors * FLAGS.num_class
    
    pred = tf.layers.conv2d(layer, num_loc_pred+num_cls_pred, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    print(pred.shape)
    loc_pred = tf.reshape(pred[...,0:(4*num_anchors)],[-1,feat_shapes[0],feat_shapes[0], num_anchors,4])
    
#     cls_pred = tf.layers.conv2d(layer, num_cls_pred, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    cls_pred = tf.reshape(pred[...,(4*num_anchors)::],[-1,feat_shapes[0],feat_shapes[0],num_anchors,FLAGS.num_class])
    #channel to last and reshape didn't implement
    return cls_pred,loc_pred

In [10]:
def ssd_anchor_one_layer(img_shape,#原始图像shape
                         feat_shape,#特征图shape
                         sizes,#预设的box size
                         ratios,#aspect 比例
                         step,#anchor的层
                         offset=0.5,
                         dtype=np.float32):
    """Computer SSD default anchor boxes for one feature layer.

    Determine the relative position grid of the centers, and the relative
    width and height.

    Arguments:
      feat_shape: Feature shape, used for computing relative position grids;
      size: Absolute reference sizes;
      ratios: Ratios to use on these features;
      img_shape: Image shape, used for computing height, width relatively to the
        former;
      offset: Grid offset.

    Return:
      y, x, h, w: Relative x and y grids, and height and width.
    """
    # Compute the position grid: simple way.
    # y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
    # y = (y.astype(dtype) + offset) / feat_shape[0]
    # x = (x.astype(dtype) + offset) / feat_shape[1]
    # Weird SSD-Caffe computation using steps values...
    
    """
    #测试中，参数如下
    feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]
    anchor_sizes=[(21., 45.),
                      (45., 99.),
                      (99., 153.),
                      (153., 207.),
                      (207., 261.),
                      (261., 315.)]
    anchor_ratios=[[2, .5],
                       [2, .5, 3, 1./3],
                       [2, .5, 3, 1./3],
                       [2, .5, 3, 1./3],
                       [2, .5],
                       [2, .5]]
    anchor_steps=[8, 16, 32, 64, 100, 300]


    offset=0.5

    dtype=np.float32

    feat_shape=feat_shapes[0]
    step=anchor_steps[0]
    """
    #测试中，y和x的shape为（38,38）（38,38）
    #y的值为
    #array([[ 0,  0,  0, ...,  0,  0,  0],
     #  [ 1,  1,  1, ...,  1,  1,  1],
    # [ 2,  2,  2, ...,  2,  2,  2],
    #   ..., 
     #  [35, 35, 35, ..., 35, 35, 35],
    #  [36, 36, 36, ..., 36, 36, 36],
     #  [37, 37, 37, ..., 37, 37, 37]])
    y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
    #测试中y=(y+0.5)×8/300,x=(x+0.5)×8/300
    y = (y.astype(dtype) + offset) * step / img_shape[0]
    x = (x.astype(dtype) + offset) * step / img_shape[1]

    #扩展维度，维度为（38,38,1）
    # Expand dims to support easy broadcasting.
    y = np.expand_dims(y, axis=-1)
    x = np.expand_dims(x, axis=-1)

    # Compute relative height and width.
    # Tries to follow the original implementation of SSD for the order.
    #数值为2+2
    num_anchors = len(sizes) + len(ratios)
    #shape为（4,）
    h = np.zeros((num_anchors, ), dtype=dtype)
    w = np.zeros((num_anchors, ), dtype=dtype)
    # Add first anchor boxes with ratio=1.
    #测试中，h[0]=21/300,w[0]=21/300?
    h[0] = sizes[0] / img_shape[0]
    w[0] = sizes[0] / img_shape[1]
    di = 1
    if len(sizes) > 1:
        #h[1]=sqrt(21*45)/300
        h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
        w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
        di += 1
    for i, r in enumerate(ratios):
        h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
        w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
    #测试中，y和x shape为（38,38,1）
    #h和w的shape为（4,）
    return y, x, h, w

In [11]:
def ssd_anchors_all_layers(img_shape,
                           layers_shape,
                           anchor_sizes,
                           anchor_ratios,
                           anchor_steps,
                           offset=0.5,
                           dtype=np.float32):
    """Compute anchor boxes for all feature layers.
    """
    layers_anchors = []
    for i, s in enumerate(layers_shape):
        anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
                                             anchor_sizes[i],
                                             anchor_ratios[i],
                                             anchor_steps[i],
                                             offset=offset, dtype=dtype)
        layers_anchors.append(anchor_bboxes)
    return layers_anchors

In [12]:
def read_data(img_list,annotation,batch_size,aug):
    num_batch = len(img_list)/batch_size
    count=0
    while(True):
        image_data = []
        annotation_data = []
        for i in range(batch_size):
            temp_index = i+count*batch_size
            temp_index %=len(img_list) 
            image = cv2.imread(img_list[temp_index])
            if aug:
                image = data_augmentation(image)
            image = image[:,:,::-1]
            image = image.astype(np.float32)
            
#             image = cv2.resize(image,(FLAGS.image_size,FLAGS.image_size))
            image = image/255
            image_data.append(image)
            annotation_data.append(annotation[temp_index])
        count+=1
#         image_data = np.array(image_data)
        yield image_data,annotation_data

def get_path_and_annotation(file_path):
    annotation = []
    img_path = []
    line_list = []
    with open(file_path,'r') as f:
        for line in f:
            temp=[]
            line = line.strip('\n')
            line_list.append(line)
        random.shuffle(line_list)

    for i in line_list:
        line = i.split(' ')
        img_path.append(line[0])
        temp = []
        temp_inner= []
        for j in range(1,len(line)):
            temp.append(line[j].split(','))
        annotation.append(temp)
    return img_path,annotation

In [13]:
def resize_img_bbox(original_img,target_size,annotation):
    xmin_resized = []
    ymin_resized = []
    xmax_resized = []
    ymax_resized = []
    ratio_list = []
    img = cv2.resize(original_img,(target_size,target_size))
    for i in annotation:
        xmin = int(float(i[0]))
        ymin = int(float(i[1]))
        xmax = int(i[2])
        ymax = int(i[3])

        x_ratio = 300/original_img.shape[1]
        y_ratio = 300/original_img.shape[0]
        ratio_list.append([x_ratio,y_ratio])
        xmin_resized.append(int(np.round(xmin*x_ratio)))
        ymin_resized.append(int(np.round(ymin*y_ratio)))
        xmax_resized.append(int(np.round(xmax*x_ratio)))
        ymax_resized.append(int(np.round(ymax*y_ratio)))
    return img,xmin_resized,ymin_resized,xmax_resized,ymax_resized,ratio_list

In [14]:
def data_preprocessing(image_batch,annotation_batch,cls_batch,target_size):

    x_batch_list =list()
    y_batch_list =list()
    w_batch_list =list()
    h_batch_list =list()
    cls_batch_list = list()
    image_batch_list = list()
    anno_batch_list = list()
    for i in range(len(annotation_batch)):
        #loop for each img
        temp_anno = list()
        temp_cls = list()
        image_batch_temp,xmin,ymin,xmax,ymax,_=resize_img_bbox(image_batch[i],target_size,annotation_batch[i])
        
        image_batch_list.append(image_batch_temp)
        for j in range(len(annotation_batch[i])):
            #loop for each bbox in one img
            temp_anno.append([ymin[j]/300,xmin[j]/300,ymax[j]/300,xmax[j]/300])
            temp_cls.append(int(cls_batch[i][j]))
        for j in range(60-len(annotation_batch[i])):
            temp_anno.append([0,0,0,0])
            temp_cls.append(0)
        anno_batch_list.append(np.array(temp_anno,dtype=np.float32))
        cls_batch_list.append(np.array(temp_cls,dtype=np.int32))
        
    image_batch = np.array(image_batch_list,dtype=np.float32)
#     anno_batch_list = np.array(anno_batch_list,dtype=np.float32)
#     cls_batch_list = np.array(cls_batch_list,dtype=np.int32)
    return image_batch,anno_batch_list,cls_batch_list

In [15]:
def stack_tensor(tensor):
    final_result = []
    for j in range(6):
        temp_tensor = []
        for i in range(FLAGS.batch_size):
            temp_tensor.append(tensor[i][j])
        final_result.append(tf.stack(temp_tensor,axis=0))
    return final_result

In [16]:
def ssd_losses(logits, localisations,
               gclasses, glocalisations, gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        #################################someone on github#########
#         pmask = tf.concat(axis=0, values=[pmask[:tf.argmax(gscores)],[True],pmask[tf.argmax(gscores)+1:]])
        ###########################################################
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask), tf.cast(batch_size,dtype), name='value')
            tf.losses.add_loss(loss)
            cross_entropy_pos_loss = loss
        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask), tf.cast(batch_size,dtype), name='value')
            tf.losses.add_loss(loss)
            cross_entropy_neg_loss = loss
        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights), tf.cast(batch_size,dtype), name='value')
            tf.losses.add_loss(loss)
            localization_loss = loss
        return cross_entropy_pos_loss,cross_entropy_neg_loss,localization_loss

In [17]:
# some constant values
feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11']
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]
anchor_size_bounds=[0.15, 0.90]
anchor_sizes=[(21., 45.),
                      (45., 99.),
                      (99., 153.),
                      (153., 207.),
                      (207., 261.),
                      (261., 315.)]
anchor_ratios=[[2, .5],
                       [2, .5, 3, 1./3],
                       [2, .5, 3, 1./3],
                       [2, .5, 3, 1./3],
                       [2, .5],
                       [2, .5]]                                                                      
anchor_steps=[8, 16, 32, 64, 100, 300]
anchor_offset=0.5
normalizations=[20, -1, -1, -1, -1, -1]
prior_scaling=[0.1, 0.1, 0.2, 0.2]


In [18]:
#####################the start of bottle neck##################
inputs = tf.placeholder(tf.float32, [None, FLAGS.image_size, FLAGS.image_size, 3],name='inputs')
labels = tf.placeholder(tf.int32, [None,None])
bboxes = tf.placeholder(tf.float32, [None,None,4])
# keep_prob = tf.placeholder(tf.float32)
is_training = tf.placeholder(tf.bool,name='is_training')
end_points={}
layer = inputs 
#conv1
layer,_ = conv_layer_2conv(layer, 64, is_training=is_training)
#conv2
layer,_ = conv_layer_2conv(layer, 128, is_training=is_training)
#conv3
layer,_ = conv_layer_3conv(layer, 256, is_training=is_training)
#conv4
layer,foot_stage1 = conv_layer_3conv(layer, 512, is_training=is_training)
end_points['block4'] = foot_stage1
#conv5
layer = tf.layers.conv2d(layer, 512, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
layer = tf.layers.conv2d(layer, 512, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.layers.max_pooling2d(layer,[3,3],strides=1,padding='same')
layer = tf.nn.relu(layer)

#FC6
layer = slim.conv2d(layer, 1024, [3, 3], rate=6, scope='conv6')

#FC7
layer = tf.layers.conv2d(layer, 1024, [1,1], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
end_points['block7'] = layer

#conv8
layer = tf.layers.conv2d(layer, 256, [1,1], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
layer = tf.layers.conv2d(layer, 512, [3,3], 2, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
end_points['block8'] = layer

#conv9
layer = tf.layers.conv2d(layer, 128, [1,1], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
layer = tf.layers.conv2d(layer, 256, [3,3], 2, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
end_points['block9'] = layer

#conv10
layer = tf.layers.conv2d(layer, 128, [1,1], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
layer = tf.layers.conv2d(layer, 256, [3,3], 1, 'valid', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
end_points['block10'] = layer

#conv11
layer = tf.layers.conv2d(layer, 128, [1,1], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
layer = tf.layers.conv2d(layer, 256, [3,3], 1, 'valid', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
end_points['block11'] = layer



predictions =[]
logits = []
localisations=[]
for i,layer in enumerate(feat_layers):
    with tf.variable_scope(layer + '_box'):
        p, l = ssd_multibox_layer(end_points[layer],anchor_sizes[i],anchor_ratios[i],feat_shapes[i],normalizations[i])
        predictions.append(tf.contrib.slim.softmax(p))
        logits.append(p)########(layers,batch_size,featuremap,featuremap,boxes_layer,num_class)
        localisations.append(l)#####(layers,batch_size,featuremap,featuremap,boxes_layer,4)
        
# generate bboxes for each feet layer
anchor_layers_original = ssd_anchors_all_layers((FLAGS.image_size,FLAGS.image_size),
                           feat_shapes,
                           anchor_sizes,
                           anchor_ratios,
                           anchor_steps,
                           offset=0.5,
                           dtype=np.float32)

################     encode bboxes to each layer      #################
feat_labels = []
feat_localizations = []
feat_scores = []
for i in range(FLAGS.batch_size):
    feat_labels_temp, feat_localizations_temp, feat_scores_temp = ssd_common.tf_ssd_bboxes_encode(labels[i],bboxes[i],anchor_layers_original,FLAGS.num_class,0)
    feat_labels.append(feat_labels_temp)
    feat_localizations.append(feat_localizations_temp)
    feat_scores.append(feat_scores_temp) 

feat_labels = stack_tensor(feat_labels)
feat_localizations = stack_tensor(feat_localizations)
feat_scores = stack_tensor(feat_scores)

(?, 38, 38, 100)
(?, 19, 19, 150)
(?, 10, 10, 150)
(?, 5, 5, 150)
(?, 3, 3, 100)
(?, 1, 1, 100)


In [19]:
##################################This part is used for inference #########################
decoded_boxes = ssd_common.tf_ssd_bboxes_decode(localisations,
                         anchor_layers_original,
                         prior_scaling=[0.1, 0.1, 0.2, 0.2],
                         scope='ssd_bboxes_decode')

def reshape_tensors(localisations_layer,logits_layer,predictions_layer):
    localisations_layer = tf.reshape(localisations_layer,(1,-1,4))
    logits_layer = tf.reshape(logits_layer,(1,-1,21))
    predictions_layer = tf.reshape(predictions_layer,(1,-1,21))
    return localisations_layer,logits_layer,predictions_layer

reshaped_locals = []
reshaped_logits = []
reshaped_predictions = []
for i in range(len(feat_layers)):
    temp_locals,temp_logits,temp_predictions = reshape_tensors(decoded_boxes[i],logits[i],predictions[i])
    reshaped_locals.append(temp_locals)
    reshaped_logits.append(temp_logits)
    reshaped_predictions.append(temp_predictions)
reshaped_locals = tf.concat(reshaped_locals,axis=1)
reshaped_logits = tf.concat(reshaped_logits,axis=1)
reshaped_predictions = tf.concat(reshaped_predictions,axis=1)

classes = tf.cast(tf.argmax(reshaped_predictions,axis=2),tf.int32)
scores = tf.reduce_max(reshaped_predictions,axis=2)
#remove boxes belongs to background
scores = scores * tf.cast(classes >0, scores.dtype)
#remove boxes with low scores
mask = tf.greater(scores, 0.5)
classes = classes * tf.cast(mask, classes.dtype)
scores = scores * tf.cast(mask, scores.dtype)
ymin = tf.zeros_like(reshaped_locals[...,0])
xmin = tf.zeros_like(reshaped_locals[...,1])
ymax = tf.ones_like(reshaped_locals[...,2])
xmax = tf.ones_like(reshaped_locals[...,3])

ymin = tf.maximum(reshaped_locals[...,0],ymin)
xmin = tf.maximum(reshaped_locals[...,1],xmin)
ymax = tf.minimum(reshaped_locals[...,2],ymax)
xmax = tf.minimum(reshaped_locals[...,3],xmax)
boxes = tf.stack([ymin,xmin,ymax,xmax],axis = -1)

#start to remove boxes
mask_score = tf.greater_equal(scores,0.5)
mask_class = tf.not_equal(classes, 0)
total_mask = tf.logical_and(mask_score,mask_class)
total_index = tf.where(total_mask[0])
#remove background and scores>0.5
scores = tf.gather(scores,total_index[:,0],axis=1)
classes = tf.gather(classes,total_index[:,0],axis=1)
boxes = tf.gather(boxes,total_index[:,0],axis=1)
nms_index = tf.image.non_max_suppression(boxes[0], scores[0],50, iou_threshold=0.5)
#remove nms boxes
scores = tf.gather(scores[0],nms_index,name='scores')
classes = tf.gather(classes[0],nms_index,name='classes')
boxes = tf.gather(boxes[0],nms_index,name='boxes')

In [20]:
for i in range(FLAGS.batch_size):
    index = (bboxes[i][...,2]-bboxes[i][...,0])>0
    temp_label =[]
    temp_box = []
    for j in range(60):
        if(index[j] == True):
            temp_label.append(labels[j])
            temp_box.append(bboxes[j])

In [21]:
###########need to be modified############################################

# for k in range(FLAGS.batch_size):
#     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = temp_logits[k],labels = feat_labels[k])
pos_loss,neg_loss,loc_loss = ssd_losses(logits, #预测类别
                   localisations,#预测位置
                   feat_labels, #ground truth 类别
                   feat_localizations, #ground truth 位置
                   feat_scores,#ground truth 分数
                   match_threshold=0.5,
                   negative_ratio=3.,
                   alpha=1.,
                   label_smoothing=0.,
                   scope='ssd_losses')
total_losses = pos_loss+neg_loss+loc_loss

In [22]:
img_path, annotation = get_path_and_annotation('./VOC2007/train/train_OCR.txt')
train_feeder = read_data(img_path,annotation,FLAGS.batch_size,False)

In [23]:
#########
def split_cls_anno(annotation_batch):
    temp_anno=[]
    temp_cls = []
    for i in annotation_batch:
        temp_anno1=[]
        temp_cls1=[]
        for j in i:
            temp_anno1.append([int(j[0]),int(j[1]),int(j[2]),int(j[3])])#xmin,ymin,xmax,ymax
#             temp_anno1.append([int(j[0]),int(j[1]),(int(j[2])-int(j[0])),(int(j[3])-int(j[1]))])
            temp_cls1.append(int(j[4]))
        temp_anno.append(temp_anno1)
        temp_cls.append(temp_cls1)
    return temp_anno,temp_cls
def data_augmentation(image_batch,annotation_batch,cls_batch): 
    ###convert xmin,ymin,xmax,ymax to xleft,yleft,w,h to be compatible with tensorlayer format
    temp_anno = []
    for i in annotation_batch:
        temp_anno1=[]
        for j in i:
            temp_anno1.append([int(j[0]),int(j[1]),(int(j[2])-int(j[0])),(int(j[3])-int(j[1]))])
        temp_anno.append(temp_anno1)
        
    temp_img2 = []
    temp_anno2 = []
    temp_cls2 = []
    for i in range(FLAGS.batch_size):
        temp_img1,temp_cls1,temp_anno1 = tl.prepro.obj_box_shift(image_batch[i], cls_batch[i],coords=temp_anno[i],fill_mode='constant',is_rescale=False)
        temp_img1,temp_cls1,temp_anno1 = tl.prepro.obj_box_zoom(temp_img1,temp_cls1,coords=temp_anno1,fill_mode='constant',is_rescale=False)
        temp_img2.append(temp_img1)
        temp_anno2.append(temp_anno1)
        temp_cls2.append(temp_cls1)

    ##convert x,y,w,h to xmin,ymin,xmax,ymax
    temp_anno = []
    for i in temp_anno2:
        temp_anno1=[]
        for j in i:
            temp_anno1.append([j[0],j[1],j[2]+j[0],j[3]+j[1]])
        temp_anno.append(temp_anno1)
    return temp_img2,temp_anno,temp_cls2



In [24]:
def test_aug_func(image_batch,annotation_batch):
###show whether the image and the annotation are compatible after data augmentation
    anno_batch = np.array(annotation_batch)*300
    for index_img in range(FLAGS.batch_size):
        for i in range(len(anno_batch[index_img])):
            plt.hlines(int(anno_batch[index_img][i][0]),int(anno_batch[index_img][i][1]),int(anno_batch[index_img][i][3]),colors='red')
            plt.hlines(int(anno_batch[index_img][i][2]),int(anno_batch[index_img][i][1]),int(anno_batch[index_img][i][3]),colors='red')
            plt.vlines(int(anno_batch[index_img][i][1]),int(anno_batch[index_img][i][0]),int(anno_batch[index_img][i][2]),colors='red')
            plt.vlines(int(anno_batch[index_img][i][3]),int(anno_batch[index_img][i][0]),int(anno_batch[index_img][i][2]),colors='red')
        plt.imshow(image_batch[index_img])
        plt.show()

In [25]:
# [image_batch,annotation_batch] = next(train_feeder)
# anno_batch,cls_batch = split_cls_anno(annotation_batch)
# if(random.random()>0.5):
#     image_batch,anno_batch,cls_batch = data_augmentation(image_batch,anno_batch,cls_batch)
# image_batch,anno_batch,cls_batch = data_preprocessing(image_batch,anno_batch,cls_batch,FLAGS.image_size)
# test_aug_func(image_batch,anno_batch)

In [27]:
learning_rate = 0.0001
tf.summary.scalar('loss',total_losses)
update_opts = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies([tf.group(*update_opts)]):
    train_opt = tf.train.AdamOptimizer(learning_rate).minimize(total_losses)

In [None]:
with tf.Session() as sess:
    train_writer = tf.summary.FileWriter(FLAGS.log_dir,sess.graph)
    saver = tf.train.Saver(max_to_keep=10)
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    if ckpt:
        saver.restore(sess,ckpt)
        print('Restore from the checkpoint {0}'.format(ckpt))
    else:
        print('Train ssd from start')
    for i in range(50000):
#         [image_batch,annotation_batch] = next(train_feeder)
        [image_batch,annotation_batch] = next(train_feeder)
        anno_batch,cls_batch = split_cls_anno(annotation_batch)
        if(random.random()>0.5):
            image_batch,anno_batch,cls_batch = data_augmentation(image_batch,anno_batch,cls_batch)
        image_batch,anno_batch,cls_batch = data_preprocessing(image_batch,anno_batch,cls_batch,FLAGS.image_size)       
        [_,temp_loss,temp_pos_loss,temp_neg_loss,temp_loc_loss,temp_label,temp_score,temp_loc] = sess.run([train_opt,total_losses,pos_loss,neg_loss,loc_loss,feat_labels,feat_scores,feat_localizations],feed_dict={inputs:image_batch,bboxes:anno_batch,labels:cls_batch,is_training: True})
        print('Batch: '+str(i)+'   Current total loss: '+str(temp_loss)+' pos_loss: '+str(temp_pos_loss)+' neg_loss: '+str(temp_neg_loss)+' loc_loss: '+str(temp_loc_loss))
        if(temp_loss==0):
            break
        if(i%500==0):
            #save model for training
            saver.save(sess,os.path.join(FLAGS.checkpoint_dir,'OCR-'),global_step=i) 
            ##########################save model for inference#############################################
            constant_graph = convert_variables_to_constants(sess, sess.graph_def, ['boxes','scores','classes'])
            with tf.gfile.FastGFile('./model.pb', mode='wb') as f:
                f.write(constant_graph.SerializeToString())
            ######################################################################
            print('Model saved !!!')

INFO:tensorflow:Restoring parameters from ./SSD_Billy/checkpoint/OCR--6000
Restore from the checkpoint ./SSD_Billy/checkpoint/OCR--6000
Batch: 0   Current total loss: 89.62239 pos_loss: 49.481537 neg_loss: 13.133539 loc_loss: 27.007313
INFO:tensorflow:Froze 141 variables.
INFO:tensorflow:Converted 141 variables to const ops.
Model saved !!!
Batch: 1   Current total loss: 69.196785 pos_loss: 37.56295 neg_loss: 11.07383 loc_loss: 20.560005
Batch: 2   Current total loss: 66.40064 pos_loss: 38.33261 neg_loss: 11.254723 loc_loss: 16.81331
Batch: 3   Current total loss: 88.99429 pos_loss: 49.19397 neg_loss: 13.165367 loc_loss: 26.634958
Batch: 4   Current total loss: 77.55124 pos_loss: 38.896046 neg_loss: 11.854639 loc_loss: 26.800556
Batch: 5   Current total loss: 80.783424 pos_loss: 43.609238 neg_loss: 11.30732 loc_loss: 25.866867
Batch: 6   Current total loss: 68.222206 pos_loss: 34.283813 neg_loss: 11.484272 loc_loss: 22.45412
Batch: 7   Current total loss: 62.215393 pos_loss: 31.832573 

Batch: 80   Current total loss: 81.077255 pos_loss: 39.33818 neg_loss: 14.254686 loc_loss: 27.484388
Batch: 81   Current total loss: 76.90115 pos_loss: 42.336395 neg_loss: 12.457512 loc_loss: 22.107243
Batch: 82   Current total loss: 123.037704 pos_loss: 64.438515 neg_loss: 19.064995 loc_loss: 39.534195
Batch: 83   Current total loss: 114.19418 pos_loss: 63.285526 neg_loss: 16.925566 loc_loss: 33.98309
Batch: 84   Current total loss: 82.41275 pos_loss: 42.48179 neg_loss: 14.542297 loc_loss: 25.388664
Batch: 85   Current total loss: 81.37343 pos_loss: 38.696774 neg_loss: 15.6295395 loc_loss: 27.047112
Batch: 86   Current total loss: 76.98892 pos_loss: 36.855556 neg_loss: 14.66167 loc_loss: 25.471693
Batch: 87   Current total loss: 44.93946 pos_loss: 23.996805 neg_loss: 9.610575 loc_loss: 11.332081
Batch: 88   Current total loss: 104.486694 pos_loss: 54.04097 neg_loss: 17.019035 loc_loss: 33.42669
Batch: 89   Current total loss: 90.458084 pos_loss: 48.06028 neg_loss: 15.479527 loc_loss: 

Batch: 161   Current total loss: 66.976036 pos_loss: 32.96934 neg_loss: 12.550634 loc_loss: 21.456062
Batch: 162   Current total loss: 65.25969 pos_loss: 31.773296 neg_loss: 12.311502 loc_loss: 21.174885
Batch: 163   Current total loss: 75.86702 pos_loss: 36.60206 neg_loss: 14.705458 loc_loss: 24.559502
Batch: 164   Current total loss: 58.415497 pos_loss: 29.285196 neg_loss: 10.186981 loc_loss: 18.943321
Batch: 165   Current total loss: 52.24426 pos_loss: 22.746368 neg_loss: 10.982 loc_loss: 18.515888
Batch: 166   Current total loss: 96.293655 pos_loss: 48.49903 neg_loss: 16.33751 loc_loss: 31.457119
Batch: 167   Current total loss: 70.24863 pos_loss: 36.03975 neg_loss: 13.257427 loc_loss: 20.951454
Batch: 168   Current total loss: 73.22569 pos_loss: 36.92392 neg_loss: 13.07002 loc_loss: 23.23175
Batch: 169   Current total loss: 66.13958 pos_loss: 36.4268 neg_loss: 11.672482 loc_loss: 18.040297
Batch: 170   Current total loss: 60.973522 pos_loss: 32.315914 neg_loss: 11.883071 loc_loss:

Batch: 242   Current total loss: 84.80569 pos_loss: 43.032307 neg_loss: 14.097737 loc_loss: 27.675646
Batch: 243   Current total loss: 72.952255 pos_loss: 39.079704 neg_loss: 13.5714855 loc_loss: 20.301067
Batch: 244   Current total loss: 55.243652 pos_loss: 29.001678 neg_loss: 11.300584 loc_loss: 14.941389
Batch: 245   Current total loss: 92.9991 pos_loss: 46.305176 neg_loss: 16.214825 loc_loss: 30.479103
Batch: 246   Current total loss: 43.67484 pos_loss: 19.072723 neg_loss: 10.182182 loc_loss: 14.419933
Batch: 247   Current total loss: 57.01858 pos_loss: 26.276794 neg_loss: 11.974293 loc_loss: 18.767494
Batch: 248   Current total loss: 75.778496 pos_loss: 40.183773 neg_loss: 12.643701 loc_loss: 22.951023
Batch: 249   Current total loss: 100.71057 pos_loss: 51.122658 neg_loss: 16.600208 loc_loss: 32.9877
Batch: 250   Current total loss: 84.9966 pos_loss: 41.010998 neg_loss: 14.490461 loc_loss: 29.495142
Batch: 251   Current total loss: 87.86108 pos_loss: 44.90435 neg_loss: 14.767557 

Batch: 323   Current total loss: 81.69571 pos_loss: 40.69918 neg_loss: 14.313991 loc_loss: 26.682533
Batch: 324   Current total loss: 56.14097 pos_loss: 27.156004 neg_loss: 11.1118965 loc_loss: 17.873068
Batch: 325   Current total loss: 41.58114 pos_loss: 19.104109 neg_loss: 10.2452965 loc_loss: 12.231735
Batch: 326   Current total loss: 49.276222 pos_loss: 24.247168 neg_loss: 11.048037 loc_loss: 13.98102
Batch: 327   Current total loss: 55.590267 pos_loss: 27.311869 neg_loss: 11.571335 loc_loss: 16.707066
Batch: 328   Current total loss: 58.85713 pos_loss: 27.66961 neg_loss: 12.837869 loc_loss: 18.349651
Batch: 329   Current total loss: 51.018124 pos_loss: 29.008524 neg_loss: 9.352968 loc_loss: 12.656632
Batch: 330   Current total loss: 106.2487 pos_loss: 56.10235 neg_loss: 17.419788 loc_loss: 32.726562
Batch: 331   Current total loss: 75.78421 pos_loss: 36.213657 neg_loss: 14.574968 loc_loss: 24.99558
Batch: 332   Current total loss: 47.015938 pos_loss: 24.620832 neg_loss: 8.614192 l

Batch: 404   Current total loss: 83.84166 pos_loss: 42.006493 neg_loss: 15.650294 loc_loss: 26.184868
Batch: 405   Current total loss: 67.469376 pos_loss: 31.35815 neg_loss: 13.538634 loc_loss: 22.57259
Batch: 406   Current total loss: 90.28976 pos_loss: 44.163925 neg_loss: 17.648516 loc_loss: 28.477318
Batch: 407   Current total loss: 72.83812 pos_loss: 34.89322 neg_loss: 14.654341 loc_loss: 23.290562
Batch: 408   Current total loss: 96.01322 pos_loss: 46.54315 neg_loss: 18.236317 loc_loss: 31.233757
Batch: 409   Current total loss: 61.807503 pos_loss: 31.273693 neg_loss: 10.548139 loc_loss: 19.985672
Batch: 410   Current total loss: 72.29126 pos_loss: 36.907608 neg_loss: 11.669362 loc_loss: 23.714287
Batch: 411   Current total loss: 67.60034 pos_loss: 35.351288 neg_loss: 12.768734 loc_loss: 19.480324
Batch: 412   Current total loss: 39.884064 pos_loss: 21.926506 neg_loss: 9.012698 loc_loss: 8.944859
Batch: 413   Current total loss: 87.66872 pos_loss: 40.55926 neg_loss: 14.945772 loc_

Batch: 485   Current total loss: 93.3303 pos_loss: 48.394806 neg_loss: 16.49788 loc_loss: 28.437614
Batch: 486   Current total loss: 66.451965 pos_loss: 32.535812 neg_loss: 12.768059 loc_loss: 21.14809
Batch: 487   Current total loss: 58.739883 pos_loss: 32.221497 neg_loss: 10.360068 loc_loss: 16.158316
Batch: 488   Current total loss: 65.80395 pos_loss: 34.98018 neg_loss: 11.060583 loc_loss: 19.763184
Batch: 489   Current total loss: 93.80365 pos_loss: 49.529106 neg_loss: 14.553515 loc_loss: 29.721033
Batch: 490   Current total loss: 68.37135 pos_loss: 33.15762 neg_loss: 13.842711 loc_loss: 21.371017
Batch: 491   Current total loss: 54.410652 pos_loss: 26.74206 neg_loss: 11.305384 loc_loss: 16.363207
Batch: 492   Current total loss: 54.19674 pos_loss: 27.04227 neg_loss: 11.558609 loc_loss: 15.595859
Batch: 493   Current total loss: 100.8327 pos_loss: 52.024185 neg_loss: 17.24998 loc_loss: 31.55853
Batch: 494   Current total loss: 84.88823 pos_loss: 42.84957 neg_loss: 13.407888 loc_los

Batch: 565   Current total loss: 89.77366 pos_loss: 44.538532 neg_loss: 16.588642 loc_loss: 28.646484
Batch: 566   Current total loss: 57.220314 pos_loss: 26.214296 neg_loss: 11.931675 loc_loss: 19.07434
Batch: 567   Current total loss: 62.667038 pos_loss: 32.052834 neg_loss: 11.203445 loc_loss: 19.410759
Batch: 568   Current total loss: 57.917442 pos_loss: 26.582607 neg_loss: 11.495725 loc_loss: 19.839111
Batch: 569   Current total loss: 39.34737 pos_loss: 16.460726 neg_loss: 10.677675 loc_loss: 12.20897
Batch: 570   Current total loss: 57.41555 pos_loss: 30.260784 neg_loss: 10.989801 loc_loss: 16.164963
Batch: 571   Current total loss: 59.89202 pos_loss: 28.757816 neg_loss: 13.256561 loc_loss: 17.877644
Batch: 572   Current total loss: 99.14696 pos_loss: 52.836365 neg_loss: 16.020323 loc_loss: 30.290268
Batch: 573   Current total loss: 60.88717 pos_loss: 31.775572 neg_loss: 9.957527 loc_loss: 19.15407
Batch: 574   Current total loss: 63.04441 pos_loss: 31.684752 neg_loss: 12.299416 l

ymin = y-h/2
ymax = y+h/2
xmin = x-w/2
xmax = x+w/2

In [None]:
def jaccard_with_anchors(bbox,ymin,xmin,ymax,xmax):
    """Compute jaccard score between a box and the anchors.
    """
    int_ymin = np.maximum(ymin, bbox[0])
    int_xmin = np.maximum(xmin, bbox[1])
    int_ymax = np.minimum(ymax, bbox[2])
    int_xmax = np.minimum(xmax, bbox[3])
    h = np.maximum(int_ymax - int_ymin, 0.)
    w = np.maximum(int_xmax - int_xmin, 0.)
    # Volumes.
    inter_vol = h * w
    vol_anchors = (xmax - xmin) * (ymax - ymin)
    union_vol = vol_anchors - inter_vol \
        + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
    jaccard = np.divide(inter_vol, union_vol)
    return jaccard

In [None]:
jaccard_with_anchors(anno_batch[0][0],ymin,xmin,ymax,xmax)

In [None]:
plt.figure(figsize=(20,20))
plt.hlines(anno_batch[0][...,0]*300,anno_batch[0][...,1]*300,anno_batch[0][...,3]*300,colors='red')
plt.hlines(anno_batch[0][...,2]*300,anno_batch[0][...,1]*300,anno_batch[0][...,3]*300,colors='red')
plt.vlines(anno_batch[0][...,1]*300,anno_batch[0][...,0]*300,anno_batch[0][...,2]*300,colors='red')
plt.vlines(anno_batch[0][...,3]*300,anno_batch[0][...,0]*300,anno_batch[0][...,2]*300,colors='red')

plt.imshow(image_batch[0])