In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os 
import sys
import math
from tensorflow.python.keras.initializers import he_normal

In [None]:
tf.app.flags.DEFINE_integer('image_size', 416, "Needs to provide same value as in training.")
tf.app.flags.DEFINE_string('checkpoint_dir', './checkpoint', 'the checkpoint dir')
tf.app.flags.DEFINE_string('log_dir', './log', 'the logging dir')
tf.app.flags.DEFINE_integer('num_classes', 37,'The num of classes need to predict')
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')

In [None]:
def bottle_neck(layer,is_training):
    #------------bottleneck for yolo(darknet53)-----------
    X = conv2d_bn_leak(layer,32,1,[3,3],is_training)
    X = tf.pad(X,[[0,0],[1,0],[1,0],[0,0]])
    X = conv2d_bn_leak(X,64,2,[3,3],is_training)
    #------------
    X = identity_block_2(X,[32,64],is_training)
    #-------------
    X = tf.pad(X,[[0,0],[1,0],[1,0],[0,0]])
    X = conv2d_bn_leak(X,128,2,[3,3],is_training)
    #-------------res*2
    X = identity_block_2(X,[64,128],is_training)
    X = identity_block_2(X,[64,128],is_training)
    #-------------
    X = tf.pad(X,[[0,0],[1,0],[1,0],[0,0]])
    X = conv2d_bn_leak(X,256,2,[3,3],is_training)
    #-------------resnet*8
    for i in range(8):
        X = identity_block_2(X,[128,256],is_training)
    scale3 = X
    #-------------conv
    X = tf.pad(X,[[0,0],[1,0],[1,0],[0,0]])
    X = conv2d_bn_leak(X,512,2,[3,3],is_training)
    #-------------resnet*8
    for i in range(8):
        X = identity_block_2(X,[256,512],is_training) 
    scale2 = X
    #------------conv
    X = tf.pad(X,[[0,0],[1,0],[1,0],[0,0]])
    X = conv2d_bn_leak(X,1024,2,[3,3],is_training)
    #-----------resnet*4
    for i in range(4):
        X = identity_block_2(X,[512,1024],is_training) 
    #-----------avg-pooling
#     X = tf.layers.average_pooling2d(X,[2,2],strides=1)
    scale1 = X
    return X,scale1,scale2,scale3

In [None]:
def identity_block_2(layer,layer_depth,is_training):
    depth1,depth2 = layer_depth
    shortcut = layer
    conv_layer1 = tf.layers.conv2d(layer, depth1, [1,1], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer1_bn = tf.layers.batch_normalization(conv_layer1, training=is_training)
    conv_layer1_out = tf.nn.relu(conv_layer1_bn)
    
    conv_layer2 = tf.layers.conv2d(conv_layer1_out, depth2, [3,3], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    conv_layer2_bn = tf.layers.batch_normalization(conv_layer2, training=is_training)

    conv_add = shortcut+conv_layer2_bn
    conv_layer2_out = tf.nn.leaky_relu(conv_add,alpha=0.1)

    return conv_layer2_out

In [None]:
def conv2d_bn_leak(layer,depth,stride,kernel_size,is_training):
    if stride ==1:
        padding ='same'
    else:
        padding ='valid'
    X = tf.layers.conv2d(inputs=layer,filters=depth,kernel_size=kernel_size,strides=stride,padding=padding,use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(5e-4))
    X = tf.layers.batch_normalization(X,training=is_training)
    X = tf.nn.leaky_relu(X,alpha=0.1)
    return X

In [None]:
def scale(X,scale,depth,num_anchors,num_class,is_training):
    if(depth!=512):
        X = conv2d_bn_leak(X,depth,1,[3,3],is_training)
        X = tf.image.resize_nearest_neighbor(X,(X.shape[1]*2,X.shape[2]*2))
#         print(scale2.shape)
#         print(X.shape)
        X = tf.concat([X,scale],axis=3)

    #scale1 13*13
    X = conv2d_bn_leak(X,depth,1,[1,1],is_training)
    X = conv2d_bn_leak(X,depth*2,1,[3,3],is_training)
    X = conv2d_bn_leak(X,depth,1,[1,1],is_training)
    X = conv2d_bn_leak(X,depth*2,1,[3,3],is_training)
    X = conv2d_bn_leak(X,depth,1,[1,1],is_training)

    scale = conv2d_bn_leak(scale,depth*2,1,[3,3],is_training)
    scale = tf.layers.conv2d(scale, num_anchors*(num_class+5), [1,1], 1, 'same', use_bias=True,kernel_initializer=he_normal(seed=0.01),activation=None,kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01))
    return X,scale

In [None]:
def yolo_head_2(net_out, anchors, n_class, input_shape):
    boxes = list()
    box_scores = list()

    cellbase_x = tf.to_float(tf.reshape(tf.tile(tf.range(52), [52]), (1, 52, 52, 1, 1)))
    cellbase_y = tf.transpose(cellbase_x, (0, 2, 1, 3, 4))
    cellbase_grid = tf.tile(tf.concat([cellbase_x, cellbase_y], -1), [1, 1, 1, 3, 1])
    anchors = tf.constant(anchors, dtype='float', shape=[1, 1, 1, 9, 2])
    
    temp_box_confidence = []
    temp_box_class_probs = []
    temp_box_wh = []
    temp_box_yx = []
    for i in range(3):  # 52 26 13
        anchor = anchors[..., 3 * i:3 * (i + 1), :]
        # feats = model.output[i]
        feats = net_out[i]

        grid_w = tf.shape(feats)[1]  # 13
        grid_h = tf.shape(feats)[2]  # 13
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1, 1, 1, 1, 2])

        feats = tf.reshape(feats, [-1, grid_w, grid_h, 3, n_class + 5])

        # Adjust preditions to each spatial grid point and anchor size.
        box_xy = (tf.sigmoid(feats[..., :2]) + cellbase_grid[:, :grid_w, :grid_h, :, :]) / tf.cast(grid_factor[::-1],
                                                                                                   'float32')
        box_wh = tf.exp(feats[..., 2:4]) * anchor / tf.cast(input_shape[::-1], 'float32')
        box_confidence = tf.sigmoid(feats[..., 4:5])
        box_class_probs = tf.sigmoid(feats[..., 5:])

        box_yx = box_xy[..., ::-1]
        box_hw = box_wh[..., ::-1]
#         box_yx = (box_yx - offset) * scale
#         box_hw *= scale
        box_mins = box_yx - (box_hw / 2.)
        box_maxes = box_yx + (box_hw / 2.)
        _boxes = tf.concat([
            box_mins[..., 0:1],  # y_min
            box_mins[..., 1:2],  # x_min
            box_maxes[..., 0:1],  # y_max
            box_maxes[..., 1:2]  # x_max
        ], axis=-1)

        # Scale boxes back to (416,416) image shape.
        _boxes *= tf.concat([tf.cast((416,416), 'float32'), tf.cast((416,416), 'float32')], axis=-1)
        _boxes = tf.reshape(_boxes, [-1, 4])

        _box_scores = box_confidence * box_class_probs
        _box_scores = tf.reshape(_box_scores, [-1, n_class])
        boxes.append(_boxes)
        box_scores.append(_box_scores)
        
    boxes = tf.concat(boxes, axis=0)
    box_scores = tf.concat(box_scores, axis=0)

    mask = box_scores >= 0.5
    max_num_boxes = tf.constant(20, dtype='int32')

    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(n_class):
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_num_boxes, iou_threshold=0.5)
        class_boxes = tf.gather(class_boxes, nms_index)
        class_box_scores = tf.gather(class_box_scores, nms_index)
        classes = tf.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = tf.concat(boxes_, axis=0)
    scores_ = tf.concat(scores_, axis=0)
    classes_ = tf.concat(classes_, axis=0)
    return boxes_,scores_,classes_

In [None]:
anchors = []
with open('./data/yolo_anchors.txt','r') as f:
    for line in f.readlines():
        anchors.append(line.strip('\n'))
temp = anchors[0].split(', ')
temp_list = []
for i in temp:
    temp_list.append(i.split(','))
temp_list = np.array(temp_list)
anchors = temp_list.astype(np.float32)
num_anchors = len(anchors)//3

In [None]:
inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
# keep_prob = tf.placeholder('float')
is_training = tf.placeholder(tf.bool)
layer = inputs 

#bottle neck
X,scale1,scale2,scale3 = bottle_neck(layer,is_training)
# print(X.shape)
#scale1 13*13
X,scale1 = scale(X,scale1,512,num_anchors,FLAGS.num_classes,is_training)

#scale2 26*26
X,scale2 = scale(X,scale2,256,num_anchors,FLAGS.num_classes,is_training)

#scale 52*52
X,scale3 = scale(X,scale3,128,num_anchors,FLAGS.num_classes,is_training)

scale1 = tf.reshape(scale1,(-1,13,13,num_anchors,FLAGS.num_classes+5))
scale2 = tf.reshape(scale2,(-1,26,26,num_anchors,FLAGS.num_classes+5))
scale3 = tf.reshape(scale3,(-1,52,52,num_anchors,FLAGS.num_classes+5))
y_pred = [scale3,scale2,scale1]
# anchors = [25.0,39.0,35.0,87.0,61.0,52.0,67.0,134.0,121.0,96.0,122.0,205.0,207.0,287.0,262.0,148.0,373.0,300.0]
# boxes_,scores_,classes_ = yolo_head(y_pred[0], anchors[6:12], num_class, (416,416),calc_loss=False)
boxes_,scores_,classes_ = yolo_head_2(y_pred, anchors, FLAGS.num_classes, (FLAGS.image_size,FLAGS.image_size))

In [None]:
from matplotlib import pyplot as plt
import random

In [None]:
def get_path_and_annotation(file_path):
    annotation = []
    img_path = []
    line_list = []
    with open(file_path,'r') as f:
        for line in f:
            temp=[]
            line = line.strip('\n')
            line_list.append(line)
        random.shuffle(line_list)

    for i in line_list:
        line = i.split(' ')
        img_path.append(line[0])
        temp = []
        temp_inner= []
        for j in range(1,len(line)):
            temp.append(line[j].split(','))
        annotation.append(temp)
    return img_path,annotation

In [None]:
img_path,annotation = get_path_and_annotation('./data/test_ocr.txt')

In [None]:
len(img_path)

In [None]:
corresponding_dict = {1:'1',2:'2',3:'3',4:'4',5:'5',6:'6',7:'7',\
                      8:'8',9:'9',0:'0',10:'A',11:'B',12:'C',\
                     13:'D',14:'E',15:'F',16:'G',17:'H',18:'I',\
                      19:'J',20:'K',21:'L',22:'M',23:'N',24:'O',25:'P',\
                      26:'Q',27:'R',28:'S',29:'T',30:'U',31:'V',32:'W',33:'X',34:'Y',35:'Z',36:'-'}


In [None]:
box_class

In [None]:
sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())

ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if ckpt:
    saver.restore(sess,ckpt)
    print('restore from the checkpoint {0}'.format(ckpt))
# [b,s,c,temp_feat,box_confidence,class_probs,box_wh,box_yx] = sess.run([boxes_,scores_,classes_,y_pred,temp_box_confidence,temp_box_class_probs,temp_box_wh,temp_box_yx],feed_dict={inputs:image,is_training: False})

In [None]:
for temp_count in range(50):
    image = cv2.imread(img_path[temp_count])
    image = image[:,:,::-1]
    image_temp = image.astype(np.float32)
    image_temp = cv2.resize(image_temp,(FLAGS.image_size,FLAGS.image_size))
    image_temp = np.expand_dims(image_temp,axis=0)
    import time
    start = time.time()
    [b,s,c,temp_feat] = sess.run([boxes_,scores_,classes_,y_pred],feed_dict={inputs:image_temp,is_training: False})
    elapsed = time.time()-start
    print(elapsed)
    box_class = [corresponding_dict[i] for i in c]
    
    #convert bboxes from (416,416) to (1280,1024)
    y_ratio = 416/1024
    x_ratio = 416/1280
    b_transformed = np.zeros_like(b)
    b_transformed[...,0] = (b[...,0]/y_ratio)
    b_transformed[...,1] = (b[...,1]/x_ratio)
    b_transformed[...,2] = (b[...,2]/y_ratio)
    b_transformed[...,3] = (b[...,3]/x_ratio)


    plt.figure(figsize=(20,20))
    plt.hlines(b_transformed[...,0],b_transformed[...,1],b_transformed[...,3],colors='r')
    plt.hlines(b_transformed[...,2],b_transformed[...,1],b_transformed[...,3],colors='r')
    plt.vlines(b_transformed[...,1],b_transformed[...,0],b_transformed[...,2],colors='r')
    plt.vlines(b_transformed[...,3],b_transformed[...,0],b_transformed[...,2],colors='r')
    for i in range(b_transformed.shape[0]):
        plt.text((b_transformed[i][3]+b_transformed[i][1])/2-10,((b_transformed[i][2]+b_transformed[i][0])/2)-30,box_class[i],fontsize=50,color='r')
    plt.imshow(image)
    plt.show()

In [None]:
import tensorlayer as tl
import xml.etree.ElementTree as ET

In [None]:
img = cv2.imread('./data/JPEGImages/1.jpg')
img = img[:,:,::-1]
tree = ET.ElementTree(file = './data/Annotations/1.xml')
xmin = []
xmax = []
ymin = []
ymax = []
temp_class = []
root = tree.getroot()
for i in root:
    if((i.find('name')!=None)):
        temp_class.append(i.find('name').text)
        xmin.append(int(i.find('bndbox').find('xmin').text))
        xmax.append(int(i.find('bndbox').find('xmax').text))
        ymin.append(int(i.find('bndbox').find('ymin').text))
        ymax.append(int(i.find('bndbox').find('ymax').text))


In [None]:
coords_original = []
for i in range(len(xmin)):
    coords_original.append([xmin[i],ymin[i],xmax[i],ymax[i]])
coords = []
xywh_t = []
for i in coords_original:
    coords.append(tl.prepro.obj_box_coord_upleft_butright_to_centroid(i))
im_flip, coords = tl.prepro.obj_box_left_right_flip(img,coords,is_rescale=False,is_center=True)
for i in range(len(coords)):
    xywh_t.append(tl.prepro.obj_box_coord_centroid_to_upleft_butright(coords[i]))
xywh_t = np.array(xywh_t)
xywh_t = xywh_t.astype(np.int32)
show_box(xywh_t,im_flip)

In [None]:
# coords = tl.prepro.obj_box_coords_rescale(coords=coords, shape=[1280, 1024])


In [None]:
xywh_t

In [None]:
xywh_t = np.squeeze(xywh_t)

In [None]:
xywh

In [None]:
def show_box(b_transformed,image):
    plt.figure(figsize=(20,20))
    plt.hlines(b_transformed[...,1],b_transformed[...,0],b_transformed[...,2],colors='r')
    plt.hlines(b_transformed[...,3],b_transformed[...,0],b_transformed[...,2],colors='r')
    plt.vlines(b_transformed[...,0],b_transformed[...,1],b_transformed[...,3],colors='r')
    plt.vlines(b_transformed[...,2],b_transformed[...,1],b_transformed[...,3],colors='r')
    plt.imshow(image)

In [None]:
show_box(np.array(xywh),img)

In [None]:
'/home/xinje/Desktop/'