Large diffs are not rendered by default.

@@ -0,0 +1,255 @@
from __future__ import print_function

import numpy as np
import tensorflow as tf
import cv2
import time
import sys



class YOLO_TF:
fromfile = "test/2008_000090.jpg"
tofile_img = 'test/output.jpg'
tofile_txt = 'test/output.txt'
imshow = True
filewrite_img = False
filewrite_txt = False
disp_console = True
weights_file = 'weights/YOLO_small.ckpt'
alpha = 0.1
threshold = 0.2
iou_threshold = 0.5
num_class = 20
num_box = 2
grid_size = 7
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]

w_img = 640
h_img = 480

def __init__(self,argvs = []):
self.argv_parser(argvs)
self.build_networks()
print (self.fromfile)
if self.fromfile is not None: self.detect_from_file(self.fromfile)
def argv_parser(self,argvs):
for i in range(1,len(argvs),2):
if argvs[i] == '-fromfile' : self.fromfile = argvs[i+1]
if argvs[i] == '-tofile_img' : self.tofile_img = argvs[i+1] ; self.filewrite_img = True
if argvs[i] == '-tofile_txt' : self.tofile_txt = argvs[i+1] ; self.filewrite_txt = True
if argvs[i] == '-imshow' :
if argvs[i+1] == '1' :self.imshow = True
else : self.imshow = False
if argvs[i] == '-disp_console' :
if argvs[i+1] == '1' :self.disp_console = True
else : self.disp_console = False

def build_networks(self):
if self.disp_console : print( "Building YOLO_small graph...")
self.x = tf.placeholder('float32',[None,448,448,3])
self.conv_1 = self.conv_layer(1,self.x,64,7,2)
self.pool_2 = self.pooling_layer(2,self.conv_1,2,2)
self.conv_3 = self.conv_layer(3,self.pool_2,192,3,1)
self.pool_4 = self.pooling_layer(4,self.conv_3,2,2)
self.conv_5 = self.conv_layer(5,self.pool_4,128,1,1)
self.conv_6 = self.conv_layer(6,self.conv_5,256,3,1)
self.conv_7 = self.conv_layer(7,self.conv_6,256,1,1)
self.conv_8 = self.conv_layer(8,self.conv_7,512,3,1)
self.pool_9 = self.pooling_layer(9,self.conv_8,2,2)
self.conv_10 = self.conv_layer(10,self.pool_9,256,1,1)
self.conv_11 = self.conv_layer(11,self.conv_10,512,3,1)
self.conv_12 = self.conv_layer(12,self.conv_11,256,1,1)
self.conv_13 = self.conv_layer(13,self.conv_12,512,3,1)
self.conv_14 = self.conv_layer(14,self.conv_13,256,1,1)
self.conv_15 = self.conv_layer(15,self.conv_14,512,3,1)
self.conv_16 = self.conv_layer(16,self.conv_15,256,1,1)
self.conv_17 = self.conv_layer(17,self.conv_16,512,3,1)
self.conv_18 = self.conv_layer(18,self.conv_17,512,1,1)
self.conv_19 = self.conv_layer(19,self.conv_18,1024,3,1)
self.pool_20 = self.pooling_layer(20,self.conv_19,2,2)
self.conv_21 = self.conv_layer(21,self.pool_20,512,1,1)
self.conv_22 = self.conv_layer(22,self.conv_21,1024,3,1)
self.conv_23 = self.conv_layer(23,self.conv_22,512,1,1)
self.conv_24 = self.conv_layer(24,self.conv_23,1024,3,1)
self.conv_25 = self.conv_layer(25,self.conv_24,1024,3,1)
self.conv_26 = self.conv_layer(26,self.conv_25,1024,3,2)
self.conv_27 = self.conv_layer(27,self.conv_26,1024,3,1)
self.conv_28 = self.conv_layer(28,self.conv_27,1024,3,1)
self.fc_29 = self.fc_layer(29,self.conv_28,512,flat=True,linear=False)
self.fc_30 = self.fc_layer(30,self.fc_29,4096,flat=False,linear=False)
#skip dropout_31
self.fc_32 = self.fc_layer(32,self.fc_30,1470,flat=False,linear=True)
self.sess = tf.Session()
self.sess.run(tf.initialize_all_variables())
self.saver = tf.train.Saver()
self.saver.restore(self.sess,self.weights_file)
if self.disp_console : print( "Loading complete!" + '\n')

def conv_layer(self,idx,inputs,filters,size,stride):
channels = inputs.get_shape()[3]
weight = tf.Variable(tf.truncated_normal([size,size,int(channels),filters], stddev=0.1))
biases = tf.Variable(tf.constant(0.1, shape=[filters]))

pad_size = size//2
pad_mat = np.array([[0,0],[pad_size,pad_size],[pad_size,pad_size],[0,0]])
inputs_pad = tf.pad(inputs,pad_mat)

conv = tf.nn.conv2d(inputs_pad, weight, strides=[1, stride, stride, 1], padding='VALID',name=str(idx)+'_conv')
conv_biased = tf.add(conv,biases,name=str(idx)+'_conv_biased')
if self.disp_console : print (' Layer %d : Type = Conv, Size = %d * %d, Stride = %d, Filters = %d, Input channels = %d' % (idx,size,size,stride,filters,int(channels)))
return tf.maximum(self.alpha*conv_biased,conv_biased,name=str(idx)+'_leaky_relu')

def pooling_layer(self,idx,inputs,size,stride):
if self.disp_console : print (' Layer %d : Type = Pool, Size = %d * %d, Stride = %d' % (idx,size,size,stride))
return tf.nn.max_pool(inputs, ksize=[1, size, size, 1],strides=[1, stride, stride, 1], padding='SAME',name=str(idx)+'_pool')

def fc_layer(self,idx,inputs,hiddens,flat = False,linear = False):
input_shape = inputs.get_shape().as_list()
if flat:
dim = input_shape[1]*input_shape[2]*input_shape[3]
inputs_transposed = tf.transpose(inputs,(0,3,1,2))
inputs_processed = tf.reshape(inputs_transposed, [-1,dim])
else:
dim = input_shape[1]
inputs_processed = inputs
weight = tf.Variable(tf.truncated_normal([dim,hiddens], stddev=0.1))
biases = tf.Variable(tf.constant(0.1, shape=[hiddens]))
if self.disp_console : print (' Layer %d : Type = Full, Hidden = %d, Input dimension = %d, Flat = %d, Activation = %d' % (idx,hiddens,int(dim),int(flat),1-int(linear)) )
if linear : return tf.add(tf.matmul(inputs_processed,weight),biases,name=str(idx)+'_fc')
ip = tf.add(tf.matmul(inputs_processed,weight),biases)
return tf.maximum(self.alpha*ip,ip,name=str(idx)+'_fc')

def detect_from_cvmat(self,img):
s = time.time()
self.h_img,self.w_img,_ = img.shape
img_resized = cv2.resize(img, (448, 448))
img_RGB = cv2.cvtColor(img_resized,cv2.COLOR_BGR2RGB)
img_resized_np = np.asarray( img_RGB )
inputs = np.zeros((1,448,448,3),dtype='float32')
inputs[0] = (img_resized_np/255.0)*2.0-1.0
in_dict = {self.x: inputs}
net_output = self.sess.run(self.fc_32,feed_dict=in_dict)
self.result = self.interpret_output(net_output[0])
self.show_results(img,self.result)
strtime = str(time.time()-s)
if self.disp_console : print ('Elapsed time : ' + strtime + ' secs' + '\n')

def detect_from_file(self,filename):
if self.disp_console : print ('Detect from ' + filename)
img = cv2.imread(filename)
#img = misc.imread(filename)
self.detect_from_cvmat(img)

def detect_from_crop_sample(self):
self.w_img = 640
self.h_img = 420
f = np.array(open('person_crop.txt','r').readlines(),dtype='float32')
inputs = np.zeros((1,448,448,3),dtype='float32')
for c in range(3):
for y in range(448):
for x in range(448):
inputs[0,y,x,c] = f[c*448*448+y*448+x]

in_dict = {self.x: inputs}
net_output = self.sess.run(self.fc_32,feed_dict=in_dict)
self.boxes, self.probs = self.interpret_output(net_output[0])
img = cv2.imread('person.jpg')
self.show_results(self.boxes,img)

def interpret_output(self,output):
probs = np.zeros((7,7,2,20))
class_probs = np.reshape(output[0:980],(7,7,20))
scales = np.reshape(output[980:1078],(7,7,2))
boxes = np.reshape(output[1078:],(7,7,2,4))
offset = np.transpose(np.reshape(np.array([np.arange(7)]*14),(2,7,7)),(1,2,0))

boxes[:,:,:,0] += offset
boxes[:,:,:,1] += np.transpose(offset,(1,0,2))
boxes[:,:,:,0:2] = boxes[:,:,:,0:2] / 7.0
boxes[:,:,:,2] = np.multiply(boxes[:,:,:,2],boxes[:,:,:,2])
boxes[:,:,:,3] = np.multiply(boxes[:,:,:,3],boxes[:,:,:,3])

boxes[:,:,:,0] *= self.w_img
boxes[:,:,:,1] *= self.h_img
boxes[:,:,:,2] *= self.w_img
boxes[:,:,:,3] *= self.h_img

for i in range(2):
for j in range(20):
probs[:,:,i,j] = np.multiply(class_probs[:,:,j],scales[:,:,i])

filter_mat_probs = np.array(probs>=self.threshold,dtype='bool')
filter_mat_boxes = np.nonzero(filter_mat_probs)
boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
probs_filtered = probs[filter_mat_probs]
classes_num_filtered = np.argmax(filter_mat_probs,axis=3)[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]

argsort = np.array(np.argsort(probs_filtered))[::-1]
boxes_filtered = boxes_filtered[argsort]
probs_filtered = probs_filtered[argsort]
classes_num_filtered = classes_num_filtered[argsort]

for i in range(len(boxes_filtered)):
if probs_filtered[i] == 0 : continue
for j in range(i+1,len(boxes_filtered)):
if self.iou(boxes_filtered[i],boxes_filtered[j]) > self.iou_threshold :
probs_filtered[j] = 0.0

filter_iou = np.array(probs_filtered>0.0,dtype='bool')
boxes_filtered = boxes_filtered[filter_iou]
probs_filtered = probs_filtered[filter_iou]
classes_num_filtered = classes_num_filtered[filter_iou]

result = []
for i in range(len(boxes_filtered)):
result.append([self.classes[classes_num_filtered[i]],boxes_filtered[i][0],boxes_filtered[i][1],boxes_filtered[i][2],boxes_filtered[i][3],probs_filtered[i]])

return result

def show_results(self,img,results):
img_cp = img.copy()
if self.filewrite_txt :
ftxt = open(self.tofile_txt,'w')
for i in range(len(results)):
x = int(results[i][1])
y = int(results[i][2])
w = int(results[i][3])//2
h = int(results[i][4])//2
if self.disp_console : print (' class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(y) + ',' + str(int(results[i][3])) + ',' + str(int(results[i][4]))+'], Confidence = ' + str(results[i][5]))
if self.filewrite_img or self.imshow:
cv2.rectangle(img_cp,(x-w,y-h),(x+w,y+h),(0,255,0),2)
cv2.rectangle(img_cp,(x-w,y-h-20),(x+w,y-h),(125,125,125),-1)
cv2.putText(img_cp,results[i][0] + ' : %.2f' % results[i][5],(x-w+5,y-h-7),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1)
if self.filewrite_txt :
ftxt.write(results[i][0] + ',' + str(x) + ',' + str(y) + ',' + str(w) + ',' + str(h)+',' + str(results[i][5]) + '\n')
if self.filewrite_img :
if self.disp_console : print (' image file writed : ' + self.tofile_img)
cv2.imwrite(self.tofile_img,img_cp)
if self.imshow :
cv2.imshow('YOLO_small detection',img_cp)
cv2.waitKey()
if self.filewrite_txt :
if self.disp_console : print (' txt file writed : ' + self.tofile_txt)
ftxt.close()

def iou(self,box1,box2):
tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
if tb < 0 or lr < 0 : intersection = 0
else : intersection = tb*lr
return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)

def training(self): #TODO add training function!
return None




def main(argvs):
yolo = YOLO_TF(argvs)
cv2.waitKey(1000)


if __name__=='__main__':
main(sys.argv)
@@ -0,0 +1,241 @@
from __future__ import print_function

import numpy as np
import tensorflow as tf
import cv2
import time
import sys



class YOLO_TF:
fromfile = "test/2008_000090.jpg"
tofile_img = 'test/output.jpg'
tofile_txt = 'test/output.txt'
imshow = True
filewrite_img = False
filewrite_txt = False
disp_console = True
weights_file = '/home/ubuntu/workspace/fastcnn/model/yolo_model/YOLO_tiny.ckpt'
alpha = 0.1
threshold = 0.2
iou_threshold = 0.5
num_class = 20
num_box = 2
grid_size = 7
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]

w_img = 640
h_img = 480

def __init__(self,argvs = []):
# self.argv_parser(argvs)
self.build_networks()
# if self.fromfile is not None: self.detect_from_file(self.fromfile)
def argv_parser(self,argvs):
for i in range(1,len(argvs),2):
if argvs[i] == '-fromfile' : self.fromfile = argvs[i+1]
if argvs[i] == '-tofile_img' : self.tofile_img = argvs[i+1] ; self.filewrite_img = True
if argvs[i] == '-tofile_txt' : self.tofile_txt = argvs[i+1] ; self.filewrite_txt = True
if argvs[i] == '-imshow' :
if argvs[i+1] == '1' :self.imshow = True
else : self.imshow = False
if argvs[i] == '-disp_console' :
if argvs[i+1] == '1' :self.disp_console = True
else : self.disp_console = False

def build_networks(self):
if self.disp_console : print ("Building YOLO_tiny graph...")
self.x = tf.placeholder('float32',[None,448,448,3])
self.conv_1 = self.conv_layer(1,self.x,16,3,1)
self.pool_2 = self.pooling_layer(2,self.conv_1,2,2)
self.conv_3 = self.conv_layer(3,self.pool_2,32,3,1)
self.pool_4 = self.pooling_layer(4,self.conv_3,2,2)
self.conv_5 = self.conv_layer(5,self.pool_4,64,3,1)
self.pool_6 = self.pooling_layer(6,self.conv_5,2,2)
self.conv_7 = self.conv_layer(7,self.pool_6,128,3,1)
self.pool_8 = self.pooling_layer(8,self.conv_7,2,2)
self.conv_9 = self.conv_layer(9,self.pool_8,256,3,1)
self.pool_10 = self.pooling_layer(10,self.conv_9,2,2)
self.conv_11 = self.conv_layer(11,self.pool_10,512,3,1)
self.pool_12 = self.pooling_layer(12,self.conv_11,2,2)
self.conv_13 = self.conv_layer(13,self.pool_12,1024,3,1)
self.conv_14 = self.conv_layer(14,self.conv_13,1024,3,1)
self.conv_15 = self.conv_layer(15,self.conv_14,1024,3,1)
self.fc_16 = self.fc_layer(16,self.conv_15,256,flat=True,linear=False)
self.fc_17 = self.fc_layer(17,self.fc_16,4096,flat=False,linear=False)
#skip dropout_18
self.fc_19 = self.fc_layer(19,self.fc_17,1470,flat=False,linear=True)
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
self.saver = tf.train.Saver()
self.saver.restore(self.sess,self.weights_file)
if self.disp_console : print ("Loading complete!" + '\n')

def conv_layer(self,idx,inputs,filters,size,stride):
channels = inputs.get_shape()[3]
weight = tf.Variable(tf.truncated_normal([size,size,int(channels),filters], stddev=0.1))
biases = tf.Variable(tf.constant(0.1, shape=[filters]))

pad_size = size//2
pad_mat = np.array([[0,0],[pad_size,pad_size],[pad_size,pad_size],[0,0]])
inputs_pad = tf.pad(inputs,pad_mat)

conv = tf.nn.conv2d(inputs_pad, weight, strides=[1, stride, stride, 1], padding='VALID',name=str(idx)+'_conv')
conv_biased = tf.add(conv,biases,name=str(idx)+'_conv_biased')
if self.disp_console : print (' Layer %d : Type = Conv, Size = %d * %d, Stride = %d, Filters = %d, Input channels = %d' % (idx,size,size,stride,filters,int(channels)))
return tf.maximum(self.alpha*conv_biased,conv_biased,name=str(idx)+'_leaky_relu')

def pooling_layer(self,idx,inputs,size,stride):
if self.disp_console : print (' Layer %d : Type = Pool, Size = %d * %d, Stride = %d' % (idx,size,size,stride))
return tf.nn.max_pool(inputs, ksize=[1, size, size, 1],strides=[1, stride, stride, 1], padding='SAME',name=str(idx)+'_pool')

def fc_layer(self,idx,inputs,hiddens,flat = False,linear = False):
input_shape = inputs.get_shape().as_list()
if flat:
dim = input_shape[1]*input_shape[2]*input_shape[3]
inputs_transposed = tf.transpose(inputs,(0,3,1,2))
inputs_processed = tf.reshape(inputs_transposed, [-1,dim])
else:
dim = input_shape[1]
inputs_processed = inputs
weight = tf.Variable(tf.truncated_normal([dim,hiddens], stddev=0.1))
biases = tf.Variable(tf.constant(0.1, shape=[hiddens]))
if self.disp_console : print (' Layer %d : Type = Full, Hidden = %d, Input dimension = %d, Flat = %d, Activation = %d' % (idx,hiddens,int(dim),int(flat),1-int(linear)) )
if linear : return tf.add(tf.matmul(inputs_processed,weight),biases,name=str(idx)+'_fc')
ip = tf.add(tf.matmul(inputs_processed,weight),biases)
return tf.maximum(self.alpha*ip,ip,name=str(idx)+'_fc')

def detect_from_cvmat(self,img):
s = time.time()
self.h_img,self.w_img,_ = img.shape
img_resized = cv2.resize(img, (448, 448))
img_RGB = cv2.cvtColor(img_resized,cv2.COLOR_BGR2RGB)
img_resized_np = np.asarray( img_RGB )
inputs = np.zeros((1,448,448,3),dtype='float32')
inputs[0] = (img_resized_np/255.0)*2.0-1.0
in_dict = {self.x: inputs}
net_output = self.sess.run(self.fc_19,feed_dict=in_dict)
self.result = self.interpret_output(net_output[0])
self.show_results(img,self.result)
strtime = str(time.time()-s)
if self.disp_console : print ('Elapsed time : ' + strtime + ' secs' + '\n')

def detect_from_file(self,filename):
if self.disp_console : print ('Detect from ' + filename)
img = cv2.imread(filename)
#img = misc.imread(filename)
self.detect_from_cvmat(img)

def detect_from_crop_sample(self):
self.w_img = 640
self.h_img = 420
f = np.array(open('person_crop.txt','r').readlines(),dtype='float32')
inputs = np.zeros((1,448,448,3),dtype='float32')
for c in range(3):
for y in range(448):
for x in range(448):
inputs[0,y,x,c] = f[c*448*448+y*448+x]

in_dict = {self.x: inputs}
net_output = self.sess.run(self.fc_19,feed_dict=in_dict)
self.boxes, self.probs = self.interpret_output(net_output[0])
img = cv2.imread('person.jpg')
self.show_results(self.boxes,img)

def interpret_output(self,output):
probs = np.zeros((7,7,2,20))
class_probs = np.reshape(output[0:980],(7,7,20))
scales = np.reshape(output[980:1078],(7,7,2))
boxes = np.reshape(output[1078:],(7,7,2,4))
offset = np.transpose(np.reshape(np.array([np.arange(7)]*14),(2,7,7)),(1,2,0))

boxes[:,:,:,0] += offset
boxes[:,:,:,1] += np.transpose(offset,(1,0,2))
boxes[:,:,:,0:2] = boxes[:,:,:,0:2] / 7.0
boxes[:,:,:,2] = np.multiply(boxes[:,:,:,2],boxes[:,:,:,2])
boxes[:,:,:,3] = np.multiply(boxes[:,:,:,3],boxes[:,:,:,3])

boxes[:,:,:,0] *= self.w_img
boxes[:,:,:,1] *= self.h_img
boxes[:,:,:,2] *= self.w_img
boxes[:,:,:,3] *= self.h_img

for i in range(2):
for j in range(20):
probs[:,:,i,j] = np.multiply(class_probs[:,:,j],scales[:,:,i])

filter_mat_probs = np.array(probs>=self.threshold,dtype='bool')
filter_mat_boxes = np.nonzero(filter_mat_probs)
boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
probs_filtered = probs[filter_mat_probs]
classes_num_filtered = np.argmax(filter_mat_probs,axis=3)[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]

argsort = np.array(np.argsort(probs_filtered))[::-1]
boxes_filtered = boxes_filtered[argsort]
probs_filtered = probs_filtered[argsort]
classes_num_filtered = classes_num_filtered[argsort]

for i in range(len(boxes_filtered)):
if probs_filtered[i] == 0 : continue
for j in range(i+1,len(boxes_filtered)):
if self.iou(boxes_filtered[i],boxes_filtered[j]) > self.iou_threshold :
probs_filtered[j] = 0.0

filter_iou = np.array(probs_filtered>0.0,dtype='bool')
boxes_filtered = boxes_filtered[filter_iou]
probs_filtered = probs_filtered[filter_iou]
classes_num_filtered = classes_num_filtered[filter_iou]

result = []
for i in range(len(boxes_filtered)):
result.append([self.classes[classes_num_filtered[i]],boxes_filtered[i][0],boxes_filtered[i][1],boxes_filtered[i][2],boxes_filtered[i][3],probs_filtered[i]])

return result

def show_results(self,img,results):
img_cp = img.copy()
if self.filewrite_txt :
ftxt = open(self.tofile_txt,'w')
for i in range(len(results)):
x = int(results[i][1])
y = int(results[i][2])
w = int(results[i][3])//2
h = int(results[i][4])//2
if self.disp_console : print (' class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(y) + ',' + str(int(results[i][3])) + ',' + str(int(results[i][4]))+'], Confidence = ' + str(results[i][5]))
if self.filewrite_img or self.imshow:
cv2.rectangle(img_cp,(x-w,y-h),(x+w,y+h),(0,255,0),2)
cv2.rectangle(img_cp,(x-w,y-h-20),(x+w,y-h),(125,125,125),-1)
cv2.putText(img_cp,results[i][0] + ' : %.2f' % results[i][5],(x-w+5,y-h-7),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1)
if self.filewrite_txt :
ftxt.write(results[i][0] + ',' + str(x) + ',' + str(y) + ',' + str(w) + ',' + str(h)+',' + str(results[i][5]) + '\n')
if self.filewrite_img :
if self.disp_console : print (' image file writed : ' + self.tofile_img)
cv2.imwrite(self.tofile_img,img_cp)
if self.imshow :
cv2.imshow('YOLO_tiny detection',img_cp)
cv2.waitKey()
if self.filewrite_txt :
if self.disp_console : print (' txt file writed : ' + self.tofile_txt)
ftxt.close()

def iou(self,box1,box2):
tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
if tb < 0 or lr < 0 : intersection = 0
else : intersection = tb*lr
return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)

def training(self): #TODO add training function!
return None




#def main(argvs):
# yolo = YOLO_TF(argvs)
# cv2.waitKey(1000)
#
#
#if __name__=='__main__':
# main(sys.argv)
Binary file not shown.
@@ -0,0 +1,38 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 5 15:59:54 2017
@author: ubuntu
"""

from __future__ import print_function

import numpy as np
import tensorflow as tf
import cv2


import YOLO_tiny_tf

fromfile = "test/2008_000090.jpg"

yolo = YOLO_tiny_tf.YOLO_TF()

img = cv2.imread(fromfile)

img_resized = cv2.resize(img, (448, 448))
img_RGB = cv2.cvtColor(img_resized,cv2.COLOR_BGR2RGB)
img_resized_np = np.asarray( img_RGB )
inputs = np.zeros((1,448,448,3),dtype='float32')
inputs[0] = (img_resized_np/255.0)*2.0-1.0

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
in_dict = {yolo.x: inputs}
net_output = yolo.sess.run(yolo.fc_19,feed_dict=in_dict)



yolo.detect_from_file(fromfile)

Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN +111 KB yolo/test/person.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@@ -0,0 +1,180 @@
import tensorflow as tf
import numpy as np
from scipy.misc import imread
from scipy.misc import imresize
from scipy import misc
#import net_factory
import time

alpha = 0.1
batch_file = "/media/ubuntu/65db2e03-ffde-4f3d-8f33-55d73836211a/dataset/VOC_train/*.jpg"
test_file = "/media/ubuntu/65db2e03-ffde-4f3d-8f33-55d73836211a/dataset/VOC_val/*.jpg"
filename = "../model/voc2012_reg/fcann_v1.ckpt"
logfile = '../log/voc2012_reg'
graph_model = '../model/voc2012_reg/fcann_v1.ckpt-489000.meta'
checkpoint_dir = '../model/voc2012_reg'

def randombatch(batchfile):
filenames = tf.train.match_filenames_once(batchfile)
file_queue = tf.train.string_input_producer( filenames, shuffle=True)
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(file_queue)
image_orig = tf.image.decode_jpeg(image_file)
image = tf.image.resize_images(image_orig, [448, 448])
image.set_shape((448, 448, 3))
num_preprocess_threads = 5
min_queue_examples = 500
batch_size = 1

images = tf.train.shuffle_batch(
[image],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + (num_preprocess_threads +50)*batch_size,
min_after_dequeue=min_queue_examples)

return images


ds_yolo = {
'conv1w':tf.Variable(tf.truncated_normal([3,3,3,16], stddev=0.1)),
'conv1b':tf.Variable(tf.truncated_normal([16], stddev=0.1)),
'conv2w':tf.Variable(tf.truncated_normal([3,3,16,32], stddev=0.1)),
'conv2b':tf.Variable(tf.truncated_normal([32], stddev=0.1)),
'conv3w':tf.Variable(tf.truncated_normal([3,3,32,64], stddev=0.1)),
'conv3b':tf.Variable(tf.truncated_normal([64], stddev=0.1)),
'conv4w':tf.Variable(tf.truncated_normal([3,3,64,128], stddev=0.1)),
'conv4b':tf.Variable(tf.truncated_normal([128], stddev=0.1)),
'conv5w':tf.Variable(tf.truncated_normal([3,3,128,256], stddev=0.1)),
'conv5b':tf.Variable(tf.truncated_normal([256], stddev=0.1)),
'conv6w':tf.Variable(tf.truncated_normal([3,3,256,512], stddev=0.1)),
'conv6b':tf.Variable(tf.truncated_normal([512], stddev=0.1)),
'conv7w':tf.Variable(tf.truncated_normal([3,3,512,1024], stddev=0.1)),
'conv7b':tf.Variable(tf.truncated_normal([1024], stddev=0.1)),
'conv8w':tf.Variable(tf.truncated_normal([3,3,1024,1024], stddev=0.1)),
'conv8b':tf.Variable(tf.truncated_normal([1024], stddev=0.1)),
'conv9w':tf.Variable(tf.truncated_normal([3,3,1024,1024], stddev=0.1)),
'conv9b':tf.Variable(tf.truncated_normal([1024], stddev=0.1)),
'fc10w':tf.Variable(tf.truncated_normal([50176,256], stddev=0.1)),
'fc10b':tf.Variable(tf.truncated_normal([256], stddev=0.1)),
'fc11w':tf.Variable(tf.truncated_normal([256,4096], stddev=0.1)),
'fc11b':tf.Variable(tf.truncated_normal([4096], stddev=0.1)),
'fc12w':tf.Variable(tf.truncated_normal([4096,1470], stddev=0.1)),
'fc12b':tf.Variable(tf.truncated_normal([1470], stddev=0.1))
}

x = tf.placeholder(tf.float32,(None,448,448,3))
sample_batch = randombatch(batch_file)

def conv(input_src , weight, bias, step, padding='SAME'):

conv = tf.nn.conv2d(input_src, weight, strides=[1, step, step, 1], padding=padding)
conv_biased = tf.add(conv ,bias)
return tf.maximum(alpha*conv_biased,conv_biased)

def fc_layer(input_src, weight, bias, flat = False,linear = False):

input_shape = input_src.get_shape().as_list()
if flat:
dim = input_shape[1]*input_shape[2]*input_shape[3]
inputs_transposed = tf.transpose(input_src,(0,3,1,2))
inputs_processed = tf.reshape(inputs_transposed, [-1,dim])
else:
dim = input_shape[1]
inputs_processed = input_src

if linear : return tf.add(tf.matmul(inputs_processed,weight),bias)

ip = tf.add(tf.matmul(inputs_processed,weight),bias)
return tf.maximum(alpha*ip,ip)



with tf.name_scope("ds_yolo"):

with tf.name_scope("conv1"):
conv1 = conv(x, ds_yolo['conv1w'], ds_yolo['conv1b'],2)

with tf.name_scope("conv2"):
conv2 = conv(conv1, ds_yolo['conv2w'], ds_yolo['conv2b'],2)

with tf.name_scope("conv3"):
conv3 = conv(conv2, ds_yolo['conv3w'], ds_yolo['conv3b'],2)

with tf.name_scope("conv4"):
conv4 = conv(conv3, ds_yolo['conv4w'], ds_yolo['conv4b'],2)

with tf.name_scope("conv5"):
conv5 = conv(conv4, ds_yolo['conv5w'], ds_yolo['conv5b'],2)

with tf.name_scope("conv6"):
conv6 = conv(conv5, ds_yolo['conv6w'], ds_yolo['conv6b'],2)

with tf.name_scope("conv7"):
conv7 = conv(conv6, ds_yolo['conv7w'], ds_yolo['conv7b'],1)

with tf.name_scope("conv8"):
conv8 = conv(conv7, ds_yolo['conv8w'], ds_yolo['conv8b'],1)

with tf.name_scope("conv9"):
conv9 = conv(conv8, ds_yolo['conv9w'], ds_yolo['conv9b'],1)

with tf.name_scope("fc10"):
fc10 = fc_layer(conv9, ds_yolo['fc10w'], ds_yolo['fc10b'],flat=True,linear=False)

with tf.name_scope("fc11"):
fc11 = fc_layer(fc10, ds_yolo['fc11w'], ds_yolo['fc11b'],flat=False,linear=False)


with tf.name_scope("fc12"):
fc12 = fc_layer(fc11, ds_yolo['fc12w'], ds_yolo['fc12b'],flat=False,linear=True)

with tf.Session() as sess:

sess.run(tf.global_variables_initializer())

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)

try:

img = sess.run(sample_batch)
c1 = sess.run(fc12, feed_dict={x:img})

except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
# When done, ask the threads to stop.
coord.request_stop()
coord.join(threads)