This file was deleted.

This file was deleted.

This file was deleted.

@@ -0,0 +1,228 @@
import cv2
import copy
import numpy as np
from keras.utils import Sequence
from utils.bbox import BoundBox, bbox_iou
from utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes, random_flip2, correct_bounding_boxes2

ANC_VALS = [[116,90], [156,198], [373,326], [30,61], [62,45], [59,119], [10,13], [16,30], [33,23]]

class BatchGenerator(Sequence):
def __init__(self,
instances,
labels,
objects=1,
downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
max_box_per_image=30,
batch_size=1,
min_net_size=320,
max_net_size=608,
net_h=864,
net_w=864,
shuffle=True,
jitter=True,
norm=None
):
self.instances = instances
self.batch_size = batch_size
self.labels = labels
self.objects = objects
self.downsample = downsample
self.max_box_per_image = max_box_per_image
self.min_net_size = (min_net_size//self.downsample)*self.downsample
self.max_net_size = (max_net_size//self.downsample)*self.downsample
self.shuffle = shuffle
self.jitter = jitter
self.norm = norm
# self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
self.net_h = net_h
self.net_w = net_w

if shuffle: np.random.shuffle(self.instances)

def __len__(self):
return int(np.ceil(float(len(self.instances))/self.batch_size))

def __getitem__(self, idx):
# get image input size, change every 10 batches
net_h, net_w = self._get_net_size(idx)
base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

# determine the first and the last indices of the batch
l_bound = idx*self.batch_size
r_bound = (idx+1)*self.batch_size

if r_bound > len(self.instances):
r_bound = len(self.instances)
l_bound = r_bound - self.batch_size

x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3)) # input images
t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes

# initialize the inputs and the outputs
yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, 3, 4+1+self.objects)) # desired network output 1
yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, 3, 4+1+self.objects)) # desired network output 2
yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, 3, 4+1+self.objects)) # desired network output 3
yolos = [yolo_1, yolo_2, yolo_3]

instance_count = 0
true_box_index = 0

# do the logic to fill in the inputs and the output
for train_instance in self.instances[l_bound:r_bound]:
# augment input image and fix object's position and size
img, all_objs = self._aug_image(train_instance, net_h, net_w)

for obj in all_objs:
# find the best anchor box for this object
max_anchor = None
max_index = -1
max_iou = -1

shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin'])

for i in range(len(ANC_VALS)):
anchor =BoundBox(0, 0, ANC_VALS[i][0],ANC_VALS[i][1])
iou = bbox_iou(shifted_box, anchor)

if max_iou < iou:
max_anchor = anchor
max_index = i
max_iou = iou

# determine the yolo to be responsible for this bounding box
yolo = yolos[max_index//3]
grid_h, grid_w = yolo.shape[1:3]

# determine the position of the bounding box on the grid
center_x = .5*(obj['xmin'] + obj['xmax'])
g_center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
center_y = .5*(obj['ymin'] + obj['ymax'])
g_center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y

# determine the sizes of the bounding box
w = obj['xmax'] - obj['xmin']
h = obj['ymax'] - obj['ymin']

box = [center_x, center_y, w, h]

# determine the index of the label
obj_indx = self.labels.index(obj['name'])

# determine the location of the cell responsible for this object
grid_x = int(np.floor(g_center_x))
grid_y = int(np.floor(g_center_y))

# assign ground truth x, y, w, h, confidence and class probs to y_batch
# yolo[instance_count, grid_y, grid_x, ] = 0
yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
yolo[instance_count, grid_y, grid_x, max_index%3, 4 ] = 1.
yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1


# assign input image to x_batch
if self.norm != None:
x_batch[instance_count] = self.norm(img)
else:
# plot image and bounding boxes for sanity check
# for obj in all_objs:
# cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
# cv2.putText(img, obj['name'],
# (obj['xmin']+2, obj['ymin']+12),
# 0, 1.2e-3 * img.shape[0],
# (0,255,0), 2)

x_batch[instance_count] = img

# increase instance counter in the current batch
instance_count += 1

# yolo_1 = yolo_1.reshape((yolo_1.shape[0],yolo_1.shape[1],yolo_1.shape[2],3*(self.objects+5)))
# print(yolo_1.shape)
# return x_batch, yolo_3# [dummy_yolo_1]
return x_batch, [yolo_1, yolo_2, yolo_3]# [dummy_yolo_1]
return [x_batch, t_batch, yolo_1], [dummy_yolo_1]

def _get_net_size(self, idx):
if idx%10 == 0:
net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, \
self.max_net_size/self.downsample+1)
self.net_h, self.net_w = net_size, net_size
return self.net_h, self.net_w

def _aug_image(self, instance, net_h, net_w):
image_name = instance['filename']
image_name = image_name.replace('../', '') # hack for changed folder structure
image = cv2.imread(image_name) # RGB image

if image is None: print('Cannot find ', image_name)
image = image[:,:,::-1] # RGB image

image_h, image_w, _ = image.shape

# determine the amount of scaling and cropping
dw = self.jitter * image_w;
dh = self.jitter * image_h;

new_ar = (image_w + np.random.uniform(-dw, dw)) / (image_h + np.random.uniform(-dh, dh));
scale = np.random.uniform(0.95, 1.05);

if (new_ar < 1):
new_h = int(scale * net_h);
new_w = int(net_h * new_ar);
else:
new_w = int(scale * net_w);
new_h = int(net_w / new_ar);

dx = int(np.random.uniform(0, net_w - new_w));
dy = int(np.random.uniform(0, net_h - new_h));

# apply scaling and cropping
im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy)

# randomly distort hsv space
# im_sized = random_distort_image(im_sized)

# randomly flip
flip = np.random.randint(2)
im_sized = random_flip(im_sized, flip)
#im_sized = random_flip(image, flip)
#flip2 = np.random.randint(2)
#im_sized = random_flip2(im_sized, flip2)

# correct the size and pos of bounding boxes
all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h)
#all_objs = correct_bounding_boxes2(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, flip2, image_w, image_h)

return im_sized, all_objs

def on_epoch_end(self):
if self.shuffle: np.random.shuffle(self.instances)

def num_classes(self):
return len(self.labels)

def size(self):
return len(self.instances)

def get_anchors(self):
anchors = []

for anchor in self.anchors:
anchors += [anchor.xmax, anchor.ymax]

return anchors

def load_annotation(self, i):
annots = []

for obj in self.instances[i]['object']:
annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['name'])]
annots += [annot]

if len(annots) == 0: annots = [[]]

return np.array(annots)

def load_image(self, i):
return cv2.imread(self.instances[i]['filename'])
@@ -0,0 +1,305 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os,sys\n",
"import cv2\n",
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# initialize the list of points for the rectangle bbox,\n",
"# the temporaray endpoint of the drawing rectangle\n",
"# the list of all bounding boxes of selected rois\n",
"# and boolean indicating wether drawing of mouse\n",
"# is performed or not\n",
"rect_endpoint_tmp = []\n",
"rect_bbox = []\n",
"\n",
"drawing = False\n",
"\n",
"def check_boxes(img_clean,bbox_list):\n",
" def draw_all_boxes():\n",
" img = img_clean.copy()\n",
" \n",
" for b in bbox_list:\n",
" cv2.rectangle(img, (b[0],b[1]),(b[2],b[3]), color=(0, 255, 0),thickness=1)\n",
" cv2.imshow('image', img)\n",
" \n",
" \n",
" # mouse callback function\n",
" def draw_rect_roi(event, x, y, flags, param):\n",
" # grab references to the global variables\n",
" global rect_bbox, rect_endpoint_tmp, drawing\n",
" \n",
" # if the left mouse button was clicked, record the starting\n",
" # (x, y) coordinates and indicate that drawing is being\n",
" # performed. set rect_endpoint_tmp empty list.\n",
" if event == cv2.EVENT_LBUTTONDOWN:\n",
" rect_endpoint_tmp = []\n",
" rect_bbox = [(x, y)]\n",
" drawing = True\n",
" \n",
" # check to see if the left mouse button was released\n",
" elif event == cv2.EVENT_LBUTTONUP:\n",
" # record the ending (x, y) coordinates and indicate that\n",
" # drawing operation is finished\n",
" rect_bbox.append((x, y))\n",
" drawing = False\n",
" \n",
" # draw a rectangle around the region of interest\n",
" p_1, p_2 = rect_bbox\n",
" \n",
" # for bbox find upper left and bottom right points\n",
" p_1x, p_1y = p_1\n",
" p_2x, p_2y = p_2\n",
" \n",
" lx = min(p_1x, p_2x)\n",
" ty = min(p_1y, p_2y)\n",
" rx = max(p_1x, p_2x)\n",
" by = max(p_1y, p_2y)\n",
" \n",
" # add bbox to list if both points are different\n",
" if (lx, ty) != (rx, by):\n",
" if abs(lx-rx)>5:\n",
" if abs(ty-by)>5:\n",
" bbox = [lx, ty, rx, by]\n",
" bbox_list.append(bbox)\n",
" \n",
" # if mouse is drawing set tmp rectangle endpoint to (x,y)\n",
" elif event == cv2.EVENT_MOUSEMOVE and drawing:\n",
" rect_endpoint_tmp = [(x, y)]\n",
" elif event == cv2.EVENT_LBUTTONDBLCLK:\n",
" npbx=np.asarray(bbox_list)\n",
" selected_box = ((x>npbx[:,0]) & (y>npbx[:,1]) & (x<npbx[:,2]) & (y<npbx[:,3]))\n",
" if np.sum(selected_box)==1:\n",
" bbox_list.remove(npbx[selected_box].tolist()[0])\n",
" if np.sum(selected_box)>1:\n",
" potentials = npbx[selected_box]\n",
" areas = (potentials[:,2]-potentials[:,0])*(potentials[:,3]-potentials[:,1])\n",
" bbox_list.remove(potentials[np.argmin(areas)].tolist())\n",
" draw_all_boxes()\n",
"\n",
" cv2.namedWindow('image',cv2.WINDOW_GUI_NORMAL )\n",
" cv2.resizeWindow('image', 900,900)\n",
" cv2.setMouseCallback('image', draw_rect_roi)\n",
" draw_all_boxes()\n",
" # keep looping until the 'c' key is pressed\n",
" stop = False\n",
" while True:\n",
" # display the image and wait for a keypress\n",
" if not drawing:\n",
" draw_all_boxes()\n",
" #cv2.imshow('image', img)\n",
" elif drawing and rect_endpoint_tmp:\n",
" rect_cpy = img_clean.copy()\n",
" start_point = rect_bbox[0]\n",
" end_point_tmp = rect_endpoint_tmp[0]\n",
" cv2.rectangle(rect_cpy, start_point, end_point_tmp,(0,255,0),1)\n",
" cv2.imshow('image', rect_cpy)\n",
" \n",
" key = cv2.waitKey(1) #& 0xFF\n",
" # if the 'c' key is pressed, break from the loop\n",
" if key == ord('c'):\n",
" break\n",
" if key == ord('q'):\n",
" stop=True\n",
" break\n",
" # close all open windows\n",
" cv2.destroyAllWindows()\n",
" #cv2.waitKey(1)\n",
" return stop"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/staff1/ctorney/workspace/deepWildCount/yolo_v3/train/train_images/SWC1058-8.JPG, 19888 of 20000\n"
]
}
],
"source": [
"with open ('../train_images/annotations-checked.pickle', 'rb') as fp:\n",
" all_imgs = pickle.load(fp)\n",
"\n",
"from_scratch=False\n",
"if from_scratch:\n",
" new_imgs = []\n",
"else:\n",
" with open ('../train_images/annotations-checked-2.pickle', 'rb') as fp:\n",
" new_imgs = pickle.load(fp)\n",
"\n",
"for i in range(len(all_imgs)):\n",
" if not from_scratch:\n",
" if any(d['filename'] == all_imgs[i]['filename'] for d in new_imgs):\n",
" continue\n",
" img_data = {'object':[]}\n",
" img_data['filename'] = all_imgs[i]['filename']\n",
" img_data['width'] = all_imgs[i]['width']\n",
" img_data['height'] = all_imgs[i]['height']\n",
" if len(all_imgs[i]['object'])>0:\n",
" print(img_data['filename'] + \", \" + str(i) + ' of ' + str(len(all_imgs)))\n",
" boxes=[]\n",
" for obj in all_imgs[i]['object']:\n",
" boxes.append([obj['xmin'],obj['ymin'],obj['xmax'],obj['ymax']])\n",
" \n",
" #do box processing\n",
" img = cv2.imread(img_data['filename'])\n",
" if check_boxes(img,boxes):\n",
" break\n",
" for b in boxes:\n",
" obj = {}\n",
" if ((b[2]-b[0])*(b[3]-b[1]))<10:\n",
" continue\n",
" obj['name'] = 'blackbuck'\n",
" obj['xmin'] = int(b[0])\n",
" obj['ymin'] = int(b[1])\n",
" obj['xmax'] = int(b[2])\n",
" obj['ymax'] = int(b[3])\n",
" img_data['object'] += [obj]\n",
"\n",
" new_imgs += [img_data]\n",
"\n",
"#print(all_imgs)\n",
"with open('../train_images/annotations-checked-2.pickle', 'wb') as handle:\n",
" pickle.dump(new_imgs, handle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@@ -0,0 +1,101 @@
import numpy as np
import pandas as pd
import os,sys,glob
import cv2
import pickle
sys.path.append("../..")
sys.path.append("..")
from models.yolo_models import get_yolo_model
from utils.decoder import decode

image_dir = '/home/ctorney/data/horses/still_images/'
train_dir = '../horse_images/'


train_images = glob.glob( image_dir + "*.png" )

max_l=100
min_l=10

width=1920
height=1080

im_size=864 #size of training imageas for yolo

nx = width//im_size
ny = height//im_size

##################################################
#im_size=416 #size of training imageas for yolo
yolov3 = get_yolo_model(im_size,im_size,trainable=False)
yolov3.load_weights('../../weights/yolo-v3-coco.h5',by_name=True)


########################################
im_num=1
all_imgs = []
for imagename in train_images:
im = cv2.imread(imagename)
print('processing image ' + imagename + ', ' + str(im_num) + ' of ' + str(len(train_images)) + '...')
im_num+=1

n_count=0
for x in np.arange(0,width-im_size,im_size):
for y in np.arange(0,height-im_size,im_size):
img_data = {'object':[]} #dictionary? key-value pair to store image data
head, tail = os.path.split(imagename)
noext, ext = os.path.splitext(tail)
save_name = train_dir + '/TR_' + noext + '-' + str(n_count) + '.png'
box_name = train_dir + '/bbox/' + noext + '-' + str(n_count) + '.png'
img = im[y:y+im_size,x:x+im_size,:]
cv2.imwrite(save_name, img)
img_data['filename'] = save_name
img_data['width'] = im_size
img_data['height'] = im_size
n_count+=1
# use the yolov3 model to predict 80 classes on COCO

# preprocess the image
image_h, image_w, _ = img.shape
new_image = img[:,:,::-1]/255.
new_image = np.expand_dims(new_image, 0)

# run the prediction
yolos = yolov3.predict(new_image)

boxes = decode(yolos, obj_thresh=0.005, nms_thresh=0.5)
for b in boxes:
xmin=int(b[0])
xmax=int(b[2])
ymin=int(b[1])
ymax=int(b[3])
obj = {}

obj['name'] = 'aoi'

if xmin<0: continue
if ymin<0: continue
if xmax>im_size: continue
if ymax>im_size: continue
if (xmax-xmin)<min_l: continue
if (xmax-xmin)>max_l: continue
if (ymax-ymin)<min_l: continue
if (ymax-ymin)>max_l: continue


obj['xmin'] = xmin
obj['ymin'] = ymin
obj['xmax'] = xmax
obj['ymax'] = ymax
img_data['object'] += [obj]
cv2.rectangle(img, (xmin,ymin), (xmax,ymax), (0,255,0), 2)

cv2.imwrite(box_name, img)
all_imgs += [img_data]


#print(all_imgs)
with open(train_dir + '/annotations.pickle', 'wb') as handle:
pickle.dump(all_imgs, handle)


This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Large diffs are not rendered by default.

This file was deleted.

@@ -0,0 +1,89 @@
import numpy as np
import os
import cv2
from .colors import get_color

class BoundBox:
def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None):
self.xmin = xmin
self.ymin = ymin
self.xmax = xmax
self.ymax = ymax

self.c = c
self.classes = classes

self.label = -1
self.score = -1

def get_label(self):
if self.label == -1:
self.label = np.argmax(self.classes)

return self.label

def get_score(self):
if self.score == -1:
self.score = self.classes[self.get_label()]

return self.score

def _interval_overlap(interval_a, interval_b):
x1, x2 = interval_a
x3, x4 = interval_b

if x3 < x1:
if x4 < x1:
return 0
else:
return min(x2,x4) - x1
else:
if x2 < x3:
return 0
else:
return min(x2,x4) - x3

def bbox_iou(box1, box2):
intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])

intersect = intersect_w * intersect_h

w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin

union = w1*h1 + w2*h2 - intersect

return float(intersect) / union

def draw_boxes(image, boxes, labels, obj_thresh, quiet=True):
for box in boxes:
label_str = ''
label = -1

for i in range(len(labels)):
if box.classes[i] > obj_thresh:
if label_str != '': label_str += ', '
label_str += (labels[i] + ' ' + str(round(box.get_score()*100, 2)) + '%')
label = i
if not quiet: print(label_str)

if label >= 0:
text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5)
width, height = text_size[0][0], text_size[0][1]
region = np.array([[box.xmin-3, box.ymin],
[box.xmin-3, box.ymin-height-26],
[box.xmin+width+13, box.ymin-height-26],
[box.xmin+width+13, box.ymin]], dtype='int32')

cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5)
cv2.fillPoly(img=image, pts=[region], color=get_color(label))
cv2.putText(img=image,
text=label_str,
org=(box.xmin+13, box.ymin - 13),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=1e-3 * image.shape[0],
color=(0,0,0),
thickness=2)

return image
@@ -0,0 +1,96 @@
def get_color(label):
""" Return a color from a set of predefined colors. Contains 80 colors in total.
code originally from https://github.com/fizyr/keras-retinanet/
Args
label: The label to get the color for.
Returns
A list of three values representing a RGB color.
"""
if label < len(colors):
return colors[label]
else:
print('Label {} has no color, returning default.'.format(label))
return (0, 255, 0)

colors = [
[31 , 0 , 255] ,
[0 , 159 , 255] ,
[255 , 95 , 0] ,
[255 , 19 , 0] ,
[255 , 0 , 0] ,
[255 , 38 , 0] ,
[0 , 255 , 25] ,
[255 , 0 , 133] ,
[255 , 172 , 0] ,
[108 , 0 , 255] ,
[0 , 82 , 255] ,
[0 , 255 , 6] ,
[255 , 0 , 152] ,
[223 , 0 , 255] ,
[12 , 0 , 255] ,
[0 , 255 , 178] ,
[108 , 255 , 0] ,
[184 , 0 , 255] ,
[255 , 0 , 76] ,
[146 , 255 , 0] ,
[51 , 0 , 255] ,
[0 , 197 , 255] ,
[255 , 248 , 0] ,
[255 , 0 , 19] ,
[255 , 0 , 38] ,
[89 , 255 , 0] ,
[127 , 255 , 0] ,
[255 , 153 , 0] ,
[0 , 255 , 255] ,
[0 , 255 , 216] ,
[0 , 255 , 121] ,
[255 , 0 , 248] ,
[70 , 0 , 255] ,
[0 , 255 , 159] ,
[0 , 216 , 255] ,
[0 , 6 , 255] ,
[0 , 63 , 255] ,
[31 , 255 , 0] ,
[255 , 57 , 0] ,
[255 , 0 , 210] ,
[0 , 255 , 102] ,
[242 , 255 , 0] ,
[255 , 191 , 0] ,
[0 , 255 , 63] ,
[255 , 0 , 95] ,
[146 , 0 , 255] ,
[184 , 255 , 0] ,
[255 , 114 , 0] ,
[0 , 255 , 235] ,
[255 , 229 , 0] ,
[0 , 178 , 255] ,
[255 , 0 , 114] ,
[255 , 0 , 57] ,
[0 , 140 , 255] ,
[0 , 121 , 255] ,
[12 , 255 , 0] ,
[255 , 210 , 0] ,
[0 , 255 , 44] ,
[165 , 255 , 0] ,
[0 , 25 , 255] ,
[0 , 255 , 140] ,
[0 , 101 , 255] ,
[0 , 255 , 82] ,
[223 , 255 , 0] ,
[242 , 0 , 255] ,
[89 , 0 , 255] ,
[165 , 0 , 255] ,
[70 , 255 , 0] ,
[255 , 0 , 172] ,
[255 , 76 , 0] ,
[203 , 255 , 0] ,
[204 , 0 , 255] ,
[255 , 0 , 229] ,
[255 , 133 , 0] ,
[127 , 0 , 255] ,
[0 , 235 , 255] ,
[0 , 255 , 197] ,
[255 , 0 , 191] ,
[0 , 44 , 255] ,
[50 , 255 , 0]
]
@@ -0,0 +1,91 @@

import numpy as np
import pandas as pd
import os,sys
import cv2

def _interval_overlap(interval_a, interval_b):
x1, x2 = interval_a
x3, x4 = interval_b

if x3 < x1:
if x4 < x1:
return 0
else:
return min(x2,x4) - x1
else:
if x2 < x3:
return 0
else:
return min(x2,x4) - x3

def bbox_iou(box1, box2):

intersect_w = _interval_overlap([box1[0], box1[2]], [box2[0], box2[2]])
intersect_h = _interval_overlap([box1[1], box1[3]], [box2[1], box2[3]])

intersect = intersect_w * intersect_h

w1, h1 = box1[2]-box1[0], box1[3]-box1[1]
w2, h2 = box2[2]-box2[0], box2[3]-box2[1]

union = w1*h1 + w2*h2 - intersect
return float(intersect) / union



def decode_netout(netout, obj_thresh):

xpos = netout[...,0]
ypos = netout[...,1]
wpos = netout[...,2]
hpos = netout[...,3]

objectness = netout[...,4]

# select only objects above threshold
indexes = objectness > obj_thresh

new_boxes = np.column_stack((xpos[indexes]-wpos[indexes]/2, \
ypos[indexes]-hpos[indexes]/2, \
xpos[indexes]+wpos[indexes]/2, \
ypos[indexes]+hpos[indexes]/2, \
objectness[indexes])).tolist()

return new_boxes

def do_nms(boxes, nms_thresh):
if len(boxes) == 0:
return

sorted_indices = np.argsort([-box[4] for box in boxes])

for i in range(len(sorted_indices)):
index_i = sorted_indices[i]

if boxes[index_i][4] == 0: continue

for j in range(i+1, len(sorted_indices)):
index_j = sorted_indices[j]

if bbox_iou(boxes[index_i][0:4], boxes[index_j][0:4]) >= nms_thresh:
boxes[index_j][4] = 0
return

def decode(yolos, obj_thresh=0.9, nms_thresh=0.5):
boxes = []

for i in range(len(yolos)):
# decode the output of the network
boxes += decode_netout(yolos[i][0], obj_thresh)

# suppress non-maximal boxes
do_nms(boxes, nms_thresh)

return_boxes = []

for b in boxes:
if b[4]>0:
return_boxes.append([b[0],b[1],b[2],b[3]])

return return_boxes
@@ -0,0 +1,121 @@
import cv2
import numpy as np
import copy

def _rand_scale(scale):
scale = np.random.uniform(1, scale)
return scale if (np.random.randint(2) == 0) else 1./scale;

def _constrain(min_v, max_v, value):
if value < min_v: return min_v
if value > max_v: return max_v
return value

def random_flip(image, flip):
if flip == 1: return cv2.flip(image, 1)
return image
def random_flip2(image, flip):
if flip == 1: return cv2.flip(image, 0)
return image

def correct_bounding_boxes2(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, flip2, image_w, image_h):
boxes = copy.deepcopy(boxes)

# randomize boxes' order
np.random.shuffle(boxes)

# correct sizes and positions
sx, sy = float(new_w)/image_w, float(new_h)/image_h
zero_boxes = []

for i in range(len(boxes)):
boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx))
boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx))
boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy))
boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy))

if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']:
zero_boxes += [i]
continue

if flip == 1:
swap = boxes[i]['xmin'];
boxes[i]['xmin'] = net_w - boxes[i]['xmax']
boxes[i]['xmax'] = net_w - swap
if flip2 == 1:
swap = boxes[i]['ymin'];
boxes[i]['ymin'] = net_h - boxes[i]['ymax']
boxes[i]['ymax'] = net_h - swap

boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes]

return boxes
def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h):
boxes = copy.deepcopy(boxes)

# randomize boxes' order
np.random.shuffle(boxes)

# correct sizes and positions
sx, sy = float(new_w)/image_w, float(new_h)/image_h
zero_boxes = []

for i in range(len(boxes)):
boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx))
boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx))
boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy))
boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy))

if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']:
zero_boxes += [i]
continue

if flip == 1:
swap = boxes[i]['xmin'];
boxes[i]['xmin'] = net_w - boxes[i]['xmax']
boxes[i]['xmax'] = net_w - swap

boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes]

return boxes

def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5):
# determine scale factors
dhue = np.random.uniform(-hue, hue)
dsat = _rand_scale(saturation);
dexp = _rand_scale(exposure);

# convert RGB space to HSV space
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float')

# change satuation and exposure
image[:,:,1] *= dsat
image[:,:,2] *= dexp

# change hue
image[:,:,0] += dhue
image[:,:,0] -= (image[:,:,0] > 180)*180
image[:,:,0] += (image[:,:,0] < 0) *180

# convert back to RGB from HSV
return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB)

def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy):
im_sized = cv2.resize(image, (new_w, new_h))

if dx > 0:
im_sized = np.pad(im_sized, ((0,0), (dx,0), (0,0)), mode='constant', constant_values=127)
else:
im_sized = im_sized[:,-dx:,:]
if (new_w + dx) < net_w:
im_sized = np.pad(im_sized, ((0,0), (0, net_w - (new_w+dx)), (0,0)), mode='constant', constant_values=127)

if dy > 0:
im_sized = np.pad(im_sized, ((dy,0), (0,0), (0,0)), mode='constant', constant_values=127)
else:
im_sized = im_sized[-dy:,:,:]

if (new_h + dy) < net_h:
im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), (0,0), (0,0)), mode='constant', constant_values=127)

return im_sized[:net_h, :net_w,:]
@@ -0,0 +1,317 @@
import cv2
import numpy as np
import os
from .bbox import BoundBox, bbox_iou
from scipy.special import expit

def _sigmoid(x):
return expit(x)

def makedirs(path):
try:
os.makedirs(path)
except OSError:
if not os.path.isdir(path):
raise

def evaluate(model,
generator,
iou_threshold=0.5,
obj_thresh=0.5,
nms_thresh=0.45,
net_h=416,
net_w=416,
save_path=None):
""" Evaluate a given dataset using a given model.
code originally from https://github.com/fizyr/keras-retinanet
# Arguments
model : The model to evaluate.
generator : The generator that represents the dataset to evaluate.
iou_threshold : The threshold used to consider when a detection is positive or negative.
obj_thresh : The threshold used to distinguish between object and non-object
nms_thresh : The threshold used to determine whether two detections are duplicates
net_h : The height of the input image to the model, higher value results in better accuracy
net_w : The width of the input image to the model
save_path : The path to save images with visualized detections to.
# Returns
A dict mapping class names to mAP scores.
"""
# gather all detections and annotations
all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]

for i in range(generator.size()):
raw_image = [generator.load_image(i)]

# make the boxes and the labels
pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]

score = np.array([box.get_score() for box in pred_boxes])
pred_labels = np.array([box.label for box in pred_boxes])

if len(pred_boxes) > 0:
pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes])
else:
pred_boxes = np.array([[]])

# sort the boxes and the labels according to scores
score_sort = np.argsort(-score)
pred_labels = pred_labels[score_sort]
pred_boxes = pred_boxes[score_sort]

# copy detections to all_detections
for label in range(generator.num_classes()):
all_detections[i][label] = pred_boxes[pred_labels == label, :]

annotations = generator.load_annotation(i)

# copy detections to all_annotations
for label in range(generator.num_classes()):
all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()

# compute mAP by comparing all detections and all annotations
average_precisions = {}

for label in range(generator.num_classes()):
false_positives = np.zeros((0,))
true_positives = np.zeros((0,))
scores = np.zeros((0,))
num_annotations = 0.0

for i in range(generator.size()):
detections = all_detections[i][label]
annotations = all_annotations[i][label]
num_annotations += annotations.shape[0]
detected_annotations = []

for d in detections:
scores = np.append(scores, d[4])

if annotations.shape[0] == 0:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
continue

overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
assigned_annotation = np.argmax(overlaps, axis=1)
max_overlap = overlaps[0, assigned_annotation]

if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
false_positives = np.append(false_positives, 0)
true_positives = np.append(true_positives, 1)
detected_annotations.append(assigned_annotation)
else:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)

# no annotations -> AP for this class is 0 (is this correct?)
if num_annotations == 0:
average_precisions[label] = 0
continue

# sort by score
indices = np.argsort(-scores)
false_positives = false_positives[indices]
true_positives = true_positives[indices]

# compute false positives and true positives
false_positives = np.cumsum(false_positives)
true_positives = np.cumsum(true_positives)

# compute recall and precision
recall = true_positives / num_annotations
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)

# compute average precision
average_precision = compute_ap(recall, precision)
average_precisions[label] = average_precision

return average_precisions

def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
if (float(net_w)/image_w) < (float(net_h)/image_h):
new_w = net_w
new_h = (image_h*net_w)/image_w
else:
new_h = net_w
new_w = (image_w*net_h)/image_h

for i in range(len(boxes)):
x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h

boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)

def do_nms(boxes, nms_thresh):
if len(boxes) > 0:
nb_class = len(boxes[0].classes)
else:
return

for c in range(nb_class):
sorted_indices = np.argsort([-box.classes[c] for box in boxes])

for i in range(len(sorted_indices)):
index_i = sorted_indices[i]

if boxes[index_i].classes[c] == 0: continue

for j in range(i+1, len(sorted_indices)):
index_j = sorted_indices[j]

if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
boxes[index_j].classes[c] = 0

def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
grid_h, grid_w = netout.shape[:2]
nb_box = 3
netout = netout.reshape((grid_h, grid_w, nb_box, -1))
nb_class = netout.shape[-1] - 5

boxes = []

netout[..., :2] = _sigmoid(netout[..., :2])
netout[..., 4:] = _sigmoid(netout[..., 4:])
netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
netout[..., 5:] *= netout[..., 5:] > obj_thresh

for i in range(grid_h*grid_w):
row = i // grid_w
col = i % grid_w

for b in range(nb_box):
# 4th element is objectness score
objectness = netout[row, col, b, 4]

if(objectness <= obj_thresh): continue

# first 4 elements are x, y, w, and h
x, y, w, h = netout[row,col,b,:4]

x = (col + x) / grid_w # center position, unit: image width
y = (row + y) / grid_h # center position, unit: image height
w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height

# last elements are class probabilities
classes = netout[row,col,b,5:]

box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)

boxes.append(box)

return boxes

def preprocess_input(image, net_h, net_w):
new_h, new_w, _ = image.shape

# determine the new size of the image
if (float(net_w)/new_w) < (float(net_h)/new_h):
new_h = (new_h * net_w)//new_w
new_w = net_w
else:
new_w = (new_w * net_h)//new_h
new_h = net_h

# resize the image to the new size
resized = cv2.resize(image[:,:,::-1]/255., (new_w, new_h))

# embed the image into the standard letter box
new_image = np.ones((net_h, net_w, 3)) * 0.5
new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
new_image = np.expand_dims(new_image, 0)

return new_image

def normalize(image):
return image/255.

def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
image_h, image_w, _ = images[0].shape
nb_images = len(images)
batch_input = np.zeros((nb_images, net_h, net_w, 3))

# preprocess the input
for i in range(nb_images):
batch_input[i] = preprocess_input(images[i], net_h, net_w)

# run the prediction
batch_output = model.predict_on_batch(batch_input)
batch_boxes = [None]*nb_images

for i in range(nb_images):
yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
boxes = []

# decode the output of the network
for j in range(len(yolos)):
yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors']
boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)

# correct the sizes of the bounding boxes
correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)

# suppress non-maximal boxes
do_nms(boxes, nms_thresh)

batch_boxes[i] = boxes

return batch_boxes

def compute_overlap(a, b):
"""
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
Parameters
----------
a: (N, 4) ndarray of float
b: (K, 4) ndarray of float
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])

iw = np.maximum(iw, 0)
ih = np.maximum(ih, 0)

ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih

ua = np.maximum(ua, np.finfo(float).eps)

intersection = iw * ih

return intersection / ua

def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))

# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]

# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap