| @@ -0,0 +1,228 @@ | ||
| import cv2 | ||
| import copy | ||
| import numpy as np | ||
| from keras.utils import Sequence | ||
| from utils.bbox import BoundBox, bbox_iou | ||
| from utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes, random_flip2, correct_bounding_boxes2 | ||
|
|
||
| ANC_VALS = [[116,90], [156,198], [373,326], [30,61], [62,45], [59,119], [10,13], [16,30], [33,23]] | ||
|
|
||
| class BatchGenerator(Sequence): | ||
| def __init__(self, | ||
| instances, | ||
| labels, | ||
| objects=1, | ||
| downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 | ||
| max_box_per_image=30, | ||
| batch_size=1, | ||
| min_net_size=320, | ||
| max_net_size=608, | ||
| net_h=864, | ||
| net_w=864, | ||
| shuffle=True, | ||
| jitter=True, | ||
| norm=None | ||
| ): | ||
| self.instances = instances | ||
| self.batch_size = batch_size | ||
| self.labels = labels | ||
| self.objects = objects | ||
| self.downsample = downsample | ||
| self.max_box_per_image = max_box_per_image | ||
| self.min_net_size = (min_net_size//self.downsample)*self.downsample | ||
| self.max_net_size = (max_net_size//self.downsample)*self.downsample | ||
| self.shuffle = shuffle | ||
| self.jitter = jitter | ||
| self.norm = norm | ||
| # self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] | ||
| self.net_h = net_h | ||
| self.net_w = net_w | ||
|
|
||
| if shuffle: np.random.shuffle(self.instances) | ||
|
|
||
| def __len__(self): | ||
| return int(np.ceil(float(len(self.instances))/self.batch_size)) | ||
|
|
||
| def __getitem__(self, idx): | ||
| # get image input size, change every 10 batches | ||
| net_h, net_w = self._get_net_size(idx) | ||
| base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample | ||
|
|
||
| # determine the first and the last indices of the batch | ||
| l_bound = idx*self.batch_size | ||
| r_bound = (idx+1)*self.batch_size | ||
|
|
||
| if r_bound > len(self.instances): | ||
| r_bound = len(self.instances) | ||
| l_bound = r_bound - self.batch_size | ||
|
|
||
| x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3)) # input images | ||
| t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes | ||
|
|
||
| # initialize the inputs and the outputs | ||
| yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, 3, 4+1+self.objects)) # desired network output 1 | ||
| yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, 3, 4+1+self.objects)) # desired network output 2 | ||
| yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, 3, 4+1+self.objects)) # desired network output 3 | ||
| yolos = [yolo_1, yolo_2, yolo_3] | ||
|
|
||
| instance_count = 0 | ||
| true_box_index = 0 | ||
|
|
||
| # do the logic to fill in the inputs and the output | ||
| for train_instance in self.instances[l_bound:r_bound]: | ||
| # augment input image and fix object's position and size | ||
| img, all_objs = self._aug_image(train_instance, net_h, net_w) | ||
|
|
||
| for obj in all_objs: | ||
| # find the best anchor box for this object | ||
| max_anchor = None | ||
| max_index = -1 | ||
| max_iou = -1 | ||
|
|
||
| shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin']) | ||
|
|
||
| for i in range(len(ANC_VALS)): | ||
| anchor =BoundBox(0, 0, ANC_VALS[i][0],ANC_VALS[i][1]) | ||
| iou = bbox_iou(shifted_box, anchor) | ||
|
|
||
| if max_iou < iou: | ||
| max_anchor = anchor | ||
| max_index = i | ||
| max_iou = iou | ||
|
|
||
| # determine the yolo to be responsible for this bounding box | ||
| yolo = yolos[max_index//3] | ||
| grid_h, grid_w = yolo.shape[1:3] | ||
|
|
||
| # determine the position of the bounding box on the grid | ||
| center_x = .5*(obj['xmin'] + obj['xmax']) | ||
| g_center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x | ||
| center_y = .5*(obj['ymin'] + obj['ymax']) | ||
| g_center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y | ||
|
|
||
| # determine the sizes of the bounding box | ||
| w = obj['xmax'] - obj['xmin'] | ||
| h = obj['ymax'] - obj['ymin'] | ||
|
|
||
| box = [center_x, center_y, w, h] | ||
|
|
||
| # determine the index of the label | ||
| obj_indx = self.labels.index(obj['name']) | ||
|
|
||
| # determine the location of the cell responsible for this object | ||
| grid_x = int(np.floor(g_center_x)) | ||
| grid_y = int(np.floor(g_center_y)) | ||
|
|
||
| # assign ground truth x, y, w, h, confidence and class probs to y_batch | ||
| # yolo[instance_count, grid_y, grid_x, ] = 0 | ||
| yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box | ||
| yolo[instance_count, grid_y, grid_x, max_index%3, 4 ] = 1. | ||
| yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1 | ||
|
|
||
|
|
||
| # assign input image to x_batch | ||
| if self.norm != None: | ||
| x_batch[instance_count] = self.norm(img) | ||
| else: | ||
| # plot image and bounding boxes for sanity check | ||
| # for obj in all_objs: | ||
| # cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3) | ||
| # cv2.putText(img, obj['name'], | ||
| # (obj['xmin']+2, obj['ymin']+12), | ||
| # 0, 1.2e-3 * img.shape[0], | ||
| # (0,255,0), 2) | ||
|
|
||
| x_batch[instance_count] = img | ||
|
|
||
| # increase instance counter in the current batch | ||
| instance_count += 1 | ||
|
|
||
| # yolo_1 = yolo_1.reshape((yolo_1.shape[0],yolo_1.shape[1],yolo_1.shape[2],3*(self.objects+5))) | ||
| # print(yolo_1.shape) | ||
| # return x_batch, yolo_3# [dummy_yolo_1] | ||
| return x_batch, [yolo_1, yolo_2, yolo_3]# [dummy_yolo_1] | ||
| return [x_batch, t_batch, yolo_1], [dummy_yolo_1] | ||
|
|
||
| def _get_net_size(self, idx): | ||
| if idx%10 == 0: | ||
| net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, \ | ||
| self.max_net_size/self.downsample+1) | ||
| self.net_h, self.net_w = net_size, net_size | ||
| return self.net_h, self.net_w | ||
|
|
||
| def _aug_image(self, instance, net_h, net_w): | ||
| image_name = instance['filename'] | ||
| image_name = image_name.replace('../', '') # hack for changed folder structure | ||
| image = cv2.imread(image_name) # RGB image | ||
|
|
||
| if image is None: print('Cannot find ', image_name) | ||
| image = image[:,:,::-1] # RGB image | ||
|
|
||
| image_h, image_w, _ = image.shape | ||
|
|
||
| # determine the amount of scaling and cropping | ||
| dw = self.jitter * image_w; | ||
| dh = self.jitter * image_h; | ||
|
|
||
| new_ar = (image_w + np.random.uniform(-dw, dw)) / (image_h + np.random.uniform(-dh, dh)); | ||
| scale = np.random.uniform(0.95, 1.05); | ||
|
|
||
| if (new_ar < 1): | ||
| new_h = int(scale * net_h); | ||
| new_w = int(net_h * new_ar); | ||
| else: | ||
| new_w = int(scale * net_w); | ||
| new_h = int(net_w / new_ar); | ||
|
|
||
| dx = int(np.random.uniform(0, net_w - new_w)); | ||
| dy = int(np.random.uniform(0, net_h - new_h)); | ||
|
|
||
| # apply scaling and cropping | ||
| im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy) | ||
|
|
||
| # randomly distort hsv space | ||
| # im_sized = random_distort_image(im_sized) | ||
|
|
||
| # randomly flip | ||
| flip = np.random.randint(2) | ||
| im_sized = random_flip(im_sized, flip) | ||
| #im_sized = random_flip(image, flip) | ||
| #flip2 = np.random.randint(2) | ||
| #im_sized = random_flip2(im_sized, flip2) | ||
|
|
||
| # correct the size and pos of bounding boxes | ||
| all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h) | ||
| #all_objs = correct_bounding_boxes2(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, flip2, image_w, image_h) | ||
|
|
||
| return im_sized, all_objs | ||
|
|
||
| def on_epoch_end(self): | ||
| if self.shuffle: np.random.shuffle(self.instances) | ||
|
|
||
| def num_classes(self): | ||
| return len(self.labels) | ||
|
|
||
| def size(self): | ||
| return len(self.instances) | ||
|
|
||
| def get_anchors(self): | ||
| anchors = [] | ||
|
|
||
| for anchor in self.anchors: | ||
| anchors += [anchor.xmax, anchor.ymax] | ||
|
|
||
| return anchors | ||
|
|
||
| def load_annotation(self, i): | ||
| annots = [] | ||
|
|
||
| for obj in self.instances[i]['object']: | ||
| annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['name'])] | ||
| annots += [annot] | ||
|
|
||
| if len(annots) == 0: annots = [[]] | ||
|
|
||
| return np.array(annots) | ||
|
|
||
| def load_image(self, i): | ||
| return cv2.imread(self.instances[i]['filename']) |
| @@ -0,0 +1,305 @@ | ||
| { | ||
| "cells": [ | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 1, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [ | ||
| "import numpy as np\n", | ||
| "import pandas as pd\n", | ||
| "import os,sys\n", | ||
| "import cv2\n", | ||
| "import pickle" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 2, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [ | ||
| "# initialize the list of points for the rectangle bbox,\n", | ||
| "# the temporaray endpoint of the drawing rectangle\n", | ||
| "# the list of all bounding boxes of selected rois\n", | ||
| "# and boolean indicating wether drawing of mouse\n", | ||
| "# is performed or not\n", | ||
| "rect_endpoint_tmp = []\n", | ||
| "rect_bbox = []\n", | ||
| "\n", | ||
| "drawing = False\n", | ||
| "\n", | ||
| "def check_boxes(img_clean,bbox_list):\n", | ||
| " def draw_all_boxes():\n", | ||
| " img = img_clean.copy()\n", | ||
| " \n", | ||
| " for b in bbox_list:\n", | ||
| " cv2.rectangle(img, (b[0],b[1]),(b[2],b[3]), color=(0, 255, 0),thickness=1)\n", | ||
| " cv2.imshow('image', img)\n", | ||
| " \n", | ||
| " \n", | ||
| " # mouse callback function\n", | ||
| " def draw_rect_roi(event, x, y, flags, param):\n", | ||
| " # grab references to the global variables\n", | ||
| " global rect_bbox, rect_endpoint_tmp, drawing\n", | ||
| " \n", | ||
| " # if the left mouse button was clicked, record the starting\n", | ||
| " # (x, y) coordinates and indicate that drawing is being\n", | ||
| " # performed. set rect_endpoint_tmp empty list.\n", | ||
| " if event == cv2.EVENT_LBUTTONDOWN:\n", | ||
| " rect_endpoint_tmp = []\n", | ||
| " rect_bbox = [(x, y)]\n", | ||
| " drawing = True\n", | ||
| " \n", | ||
| " # check to see if the left mouse button was released\n", | ||
| " elif event == cv2.EVENT_LBUTTONUP:\n", | ||
| " # record the ending (x, y) coordinates and indicate that\n", | ||
| " # drawing operation is finished\n", | ||
| " rect_bbox.append((x, y))\n", | ||
| " drawing = False\n", | ||
| " \n", | ||
| " # draw a rectangle around the region of interest\n", | ||
| " p_1, p_2 = rect_bbox\n", | ||
| " \n", | ||
| " # for bbox find upper left and bottom right points\n", | ||
| " p_1x, p_1y = p_1\n", | ||
| " p_2x, p_2y = p_2\n", | ||
| " \n", | ||
| " lx = min(p_1x, p_2x)\n", | ||
| " ty = min(p_1y, p_2y)\n", | ||
| " rx = max(p_1x, p_2x)\n", | ||
| " by = max(p_1y, p_2y)\n", | ||
| " \n", | ||
| " # add bbox to list if both points are different\n", | ||
| " if (lx, ty) != (rx, by):\n", | ||
| " if abs(lx-rx)>5:\n", | ||
| " if abs(ty-by)>5:\n", | ||
| " bbox = [lx, ty, rx, by]\n", | ||
| " bbox_list.append(bbox)\n", | ||
| " \n", | ||
| " # if mouse is drawing set tmp rectangle endpoint to (x,y)\n", | ||
| " elif event == cv2.EVENT_MOUSEMOVE and drawing:\n", | ||
| " rect_endpoint_tmp = [(x, y)]\n", | ||
| " elif event == cv2.EVENT_LBUTTONDBLCLK:\n", | ||
| " npbx=np.asarray(bbox_list)\n", | ||
| " selected_box = ((x>npbx[:,0]) & (y>npbx[:,1]) & (x<npbx[:,2]) & (y<npbx[:,3]))\n", | ||
| " if np.sum(selected_box)==1:\n", | ||
| " bbox_list.remove(npbx[selected_box].tolist()[0])\n", | ||
| " if np.sum(selected_box)>1:\n", | ||
| " potentials = npbx[selected_box]\n", | ||
| " areas = (potentials[:,2]-potentials[:,0])*(potentials[:,3]-potentials[:,1])\n", | ||
| " bbox_list.remove(potentials[np.argmin(areas)].tolist())\n", | ||
| " draw_all_boxes()\n", | ||
| "\n", | ||
| " cv2.namedWindow('image',cv2.WINDOW_GUI_NORMAL )\n", | ||
| " cv2.resizeWindow('image', 900,900)\n", | ||
| " cv2.setMouseCallback('image', draw_rect_roi)\n", | ||
| " draw_all_boxes()\n", | ||
| " # keep looping until the 'c' key is pressed\n", | ||
| " stop = False\n", | ||
| " while True:\n", | ||
| " # display the image and wait for a keypress\n", | ||
| " if not drawing:\n", | ||
| " draw_all_boxes()\n", | ||
| " #cv2.imshow('image', img)\n", | ||
| " elif drawing and rect_endpoint_tmp:\n", | ||
| " rect_cpy = img_clean.copy()\n", | ||
| " start_point = rect_bbox[0]\n", | ||
| " end_point_tmp = rect_endpoint_tmp[0]\n", | ||
| " cv2.rectangle(rect_cpy, start_point, end_point_tmp,(0,255,0),1)\n", | ||
| " cv2.imshow('image', rect_cpy)\n", | ||
| " \n", | ||
| " key = cv2.waitKey(1) #& 0xFF\n", | ||
| " # if the 'c' key is pressed, break from the loop\n", | ||
| " if key == ord('c'):\n", | ||
| " break\n", | ||
| " if key == ord('q'):\n", | ||
| " stop=True\n", | ||
| " break\n", | ||
| " # close all open windows\n", | ||
| " cv2.destroyAllWindows()\n", | ||
| " #cv2.waitKey(1)\n", | ||
| " return stop" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 19, | ||
| "metadata": { | ||
| "collapsed": false | ||
| }, | ||
| "outputs": [ | ||
| { | ||
| "name": "stdout", | ||
| "output_type": "stream", | ||
| "text": [ | ||
| "/home/staff1/ctorney/workspace/deepWildCount/yolo_v3/train/train_images/SWC1058-8.JPG, 19888 of 20000\n" | ||
| ] | ||
| } | ||
| ], | ||
| "source": [ | ||
| "with open ('../train_images/annotations-checked.pickle', 'rb') as fp:\n", | ||
| " all_imgs = pickle.load(fp)\n", | ||
| "\n", | ||
| "from_scratch=False\n", | ||
| "if from_scratch:\n", | ||
| " new_imgs = []\n", | ||
| "else:\n", | ||
| " with open ('../train_images/annotations-checked-2.pickle', 'rb') as fp:\n", | ||
| " new_imgs = pickle.load(fp)\n", | ||
| "\n", | ||
| "for i in range(len(all_imgs)):\n", | ||
| " if not from_scratch:\n", | ||
| " if any(d['filename'] == all_imgs[i]['filename'] for d in new_imgs):\n", | ||
| " continue\n", | ||
| " img_data = {'object':[]}\n", | ||
| " img_data['filename'] = all_imgs[i]['filename']\n", | ||
| " img_data['width'] = all_imgs[i]['width']\n", | ||
| " img_data['height'] = all_imgs[i]['height']\n", | ||
| " if len(all_imgs[i]['object'])>0:\n", | ||
| " print(img_data['filename'] + \", \" + str(i) + ' of ' + str(len(all_imgs)))\n", | ||
| " boxes=[]\n", | ||
| " for obj in all_imgs[i]['object']:\n", | ||
| " boxes.append([obj['xmin'],obj['ymin'],obj['xmax'],obj['ymax']])\n", | ||
| " \n", | ||
| " #do box processing\n", | ||
| " img = cv2.imread(img_data['filename'])\n", | ||
| " if check_boxes(img,boxes):\n", | ||
| " break\n", | ||
| " for b in boxes:\n", | ||
| " obj = {}\n", | ||
| " if ((b[2]-b[0])*(b[3]-b[1]))<10:\n", | ||
| " continue\n", | ||
| " obj['name'] = 'blackbuck'\n", | ||
| " obj['xmin'] = int(b[0])\n", | ||
| " obj['ymin'] = int(b[1])\n", | ||
| " obj['xmax'] = int(b[2])\n", | ||
| " obj['ymax'] = int(b[3])\n", | ||
| " img_data['object'] += [obj]\n", | ||
| "\n", | ||
| " new_imgs += [img_data]\n", | ||
| "\n", | ||
| "#print(all_imgs)\n", | ||
| "with open('../train_images/annotations-checked-2.pickle', 'wb') as handle:\n", | ||
| " pickle.dump(new_imgs, handle)" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": false | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": false | ||
| }, | ||
| "outputs": [], | ||
| "source": [ | ||
| "\n" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": false | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 18, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| } | ||
| ], | ||
| "metadata": { | ||
| "kernelspec": { | ||
| "display_name": "Python 3", | ||
| "language": "python", | ||
| "name": "python3" | ||
| }, | ||
| "language_info": { | ||
| "codemirror_mode": { | ||
| "name": "ipython", | ||
| "version": 3 | ||
| }, | ||
| "file_extension": ".py", | ||
| "mimetype": "text/x-python", | ||
| "name": "python", | ||
| "nbconvert_exporter": "python", | ||
| "pygments_lexer": "ipython3", | ||
| "version": "3.5.2" | ||
| } | ||
| }, | ||
| "nbformat": 4, | ||
| "nbformat_minor": 2 | ||
| } |
| @@ -0,0 +1,101 @@ | ||
| import numpy as np | ||
| import pandas as pd | ||
| import os,sys,glob | ||
| import cv2 | ||
| import pickle | ||
| sys.path.append("../..") | ||
| sys.path.append("..") | ||
| from models.yolo_models import get_yolo_model | ||
| from utils.decoder import decode | ||
|
|
||
| image_dir = '/home/ctorney/data/horses/still_images/' | ||
| train_dir = '../horse_images/' | ||
|
|
||
|
|
||
| train_images = glob.glob( image_dir + "*.png" ) | ||
|
|
||
| max_l=100 | ||
| min_l=10 | ||
|
|
||
| width=1920 | ||
| height=1080 | ||
|
|
||
| im_size=864 #size of training imageas for yolo | ||
|
|
||
| nx = width//im_size | ||
| ny = height//im_size | ||
|
|
||
| ################################################## | ||
| #im_size=416 #size of training imageas for yolo | ||
| yolov3 = get_yolo_model(im_size,im_size,trainable=False) | ||
| yolov3.load_weights('../../weights/yolo-v3-coco.h5',by_name=True) | ||
|
|
||
|
|
||
| ######################################## | ||
| im_num=1 | ||
| all_imgs = [] | ||
| for imagename in train_images: | ||
| im = cv2.imread(imagename) | ||
| print('processing image ' + imagename + ', ' + str(im_num) + ' of ' + str(len(train_images)) + '...') | ||
| im_num+=1 | ||
|
|
||
| n_count=0 | ||
| for x in np.arange(0,width-im_size,im_size): | ||
| for y in np.arange(0,height-im_size,im_size): | ||
| img_data = {'object':[]} #dictionary? key-value pair to store image data | ||
| head, tail = os.path.split(imagename) | ||
| noext, ext = os.path.splitext(tail) | ||
| save_name = train_dir + '/TR_' + noext + '-' + str(n_count) + '.png' | ||
| box_name = train_dir + '/bbox/' + noext + '-' + str(n_count) + '.png' | ||
| img = im[y:y+im_size,x:x+im_size,:] | ||
| cv2.imwrite(save_name, img) | ||
| img_data['filename'] = save_name | ||
| img_data['width'] = im_size | ||
| img_data['height'] = im_size | ||
| n_count+=1 | ||
| # use the yolov3 model to predict 80 classes on COCO | ||
|
|
||
| # preprocess the image | ||
| image_h, image_w, _ = img.shape | ||
| new_image = img[:,:,::-1]/255. | ||
| new_image = np.expand_dims(new_image, 0) | ||
|
|
||
| # run the prediction | ||
| yolos = yolov3.predict(new_image) | ||
|
|
||
| boxes = decode(yolos, obj_thresh=0.005, nms_thresh=0.5) | ||
| for b in boxes: | ||
| xmin=int(b[0]) | ||
| xmax=int(b[2]) | ||
| ymin=int(b[1]) | ||
| ymax=int(b[3]) | ||
| obj = {} | ||
|
|
||
| obj['name'] = 'aoi' | ||
|
|
||
| if xmin<0: continue | ||
| if ymin<0: continue | ||
| if xmax>im_size: continue | ||
| if ymax>im_size: continue | ||
| if (xmax-xmin)<min_l: continue | ||
| if (xmax-xmin)>max_l: continue | ||
| if (ymax-ymin)<min_l: continue | ||
| if (ymax-ymin)>max_l: continue | ||
|
|
||
|
|
||
| obj['xmin'] = xmin | ||
| obj['ymin'] = ymin | ||
| obj['xmax'] = xmax | ||
| obj['ymax'] = ymax | ||
| img_data['object'] += [obj] | ||
| cv2.rectangle(img, (xmin,ymin), (xmax,ymax), (0,255,0), 2) | ||
|
|
||
| cv2.imwrite(box_name, img) | ||
| all_imgs += [img_data] | ||
|
|
||
|
|
||
| #print(all_imgs) | ||
| with open(train_dir + '/annotations.pickle', 'wb') as handle: | ||
| pickle.dump(all_imgs, handle) | ||
|
|
||
|
|
| @@ -0,0 +1,89 @@ | ||
| import numpy as np | ||
| import os | ||
| import cv2 | ||
| from .colors import get_color | ||
|
|
||
| class BoundBox: | ||
| def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None): | ||
| self.xmin = xmin | ||
| self.ymin = ymin | ||
| self.xmax = xmax | ||
| self.ymax = ymax | ||
|
|
||
| self.c = c | ||
| self.classes = classes | ||
|
|
||
| self.label = -1 | ||
| self.score = -1 | ||
|
|
||
| def get_label(self): | ||
| if self.label == -1: | ||
| self.label = np.argmax(self.classes) | ||
|
|
||
| return self.label | ||
|
|
||
| def get_score(self): | ||
| if self.score == -1: | ||
| self.score = self.classes[self.get_label()] | ||
|
|
||
| return self.score | ||
|
|
||
| def _interval_overlap(interval_a, interval_b): | ||
| x1, x2 = interval_a | ||
| x3, x4 = interval_b | ||
|
|
||
| if x3 < x1: | ||
| if x4 < x1: | ||
| return 0 | ||
| else: | ||
| return min(x2,x4) - x1 | ||
| else: | ||
| if x2 < x3: | ||
| return 0 | ||
| else: | ||
| return min(x2,x4) - x3 | ||
|
|
||
| def bbox_iou(box1, box2): | ||
| intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) | ||
| intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) | ||
|
|
||
| intersect = intersect_w * intersect_h | ||
|
|
||
| w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin | ||
| w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin | ||
|
|
||
| union = w1*h1 + w2*h2 - intersect | ||
|
|
||
| return float(intersect) / union | ||
|
|
||
| def draw_boxes(image, boxes, labels, obj_thresh, quiet=True): | ||
| for box in boxes: | ||
| label_str = '' | ||
| label = -1 | ||
|
|
||
| for i in range(len(labels)): | ||
| if box.classes[i] > obj_thresh: | ||
| if label_str != '': label_str += ', ' | ||
| label_str += (labels[i] + ' ' + str(round(box.get_score()*100, 2)) + '%') | ||
| label = i | ||
| if not quiet: print(label_str) | ||
|
|
||
| if label >= 0: | ||
| text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5) | ||
| width, height = text_size[0][0], text_size[0][1] | ||
| region = np.array([[box.xmin-3, box.ymin], | ||
| [box.xmin-3, box.ymin-height-26], | ||
| [box.xmin+width+13, box.ymin-height-26], | ||
| [box.xmin+width+13, box.ymin]], dtype='int32') | ||
|
|
||
| cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5) | ||
| cv2.fillPoly(img=image, pts=[region], color=get_color(label)) | ||
| cv2.putText(img=image, | ||
| text=label_str, | ||
| org=(box.xmin+13, box.ymin - 13), | ||
| fontFace=cv2.FONT_HERSHEY_SIMPLEX, | ||
| fontScale=1e-3 * image.shape[0], | ||
| color=(0,0,0), | ||
| thickness=2) | ||
|
|
||
| return image |
| @@ -0,0 +1,96 @@ | ||
| def get_color(label): | ||
| """ Return a color from a set of predefined colors. Contains 80 colors in total. | ||
| code originally from https://github.com/fizyr/keras-retinanet/ | ||
| Args | ||
| label: The label to get the color for. | ||
| Returns | ||
| A list of three values representing a RGB color. | ||
| """ | ||
| if label < len(colors): | ||
| return colors[label] | ||
| else: | ||
| print('Label {} has no color, returning default.'.format(label)) | ||
| return (0, 255, 0) | ||
|
|
||
| colors = [ | ||
| [31 , 0 , 255] , | ||
| [0 , 159 , 255] , | ||
| [255 , 95 , 0] , | ||
| [255 , 19 , 0] , | ||
| [255 , 0 , 0] , | ||
| [255 , 38 , 0] , | ||
| [0 , 255 , 25] , | ||
| [255 , 0 , 133] , | ||
| [255 , 172 , 0] , | ||
| [108 , 0 , 255] , | ||
| [0 , 82 , 255] , | ||
| [0 , 255 , 6] , | ||
| [255 , 0 , 152] , | ||
| [223 , 0 , 255] , | ||
| [12 , 0 , 255] , | ||
| [0 , 255 , 178] , | ||
| [108 , 255 , 0] , | ||
| [184 , 0 , 255] , | ||
| [255 , 0 , 76] , | ||
| [146 , 255 , 0] , | ||
| [51 , 0 , 255] , | ||
| [0 , 197 , 255] , | ||
| [255 , 248 , 0] , | ||
| [255 , 0 , 19] , | ||
| [255 , 0 , 38] , | ||
| [89 , 255 , 0] , | ||
| [127 , 255 , 0] , | ||
| [255 , 153 , 0] , | ||
| [0 , 255 , 255] , | ||
| [0 , 255 , 216] , | ||
| [0 , 255 , 121] , | ||
| [255 , 0 , 248] , | ||
| [70 , 0 , 255] , | ||
| [0 , 255 , 159] , | ||
| [0 , 216 , 255] , | ||
| [0 , 6 , 255] , | ||
| [0 , 63 , 255] , | ||
| [31 , 255 , 0] , | ||
| [255 , 57 , 0] , | ||
| [255 , 0 , 210] , | ||
| [0 , 255 , 102] , | ||
| [242 , 255 , 0] , | ||
| [255 , 191 , 0] , | ||
| [0 , 255 , 63] , | ||
| [255 , 0 , 95] , | ||
| [146 , 0 , 255] , | ||
| [184 , 255 , 0] , | ||
| [255 , 114 , 0] , | ||
| [0 , 255 , 235] , | ||
| [255 , 229 , 0] , | ||
| [0 , 178 , 255] , | ||
| [255 , 0 , 114] , | ||
| [255 , 0 , 57] , | ||
| [0 , 140 , 255] , | ||
| [0 , 121 , 255] , | ||
| [12 , 255 , 0] , | ||
| [255 , 210 , 0] , | ||
| [0 , 255 , 44] , | ||
| [165 , 255 , 0] , | ||
| [0 , 25 , 255] , | ||
| [0 , 255 , 140] , | ||
| [0 , 101 , 255] , | ||
| [0 , 255 , 82] , | ||
| [223 , 255 , 0] , | ||
| [242 , 0 , 255] , | ||
| [89 , 0 , 255] , | ||
| [165 , 0 , 255] , | ||
| [70 , 255 , 0] , | ||
| [255 , 0 , 172] , | ||
| [255 , 76 , 0] , | ||
| [203 , 255 , 0] , | ||
| [204 , 0 , 255] , | ||
| [255 , 0 , 229] , | ||
| [255 , 133 , 0] , | ||
| [127 , 0 , 255] , | ||
| [0 , 235 , 255] , | ||
| [0 , 255 , 197] , | ||
| [255 , 0 , 191] , | ||
| [0 , 44 , 255] , | ||
| [50 , 255 , 0] | ||
| ] |
| @@ -0,0 +1,91 @@ | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
| import os,sys | ||
| import cv2 | ||
|
|
||
| def _interval_overlap(interval_a, interval_b): | ||
| x1, x2 = interval_a | ||
| x3, x4 = interval_b | ||
|
|
||
| if x3 < x1: | ||
| if x4 < x1: | ||
| return 0 | ||
| else: | ||
| return min(x2,x4) - x1 | ||
| else: | ||
| if x2 < x3: | ||
| return 0 | ||
| else: | ||
| return min(x2,x4) - x3 | ||
|
|
||
| def bbox_iou(box1, box2): | ||
|
|
||
| intersect_w = _interval_overlap([box1[0], box1[2]], [box2[0], box2[2]]) | ||
| intersect_h = _interval_overlap([box1[1], box1[3]], [box2[1], box2[3]]) | ||
|
|
||
| intersect = intersect_w * intersect_h | ||
|
|
||
| w1, h1 = box1[2]-box1[0], box1[3]-box1[1] | ||
| w2, h2 = box2[2]-box2[0], box2[3]-box2[1] | ||
|
|
||
| union = w1*h1 + w2*h2 - intersect | ||
| return float(intersect) / union | ||
|
|
||
|
|
||
|
|
||
| def decode_netout(netout, obj_thresh): | ||
|
|
||
| xpos = netout[...,0] | ||
| ypos = netout[...,1] | ||
| wpos = netout[...,2] | ||
| hpos = netout[...,3] | ||
|
|
||
| objectness = netout[...,4] | ||
|
|
||
| # select only objects above threshold | ||
| indexes = objectness > obj_thresh | ||
|
|
||
| new_boxes = np.column_stack((xpos[indexes]-wpos[indexes]/2, \ | ||
| ypos[indexes]-hpos[indexes]/2, \ | ||
| xpos[indexes]+wpos[indexes]/2, \ | ||
| ypos[indexes]+hpos[indexes]/2, \ | ||
| objectness[indexes])).tolist() | ||
|
|
||
| return new_boxes | ||
|
|
||
| def do_nms(boxes, nms_thresh): | ||
| if len(boxes) == 0: | ||
| return | ||
|
|
||
| sorted_indices = np.argsort([-box[4] for box in boxes]) | ||
|
|
||
| for i in range(len(sorted_indices)): | ||
| index_i = sorted_indices[i] | ||
|
|
||
| if boxes[index_i][4] == 0: continue | ||
|
|
||
| for j in range(i+1, len(sorted_indices)): | ||
| index_j = sorted_indices[j] | ||
|
|
||
| if bbox_iou(boxes[index_i][0:4], boxes[index_j][0:4]) >= nms_thresh: | ||
| boxes[index_j][4] = 0 | ||
| return | ||
|
|
||
| def decode(yolos, obj_thresh=0.9, nms_thresh=0.5): | ||
| boxes = [] | ||
|
|
||
| for i in range(len(yolos)): | ||
| # decode the output of the network | ||
| boxes += decode_netout(yolos[i][0], obj_thresh) | ||
|
|
||
| # suppress non-maximal boxes | ||
| do_nms(boxes, nms_thresh) | ||
|
|
||
| return_boxes = [] | ||
|
|
||
| for b in boxes: | ||
| if b[4]>0: | ||
| return_boxes.append([b[0],b[1],b[2],b[3]]) | ||
|
|
||
| return return_boxes |
| @@ -0,0 +1,121 @@ | ||
| import cv2 | ||
| import numpy as np | ||
| import copy | ||
|
|
||
| def _rand_scale(scale): | ||
| scale = np.random.uniform(1, scale) | ||
| return scale if (np.random.randint(2) == 0) else 1./scale; | ||
|
|
||
| def _constrain(min_v, max_v, value): | ||
| if value < min_v: return min_v | ||
| if value > max_v: return max_v | ||
| return value | ||
|
|
||
| def random_flip(image, flip): | ||
| if flip == 1: return cv2.flip(image, 1) | ||
| return image | ||
| def random_flip2(image, flip): | ||
| if flip == 1: return cv2.flip(image, 0) | ||
| return image | ||
|
|
||
| def correct_bounding_boxes2(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, flip2, image_w, image_h): | ||
| boxes = copy.deepcopy(boxes) | ||
|
|
||
| # randomize boxes' order | ||
| np.random.shuffle(boxes) | ||
|
|
||
| # correct sizes and positions | ||
| sx, sy = float(new_w)/image_w, float(new_h)/image_h | ||
| zero_boxes = [] | ||
|
|
||
| for i in range(len(boxes)): | ||
| boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx)) | ||
| boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx)) | ||
| boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy)) | ||
| boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy)) | ||
|
|
||
| if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']: | ||
| zero_boxes += [i] | ||
| continue | ||
|
|
||
| if flip == 1: | ||
| swap = boxes[i]['xmin']; | ||
| boxes[i]['xmin'] = net_w - boxes[i]['xmax'] | ||
| boxes[i]['xmax'] = net_w - swap | ||
| if flip2 == 1: | ||
| swap = boxes[i]['ymin']; | ||
| boxes[i]['ymin'] = net_h - boxes[i]['ymax'] | ||
| boxes[i]['ymax'] = net_h - swap | ||
|
|
||
| boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes] | ||
|
|
||
| return boxes | ||
| def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h): | ||
| boxes = copy.deepcopy(boxes) | ||
|
|
||
| # randomize boxes' order | ||
| np.random.shuffle(boxes) | ||
|
|
||
| # correct sizes and positions | ||
| sx, sy = float(new_w)/image_w, float(new_h)/image_h | ||
| zero_boxes = [] | ||
|
|
||
| for i in range(len(boxes)): | ||
| boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx)) | ||
| boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx)) | ||
| boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy)) | ||
| boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy)) | ||
|
|
||
| if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']: | ||
| zero_boxes += [i] | ||
| continue | ||
|
|
||
| if flip == 1: | ||
| swap = boxes[i]['xmin']; | ||
| boxes[i]['xmin'] = net_w - boxes[i]['xmax'] | ||
| boxes[i]['xmax'] = net_w - swap | ||
|
|
||
| boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes] | ||
|
|
||
| return boxes | ||
|
|
||
| def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5): | ||
| # determine scale factors | ||
| dhue = np.random.uniform(-hue, hue) | ||
| dsat = _rand_scale(saturation); | ||
| dexp = _rand_scale(exposure); | ||
|
|
||
| # convert RGB space to HSV space | ||
| image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float') | ||
|
|
||
| # change satuation and exposure | ||
| image[:,:,1] *= dsat | ||
| image[:,:,2] *= dexp | ||
|
|
||
| # change hue | ||
| image[:,:,0] += dhue | ||
| image[:,:,0] -= (image[:,:,0] > 180)*180 | ||
| image[:,:,0] += (image[:,:,0] < 0) *180 | ||
|
|
||
| # convert back to RGB from HSV | ||
| return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB) | ||
|
|
||
| def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy): | ||
| im_sized = cv2.resize(image, (new_w, new_h)) | ||
|
|
||
| if dx > 0: | ||
| im_sized = np.pad(im_sized, ((0,0), (dx,0), (0,0)), mode='constant', constant_values=127) | ||
| else: | ||
| im_sized = im_sized[:,-dx:,:] | ||
| if (new_w + dx) < net_w: | ||
| im_sized = np.pad(im_sized, ((0,0), (0, net_w - (new_w+dx)), (0,0)), mode='constant', constant_values=127) | ||
|
|
||
| if dy > 0: | ||
| im_sized = np.pad(im_sized, ((dy,0), (0,0), (0,0)), mode='constant', constant_values=127) | ||
| else: | ||
| im_sized = im_sized[-dy:,:,:] | ||
|
|
||
| if (new_h + dy) < net_h: | ||
| im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), (0,0), (0,0)), mode='constant', constant_values=127) | ||
|
|
||
| return im_sized[:net_h, :net_w,:] |
| @@ -0,0 +1,317 @@ | ||
| import cv2 | ||
| import numpy as np | ||
| import os | ||
| from .bbox import BoundBox, bbox_iou | ||
| from scipy.special import expit | ||
|
|
||
| def _sigmoid(x): | ||
| return expit(x) | ||
|
|
||
| def makedirs(path): | ||
| try: | ||
| os.makedirs(path) | ||
| except OSError: | ||
| if not os.path.isdir(path): | ||
| raise | ||
|
|
||
| def evaluate(model, | ||
| generator, | ||
| iou_threshold=0.5, | ||
| obj_thresh=0.5, | ||
| nms_thresh=0.45, | ||
| net_h=416, | ||
| net_w=416, | ||
| save_path=None): | ||
| """ Evaluate a given dataset using a given model. | ||
| code originally from https://github.com/fizyr/keras-retinanet | ||
| # Arguments | ||
| model : The model to evaluate. | ||
| generator : The generator that represents the dataset to evaluate. | ||
| iou_threshold : The threshold used to consider when a detection is positive or negative. | ||
| obj_thresh : The threshold used to distinguish between object and non-object | ||
| nms_thresh : The threshold used to determine whether two detections are duplicates | ||
| net_h : The height of the input image to the model, higher value results in better accuracy | ||
| net_w : The width of the input image to the model | ||
| save_path : The path to save images with visualized detections to. | ||
| # Returns | ||
| A dict mapping class names to mAP scores. | ||
| """ | ||
| # gather all detections and annotations | ||
| all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())] | ||
| all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] | ||
|
|
||
| for i in range(generator.size()): | ||
| raw_image = [generator.load_image(i)] | ||
|
|
||
| # make the boxes and the labels | ||
| pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0] | ||
|
|
||
| score = np.array([box.get_score() for box in pred_boxes]) | ||
| pred_labels = np.array([box.label for box in pred_boxes]) | ||
|
|
||
| if len(pred_boxes) > 0: | ||
| pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes]) | ||
| else: | ||
| pred_boxes = np.array([[]]) | ||
|
|
||
| # sort the boxes and the labels according to scores | ||
| score_sort = np.argsort(-score) | ||
| pred_labels = pred_labels[score_sort] | ||
| pred_boxes = pred_boxes[score_sort] | ||
|
|
||
| # copy detections to all_detections | ||
| for label in range(generator.num_classes()): | ||
| all_detections[i][label] = pred_boxes[pred_labels == label, :] | ||
|
|
||
| annotations = generator.load_annotation(i) | ||
|
|
||
| # copy detections to all_annotations | ||
| for label in range(generator.num_classes()): | ||
| all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy() | ||
|
|
||
| # compute mAP by comparing all detections and all annotations | ||
| average_precisions = {} | ||
|
|
||
| for label in range(generator.num_classes()): | ||
| false_positives = np.zeros((0,)) | ||
| true_positives = np.zeros((0,)) | ||
| scores = np.zeros((0,)) | ||
| num_annotations = 0.0 | ||
|
|
||
| for i in range(generator.size()): | ||
| detections = all_detections[i][label] | ||
| annotations = all_annotations[i][label] | ||
| num_annotations += annotations.shape[0] | ||
| detected_annotations = [] | ||
|
|
||
| for d in detections: | ||
| scores = np.append(scores, d[4]) | ||
|
|
||
| if annotations.shape[0] == 0: | ||
| false_positives = np.append(false_positives, 1) | ||
| true_positives = np.append(true_positives, 0) | ||
| continue | ||
|
|
||
| overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations) | ||
| assigned_annotation = np.argmax(overlaps, axis=1) | ||
| max_overlap = overlaps[0, assigned_annotation] | ||
|
|
||
| if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations: | ||
| false_positives = np.append(false_positives, 0) | ||
| true_positives = np.append(true_positives, 1) | ||
| detected_annotations.append(assigned_annotation) | ||
| else: | ||
| false_positives = np.append(false_positives, 1) | ||
| true_positives = np.append(true_positives, 0) | ||
|
|
||
| # no annotations -> AP for this class is 0 (is this correct?) | ||
| if num_annotations == 0: | ||
| average_precisions[label] = 0 | ||
| continue | ||
|
|
||
| # sort by score | ||
| indices = np.argsort(-scores) | ||
| false_positives = false_positives[indices] | ||
| true_positives = true_positives[indices] | ||
|
|
||
| # compute false positives and true positives | ||
| false_positives = np.cumsum(false_positives) | ||
| true_positives = np.cumsum(true_positives) | ||
|
|
||
| # compute recall and precision | ||
| recall = true_positives / num_annotations | ||
| precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) | ||
|
|
||
| # compute average precision | ||
| average_precision = compute_ap(recall, precision) | ||
| average_precisions[label] = average_precision | ||
|
|
||
| return average_precisions | ||
|
|
||
| def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w): | ||
| if (float(net_w)/image_w) < (float(net_h)/image_h): | ||
| new_w = net_w | ||
| new_h = (image_h*net_w)/image_w | ||
| else: | ||
| new_h = net_w | ||
| new_w = (image_w*net_h)/image_h | ||
|
|
||
| for i in range(len(boxes)): | ||
| x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w | ||
| y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h | ||
|
|
||
| boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) | ||
| boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) | ||
| boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) | ||
| boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) | ||
|
|
||
| def do_nms(boxes, nms_thresh): | ||
| if len(boxes) > 0: | ||
| nb_class = len(boxes[0].classes) | ||
| else: | ||
| return | ||
|
|
||
| for c in range(nb_class): | ||
| sorted_indices = np.argsort([-box.classes[c] for box in boxes]) | ||
|
|
||
| for i in range(len(sorted_indices)): | ||
| index_i = sorted_indices[i] | ||
|
|
||
| if boxes[index_i].classes[c] == 0: continue | ||
|
|
||
| for j in range(i+1, len(sorted_indices)): | ||
| index_j = sorted_indices[j] | ||
|
|
||
| if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: | ||
| boxes[index_j].classes[c] = 0 | ||
|
|
||
| def decode_netout(netout, anchors, obj_thresh, net_h, net_w): | ||
| grid_h, grid_w = netout.shape[:2] | ||
| nb_box = 3 | ||
| netout = netout.reshape((grid_h, grid_w, nb_box, -1)) | ||
| nb_class = netout.shape[-1] - 5 | ||
|
|
||
| boxes = [] | ||
|
|
||
| netout[..., :2] = _sigmoid(netout[..., :2]) | ||
| netout[..., 4:] = _sigmoid(netout[..., 4:]) | ||
| netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:] | ||
| netout[..., 5:] *= netout[..., 5:] > obj_thresh | ||
|
|
||
| for i in range(grid_h*grid_w): | ||
| row = i // grid_w | ||
| col = i % grid_w | ||
|
|
||
| for b in range(nb_box): | ||
| # 4th element is objectness score | ||
| objectness = netout[row, col, b, 4] | ||
|
|
||
| if(objectness <= obj_thresh): continue | ||
|
|
||
| # first 4 elements are x, y, w, and h | ||
| x, y, w, h = netout[row,col,b,:4] | ||
|
|
||
| x = (col + x) / grid_w # center position, unit: image width | ||
| y = (row + y) / grid_h # center position, unit: image height | ||
| w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width | ||
| h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height | ||
|
|
||
| # last elements are class probabilities | ||
| classes = netout[row,col,b,5:] | ||
|
|
||
| box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) | ||
|
|
||
| boxes.append(box) | ||
|
|
||
| return boxes | ||
|
|
||
| def preprocess_input(image, net_h, net_w): | ||
| new_h, new_w, _ = image.shape | ||
|
|
||
| # determine the new size of the image | ||
| if (float(net_w)/new_w) < (float(net_h)/new_h): | ||
| new_h = (new_h * net_w)//new_w | ||
| new_w = net_w | ||
| else: | ||
| new_w = (new_w * net_h)//new_h | ||
| new_h = net_h | ||
|
|
||
| # resize the image to the new size | ||
| resized = cv2.resize(image[:,:,::-1]/255., (new_w, new_h)) | ||
|
|
||
| # embed the image into the standard letter box | ||
| new_image = np.ones((net_h, net_w, 3)) * 0.5 | ||
| new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized | ||
| new_image = np.expand_dims(new_image, 0) | ||
|
|
||
| return new_image | ||
|
|
||
| def normalize(image): | ||
| return image/255. | ||
|
|
||
| def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh): | ||
| image_h, image_w, _ = images[0].shape | ||
| nb_images = len(images) | ||
| batch_input = np.zeros((nb_images, net_h, net_w, 3)) | ||
|
|
||
| # preprocess the input | ||
| for i in range(nb_images): | ||
| batch_input[i] = preprocess_input(images[i], net_h, net_w) | ||
|
|
||
| # run the prediction | ||
| batch_output = model.predict_on_batch(batch_input) | ||
| batch_boxes = [None]*nb_images | ||
|
|
||
| for i in range(nb_images): | ||
| yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]] | ||
| boxes = [] | ||
|
|
||
| # decode the output of the network | ||
| for j in range(len(yolos)): | ||
| yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors'] | ||
| boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w) | ||
|
|
||
| # correct the sizes of the bounding boxes | ||
| correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w) | ||
|
|
||
| # suppress non-maximal boxes | ||
| do_nms(boxes, nms_thresh) | ||
|
|
||
| batch_boxes[i] = boxes | ||
|
|
||
| return batch_boxes | ||
|
|
||
| def compute_overlap(a, b): | ||
| """ | ||
| Code originally from https://github.com/rbgirshick/py-faster-rcnn. | ||
| Parameters | ||
| ---------- | ||
| a: (N, 4) ndarray of float | ||
| b: (K, 4) ndarray of float | ||
| Returns | ||
| ------- | ||
| overlaps: (N, K) ndarray of overlap between boxes and query_boxes | ||
| """ | ||
| area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) | ||
|
|
||
| iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0]) | ||
| ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1]) | ||
|
|
||
| iw = np.maximum(iw, 0) | ||
| ih = np.maximum(ih, 0) | ||
|
|
||
| ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih | ||
|
|
||
| ua = np.maximum(ua, np.finfo(float).eps) | ||
|
|
||
| intersection = iw * ih | ||
|
|
||
| return intersection / ua | ||
|
|
||
| def compute_ap(recall, precision): | ||
| """ Compute the average precision, given the recall and precision curves. | ||
| Code originally from https://github.com/rbgirshick/py-faster-rcnn. | ||
| # Arguments | ||
| recall: The recall curve (list). | ||
| precision: The precision curve (list). | ||
| # Returns | ||
| The average precision as computed in py-faster-rcnn. | ||
| """ | ||
| # correct AP calculation | ||
| # first append sentinel values at the end | ||
| mrec = np.concatenate(([0.], recall, [1.])) | ||
| mpre = np.concatenate(([0.], precision, [0.])) | ||
|
|
||
| # compute the precision envelope | ||
| for i in range(mpre.size - 1, 0, -1): | ||
| mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) | ||
|
|
||
| # to calculate area under PR curve, look for points | ||
| # where X axis (recall) changes value | ||
| i = np.where(mrec[1:] != mrec[:-1])[0] | ||
|
|
||
| # and sum (\Delta recall) * prec | ||
| ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) | ||
| return ap |