# Auto detection to main + 4 cropped images
**Pipeline:**

1. Load cropped image csv file
2. Apply prediction
3. Save prediction result back to csv file
* pred_value
* pred_cat
* pred_bbox

In [1]:
# Import libraries
%matplotlib inline
from pycocotools.coco import COCO
from keras.models import load_model
# from utils.utils import *
# from utils.bbox import *
# from utils.image import load_image_pixels
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import numpy as np
import pandas as pd
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import torchvision.transforms.functional as TF
import PIL
import os
import json 
from urllib.request import urlretrieve
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

Using TensorFlow backend.


In [2]:
# Define image directory
projectDir=os.getcwd()
dataDir='.'
dataType='val2017'
imageDir='{}/images/'.format(dataDir)
annFile='{}/images/{}_selected/annotations/instances_{}.json'.format(dataDir,dataType,dataType)

## Utilities

In [3]:
class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)

        return self.label

    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]

        return self.score

def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5
    boxes = []
    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4:]  = _sigmoid(netout[..., 4:])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h*grid_w):
        row = i // grid_w
        col = i % grid_w
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[int(row)][int(col)][b][4]
            if(objectness.all() <= obj_thresh): continue
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[int(row)][int(col)][b][:4]
            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
            # last elements are class probabilities
            classes = netout[int(row)][col][b][5:]
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            boxes.append(box)
    return boxes

def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    new_w, new_h = net_w, net_h
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)

def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b
    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
            return 0
        else:
            return min(x2,x4) - x3

def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    intersect = intersect_w * intersect_h
    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    union = w1*h1 + w2*h2 - intersect
    return float(intersect) / union

def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])
        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]
            if boxes[index_i].classes[c] == 0: continue
            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]
                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0

# load and prepare an image
def load_image_pixels(filename, shape):
    # load the image to get its shape
    image = load_img(filename)
    width, height = image.size
    # load the image with the required size
    image = load_img(filename, target_size=shape)
    # convert to numpy array
    image = img_to_array(image)
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0
    # add a dimension so that we have one sample
    image = np.expand_dims(image, 0)
    return image, width, height

# get all of the results above a threshold
def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    # enumerate all boxes
    for box in boxes:
        # enumerate all possible labels
        for i in range(len(labels)):
            # check if the threshold for this label is high enough
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
            # don't break, many labels may trigger for one box
    return v_boxes, v_labels, v_scores

# draw all results
def draw_boxes(filename, v_boxes, v_labels, v_scores):
    # load the image
    data = plt.imread(filename)
    # plot the image
    plt.imshow(data)
    # get the context for drawing boxes
    ax = plt.gca()
    # plot each box
    for i in range(len(v_boxes)):
        box = v_boxes[i]
        # get coordinates
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        # calculate width and height of the box
        width, height = x2 - x1, y2 - y1
        # create the shape
        rect = plt.Rectangle((x1, y1), width, height, fill=False, color='white')
        # draw the box
        ax.add_patch(rect)
        # draw text and score in top left corner
        label = "%s (%.3f)" % (v_labels[i], v_scores[i])
        plt.text(x1, y1, label, color='white')
    # show the plot
    plt.show()

## Load model

In [4]:
# load yolov3 model
model = load_model('yolov3_model.h5')
# define the expected input shape for the model
input_w, input_h = 416, 416
# define the anchors
anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
# define the probability threshold for detected objects
class_threshold = 0.6
# define the labels
labels = ["person", "bicycle", "car", "motorbike", "airplane", "bus", "train", "truck",
          "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
          "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
          "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
          "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
          "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
          "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
          "chair", "couch", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
          "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
          "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


## Gather & concatenate all csv files

In [18]:
all_files = []
cat = 'book'
for subdir, dirs, files in os.walk(os.path.join(imageDir,cat)):
    for filename in files:
        filepath = subdir + os.sep + filename
        if filepath.endswith(".csv"):
            all_files.append(filepath)
            print(filepath)

./images/book/529148/1139919.csv
./images/book/344621/1139063.csv
./images/book/112798/1985721.csv
./images/book/385719/1139451.csv
./images/book/389315/1652379.csv
./images/book/368684/1145116.csv
./images/book/506933/1147645.csv
./images/book/166478/1138070.csv
./images/book/206579/1986194.csv
./images/book/96001/1137334.csv
./images/book/167159/1137818.csv
./images/book/551439/1140019.csv
./images/book/16958/1144744.csv
./images/book/458255/1141588.csv
./images/book/172617/1140850.csv
./images/book/334399/1648816.csv
./images/book/14038/2197005.csv
./images/book/250901/1986136.csv
./images/book/25603/1144688.csv
./images/book/172595/1139612.csv
./images/book/421923/1137283.csv
./images/book/222299/1648151.csv
./images/book/413247/1139762.csv
./images/book/398377/1138534.csv
./images/book/509699/1141405.csv
./images/book/415741/1648882.csv
./images/book/472678/1648320.csv
./images/book/575187/1145126.csv
./images/book/55528/1647877.csv
./images/book/467176/908400467176.csv
./images/b

In [19]:
li = []
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)
df_images = pd.concat(li, axis=0, ignore_index=True)
df_images.head()

Unnamed: 0,bbox,category,filename,height,path,width
0,"[271, 117, 36, 23]",book,1139919_0.jpg,140,./images/book/529148,307
1,"[271, 0, 36, 23]",book,1139919_1.jpg,309,./images/book/529148,307
2,"[0, 117, 36, 23]",book,1139919_2.jpg,140,./images/book/529148,369
3,"[0, 0, 36, 23]",book,1139919_3.jpg,309,./images/book/529148,369
4,"[270, 116, 35, 22]",book,1139919.jpg,426,./images/book/529148,640


## Apply prediction to multiple images

In [20]:
df_pred = pd.DataFrame(columns=['pred','pred_cat','pred_bbox'])
iou_threshold = 0.5
for idx, item in df_images.iterrows():
    file_path = os.path.join(item['path'], item['filename'])
    image, image_w, image_h = load_image_pixels(file_path, (input_w, input_h))
    yhat = model.predict(image)
    boxes = list()
    for i in range(len(yhat)):
        # decode the output of the network
        boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
    # correct the sizes of the bounding boxes for the shape of the image
    correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
    # suppress non-maximal boxes
    do_nms(boxes, 0.5)
    # get the details of the detected objects
    v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
    
    ##########
    # summarize what we found
    # for i in range(len(v_boxes)):
    #     print(v_labels[i], v_scores[i])
    # draw what we found
    # draw_boxes(file_path, v_boxes, v_labels, v_scores)

    ##########
    boxes = item['bbox'].lstrip("[")
    boxes = boxes.rstrip("]")
    boxes = boxes.strip()
    x, y, w, h = list(map(int,boxes.split(",")))
    _box = BoundBox(x, y, x+w, y+h)
    is_detected = False
    for i, box in enumerate(v_boxes): # y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        # print(bbox_iou(box, _box))
        # print(bbox_iou(_box, box))
        iou = bbox_iou(box, _box)
        if iou > iou_threshold:
            df_pred = df_pred.append({
                'pred': v_scores[i],
                'pred_cat': v_labels[i],
                'pred_bbox': [box.xmin, box.ymin, box.xmax-box.xmin, box.ymax-box.ymin]
            }, ignore_index=True)
            is_detected=True
            break
    if not is_detected:
        df_pred = df_pred.append({
            'pred': np.nan,
            'pred_cat': np.nan,
            'pred_bbox': np.nan
        }, ignore_index=True)

In [21]:
df = pd.concat([df_images, df_pred], axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 9 columns):
bbox         500 non-null object
category     500 non-null object
filename     500 non-null object
height       500 non-null int64
path         500 non-null object
width        500 non-null int64
pred         56 non-null float64
pred_cat     56 non-null object
pred_bbox    56 non-null object
dtypes: float64(1), int64(2), object(6)
memory usage: 35.3+ KB


In [22]:
df.head()

Unnamed: 0,bbox,category,filename,height,path,width,pred,pred_cat,pred_bbox
0,"[271, 117, 36, 23]",book,1139919_0.jpg,140,./images/book/529148,307,,,
1,"[271, 0, 36, 23]",book,1139919_1.jpg,309,./images/book/529148,307,,,
2,"[0, 117, 36, 23]",book,1139919_2.jpg,140,./images/book/529148,369,,,
3,"[0, 0, 36, 23]",book,1139919_3.jpg,309,./images/book/529148,369,,,
4,"[270, 116, 35, 22]",book,1139919.jpg,426,./images/book/529148,640,,,


In [23]:
df.to_csv(imageDir+cat+"/prediction_results.csv", index=False)