In [6]:
import numpy as np
from numpy import expand_dims
from keras.models import load_model
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from matplotlib import pyplot
from matplotlib.patches import Rectangle

class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = 0
        self.score = 0
 
        self.label = np.argmax(self.classes)
        self.score = self.classes[self.label]
        

 
def decode_netout(netout, anchors, obj_thresh, input_h, input_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5

    boxes = []

    netout[..., :2]  = 1. / (1. + np.exp(-(netout[..., :2])))
    netout[..., 4:]  = 1. / (1. + np.exp(-(netout[..., 4:])))
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w
        
        for b in range(nb_box):
           
            objectness = netout[int(row)][int(col)][b][4]
           
            
            if(objectness.all() <= obj_thresh): continue
            
            
            x, y, w, h = netout[int(row)][int(col)][b][:4]

            x = (col + x) / grid_w 
            y = (row + y) / grid_h 
            w = anchors[2 * b + 0] * np.exp(w) / input_w 
            h = anchors[2 * b + 1] * np.exp(h) / input_h  
            
           
            classes = netout[int(row)][col][b][5:]
            
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            

            boxes.append(box)

    return boxes
 


def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    for box in boxes:
        for i in range(len(labels)):
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
    
    return v_boxes, v_labels, v_scores
 

 
model = load_model('model.h5')

input_w, input_h = 416, 416
photo_filename = 'African_Bush_Elephant.jpg'
image = load_img(photo_filename)
image_w, image_h = image.size
image = img_to_array(load_img(photo_filename, target_size=(input_w, input_h))).astype('float32')
image /= 255.0
image = expand_dims(image, 0)

yhat = model.predict(image)
anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
class_threshold = 0.6
boxes = list()

for i in range(len(yhat)):
    boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)

#do_nms(boxes, 0.5)

new_w, new_h = input_w, input_h
for i in range(len(boxes)):
    x_offset, x_scale = (input_w - new_w)/2./input_w, float(new_w)/input_w
    y_offset, y_scale = (input_h - new_h)/2./input_h, float(new_h)/input_h
    boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
    boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
    boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
    boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)


labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
	"boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
	"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
	"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
	"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
	"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
	"apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
	"chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
	"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
	"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)

for i in range(len(v_boxes)):
    print(v_labels[i],":", v_scores[i])


print(v_labels)
print(v_scores)
print(len(v_boxes))



for i in range(len(v_boxes)):
    box = v_boxes[i]
    y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
    width, height = x2 - x1, y2 - y1
    rect = Rectangle((x1, y1), width, height, fill=False, color='white')
    print(rect)




elephant : 94.29503679275513
elephant : 93.70639324188232
elephant : 99.60509538650513
elephant : 98.21500778198242
elephant : 86.31215691566467
['elephant', 'elephant', 'elephant', 'elephant', 'elephant']
[94.29503679275513, 93.70639324188232, 99.60509538650513, 98.21500778198242, 86.31215691566467]
5
Rectangle(xy=(76, 224), width=492, height=620, angle=0)
Rectangle(xy=(35, 254), width=511, height=676, angle=0)
Rectangle(xy=(74, 241), width=495, height=713, angle=0)
Rectangle(xy=(117, 256), width=468, height=700, angle=0)
Rectangle(xy=(74, 295), width=497, height=665, angle=0)
