In [1]:
import cv2
import numpy as np
import time

input_square_size = 416
grid_per_width = int(input_square_size/32)
model_name = 'yolov4'
path_weights = 'net/{}.weights'.format(model_name)
path_cfg = 'net/{}.cfg'.format(model_name)
net = cv2.dnn.readNet(path_weights, path_cfg)
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCL)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL_FP16)
if cv2.cuda.getCudaEnabledDeviceCount():    
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
    print('GPU is enabled.')
else:
    print('GPU is not enabled. OpenCV-{} will use CPU instead.'.format(cv2.__version__))
print('Detection grid sizes: ({0}:{0}), ({1}:{1}), ({2}:{2}).'.format(grid_per_width, 
                                                2*grid_per_width, 4*grid_per_width))

with open('net/coco.txt', 'r') as f:
    classes = f.read().splitlines()
font = cv2.FONT_HERSHEY_PLAIN

MIN_confidence = 0.5
IOU_threshold = 0.4
FPS = 0

colors = ((255,0,0), (0,255,0), (0,0,255), (255,255,0), (0,255,255), (255,0,255), (128,0,0))
anchors = [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]
anchor_box_show = True
grid_show = True

GPU is enabled.
Detection grid sizes: (13:13), (26:26), (52:52).


In [4]:
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation = inter)
    return resized

def draw_img():
    img = img_original.copy()
    cv2.putText(img, "IOU:  {0:.2f}".format(IOU_threshold), (20, 40), font, 3, (0, 0, 255), 2)
    cv2.putText(img, "CONF: {0:.2f}".format(MIN_confidence), (20, 80), font, 3, (255, 0, 0), 2)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, MIN_confidence, IOU_threshold)
    if len(indexes) > 0:
        for c, i in enumerate(indexes.flatten()):
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(100*confidences[i]))
            color = colors[c%len(colors)]
            cv2.rectangle(img, (x, y), (x + w, y + h), color, int(8 / 2 ** detection_outputs[i]))
            cv2.putText(img, '{}'.format(c), (x+2, y-5),
                        font, 2, color, 3)

            num_of_grids = (grid_per_width * 2 ** detection_outputs[i])
            grid_x = int(grid_cells[i][0] * width / num_of_grids)
            grid_y = int(grid_cells[i][1] * height / num_of_grids)
            grid_w = int(width / (grid_per_width * 2 ** detection_outputs[i]))
            grid_h = int(height / (grid_per_width * 2 ** detection_outputs[i]))
            if grid_show:
                cv2.rectangle(img, (grid_x, grid_y), (grid_x + grid_w, grid_y + grid_h), color,
                              int(4 / 2 ** detection_outputs[i]))
                cv2.circle(img, (anchor_centers[i]), 3, color,
                           int(4 / 2 ** detection_outputs[i]))

            ab_center_x = int(grid_cells[i][0] * width / num_of_grids + grid_w * 0.5)
            ab_center_y = int(grid_cells[i][1] * height / num_of_grids + grid_h * 0.5)
            half_box_x = 0.5 * anchor_boxes[i][0] * width / input_square_size
            half_box_y = 0.5 * anchor_boxes[i][1] * height / input_square_size
            if anchor_box_show:
                cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                              (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), color,
                              int(4 / 2 ** detection_outputs[i]))
                cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                              (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), (255,255,255), 1)
                cv2.putText(img, '{}:{} {}% {}({})'.format(c, label, confidence, anchor_boxes[i], 
                                    detection_outputs[i]), (20, 160 + 30 * c),font, 2, color, 3)
            else:
                cv2.putText(img, '{}:{} {}%'.format(c, label, confidence), (20, 160 + 30 * c),font, 2, color, 3)

    if not is_pause:
        global FPS
        FPS = 1/(time.time() - start_time)
    cv2.putText(img, 'FPS: {:.2f}'.format(FPS), (20, height-20),
            font, 2, (0,255,0), 6)
    cv2.imshow('image', img)

# Video detection

In [5]:
#video = cv2.VideoCapture(r'data/NY.wmv')
video = cv2.VideoCapture(0)

while True:
    is_pause = False
    start_time = time.time()
    try:
        check, img_original = video.read()
        blob = cv2.dnn.blobFromImage(img_original, 1 / 255, (input_square_size, input_square_size),
                                     (0, 0, 0), swapRB=True, crop=False)
        img_original = image_resize(img_original, height = 720)
        height, width, _ = img_original.shape
    except:
        video.release()

    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)
    
    boxes = []
    confidences = []
    class_ids = []
    grid_cells = []
    anchor_boxes = []
    anchor_centers = []
    detection_outputs = []
    
    for i, output in enumerate(layerOutputs):
        if model_name == 'yolov4':
            if i==0:
                i=2
            elif i==2:
                i=0
        for j, detection in enumerate(output):
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = detection[4] * scores[class_id]

            if confidence > 0.01:
                anchor_box = anchors[i][2 * (j % 3):2 * (j % 3) + 2]
                grid_cell = [int(j / 3) % (grid_per_width * 2 ** i), int(j / (grid_per_width * 3 * 2 ** i))]
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)
                grid_cells.append(grid_cell)
                anchor_boxes.append(anchor_box)
                anchor_centers.append((center_x, center_y))
                detection_outputs.append(i)


    draw_img()
    
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
    elif key == ord('w'):
        IOU_threshold = min(IOU_threshold + 0.01, 0.99)
        draw_img()
    elif key == ord('s'):
        IOU_threshold = max(IOU_threshold - 0.01, 0.01)
        draw_img()
    elif key == ord('d'):
        MIN_confidence = min(MIN_confidence + 0.01, 0.99)
        draw_img()
    elif key == ord('a'):
        MIN_confidence = max(MIN_confidence - 0.01, 0.01)
        draw_img()
    elif key == ord('W'):
        IOU_threshold = min(IOU_threshold + 0.1, 0.99)
        draw_img()
    elif key == ord('S'):
        IOU_threshold = max(IOU_threshold - 0.1, 0.01)
        draw_img()
    elif key == ord('D'):
        MIN_confidence = min(MIN_confidence + 0.1, 0.99)
        draw_img()
    elif key == ord('A'):
        MIN_confidence = max(MIN_confidence - 0.1, 0.01)
        draw_img()
    elif key == ord('g'):
        grid_show = not grid_show
        draw_img()
    elif key == ord('b'):
        anchor_box_show = not anchor_box_show
        draw_img()
    elif key == 32:
        while True:
            is_pause = True
            key = cv2.waitKey(0)
            if key == ord('w'):
                IOU_threshold = min(IOU_threshold + 0.01, 0.99)
                draw_img()
            elif key == ord('s'):
                IOU_threshold = max(IOU_threshold - 0.01, 0.01)
                draw_img()
            elif key == ord('d'):
                MIN_confidence = min(MIN_confidence + 0.01, 0.99)
                draw_img()
            elif key == ord('a'):
                MIN_confidence = max(MIN_confidence - 0.01, 0.01)
                draw_img()
            elif key == ord('W'):
                IOU_threshold = min(IOU_threshold + 0.1, 0.99)
                draw_img()
            elif key == ord('S'):
                IOU_threshold = max(IOU_threshold - 0.1, 0.01)
                draw_img()
            elif key == ord('D'):
                MIN_confidence = min(MIN_confidence + 0.1, 0.99)
                draw_img()
            elif key == ord('A'):
                MIN_confidence = max(MIN_confidence - 0.1, 0.01)
                draw_img()
            elif key == ord('g'):
                grid_show = not grid_show
                draw_img()
            elif key == ord('b'):
                anchor_box_show = not anchor_box_show
                draw_img()
            elif key == 32:
                is_pause = False
                break
            elif key == ord('q'):
                break
video.release()
cv2.destroyAllWindows()

# Image detection

In [11]:
start_time = time.time()
is_pause = False
try:
    img_original = cv2.imread(r'data\giraffe.jpg')
    blob = cv2.dnn.blobFromImage(img_original, 1 / 255, (input_square_size, input_square_size),
                                 (0, 0, 0), swapRB=True, crop=False)
    img_original = image_resize(img_original, height = 800)
    height, width, _ = img_original.shape
except:
    print('Image not found.')

net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)

boxes = []
confidences = []
class_ids = []
grid_cells = []
anchor_boxes = []
anchor_centers = []
detection_outputs = []

for i, output in enumerate(layerOutputs):
    if model_name == 'yolov4':
        if i==0:
            i=2
        elif i==2:
            i=0
    for j, detection in enumerate(output):
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = detection[4] * scores[class_id]

        if confidence > 0.01:
            anchor_box = anchors[i][2 * (j % 3):2 * (j % 3) + 2]
            grid_cell = [int(j / 3) % (grid_per_width * 2 ** i), int(j / (grid_per_width * 3 * 2 ** i))]
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append((float(confidence)))
            class_ids.append(class_id)
            grid_cells.append(grid_cell)
            anchor_boxes.append(anchor_box)
            anchor_centers.append((center_x, center_y))
            detection_outputs.append(i)


draw_img()

while True:
    is_pause = True
    key = cv2.waitKey(0)
    if key == ord('w'):
        IOU_threshold = min(IOU_threshold + 0.01, 0.99)
        draw_img()
    elif key == ord('s'):
        IOU_threshold = max(IOU_threshold - 0.01, 0.01)
        draw_img()
    elif key == ord('d'):
        MIN_confidence = min(MIN_confidence + 0.01, 0.99)
        draw_img()
    elif key == ord('a'):
        MIN_confidence = max(MIN_confidence - 0.01, 0.01)
        draw_img()
    elif key == ord('W'):
        IOU_threshold = min(IOU_threshold + 0.1, 0.99)
        draw_img()
    elif key == ord('S'):
        IOU_threshold = max(IOU_threshold - 0.1, 0.01)
        draw_img()
    elif key == ord('D'):
        MIN_confidence = min(MIN_confidence + 0.1, 0.99)
        draw_img()
    elif key == ord('A'):
        MIN_confidence = max(MIN_confidence - 0.1, 0.01)
        draw_img()
    elif key == ord('g'):
        grid_show = not grid_show
        draw_img()
    elif key == ord('b'):
        anchor_box_show = not anchor_box_show
        draw_img()
    elif key == 32:
        is_pause = False
        break
    elif key == ord('q'):
        break
cv2.destroyAllWindows()