In [2]:
import cv2
import numpy as np
import time

In [7]:
class Detection:
    def __init__(self, path_yolo_weights='net/yolov3.weights', path_yolo_cfg='net/yolov3.cfg', 
                 path_yolo_classes='net/coco.txt'):
        self.net = cv2.dnn.readNet(path_yolo_weights, path_yolo_cfg)
        self.input_square_size = 416
        if cv2.cuda.getCudaEnabledDeviceCount():    
            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
            print('GPU is enabled.')
        else:
            print('GPU is not enabled. CV2 will use CPU instead.')
            
        with open(path_yolo_classes, 'r') as f:
            self.classes = f.read().splitlines()    
        self.is_scale_output = True
        self.output_height = 800
        
        self.anchor_box_show = True
        self.grid_show = True   
        self.is_pause = False
        
        self.MIN_confidence = 0.5
        self.IOU_threshold = 0.4
        self.FPS = 0

        self.font = cv2.FONT_HERSHEY_PLAIN
        self.colors = ((255,0,0), (0,255,0), (0,0,255), (255,255,0), (0,255,255), (255,0,255), (128,0,0))        
        self.anchors = [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]
        self.input_square_size = 416  
        self.img = None
        self.height, self.width = None, None
        self.boxes = None
        self.confidences = None
        self.class_ids = None
        self.grid_cells = None
        self.anchor_boxes = None
        self.anchor_centers = None
        self.detection_outputs = None
        
    def detect(self, img):
        start_time = time.time()
        self.img = img
        self.height, self.width, _ = img.shape
        blob = cv2.dnn.blobFromImage(img, 1 / 255, (self.input_square_size, self.input_square_size),
                                     (0, 0, 0), swapRB=True, crop=False)
        
        self.net.setInput(blob)
        output_layers_names = self.net.getUnconnectedOutLayersNames()
        self.layerOutputs = self.net.forward(output_layers_names)
        
        boxes = []
        confidences = []
        class_ids = []
        grid_cells = []
        anchor_boxes = []
        anchor_centers = []
        detection_outputs = []

        for i, output in enumerate(self.layerOutputs):
            for j, detection in enumerate(output):
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = detection[4] * scores[class_id]

                if confidence > 0.01:
                    anchor_box = self.anchors[i][2 * (j % 3):2 * (j % 3) + 2]
                    grid_cell = [int(j / 3) % (13 * 2 ** i), int(j / (39 * 2 ** i))]
                    center_x = int(detection[0] * self.width)
                    center_y = int(detection[1] * self.height)
                    w = int(detection[2] * self.width)
                    h = int(detection[3] * self.height)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append((float(confidence)))
                    class_ids.append(class_id)
                    grid_cells.append(grid_cell)
                    anchor_boxes.append(anchor_box)
                    anchor_centers.append((center_x, center_y))
                    detection_outputs.append(i)
            
        self.boxes = boxes
        self.confidences = confidences
        self.class_ids = class_ids
        self.grid_cells = grid_cells
        self.anchor_boxes = anchor_boxes
        self.anchor_centers = anchor_centers
        self.detection_outputs = detection_outputs
        
        if not self.is_pause:
            self.FPS = 1/(time.time() - start_time)
        
    def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
        dim = None
        (h, w) = image.shape[:2]
        if width is None and height is None:
            return image
        if width is None:
            r = height / float(h)
            dim = (int(w * r), height)
        else:
            r = width / float(w)
            dim = (width, int(h * r))
        resized = cv2.resize(image, dim, interpolation = inter)
        return resized
    
    def draw_img(self):
        img = self.img.copy()
        cv2.putText(img, "IOU:  {0:.2f}".format(self.IOU_threshold), (20, 40), self.font, 3, (0, 0, 255), 2)
        cv2.putText(img, "CONF: {0:.2f}".format(self.MIN_confidence), (20, 80), self.font, 3, (255, 0, 0), 2)

        indexes = cv2.dnn.NMSBoxes(self.boxes, self.confidences, self.MIN_confidence, self.IOU_threshold)
        if len(indexes) > 0:
            for c, i in enumerate(indexes.flatten()):
                x, y, w, h = self.boxes[i]
                label = str(self.classes[self.class_ids[i]])
                confidence = str(round(self.confidences[i], 2))
                color = self.colors[c%len(self.colors)]
                cv2.rectangle(img, (x, y), (x + w, y + h), color, int(8 / 2 ** self.detection_outputs[i]))
                #cv2.rectangle(img, (x, y), (x + w, y + h), (255,255,255), 2)
                cv2.putText(img, '{}'.format(c), (x+2, y-5),
                            self.font, 2, color, 3)

                num_of_grids = (13 * 2 ** self.detection_outputs[i])
                grid_x = int(self.grid_cells[i][0] * self.width / num_of_grids)
                grid_y = int(self.grid_cells[i][1] * self.height / num_of_grids)
                grid_w = int(self.width / (13 * 2 ** self.detection_outputs[i]))
                grid_h = int(self.height / (13 * 2 ** self.detection_outputs[i]))
                if self.grid_show:
                    cv2.rectangle(img, (grid_x, grid_y), (grid_x + grid_w, grid_y + grid_h), color,
                                  int(4 / 2 ** self.detection_outputs[i]))
                    cv2.circle(img, (self.anchor_centers[i]), 3, color,
                               int(4 / 2 ** self.detection_outputs[i]))

                ab_center_x = int(self.grid_cells[i][0] * self.width / num_of_grids + grid_w * 0.5)
                ab_center_y = int(self.grid_cells[i][1] * self.height / num_of_grids + grid_h * 0.5)
                half_box_x = 0.5 * self.anchor_boxes[i][0] * self.width / self.input_square_size
                half_box_y = 0.5 * self.anchor_boxes[i][1] * self.height / self.input_square_size
                if self.anchor_box_show:
                    cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                                  (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), color,
                                  int(4 / 2 ** self.detection_outputs[i]))
                    cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                                  (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), (255,255,255), 1)
                cv2.putText(img, '{}:{} {} {}-{}'.format(c, label, confidence, self.anchor_boxes[i], 
                                        self.detection_outputs[i]), (20, 160 + 30 * c), self.font, 2, color, 3)

        cv2.putText(img, 'FPS: {:.2f}'.format(self.FPS), (20, self.height-20),
                self.font, 2, (0,255,0), 6)
        cv2.imshow('image', img)

In [8]:
Det = Detection()

GPU is enabled.


In [9]:
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation = inter)
    return resized

def draw_img(y):
    img = img_original.copy()
    cv2.putText(img, "IOU:  {0:.2f}".format(IOU_threshold), (20, 40), font, 3, (0, 0, 255), 2)
    cv2.putText(img, "CONF: {0:.2f}".format(MIN_confidence), (20, 80), font, 3, (255, 0, 0), 2)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, MIN_confidence, IOU_threshold)
    if len(indexes) > 0:
        for c, i in enumerate(indexes.flatten()):
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[c%len(colors)]
            cv2.rectangle(img, (x, y), (x + w, y + h), color, int(8 / 2 ** detection_outputs[i]))
            #cv2.rectangle(img, (x, y), (x + w, y + h), (255,255,255), 2)
            cv2.putText(img, '{}'.format(c), (x+2, y-5),
                        font, 2, color, 3)

            num_of_grids = (13 * 2 ** detection_outputs[i])
            grid_x = int(grid_cells[i][0] * width / num_of_grids)
            grid_y = int(grid_cells[i][1] * height / num_of_grids)
            grid_w = int(width / (13 * 2 ** detection_outputs[i]))
            grid_h = int(height / (13 * 2 ** detection_outputs[i]))
            if grid_show:
                cv2.rectangle(img, (grid_x, grid_y), (grid_x + grid_w, grid_y + grid_h), color,
                              int(4 / 2 ** detection_outputs[i]))
                cv2.circle(img, (anchor_centers[i]), 3, color,
                           int(4 / 2 ** detection_outputs[i]))

            ab_center_x = int(grid_cells[i][0] * width / num_of_grids + grid_w * 0.5)
            ab_center_y = int(grid_cells[i][1] * height / num_of_grids + grid_h * 0.5)
            half_box_x = 0.5 * anchor_boxes[i][0] * width / input_square_size
            half_box_y = 0.5 * anchor_boxes[i][1] * height / input_square_size
            if anchor_box_show:
                cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                              (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), color,
                              int(4 / 2 ** detection_outputs[i]))
                cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                              (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), (255,255,255), 1)
            cv2.putText(img, '{}:{} {} {}-{}'.format(c, label, confidence, anchor_boxes[i], detection_outputs[i]), (20, 160 + 30 * c),
                        font, 2, color, 3)
            
    if not is_pause:
        global FPS
        FPS = 1/(time.time() - start_time)
    cv2.putText(img, 'FPS: {:.2f}'.format(FPS), (20, height-20),
            font, 2, (0,255,0), 6)
    img = cv2.resize(img, (1920, 1080)) 
    cv2.imshow('image', img)

In [12]:
video = cv2.VideoCapture(r'data\NY.wmv')
#video = cv2.VideoCapture(0)

while True:
    check, img = video.read()
    if not img.size:
        print('Video not found.')
        video.release()
        cv2.destroyAllWindows()
        break
    
    Det.detect(img)
    Det.draw_img()
    
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
    elif key == ord('w'):
        Det.IOU_threshold = min(Det.IOU_threshold + 0.01, 0.99)
        Det.draw_img()
    elif key == ord('s'):
        Det.IOU_threshold = max(Det.IOU_threshold - 0.01, 0.01)
        Det.draw_img()
    elif key == ord('d'):
        Det.MIN_confidence = min(Det.MIN_confidence + 0.01, 0.99)
        Det.draw_img()
    elif key == ord('a'):
        Det.MIN_confidence = max(Det.MIN_confidence - 0.01, 0.01)
        Det.draw_img()
    elif key == ord('W'):
        Det.IOU_threshold = min(Det.IOU_threshold + 0.1, 0.99)
        Det.draw_img()
    elif key == ord('S'):
        Det.IOU_threshold = max(Det.IOU_threshold - 0.1, 0.01)
        Det.draw_img()
    elif key == ord('D'):
        Det.MIN_confidence = min(Det.MIN_confidence + 0.1, 0.99)
        Det.draw_img()
    elif key == ord('A'):
        Det.MIN_confidence = max(Det.MIN_confidence - 0.1, 0.01)
        Det.draw_img()
    elif key == ord('g'):
        Det.grid_show = not Det.grid_show
        Det.draw_img()
    elif key == ord('b'):
        Det.anchor_box_show = not Det.anchor_box_show
        Det.draw_img()
    elif key == 32:
        while True:
            Det.is_pause = True
            key = cv2.waitKey(0)
            if key == ord('w'):
                Det.IOU_threshold = min(Det.IOU_threshold + 0.01, 0.99)
                Det.draw_img()
            elif key == ord('s'):
                Det.IOU_threshold = max(Det.IOU_threshold - 0.01, 0.01)
                Det.draw_img()
            elif key == ord('d'):
                Det.MIN_confidence = min(Det.MIN_confidence + 0.01, 0.99)
                Det.draw_img()
            elif key == ord('a'):
                Det.MIN_confidence = max(Det.MIN_confidence - 0.01, 0.01)
                Det.draw_img()
            elif key == ord('W'):
                Det.IOU_threshold = min(Det.IOU_threshold + 0.1, 0.99)
                Det.draw_img()
            elif key == ord('S'):
                Det.IOU_threshold = max(Det.IOU_threshold - 0.1, 0.01)
                Det.draw_img()
            elif key == ord('D'):
                Det.MIN_confidence = min(Det.MIN_confidence + 0.1, 0.99)
                Det.draw_img()
            elif key == ord('A'):
                Det.MIN_confidence = max(Det.MIN_confidence - 0.1, 0.01)
                Det.draw_img()
            elif key == ord('g'):
                Det.grid_show = not Det.grid_show
                Det.draw_img()
            elif key == ord('b'):
                Det.anchor_box_show = not Det.anchor_box_show
                Det.draw_img()
            elif key == 32:
                Det.is_pause = False
                break
            elif key == ord('q'):
                break
video.release()
cv2.destroyAllWindows()

In [4]:
start_time = time.time()
try:
    img_original = cv2.imread(r'data\giraffe.jpg')
    blob = cv2.dnn.blobFromImage(img_original, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
    img_original = image_resize(img_original, height = 1200)
    height, width, _ = img_original.shape
except:
    video.release()

net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)

boxes = []
confidences = []
class_ids = []
grid_cells = []
anchor_boxes = []
anchor_centers = []
detection_outputs = []

for i, output in enumerate(layerOutputs):
    for j, detection in enumerate(output):
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = detection[4] * scores[class_id]

        if confidence > 0.01:
            anchor_box = anchors[i][2 * (j % 3):2 * (j % 3) + 2]
            grid_cell = [int(j / 3) % (13 * 2 ** i), int(j / (39 * 2 ** i))]
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append((float(confidence)))
            class_ids.append(class_id)
            grid_cells.append(grid_cell)
            anchor_boxes.append(anchor_box)
            anchor_centers.append((center_x, center_y))
            detection_outputs.append(i)


draw_img()

while True:
    is_pause = True
    key = cv2.waitKey(0)
    if key == ord('w'):
        IOU_threshold = min(IOU_threshold + 0.01, 0.99)
        draw_img()
    elif key == ord('s'):
        IOU_threshold = max(IOU_threshold - 0.01, 0.01)
        draw_img()
    elif key == ord('d'):
        MIN_confidence = min(MIN_confidence + 0.01, 0.99)
        draw_img()
    elif key == ord('a'):
        MIN_confidence = max(MIN_confidence - 0.01, 0.01)
        draw_img()
    elif key == ord('W'):
        IOU_threshold = min(IOU_threshold + 0.1, 0.99)
        draw_img()
    elif key == ord('S'):
        IOU_threshold = max(IOU_threshold - 0.1, 0.01)
        draw_img()
    elif key == ord('D'):
        MIN_confidence = min(MIN_confidence + 0.1, 0.99)
        draw_img()
    elif key == ord('A'):
        MIN_confidence = max(MIN_confidence - 0.1, 0.01)
        draw_img()
    elif key == ord('g'):
        grid_show = not grid_show
        draw_img()
    elif key == ord('b'):
        anchor_box_show = not anchor_box_show
        draw_img()
    elif key == 32:
        is_pause = False
        break
    elif key == ord('q'):
        break
cv2.destroyAllWindows()

In [9]:
video.release()

In [9]:
!python video_bb.py

OpenCV(ocl4dnn): consider to specify kernel configuration cache directory 
                 via OPENCV_OCL4DNN_CONFIG_PATH parameter.
OpenCL program build log: dnn/dummy
Status -11: CL_BUILD_PROGRAM_FAILURE
-cl-no-subgroup-ifp
Error in processing command line: Don't understand command line argument "-cl-no-subgroup-ifp"!
1.092155933380127
0.2592344284057617
0.26123785972595215
0.262237548828125
0.26218748092651367
0.2602505683898926
0.2584221363067627
0.2622377872467041
0.25623250007629395
0.25823473930358887
0.2552316188812256
0.2582361698150635
0.2542307376861572
0.25823473930358887
0.2652413845062256
0.261089563369751
0.25623202323913574
0.25923633575439453
0.25623250007629395
0.259235143661499
1.1185383796691895
0.2592275142669678
0.2572333812713623
0.25823545455932617
0.2592346668243408
0.26323938369750977
0.25823426246643066
0.2602357864379883
0.26200413703918457
0.25823378562927246
0.26323962211608887
0.2602355480194092
0.2602367401123047
0.25623273849487305
0.25623273849487305


[ WARN:0] global C:\projects\opencv-python\opencv\modules\videoio\src\cap_msmf.cpp (674) SourceReaderCB::~SourceReaderCB terminating async callback


In [1]:
import cv2
print(cv2.__version__)

4.5.1
