In [24]:
import cv2
import numpy as np
import urllib
import time

In [96]:
class Detection:
    def __init__(self, model_name = 'yolov3', input_square_size=416,
                 path_yolo_classes='net/coco.txt', output_height=800):
        self.model_name = model_name
        path_weights = 'net/{}.weights'.format(self.model_name)
        path_cfg = 'net/{}.cfg'.format(self.model_name)
        self.net = cv2.dnn.readNet(path_weights, path_cfg)
        self.grid_per_width = int(round(input_square_size/32))
        self.input_square_size = self.grid_per_width * 32
        if input_square_size%32:
            print('''Value of input_square_size={} is indivisible by 32, 
input_square_size={} will be used instead. 
Choose input size that is integer multiple of 32(eg.320,416,620,...).'''.format(input_square_size, 
                                                                                self.input_square_size))
        
        self.is_scale_output = True
        self.output_height = output_height
        
        if cv2.cuda.getCudaEnabledDeviceCount():    
            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
            print('GPU is enabled.')
        else:
            print('GPU is NOT enabled. OpenCV-{} will use CPU instead.'.format(cv2.__version__))
        
        print('Detection grid sizes: ({0}x{0}), ({1}x{1}), ({2}x{2}).'.format(self.grid_per_width, 
                                                2*self.grid_per_width, 4*self.grid_per_width))
            
        with open(path_yolo_classes, 'r') as f:
            self.classes = f.read().splitlines()    
        
        self.anchor_box_show = False
        self.grid_show = False   
        self.is_pause = False
        
        self.MIN_confidence = 0.5
        self.IOU_threshold = 0.4
        self.FPS = None
        self.start_time = None

        self.font = cv2.FONT_HERSHEY_PLAIN
        self.colors = ((255,0,0), (0,255,0), (0,0,255), (255,255,0), (0,255,255), (255,0,255), (128,0,0))        
        self.anchors = [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]
        self.img = None
        self.height, self.width = None, None
        self.boxes = None
        self.confidences = None
        self.class_ids = None
        self.grid_cells = None
        self.anchor_boxes = None
        self.anchor_centers = None
        self.detection_outputs = None
        
    def detect(self, img):
        self.start_time = time.time()
        blob = cv2.dnn.blobFromImage(img, 1 / 255, (self.input_square_size, self.input_square_size),
                                     (0, 0, 0), swapRB=True, crop=False)
        
        self.net.setInput(blob)
        output_layers_names = self.net.getUnconnectedOutLayersNames()
        self.layerOutputs = self.net.forward(output_layers_names)
        if self.is_scale_output:
            img = image_resize(img, height = self.output_height)
        self.img = img
        self.height, self.width, _ = self.img.shape
        
        boxes = []
        confidences = []
        class_ids = []
        grid_cells = []
        anchor_boxes = []
        anchor_centers = []
        detection_outputs = []

        for i, output in enumerate(self.layerOutputs):
            if self.model_name == 'yolov4':
                if i==0:
                    i=2
                elif i==2:
                    i=0
            for j, detection in enumerate(output):
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = detection[4] * scores[class_id]

                if confidence > 0.01:
                    anchor_box = self.anchors[i][2 * (j % 3):2 * (j % 3) + 2]
                    grid_cell = [int(j / 3) % (self.grid_per_width * 2 ** i),
                                 int(j / (self.grid_per_width * 3 * 2 ** i))]
                    center_x = int(round(detection[0] * self.width))
                    center_y = int(round(detection[1] * self.height))
                    w = int(round(detection[2] * self.width))
                    h = int(round(detection[3] * self.height))
                    x = int(round(center_x - w / 2))
                    y = int(round(center_y - h / 2))

                    boxes.append([x, y, w, h])
                    confidences.append((float(confidence)))
                    class_ids.append(class_id)
                    grid_cells.append(grid_cell)
                    anchor_boxes.append(anchor_box)
                    anchor_centers.append((center_x, center_y))
                    detection_outputs.append(i)
            
        self.boxes = boxes
        self.confidences = confidences
        self.class_ids = class_ids
        self.grid_cells = grid_cells
        self.anchor_boxes = anchor_boxes
        self.anchor_centers = anchor_centers
        self.detection_outputs = detection_outputs
    
    def draw_img(self):
        img = self.img.copy()
        cv2.putText(img, "IOU:  {0:.2f}".format(self.IOU_threshold), (20, 40), self.font, 3, (0, 0, 255), 3)
        cv2.putText(img, "CONF: {0:.2f}".format(self.MIN_confidence), (20, 80), self.font, 3, (255, 0, 0), 3)

        indexes = cv2.dnn.NMSBoxes(self.boxes, self.confidences, self.MIN_confidence, self.IOU_threshold)
        if len(indexes) > 0:
            for c, i in enumerate(indexes.flatten()):
                x, y, w, h = self.boxes[i]
                label = str(self.classes[self.class_ids[i]])
                confidence = self.confidences[i]
                color = self.colors[c%len(self.colors)]
                cv2.rectangle(img, (x, y), (x + w, y + h), color, int(8 / 2 ** self.detection_outputs[i]))
                cv2.putText(img, '{}'.format(c+1), (x+2, y-5),
                            self.font, 2, color, 3)

                num_of_grids = (self.grid_per_width * 2 ** self.detection_outputs[i])
                grid_x = int(self.grid_cells[i][0] * self.width / num_of_grids)
                grid_y = int(self.grid_cells[i][1] * self.height / num_of_grids)
                grid_w = int(self.width / (self.grid_per_width * 2 ** self.detection_outputs[i]))
                grid_h = int(self.height / (self.grid_per_width * 2 ** self.detection_outputs[i]))
                if self.grid_show:
                    cv2.rectangle(img, (grid_x, grid_y), (grid_x + grid_w, grid_y + grid_h), color,
                                  int(4 / 2 ** self.detection_outputs[i]))
                    cv2.circle(img, (self.anchor_centers[i]), 3, color,
                               int(4 / 2 ** self.detection_outputs[i]))

                ab_center_x = int(self.grid_cells[i][0] * self.width / num_of_grids + grid_w * 0.5)
                ab_center_y = int(self.grid_cells[i][1] * self.height / num_of_grids + grid_h * 0.5)
                half_box_x = 0.5 * self.anchor_boxes[i][0] * self.width / self.input_square_size
                half_box_y = 0.5 * self.anchor_boxes[i][1] * self.height / self.input_square_size
                if self.anchor_box_show:
                    cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                                  (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), color,
                                  int(4 / 2 ** self.detection_outputs[i]))
                    cv2.rectangle(img, (int(ab_center_x - half_box_x), int(ab_center_y - half_box_y)),
                                  (int(ab_center_x + half_box_x), int(ab_center_y + half_box_y)), (255,255,255), 1)
                    cv2.putText(img, '{}:{} {:.0%} {}({})'.format(c+1, label, confidence, self.anchor_boxes[i], 
                                    self.detection_outputs[i]), (20, 160 + 30 * c),self.font, 2, color, 3)
                else:
                    cv2.putText(img, '{}:{} {:.0%}'.format(c+1, label, confidence),
                                (20, 160 + 30 * c),self.font, 2, color, 3)
        if not self.is_pause:
            self.FPS = 1/(time.time() - self.start_time)

        cv2.putText(img, 'FPS: {:.2f}'.format(self.FPS), (20, self.height-20),
                self.font, 2, (0,255,0), 3)
        print(self.FPS)
        cv2.imshow('image', img)
        
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation = inter)
    return resized

def keyboard(key, Det):
    if key == ord('q'):
        return 'quit'
    elif key == ord('w'):
        Det.IOU_threshold = min(Det.IOU_threshold + 0.01, 0.99)
        Det.draw_img()
    elif key == ord('s'):
        Det.IOU_threshold = max(Det.IOU_threshold - 0.01, 0.01)
        Det.draw_img()
    elif key == ord('d'):
        Det.MIN_confidence = min(Det.MIN_confidence + 0.01, 0.99)
        Det.draw_img()
    elif key == ord('a'):
        Det.MIN_confidence = max(Det.MIN_confidence - 0.01, 0.01)
        Det.draw_img()
    elif key == ord('W'):
        Det.IOU_threshold = min(Det.IOU_threshold + 0.1, 0.99)
        Det.draw_img()
    elif key == ord('S'):
        Det.IOU_threshold = max(Det.IOU_threshold - 0.1, 0.01)
        Det.draw_img()
    elif key == ord('D'):
        Det.MIN_confidence = min(Det.MIN_confidence + 0.1, 0.99)
        Det.draw_img()
    elif key == ord('A'):
        Det.MIN_confidence = max(Det.MIN_confidence - 0.1, 0.01)
        Det.draw_img()
    elif key == ord('g'):
        Det.grid_show = not Det.grid_show
        Det.draw_img()
    elif key == ord('b'):
        Det.anchor_box_show = not Det.anchor_box_show
        Det.draw_img()
    elif key == 32:
        return 'pause-unpause'

In [97]:
Det = Detection(model_name='yolov4', input_square_size=416, output_height=800)
#video = cv2.VideoCapture(r'data\NY.wmv')
video = cv2.VideoCapture(0)

run_detection = True
while run_detection:
    try:
        Det.is_pause = False
        check, img = video.read()

        Det.detect(img)
        Det.draw_img()

        key = cv2.waitKey(1) 
        key = keyboard(key, Det)
        if key == 'quit':
            run_detection = False
        elif key == 'pause-unpause':
            while True:
                Det.is_pause = True
                key = cv2.waitKey(0)
                key = keyboard(key, Det)
                if key == 'quit':
                    run_detection = False
                    break
                elif key == 'pause-unpause':
                    break
    except Exception as e: 
        print(e)
        video.release()
        cv2.destroyAllWindows()
        break
video.release()
cv2.destroyAllWindows()

GPU is enabled.
Detection grid sizes: (13x13), (26x26), (52x52).
2.7451609736303006
13.686212319961365
12.488511342760077
14.071445825169757
14.07168187018355
12.83120871993955
13.686301638060431
14.479644007470595
11.839587197010115
13.893760517284784
13.876201756069158
14.479693994504053
13.785217296991728
14.374491068858212
13.921106173101752
13.911456053067994
14.071634660547257
13.876201756069158
13.710101723281294
14.479044189988297
13.713508494304435
13.893622448134566
14.27260260454417
12.334555720566396
14.272699740360501
14.071634660547257
14.272748308764479
13.876109942170524
13.876155848967965
14.07168187018355
14.479494048440996
13.50126826755939
12.334483174384717
13.876293571182806
14.07168187018355
13.14589823793793
14.272651172287066
14.071587451227735
13.145857035846035
14.071634660547257
13.87643129613149
13.686212319961365
14.272699740360501
13.8767067542307
14.272748308764479
13.686212319961365
13.876523114282785


In [95]:
Det = Detection(model_name='yolov4', input_square_size=416, output_height=800)
try:
    img = cv2.imread(r'data\giraffe.jpg')

    Det.detect(img)
    Det.draw_img()

    while True:
        Det.is_pause = True
        key = cv2.waitKey(0)
        key = keyboard(key, Det)
        if key == 'quit':
            run_detection = False
            break
except Exception as e: 
    print(e)
    cv2.destroyAllWindows()
    
cv2.destroyAllWindows()

GPU is enabled.
Detection grid sizes: (13x13), (26x26), (52x52).


In [17]:
Det = Detection(model_name='yolov4', input_square_size=416, output_height=800)
path_to_img = '''
https://waukeshadogparks.org/wp-content/uploads/2020/05/12.jpg

'''

try:
    req = urllib.request.urlopen(path_to_img)
    arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
    img = cv2.imdecode(arr, -1)

    Det.detect(img)
    Det.draw_img()

    while True:
        Det.is_pause = True
        key = cv2.waitKey(0)
        key = keyboard(key, Det)
        if key == 'quit':
            run_detection = False
            break
except Exception as e: 
    print(e)
    cv2.destroyAllWindows()
    
cv2.destroyAllWindows()

GPU is enabled.
Detection grid sizes: (13x13), (26x26), (52x52).
