In [30]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time

In [2]:
target_names = ['person', 'laptop', 'chair', 'tvmonitor', 'diningtable']

In [38]:
class YOLOV3:
    def __init__(self, targets):
        self.class_ids = []
        self.confidences = []
        self.boxes = []
        self.target_names = targets
        self.net, self.output_layers, self.classes, self.targets = self.load_yolo(self.target_names)
        
    def load_yolo(self, targets):
        # Load Yolo
        net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        with open("coco.names", "r") as f:
            classes = [line.strip() for line in f.readlines()]
        targets = np.array([np.where(np.array(classes) == target) for target in target_names]).flatten() # find index of wanted targets
        #print(targets)
        layer_names = net.getLayerNames()
        output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
        
        return net, output_layers, classes, targets

    def detect_objects(self, size=(320,320), swapRB=True, crop=False, scalefactor=0.00392):
        # Detecting objects
        blob = cv2.dnn.blobFromImage(self.img, scalefactor, size, (0, 0, 0), swapRB, crop=crop)
        self.net.setInput(blob)
        outs = self.net.forward(self.output_layers)
    
        self.outs = outs
    
    # Showing informations on the screen
    def find_indexes(self, conf=0.9):
        self.class_ids = []
        self.confidences = []
        self.boxes = []
        dims = self.img.shape
        for out in self.outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > conf and class_id in self.targets:
                    # Object detected
                    center_x = int(detection[0] * dims[1])
                    center_y = int(detection[1] * dims[0])
                    w = int(detection[2] * dims[1])
                    h = int(detection[3] * dims[0])

                    # Rectangle coordinates
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    self.boxes.append([x, y, w, h])
                    self.confidences.append(float(confidence))
                    self.class_ids.append(class_id)

        self.indexes = cv2.dnn.NMSBoxes(self.boxes, self.confidences, 0.5, 0.4) # non max suppression
        
    def draw_boxes_labels(self, colors=(0,255,0)):
        font = cv2.FONT_HERSHEY_PLAIN
        height, width, dim = self.img.shape
        rect_thicc = int(height/70)
        if height < 70:
            rect_thicc = 1
        for i in range(len(self.boxes)):
            if i in self.indexes:
                x, y, w, h = self.boxes[i]
                font_size = int(height/300)+1
                font_width = int(height/200)+1
                label = str(self.classes[self.class_ids[i]])
                color = colors
                cv2.rectangle(self.img, (x, y), (x + w, y + h), color, rect_thicc)
                cv2.putText(self.img, label, (x, y - font_size), font, font_size, color, font_width)
                
    def yolo_frame(self, img):
        img = cv2.resize(img, None, fx=0.4, fy=0.4) # resize image
        self.img = img
        self.detect_objects() # find all objects
        self.find_indexes(.7) # box object if object in targets
        self.draw_boxes_labels() # update frame
        return img
    
    def yolo_video(self, filename):
        cap=cv2.VideoCapture(filename)
        fps = int(cap.get(5))
        print("fps:", fps)
        if cap.isOpened() == False: 
            print("Error opening video stream or file")
        t1 = time.time()
        try:
            while True:
                ret, img=cap.read()
                frame = cv2.resize(img, (768, 432)) 
                if time.time() - t1 > .6:
                    t = time.time()
                    img = self.yolo_frame(img)
                    print(time.time() - t)
                    #open final window
                    cv2.imshow('img', img)
                    t1 = time.time()
#             else:
#                 cv2.imshow('img', frame)
            #press q to quite the window
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
        except:
            pass
        finally:
            cap.release()
            cv2.destroyAllWindows()

In [39]:
YOLO = YOLOV3(target_names)
YOLO.yolo_video(filename='pedestrians.mp4')

fps: 25
0.8673686981201172
0.5714612007141113
0.49153709411621094
0.5105190277099609
0.575458288192749
