In [2]:
import time
import cv2
import numpy as np
from skimage.feature import hog
from skimage.color import rgb2gray
from skimage.transform import resize
from scipy.spatial import distance

In [3]:
yolov = 'yolov4'
model_path = 'C:/Users/10087940/Documents/GitHub/darknet/'

path = 'C:/Temp/videos/'
files = ['rec_2023_06_25_12_14_29.mp4', 'rec_2023_06_25_12_14_48.mp4', 'rec_2023_06_25_12_15_20.mp4']

In [None]:
!git clone https://github.com/AlexeyAB/darknet.git

In [None]:
%cd darknet
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!make

In [9]:
#!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights

import requests

url = f'https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/{yolov}.weights'
r = requests.get(url, allow_redirects=True)

open(f'{model_path}{yolov}.weights', 'wb').write(r.content)


257717640

In [1]:
class ObjectFeatures:
    def __init__(self, id, name, class_id, features):
        self.id = id
        self.name = name
        self.class_id = class_id
        self.features = features

class ObjectDetector:

    def __init__(self):
        self.objects = []
        self.net, self.output_layers = self.load_yolo()
        self.classes = self.load_classes()
        self.colors = np.random.uniform(0, 255, size=(len(self.classes), 3))

    def load_yolo(self):
        net = cv2.dnn.readNet(f'{model_path}{yolov}.weights', f'{model_path}cfg/{yolov}.cfg')
        layer_names = net.getLayerNames()
        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
        return net, output_layers
    
    def load_classes(self):
        classes = []
        with open(f'{model_path}/cfg/coco.names', 'r') as f:
            classes = [line.strip() for line in f.readlines()]
        return classes
    
    def detect_objects(self, img):
    
        outs, height, width = None, None, None

        if img is not None:
            height, width, channels = img.shape
            blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
            self.net.setInput(blob)
            outs = self.net.forward(self.output_layers)

        return outs, height, width
    
    def get_box_dimensions(self, outs, height, width):
    
        if outs is None or height is None or width is None:
            return None, None, None
        
        class_ids = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    # Object detected
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        return boxes, confidences, class_ids

    def draw_labels(self, boxes, confidences, colors, class_ids, img):

        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        font = cv2.FONT_HERSHEY_PLAIN
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                label = str(self.classes[class_ids[i]])
                color = colors[i]
                cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
                cv2.putText(img, label, (x, y - 5), font, 1, color, 1)

        return img
    
    def print_info(self, frame_id, frame, starting_time):
        elapsed_time = time.time() - starting_time
        fps = frame_id / elapsed_time
        cv2.putText(frame, 'FPS: ' + str(round(fps, 2)), (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (100, 0, 150), 2)
        cv2.imshow('Image', frame)

    def compute_objects(self, boxes, frame, confidences, class_ids):
        temp_objects = []
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                label = str(self.classes[class_ids[i]])
                roi = frame[y:y+h, x:x+w]
                if roi.size > 0:
                    features = self.create_features(roi, label)
                    temp_objects.append(ObjectFeatures(len(self.objects)+1, label, class_ids[i], features))

        # Caso nao tenha detectado nada, inicializa a lista de objetos detectados
        if len(self.objects) == 0:
            self.objects = temp_objects
        #verifica se algum dos objetos detectados já foi detectado anteriormente utilizando 
        #a distancia do cosseno entre as features
        else:
            for obj in temp_objects:
                measure = -2
                for i in range(len(self.objects)):
                    # Compara apenas objetos do mesmo tipo...
                    #print(f'Comparando {obj.name} com {self.objects[i].name}')
                    if(self.objects[i].class_id == obj.class_id):
                        measure = distance.cosine(self.objects[i].features, obj.features)
                        print(f'Comparing {obj.name} - {obj.id} and {self.objects[i].name} - {self.objects[i].id}')
                        print(f'value: {measure}\n')
                        
                        # Se a distancia do cosseno for menor que 0.3, considera que é o mesmo objeto
                        if measure > 0.3:
                            self.objects.append(obj)
                            break

                # Adiciona na lista caso nao tenha encontrado nenhum objeto parecido
                if measure == -2:
                    self.objects.append(obj)
    
    def create_features(self, roi, class_name):
        # Se a imagem não for em escala de cinza, converte para escala de cinza
        if len(roi.shape) > 2:
            roi = rgb2gray(roi)

        # Redimensiona a imagem para um tamanho fixo
        if class_name == 'person':
            #print('person', roi.shape)
            roi = resize(roi, (80, 20))
        else:
            #print('car', roi.shape)
            roi = resize(roi, (40, 80))

        cv2.imshow('object', roi)

        # Calcula o descritor HOG para a imagem
        fd = hog(roi, orientations=8, pixels_per_cell=(8, 8), cells_per_block=(1, 1), visualize=False)

        return fd
    
    def object_detection_video(self, video_path):
        
        capture = cv2.VideoCapture(video_path)
        ret, frame = capture.read()

        # Initialization
        frame_id = 0
        frame_counter = 0
        starting_time = time.time()
        fps = capture.get(cv2.CAP_PROP_FPS)
        frame_delay = int(1000 / fps)

        while True:
            ret, frame = capture.read()
            frame_id += 1

            # Verifica se deve reiniciar o video
            if not ret:
                print('Restarting...')
                print(f'Detected objects: {len(self.objects)}')
                capture.set(cv2.CAP_PROP_POS_FRAMES, 0)
                frame_id = 0
                starting_time = time.time()
                self.objects = []
                continue

            if frame is not None:
                # A cada 5 frames realiza a deteccao de objetos
                if frame_counter % 5 == 0:
                    # Detecta objetos
                    outs, height, width = self.detect_objects(frame)
                    boxes, confidences, class_ids = self.get_box_dimensions(outs, height, width)

                    # Desenha as caixas verifica os objetos detectados
                    if boxes is not None and confidences is not None and class_ids is not None:
                        colors = np.random.uniform(0, 255, size=(len(boxes), 2))
                        frame = self.draw_labels(boxes, confidences, colors, class_ids, frame)
                        self.compute_objects(boxes, frame, confidences, class_ids)

                # Mostra informações na tela
                self.print_info(frame_id, frame, starting_time)

            frame_counter += 1
            
            if cv2.waitKey(frame_delay) & 0xFF == ord('q'): 
                print('Quitting...')
                print(f'Detected objects: {len(self.objects)}')
                break

        capture.release()
        cv2.destroyAllWindows()

In [4]:
objDetector = ObjectDetector()

objDetector.object_detection_video(path + files[0])

Comparing car - 2 and car - 1
value: 0.2543071365459212

Comparing car - 2 and car - 1
value: 0.2846361658408467

Comparing car - 2 and car - 1
value: 0.24686742788713079

Comparing car - 2 and car - 1
value: 0.2521135116315322

Comparing car - 3 and car - 1
value: 0.23132594413323548

Comparing car - 3 and car - 1
value: 0.19632993797558473

Comparing person - 3 and person - 2
value: 0.16292208742495184

Comparing car - 3 and car - 1
value: 0.19008903052780424

Comparing car - 3 and car - 1
value: 0.18161526741115952

Comparing car - 3 and car - 1
value: 0.1957490919736803

Comparing car - 3 and car - 1
value: 0.19945741141586282

Comparing car - 3 and car - 1
value: 0.21203315579945814

Comparing car - 3 and car - 1
value: 0.16755894387247372

Comparing car - 3 and car - 1
value: 0.17379597932954138

Comparing car - 3 and car - 1
value: 0.24956951571042574

Comparing car - 3 and car - 1
value: 0.16536470485705113

Comparing person - 3 and person - 2
value: 0.1100700804946858

Compari