In [19]:
# Leitura de bibliotecas
import cv2 as cv2 # opencv
import numpy as np
import random

In [20]:
# Parâmetros da rede neural
confidence_threshold = 0.5
non_maximal_supression_threshold = 0.1

# Dimensões da imagem

image_width = 1024
image_height = 1024

# Diretório da imagem
image_path = "images\dog2.jpg"

In [21]:
# Lê a imagem
frame = cv2.imread(image_path)

# Redimensiona a imagem
frame = cv2.resize(frame, (image_width, image_height))

# Leitura das classes
with open("data/coco.names", 'rt') as f:
    classes = f.read().splitlines()

# Cria o blob 4D da imagem
blob = cv2.dnn.blobFromImage(frame, 1/255, (image_width, image_height), [0,0,0], 1, crop=False)

In [22]:
# Carrega a rede neural YOLOv3
net = cv2.dnn.readNetFromDarknet("cfg/yolov3.cfg", "weights/yolov3.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# Define o input da rede neural
net.setInput(blob)

# Carrega o nome das camadas da rede
layersNames = net.getLayerNames()

# Obtem o nome das camadas de output da rede
outputLayers = [layersNames[i - 1] for i in net.getUnconnectedOutLayers()]
# Executa o passo foward da rede para obter o output
outputs = net.forward(outputLayers)

In [23]:
initial_boxes = []

for output in outputs:
    for detection in output:
        # detection = [x, y, width, height, p1, p2, p3, ..., p80]
        # sendo p1 = prob da classe 1, p2 = prob da classe 2, ..., p80 = prob da classe 80
        scores = detection[5:] # lista de probabilidades das classes 1 a 80
        classId = np.argmax(scores)
        confidence = float(scores[classId])

        center_x = int(detection[0] * image_height)
        center_y = int(detection[1] * image_height)
        width = int(detection[2] * image_height)
        height = int(detection[3] * image_height)
        left = int(center_x - width / 2)
        top = int(center_y - height / 2)
        
        initial_boxes.append({"confidence": confidence, "classId": classId, "left": left, "top": top, "width": width, "height": height})

In [24]:
def calculate_iou(box1, box2):
    # Extrai as coordenadas das bounding boxes
    x1, y1, w1, h1 = box1["left"], box1["top"], box1["width"], box1["height"]
    x2, y2, w2, h2 = box2["left"], box2["top"], box2["width"], box2["height"]
    
    # Calcula as coordenadas dos pontos de interseção
    x_left = max(x1, x2)
    y_top = max(y1, y2)
    x_right = min(x1 + w1, x2 + w2)
    y_bottom = min(y1 + h1, y2 + h2)
    
    # Calcula a área da interseção
    intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
    
    # Calcula a área da união
    box1_area = w1 * h1
    box2_area = w2 * h2
    union_area = box1_area + box2_area - intersection_area
    
    # Calcula o IoU
    iou = intersection_area / union_area
    
    return iou

initial_boxes.sort(key=lambda x: x["confidence"], reverse=True)

initial_boxes = [box for box in initial_boxes if box["confidence"] >= confidence_threshold]

boxes = []
while len(initial_boxes):
    current_box = initial_boxes.pop(0)
    boxes.append(current_box)
    initial_boxes = [box for box in initial_boxes if calculate_iou(current_box, box) < non_maximal_supression_threshold]

In [50]:
classColors = {}
for bounding_box in boxes:
    # Extraimos as informações
    left, top, width, height = bounding_box["left"], bounding_box["top"], bounding_box["width"], bounding_box["height"]

    object_class = classes[bounding_box["classId"]]
    object_confidence = bounding_box["confidence"]

    # Escolhemos uma cor para a classe
    color = classColors[object_class] if object_class in classColors else (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    classColors[object_class] = color

    # Desenhamos a própria boundind box
    cv2.rectangle(frame, (left, top), (left+width, top+height), color, 3)

    # Desenhamos a classe e a confiança

    label = f"{object_class} {int(object_confidence * 100)}%"
    font, font_scale = cv2.FONT_HERSHEY_SIMPLEX, 1
    
    (text_width, text_height), _ = cv2.getTextSize(label, font, font_scale, 1)

    x, y = left, top - text_height - 5

    cv2.rectangle(frame, (x - 3, y - 5), (x + text_width + 5, y + text_height + 5), color, -1)
    cv2.putText(frame, label, (x, y + text_height), font, font_scale, (0, 0, 0), 2)
    
# Write the frame with the detection boxes
cv2.imwrite("YOLOv3 output.jpg", frame.astype(np.uint8))

True

In [63]:
# Implementação geral para múltiplas imagens

import os
import shutil

# Define o caminho da pasta de entrada
input_folder = "images/"

# Parâmetros da rede neural
confidence_threshold = 0.4
non_maximal_supression_threshold = 0.2

output_folder = f"output/confidence{confidence_threshold}_nms{non_maximal_supression_threshold}"

# Dimensões da imagem

image_width = 512
image_height = 512

# Cria a pasta de saída, se ela não existir
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Carrega a rede neural YOLOv3
net = cv2.dnn.readNetFromDarknet("cfg/yolov3.cfg", "weights/yolov3.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)


# Carrega o nome das camadas da rede
layersNames = net.getLayerNames()

# Obtem o nome das camadas de output da rede
outputLayers = [layersNames[i - 1] for i in net.getUnconnectedOutLayers()]

classColors = {}

# Percorre todos os arquivos da pasta de entrada
for image_path in os.listdir(input_folder):
    # Verifica se o arquivo é uma imagem
    # Define o caminho completo do arquivo de entrada e do arquivo de saída

    # Lê a imagem
    frame = cv2.imread(os.path.join(input_folder, image_path))

    # Redimensiona a imagem
    frame = cv2.resize(frame, (image_width, image_height))

    # Leitura das classes
    with open("data/coco.names", 'rt') as f:
        classes = f.read().splitlines()

    # Cria o blob 4D da imagem
    blob = cv2.dnn.blobFromImage(frame, 1/255, (image_width, image_height), [0,0,0], 1, crop=False)
    
    # Define o input da rede neural
    net.setInput(blob)
    # Executa o passo foward da rede para obter o output
    outputs = net.forward(outputLayers)

    initial_boxes = []

    for output in outputs:
        for detection in output:
            # detection = [x, y, width, height, p1, p2, p3, ..., p80]
            # sendo p1 = prob da classe 1, p2 = prob da classe 2, ..., p80 = prob da classe 80
            scores = detection[5:] # lista de probabilidades das classes 1 a 80
            classId = np.argmax(scores)
            confidence = float(scores[classId])

            center_x = int(detection[0] * image_height)
            center_y = int(detection[1] * image_height)
            width = int(detection[2] * image_height)
            height = int(detection[3] * image_height)
            left = int(center_x - width / 2)
            top = int(center_y - height / 2)
            
            initial_boxes.append({"confidence": confidence, "classId": classId, "left": left, "top": top, "width": width, "height": height})

    initial_boxes.sort(key=lambda x: x["confidence"], reverse=True)

    initial_boxes = [box for box in initial_boxes if box["confidence"] >= confidence_threshold]

    boxes = []
    while len(initial_boxes):
        current_box = initial_boxes.pop(0)
        boxes.append(current_box)
        initial_boxes = [box for box in initial_boxes if calculate_iou(current_box, box) < non_maximal_supression_threshold]

    for bounding_box in boxes:
        # Extraimos as informações
        left, top, width, height = bounding_box["left"], bounding_box["top"], bounding_box["width"], bounding_box["height"]

        object_class = classes[bounding_box["classId"]]
        object_confidence = bounding_box["confidence"]

        # Escolhemos uma cor para a classe
        color = classColors[object_class] if object_class in classColors else (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        classColors[object_class] = color

        # Desenhamos a própria boundind box
        cv2.rectangle(frame, (left, top), (left+width, top+height), color, 3)

        # Desenhamos a classe e a confiança

        label = f"{object_class} {int(object_confidence * 100)}%"
        font, font_scale = cv2.FONT_HERSHEY_SIMPLEX, 1
        
        (text_width, text_height), _ = cv2.getTextSize(label, font, font_scale, 1)

        x, y = left, top - text_height - 5

        cv2.rectangle(frame, (x - 3, y - 5), (x + text_width + 5, y + text_height + 5), color, -1)
        cv2.putText(frame, label, (x, y + text_height), font, font_scale, (0, 0, 0), 2)
        
    # Write the frame with the detection boxes
    print(os.path.join(output_folder, image_path))
    cv2.imwrite(os.path.join(output_folder, image_path), frame.astype(np.uint8))

output/confidence0.4_nms0.2\cat.jpg
output/confidence0.4_nms0.2\city_scene.jpg
output/confidence0.4_nms0.2\dog.jpg
output/confidence0.4_nms0.2\dog2.jpg
output/confidence0.4_nms0.2\eagle.jpg
output/confidence0.4_nms0.2\food.jpg
output/confidence0.4_nms0.2\giraffe.jpg
output/confidence0.4_nms0.2\horses.jpg
output/confidence0.4_nms0.2\motorbike.jpg
output/confidence0.4_nms0.2\person.jpg
output/confidence0.4_nms0.2\surf.jpg
output/confidence0.4_nms0.2\wine.jpg
