# Notebook para testes feitos com predições para 3 classes [BN, BNMN, MN]
Neste notebook estão os códigos utilizados para rodar os testes com diferentes configurações do YOLO (tanto para o YOLOv11, quanto para o YOLOv12) utilizando as 3 classes do dataset como as classes a serem treinadas e detectadas.

### 1. Definição do caminho do diretório raíz do kaggle e instalação das bibliotecas necessárias

In [None]:
ROOT_DIR = '/kaggle/input/detectionbnmn-3classes'

In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.158-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.158-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.158 ultralytics-thop-2.0.14


In [None]:
!pip install opencv-python

### 2. Importação de bibliotecas

In [4]:
import os
import numpy as np
import cv2
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


### 3. Treinamento YOLO

In [None]:
# mod = "/kaggle/input/yolo11/pytorch/default/1/yolo12s.pt"

#carregar o modelo
models = YOLO("yolo12l.pt") #cria um novo modelo

#Usar o modelo
results = models.train(data=os.path.join(ROOT_DIR, 'teste_file.yaml'), epochs=150, imgsz=1080, batch=2) #treinar o modelo

In [None]:
model_path = '/kaggle/working/runs/detect/train/weights/best.pt'
model = YOLO(model_path)

metrics = model.val()
metrics.box.map
metrics.box.map50
metrics.box.map75

### 4. Funções de auxílio para a visualização dos resultados

In [5]:
from scipy.optimize import linear_sum_assignment

#Calcula o IOU dos resultados
def get_iou(ground_truth, pred):
    
    ix1 = np.maximum(ground_truth[0], pred[0])
    iy1 = np.maximum(ground_truth[1], pred[1])
    ix2 = np.minimum(ground_truth[2], pred[2])
    iy2 = np.minimum(ground_truth[3], pred[3])
     
    i_height = np.maximum(iy2 - iy1 + 1, np.array(0.))
    i_width = np.maximum(ix2 - ix1 + 1, np.array(0.))
     
    area_of_intersection = i_height * i_width
     
    gt_height = ground_truth[3] - ground_truth[1] + 1
    gt_width = ground_truth[2] - ground_truth[0] + 1
     
    pd_height = pred[3] - pred[1] + 1
    pd_width = pred[2] - pred[0] + 1
     
    area_of_union = gt_height * gt_width + pd_height * pd_width - area_of_intersection
     
    iou = area_of_intersection / area_of_union
     
    return iou

#Desenha as bboxes preditas
def draw_bounding_boxes(image, gt_box, pred_box, iou, threshold=0.5, class_correct=True, false_positive=True, pred_class_name=""):
    color = (0, 0, 255)
    label = ""

    if false_positive:
        color = (0, 0, 255)  # Vermelho
        label = "Detecção incorreta"
    elif class_correct and iou >= threshold:
        color = (0, 255, 0)  # Verde
        label = pred_class_name
    elif class_correct and iou == 0:
        color = (255, 0, 0)  # Azul
    elif class_correct and iou < threshold:
        color = (0, 255, 255)  # Amarelo
    elif class_correct is False:
        color = (0, 0, 255)  # Vermelho
        label = "Classe incorreta"

    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    thickness = 2
    (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, thickness)

    if gt_box is not None:
        x1, y1, x2, y2 = map(int, gt_box)
        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)

        if label:
            y_text = y1 - 10 if y1 - 10 > 10 else y1 + text_height + 10
            cv2.rectangle(image,
                          (x1, y_text - text_height - baseline),
                          (x1 + text_width, y_text + baseline),
                          color, cv2.FILLED)
            cv2.putText(image, label, (x1, y_text), font, font_scale, (255, 255, 255), thickness)

    if gt_box is None and pred_box is not None:
        x1, y1, x2, y2 = map(int, pred_box)
        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)

        if label:
            y_text = y1 - 10 if y1 - 10 > 10 else y1 + text_height + 10
            cv2.rectangle(image,
                          (x1, y_text - text_height - baseline),
                          (x1 + text_width, y_text + baseline),
                          color, cv2.FILLED)
            cv2.putText(image, label, (x1, y_text), font, font_scale, (255, 255, 255), thickness)

    return image

#Carrega o ground truth para comparação
def load_ground_truth(label_path, image):
    boxes = []
    classes = []
    height, width, _ = image.shape
    with open(label_path, "r") as f:
        for line in f:
            class_id, center_x, center_y, bbox_width, bbox_height = map(float, line.strip().split())
            x1 = int((center_x - bbox_width / 2) * width)
            y1 = int((center_y - bbox_height / 2) * height)
            x2 = int((center_x + bbox_width / 2) * width)
            y2 = int((center_y + bbox_height / 2) * height)
            boxes.append([x1, y1, x2, y2])
            classes.append(int(class_id))
            
    return boxes, classes

#Faz a comparação do ground truth com as predições
def match_predictions_with_gt(gt_boxes, pred_boxes):
    num_gt = len(gt_boxes)
    num_pred = len(pred_boxes)
    
    iou_matrix = np.zeros((num_gt, num_pred))

    for i, gt in enumerate(gt_boxes):
        for j, pred in enumerate(pred_boxes):
            iou_matrix[i, j] = get_iou(gt, pred)
    
    # Resolver o problema de associação com Hungarian Algorithm
    gt_indices, pred_indices = linear_sum_assignment(-iou_matrix)  # Negativo porque queremos maximizar IoU
    
    # Identificar correspondências válidas
    matches = []
    unmatched_gt = set(range(num_gt))  # Ground truths sem correspondência
    unmatched_pred = set(range(num_pred))  # Predições sem correspondência

    for gt_idx, pred_idx in zip(gt_indices, pred_indices):
        iou = iou_matrix[gt_idx, pred_idx]
        if iou > 0:  # Apenas associações válidas
            matches.append((gt_idx, pred_idx, iou))
            unmatched_gt.discard(gt_idx)
            unmatched_pred.discard(pred_idx)

    # Adicionar GTs sem correspondência com IoU = 0
    for gt_idx in unmatched_gt:
        matches.append((gt_idx, None, 0.0))

    # Adicionar previsões sem correspondência como falsos positivos
    for pred_idx in unmatched_pred:
        matches.append((None, pred_idx, 0.0))

    return matches

In [6]:
image_dir = "/kaggle/input/detectionbnmn-3classes/images/test"
label_dir = "/kaggle/input/detectionbnmn-3classes/labels/test"
output_dir = "/kaggle/working/results_yolo11m"

model = YOLO("/kaggle/input/models/best_11m.pt")

image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir)]
os.makedirs(output_dir, exist_ok=True)

class_names = ["BN", "BNMN", "MN"]

for img in image_paths:
    
    image = cv2.imread(img)
    image_name = os.path.basename(img)
    
    # Caminho do label correspondente
    label_path = os.path.join(label_dir, os.path.splitext(image_name)[0] + ".txt")
    
    # Carregar caixas Ground Truth
    if not os.path.exists(label_path):
        print(f"Label não encontrado para {image_name}. Pulando...")
        continue
    gt_boxes, gt_classes = load_ground_truth(label_path, image)
    
    # Fazer predições
    results = model.predict(img, save=False, show_conf=False, show_labels=False)
    pred_boxes = results[0].boxes.xyxy.cpu().numpy()
    pred_classes = results[0].boxes.cls.cpu().numpy().astype(int)

    matches = match_predictions_with_gt(gt_boxes, pred_boxes)

    for gt_idx, pred_idx, iou in matches:
        if gt_idx is not None:
            gt_box = gt_boxes[gt_idx]
            gt_class = gt_classes[gt_idx]
        else:
            gt_box, gt_class = None, None  # Não foi detectado
    
        if pred_idx is not None:
            pred_box = pred_boxes[pred_idx]
            pred_class = pred_classes[pred_idx]
        else:
            pred_box, pred_class = None, None  # Previsão sem correspondência
    
        # Desenhar caixas na imagem
        if gt_box is not None and pred_box is not None:
            class_correct = gt_class == pred_class
            # Caso normal: GT e predição correspondem
            if class_correct:
                print(f"imagem:{image_name}; iou: {iou}; classe correta")
            else:
                print(f"imagem: {image_name}; IoU: {iou:.2f}; classe incorreta (GT: {gt_class}, Pred: {pred_class}).")
                
            image = draw_bounding_boxes(image, gt_box, pred_box, iou, class_correct=class_correct, false_positive=False, pred_class_name=class_names[pred_class] if pred_class is not None else "")
                
        elif gt_box is not None:
            # Caso: GT sem correspondência (IoU = 0)
            print(f"imagem:{image_name}; objeto não detectado. IoU: 0")
            image = draw_bounding_boxes(image, gt_box, [0, 0, 0, 0], 0, false_positive=False, pred_class_name=class_names[pred_class] if pred_class is not None else "")
        
        elif pred_box is not None:
            # Caso: Previsão sem correspondência
            print(f"imagem:{image_name}; falso positivo detectado. IoU: 0")
            image = draw_bounding_boxes(image, [0, 0, 0, 0], pred_box, 0, false_positive=False, pred_class_name=class_names[pred_class] if pred_class is not None else "")
    save_path = os.path.join(output_dir, image_name)
    cv2.imwrite(save_path, image)


image 1/1 /kaggle/input/detectionbnmn-3classes/images/test/BNMN172.png: 832x1088 1 BNMN, 2 MNs, 47.5ms
Speed: 14.6ms preprocess, 47.5ms inference, 263.0ms postprocess per image at shape (1, 3, 832, 1088)
imagem:BNMN172.png; iou: 0.9148962514277833; classe correta
imagem:BNMN172.png; iou: 0.730622789852844; classe correta
imagem:BNMN172.png; iou: 0.9107079109298977; classe correta

image 1/1 /kaggle/input/detectionbnmn-3classes/images/test/BN379.png: 832x1088 1 BN, 30.7ms
Speed: 6.2ms preprocess, 30.7ms inference, 1.8ms postprocess per image at shape (1, 3, 832, 1088)
imagem:BN379.png; iou: 0.9856069432724882; classe correta

image 1/1 /kaggle/input/detectionbnmn-3classes/images/test/BN53.png: 832x1088 1 BN, 30.7ms
Speed: 6.0ms preprocess, 30.7ms inference, 1.5ms postprocess per image at shape (1, 3, 832, 1088)
imagem:BN53.png; iou: 0.9669991630094559; classe correta

image 1/1 /kaggle/input/detectionbnmn-3classes/images/test/BN561.png: 832x1088 2 BNs, 30.7ms
Speed: 6.0ms preprocess, 3

### 5. Compactação dos resultados das predições das imagens para download

In [7]:
import os
import subprocess
from IPython.display import FileLink, display

def download_file(path, download_file_name):
    os.chdir('/kaggle/working/')
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip {zip_name} {path} -r"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f'{download_file_name}.zip'))


download_file('/kaggle/working/results_yolo11m', 'results_yolo11m')

### 6. Salva o resultado das predições em um CSV

In [5]:
from ultralytics import YOLO
import os

model = YOLO("/kaggle/input/models/yolo11s_3classes1080.pt")
source_dir = "/kaggle/input/detectionbnmn-3classes/images/test"
csv_path = "/kaggle/working/predicoes_11s.csv"

results = model.predict(source=source_dir)

with open(csv_path, "w", newline="") as f:
    import csv
    writer = csv.writer(f)
    writer.writerow(["file", "class_id", "class_name", "confidence", "x1", "y1", "x2", "y2"])

    for r in results:
        for box, conf, cls in zip(r.boxes.xyxy, r.boxes.conf, r.boxes.cls):
            x1, y1, x2, y2 = map(float, box.tolist())
            class_id = int(cls.item())
            class_name = r.names[class_id]
            writer.writerow([os.path.basename(r.path), class_id, class_name, float(conf.item()), x1, y1, x2, y2])

print(f"CSV salvo em {csv_path}")


image 1/268 /kaggle/input/detectionbnmn-3classes/images/test/BN112.png: 832x1088 1 BN, 13.8ms
image 2/268 /kaggle/input/detectionbnmn-3classes/images/test/BN113.png: 832x1088 1 BN, 13.2ms
image 3/268 /kaggle/input/detectionbnmn-3classes/images/test/BN115.png: 832x1088 1 BN, 13.1ms
image 4/268 /kaggle/input/detectionbnmn-3classes/images/test/BN116.png: 832x1088 1 BN, 13.1ms
image 5/268 /kaggle/input/detectionbnmn-3classes/images/test/BN117.png: 832x1088 2 BNs, 13.1ms
image 6/268 /kaggle/input/detectionbnmn-3classes/images/test/BN136.png: 832x1088 1 BN, 13.2ms
image 7/268 /kaggle/input/detectionbnmn-3classes/images/test/BN137.png: 832x1088 1 BN, 13.1ms
image 8/268 /kaggle/input/detectionbnmn-3classes/images/test/BN143.png: 832x1088 1 BN, 13.1ms
image 9/268 /kaggle/input/detectionbnmn-3classes/images/test/BN145.png: 832x1088 1 BN, 13.1ms
image 10/268 /kaggle/input/detectionbnmn-3classes/images/test/BN152.png: 832x1088 1 BN, 13.1ms
image 11/268 /kaggle/input/detectionbnmn-3classes/images/

## Extra: Remoção das classes nas anotações para treinamento de 2 ou 1 classe
O código abaixo teve a função de fazer a remoção nos labels originais (com 3 classes) das classes desejadas, para que futuramente fosse possível treinar os modelos com detecção apenas para 2 classes (BN e BNMN) ou 1 classe (MN), testes estes que foram realizados na intenção de estudar e aplicar a possibilidade da técnica de ensemble.

In [None]:
import os

# Caminho para o diretório com os arquivos de anotação originais
labels_orig = '/kaggle/input/detectionbnmn-3classes/labels/train'
labels_out = '/kaggle/working/train_2classe'

os.makedirs(labels_out, exist_ok=True)

for filename in os.listdir(labels_orig):
    if filename.endswith('.txt'):
        input_path = os.path.join(labels_orig, filename)
        output_path = os.path.join(labels_out, filename)

        # Ler o arquivo e filtrar as linhas
        with open(input_path, 'r') as file:
            lines = file.readlines()

        # Manter apenas as linhas que não têm o class_id que queremos remover
        new_lines = [line for line in lines if not line.startswith('2')]
        # new_lines = [line for line in lines if line.startswith('2')]

        # Escrever o novo arquivo no diretório de saída
        with open(output_path, 'w') as file:
            file.writelines(new_lines)

print("Anotações da classe MN removidas e salvas no novo diretório com sucesso.")
