# **Cuarto conjunto de tareas a realizar**

## Paquetes necesarios e inicializaciones

La siguiente práctica consta dos partes principales, la primera de ellas basada en YOLO y en detección de matrículas personas y vehículos y la segunda en OCR (Optical Character Recognition).

Instalar labelme en el sistema operativo del anfitrión (NO EN EL ENVIRONMENT)

Hacer lo siguiente desde el CMD del sistema para pasar las imágenes en formato JSON obtenidas con labelme y pasarlas a formato YOLO.

Si se tiene una tarjeta gráfica de NVIDIA se puede utilizar la GPU haciendo uso de CUDA, para instalar CUDAv11.6 hacer uso del siguiente script.

In [1]:
import cv2
import math
import yaml
import csv
import labelme
from collections import defaultdict
import numpy as np
from ultralytics import YOLO

In [2]:
# === ENTRENAMIENTO DEL MODELO ===
print("Iniciando entrenamiento del modelo")

# Carga el modelo base (preentrenado)
model = YOLO("yolo11s.pt")

# Entrena con tu dataset y configuración
results = model.train(
    data="data.yaml",
    imgsz=640,
    epochs=5,
    project="runs/train_custom",
    name="exp2",
    exist_ok=True,
    plots=True
)

print("Entrenamiento completado.")

Iniciando entrenamiento del modelo
Ultralytics 8.3.223  Python-3.9.23 torch-2.8.0+cpu CPU (12th Gen Intel Core i7-12700H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=5, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=exp2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspe

In [3]:
print("CÁLCULO MANUAL DESDE MATRIZ DE CONFUSIÓN:")

# 1. Obtén la matriz de confusión (es un array de NumPy)
matrix = results.confusion_matrix.matrix

if matrix is not None and matrix.size > 0:
    # 2. Calcula la suma de la diagonal (aciertos correctos)
    # np.diag() extrae la diagonal
    aciertos_correctos = np.diag(matrix).sum()

    # 3. Calcula el total de predicciones (suma de toda la matriz)
    total_predicciones = matrix.sum()

    # 4. Calcula el acierto
    if total_predicciones > 0:
        accuracy_manual = aciertos_correctos / total_predicciones
        
        print(f"Matriz de Confusión: \n{matrix}")
        print(f"Aciertos (Diagonal): {aciertos_correctos}")
        print(f"Total de Muestras: {total_predicciones}")
        print(f"Porcentaje de Acierto (Accuracy Manual): {accuracy_manual * 100:.2f}%")
    else:
        print("La matriz de confusión está vacía.")
else:
    print("No se generó matriz de confusión (quizás no es un modelo de detección/clasificación).")

CÁLCULO MANUAL DESDE MATRIZ DE CONFUSIÓN:
Matriz de Confusión: 
[[          8           1]
 [         12           0]]
Aciertos (Diagonal): 8.0
Total de Muestras: 21.0
Porcentaje de Acierto (Accuracy Manual): 38.10%


In [13]:
from ultralytics import YOLO
import cv2
import csv
import numpy as np

# ============================
# Modelos
# ============================
custom_model = YOLO("runs/train_custom/exp2/weights/best.pt")  # plates
coco_model   = YOLO("yolo11s.pt")                               # COCO preentrenado

# Solo queremos estas clases de COCO
vehicle_class_indices = [2, 3, 5, 7]  # car, motorcycle, bus, truck
person_class_index = 0
coco_class_indices = [person_class_index] + vehicle_class_indices

# ============================
# Video
# ============================
video_path = "./Resources/video.mp4"
cap = cv2.VideoCapture(video_path)

cv2.namedWindow("Detections", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Detections", 960, 540)

# ============================
# CSV
# ============================
csv_file = "detections.csv"
csv_headers = [
    "fotograma", "tipo_objeto", "confianza", "id_tracking", 
    "x1", "y1", "x2", "y2",
    "matricula_en_su_caso", "conf_matricula", "mx1","my1","mx2","my2", "texto_matricula"
]

f = open(csv_file, mode='w', newline='', encoding='utf-8')
writer = csv.writer(f)
writer.writerow(csv_headers)

# ============================
# Tracker simple
# ============================
object_id_counter = 0
prev_centroids = {}

def get_centroid(box):
    x1, y1, x2, y2 = map(int, box)
    return int((x1+x2)/2), int((y1+y2)/2)

def assign_id(cx, cy, prev_centroids, threshold=50):
    global object_id_counter
    for oid, (pcx, pcy) in prev_centroids.items():
        if np.sqrt((cx-pcx)**2 + (cy-pcy)**2) < threshold:
            prev_centroids[oid] = (cx, cy)
            return oid
    object_id_counter += 1
    prev_centroids[object_id_counter] = (cx, cy)
    return object_id_counter

# ============================
# Dibujar bounding boxes
# ============================
def draw_boxes(results, frame, color=(0,255,0), filter_classes=None):
    for r in results:
        boxes = r.boxes
        for box, cls, conf in zip(boxes.xyxy, boxes.cls, boxes.conf):
            cls = int(cls)
            if filter_classes and cls not in filter_classes:
                continue
            x1, y1, x2, y2 = map(int, box)
            label = f"{r.names[cls]} {conf:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    return frame

# ============================
# Función para verificar si la matrícula está dentro de un vehículo
# ============================
def plate_inside_vehicle(plate_box, vehicle_boxes):
    px1, py1, px2, py2 = plate_box
    for vx1, vy1, vx2, vy2 in vehicle_boxes:
        if px1 >= vx1 and py1 >= vy1 and px2 <= vx2 and py2 <= vy2:
            return True
    return False

# ============================
# Procesar frames
# ============================
frame_num = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame_num += 1

    # ============================
    # Detecciones COCO
    # ============================
    coco_results = coco_model.predict(frame, imgsz=640, conf=0.25)
    vehicle_boxes_current = []

    for r in coco_results:
        boxes = r.boxes
        for box, cls, conf in zip(boxes.xyxy, boxes.cls, boxes.conf):
            cls = int(cls)
            if cls not in coco_class_indices:
                continue
            x1, y1, x2, y2 = map(int, box)
            cx, cy = get_centroid((x1, y1, x2, y2))
            oid = assign_id(cx, cy, prev_centroids)
            tipo_obj = r.names[cls]
            writer.writerow([frame_num, tipo_obj, float(conf), oid, x1, y1, x2, y2,
                             "", "", "", "", "", "null"])
            
            if cls in vehicle_class_indices:
                vehicle_boxes_current.append((x1, y1, x2, y2))

    # ============================
    # Detecciones MATRÍCULAS
    # ============================
    custom_results = custom_model.predict(frame, imgsz=640, conf=0.25)
    for r in custom_results:
        boxes = r.boxes
        for box, cls, conf in zip(boxes.xyxy, boxes.cls, boxes.conf):
            x1, y1, x2, y2 = map(int, box)
            # Solo considerar si está dentro de un vehículo
            if plate_inside_vehicle((x1, y1, x2, y2), vehicle_boxes_current):
                cx, cy = get_centroid((x1, y1, x2, y2))
                oid = assign_id(cx, cy, prev_centroids)
                writer.writerow([frame_num, r.names[int(cls)], float(conf), oid, x1, y1, x2, y2,
                                 "plate", float(conf), x1, y1, x2, y2, "null"])

    # ============================
    # Dibujar bounding boxes
    # ============================
    frame = draw_boxes(coco_results, frame, color=(0,255,0), filter_classes=coco_class_indices)
    frame = draw_boxes(custom_results, frame, color=(0,0,255))  # plates rojo

    display_frame = cv2.resize(frame, (960, 540))
    cv2.imshow("Detections", display_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
f.close()
cv2.destroyAllWindows()
print(f"CSV guardado en {csv_file}")



0: 384x640 2 persons, 5 cars, 1 bus, 1 bench, 104.3ms
Speed: 1.6ms preprocess, 104.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 90.8ms
Speed: 1.4ms preprocess, 90.8ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 1 bus, 1 bench, 96.4ms
Speed: 1.4ms preprocess, 96.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 91.7ms
Speed: 1.5ms preprocess, 91.7ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 cars, 1 bus, 1 bench, 1 potted plant, 99.0ms
Speed: 1.8ms preprocess, 99.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 86.7ms
Speed: 1.4ms preprocess, 86.7ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 cars, 1 bus, 1 bench, 1 potted plant, 86.5ms
Speed: 1.6ms preprocess, 86.5ms inference, 0.7ms postprocess pe