In [1]:
import csv
import cv2
import easyocr
import os
import math
import matplotlib
import numpy as np
from paddleocr import PaddleOCR
from tqdm import tqdm
from ultralytics import YOLO

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'easyocr'

Cargar modelos

In [3]:
model_plates = YOLO("matriculas_model/version_1/weights/best.pt")
model_coco = YOLO("yolo11l.pt")
easy_ocr = easyocr.Reader(['en'])
paddle_ocr = PaddleOCR(use_textline_orientation=True, lang='en')

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gcprado/.paddlex/official_models/PP-LCNet_x1_0_doc_ori`.[0m
[32mCreating model: ('UVDoc', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gcprado/.paddlex/official_models/UVDoc`.[0m
[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gcprado/.paddlex/official_models/PP-LCNet_x1_0_textline_ori`.[0m
[32mCreating model: ('PP-OCRv5_server_det', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gcprado/.paddlex/official_models/PP-OCRv5_server_det`.[0m
[32m

Definir constantes

In [4]:
OUT_DIR = "out"
os.makedirs(OUT_DIR, exist_ok=True)

VIDEO_PATH = "./assets/C0142.MP4"
OUTPUT_VIDEO = os.path.join(OUT_DIR, "detecciones_y_ocr.mp4")
CSV_PATH = os.path.join(OUT_DIR, "reporte_ocr.csv")

IDX_PERSON = 0
IDX_VEHICLES = [2, 3, 5, 7]  # car, motorcycle, bus, truck
CLASSES_INTERES = [IDX_PERSON] + IDX_VEHICLES

Definir funciones de utilidad

In [5]:
def centroide(box):
    """Calcula el centroide de una caja delimitadora."""
    x1, y1, x2, y2 = map(int, box)
    return (x1 + x2) // 2, (y1 + y2) // 2

def texto_placa_easyocr(img):
    """Reconoce texto en la imagen de matrícula usando EasyOCR."""
    try:
        bbox, result, confidence = easy_ocr.readtext(img, detail=1)
        texto = "".join(ch for _, ch, _ in result for ch in ch if ch.isalnum()).upper()
        return texto if texto else "NON_DETECTION", confidence[0] if confidence else 0.0
    except Exception:
        return "NON_DETECTION", 0.0

def texto_placa_paddle(img):
    """Reconoce texto en la imagen de matrícula usando PaddleOCR."""
    try:
        result = paddle_ocr.predict(img)[0]
        texto_raw = result['rec_texts'][0]
        conf_raw = result['rec_scores'][0]
        texto = "".join(ch for ch in texto_raw if ch.isalnum()).upper()
        conf = float(conf_raw) if conf_raw is not None else 0.0
        return texto if texto else "NON_DETECTION", conf
    except Exception:
        return "NON_DETECTION", 0.0

def pintar_resultados(det, frame, color=(0, 255, 0), clases=None):
    """Dibuja las detecciones (bounding boxes) en el frame."""
    for r in det:
        for box, cls, conf in zip(r.boxes.xyxy, r.boxes.cls, r.boxes.conf):
            if clases and int(cls) not in clases:
                continue
            x1, y1, x2, y2 = map(int, box)
            etiqueta = f"{r.names[int(cls)]} {conf:.2f}"

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3)

            cv2.putText(frame, etiqueta, (x1, y1 - 8),
                        cv2.FONT_HERSHEY_TRIPLEX, 0.7, (255, 255, 255), 2)
    return frame

class SimpleTracker:
    def __init__(self, distancia_max=50):
        self.next_id = 0
        self.registros = {}
        self.distancia_max = distancia_max

    def asignar_id(self, cx, cy):
        """Devuelve un ID existente o crea uno nuevo según cercanía."""
        for ident, (px, py) in self.registros.items():
            if np.hypot(cx - px, cy - py) < self.distancia_max:
                self.registros[ident] = (cx, cy)
                return ident
        self.next_id += 1
        self.registros[self.next_id] = (cx, cy)
        return self.next_id


Procesamiento de video

In [6]:
# ===========================================
# Preparación del video y CSV
# ===========================================
cap = cv2.VideoCapture(VIDEO_PATH)
ancho = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
alto = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

codec = cv2.VideoWriter_fourcc(*'mp4v')
video_out = cv2.VideoWriter(OUTPUT_VIDEO, codec, fps, (ancho, alto))

with open(CSV_PATH, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow([
        "frame", "tipo_objeto", "confianza", "id_tracking",
        "x1", "y1", "x2", "y2",
        "matricula", "conf_matricula", "mx1", "my1", "mx2", "my2",
        "OCR_Paddle", "Conf_Paddle",
        "OCR_Easy", "Conf_Easy"
    ])

    print("Procesando video...\n")
    tracker = SimpleTracker()

    # Barra de progreso
    with tqdm(total=total_frames, desc="Procesando", ncols=80) as pbar:
        frame_index = 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_index += 1

            # Detección general COCO
            detecciones_coco = model_coco.predict(frame, imgsz=640, conf=0.25, verbose=False)
            for r in detecciones_coco:
                for box, cls, conf in zip(r.boxes.xyxy, r.boxes.cls, r.boxes.conf):
                    if int(cls) not in CLASSES_INTERES:
                        continue
                    x1, y1, x2, y2 = map(int, box)
                    cx, cy = centroide(box)
                    tid = tracker.asignar_id(cx, cy)
                    writer.writerow([
                        frame_index, r.names[int(cls)], float(conf), tid,
                        x1, y1, x2, y2, "", "", "", "", "", "",
                        "", "", "", ""
                    ])

            # Detección de matrículas
            resultados_placas = model_plates.predict(frame, imgsz=640, conf=0.25, verbose=False)
            for r in resultados_placas:
                for box, cls, conf in zip(r.boxes.xyxy, r.boxes.cls, r.boxes.conf):
                    x1, y1, x2, y2 = map(int, box)
                    cx, cy = centroide(box)
                    pid = tracker.asignar_id(cx, cy)
                    crop = frame[y1:y2, x1:x2]

                    # --- OCRs ---
                    texto_paddle, conf_paddle = texto_placa_paddle(crop)
                    texto_easy, conf_easy = texto_placa_easyocr(crop)

                    writer.writerow([
                        frame_index, r.names[int(cls)], float(conf), pid,
                        x1, y1, x2, y2, "plate", float(conf),
                        x1, y1, x2, y2,
                        texto_paddle, conf_paddle,
                        texto_easy, conf_easy
                    ])

                    etiqueta = (
                        f"{r.names[int(cls)]} {conf:.2f} | "
                        f"Paddle: {texto_paddle} ({conf_paddle:.2f}) | "
                        f"Easy: {texto_easy}"
                    )

                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                    cv2.putText(frame, etiqueta, (x1, y1 - 5),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

            # Dibujar detecciones de COCO
            frame = pintar_resultados(detecciones_coco, frame, color=(0, 255, 0), clases=CLASSES_INTERES)
            video_out.write(frame)

            # Actualizar barra de progreso
            pbar.update(1)

cap.release()
video_out.release()

print("\nProcesamiento finalizado.")
print(f"CSV guardado en: {CSV_PATH}")
print(f"Video guardado en: {OUTPUT_VIDEO}")


Procesando video...



Procesando: 100%|███████████████████████████| 2832/2832 [43:31<00:00,  1.08it/s]


Procesamiento finalizado.
CSV guardado en: out/reporte_ocr.csv
Video guardado en: out/detecciones_y_ocr.mp4





añadir readme
añadir explicaciones en el notebook
mejorar el modelo
intentar mejorar el codigo de ocrs for video labeling

pip install paddlepaddle
pip install paddleocr

In [None]:
model_plates = YOLO("matriculas_model/version_1/weights/best.pt")
easy_ocr = easyocr.Reader(['en'])
paddle_ocr = PaddleOCR(use_textline_orientation=True, lang='en')

IMG_DIR = "/home/gcprado/code/datasets/car-plates-ocr-comparison"

Recorrer las imágenes en el directorio
# .
# ├── test
# │   ├── images
# │   └── labels
# ├── train
# │   ├── images
# │   └── labels
# └── val
#     ├── images
#     └── labels

resultados_placas = model_plates.predict(img_name, imgsz=640, conf=0.25, verbose=False)
            for r in resultados_placas:
                for box, cls, conf in zip(r.boxes.xyxy, r.boxes.cls, r.boxes.conf):
                    x1, y1, x2, y2 = map(int, box)
                    cx, cy = centroide(box)
                    pid = tracker.asignar_id(cx, cy)
                    crop = frame[y1:y2, x1:x2]
                    
    preds_easyocr = easy_ocr.readtext(crop)
    preds_paddleocr = paddle_ocr.ocr(crop, cls=True)

    # Crear un label con las predicciones
    label = {
        "image": img_name,
        "predictions": {
            "yolo": preds_yolo,
            "easyocr": preds_easyocr,
            "paddleocr": preds_paddleocr
        }
    }
