In [9]:
import cv2
import re
import matplotlib.pyplot as plt
import paddle
import torch
from PIL import Image
import gradio as gr
import numpy as np
from ultralytics import YOLO
from paddleocr import PaddleOCR



In [10]:
#gpu_available = paddle.device.is_compiled_with_cuda()
#print("GPU available:", gpu_available)

In [None]:


# Configuración
CONF_THRESHOLD = 0.25
PADDING = 15
CLASSES = ["cn-11s", "cn-4", "cn-7", "iso-type"]
WHITELIST_PATTERN = re.compile(r'^[A-Z0-9]+$')
CUDA_VISIBLE_DEVICES = 0  # Cambia a -1 para CPU, 0 para la primera GPU, etc.

# Cargar modelos
# -------------------
yolo_model = YOLO("/home/gnz/GitHub/yolo11_container/runs/detect/train3/weights/best.pt")

# PaddleOCR inicializado (puedes ajustar lang si necesitas chino, inglés, etc.)
ocr_model = PaddleOCR(use_textline_orientation=True, lang="en")


[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gnz/.paddlex/official_models/PP-LCNet_x1_0_doc_ori`.[0m
[32mCreating model: ('UVDoc', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gnz/.paddlex/official_models/UVDoc`.[0m
[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gnz/.paddlex/official_models/PP-LCNet_x1_0_textline_ori`.[0m
[32mCreating model: ('PP-OCRv5_server_det', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/gnz/.paddlex/official_models/PP-OCRv5_server_det`.[0m
[32mCreating model: ('en_PP-OCRv5_mobile_rec', None)[0m
[32mModel files already exist. Using cached files. To redownloa

In [12]:
import ultralytics
ultralytics.checks()

Ultralytics 8.3.193 🚀 Python-3.12.3 torch-2.8.0+cu128 CPU (13th Gen Intel Core(TM) i7-13620H)
Setup complete ✅ (16 CPUs, 8.2 GB RAM, 112.8/1006.9 GB disk)


In [14]:
def crop_with_padding(image, xyxy, padding=PADDING):
    x1, y1, x2, y2 = map(int, xyxy)
    h, w = image.shape[:2]
    x1 = max(x1 - padding, 0)
    y1 = max(y1 - padding, 0)
    x2 = min(x2 + padding, w)
    y2 = min(y2 + padding, h)
    return image[y1:y2, x1:x2]

def run_ocr(ocr, container_image):
    """Corre OCR y devuelve texto filtrado (API moderna PaddleOCR)."""
    result_ocr = ocr.predict(cv2.cvtColor(container_image, cv2.COLOR_BGR2RGB))
    if not result_ocr or not result_ocr[0]["rec_texts"]:
        return ""
    
    texts = result_ocr[0]["rec_texts"]
    text_concat = ''.join(texts)
    
    # Filtrar por whitelist
    output_text = ''.join([t for t in text_concat if WHITELIST_PATTERN.fullmatch(t)])
    return output_text


def draw_results(image, xyxy, class_name):
    """Dibuja solo el rectángulo y la etiqueta de la clase."""
    x1, y1, x2, y2 = map(int, xyxy)
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(
        image,
        class_name,
        (x1, y1 - 5),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (0, 0, 0),
        2
    )


# -------------------
# Inference principal
# -------------------
def inference(img, conf_threshold=CONF_THRESHOLD):
    """
    Procesa una imagen con YOLO + OCR y devuelve:
    - Imagen anotada (PIL)
    - Lista de resultados con clase, texto y confianza
    """
    # Convertir a BGR para OpenCV
    image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    detections_info = []

    # Paso YOLO
    results = yolo_model(image)

    for result in results:
        for cls_idx, cls_name in enumerate(CLASSES):
            indices = (result.boxes.cls == cls_idx).nonzero(as_tuple=True)[0]

            for idx in indices:
                conf = result.boxes.conf[idx].item()
                if conf < conf_threshold:
                    continue

                xyxy = result.boxes.xyxy[idx].squeeze().tolist()
                plate_image = crop_with_padding(image, xyxy)
                output_text = run_ocr(ocr_model, plate_image)

                detections_info.append({
                    "class": cls_name,
                    "text": output_text,
                    "confidence": conf
                })

                draw_results(image, xyxy, cls_name)


    # Convertir a PIL para compatibilidad con Gradio
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    im_show = Image.fromarray(image_rgb)

    return im_show, detections_info

# -------------------
# Gradio UI
# -------------------
title = "YOLO + OCR Contenedor"
description = """
- Demo de detección de códigos de contenedores con YOLO + PaddleOCR.
- Clases soportadas: **cn-11s, cn-4, cn-7, iso-type**.
- Se devuelve la imagen anotada y un diccionario con clase, OCR y confianza.
"""

css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"

demo = gr.Interface(
    fn=inference,
    inputs=gr.Image(type="pil", label="Input"),
    outputs=[
        gr.Image(type="pil", label="Output"),
        gr.JSON(label="Resultados")
    ],
    title=title,
    description=description,
    # examples=examples,
    cache_examples=False,
    css=css
)

demo.launch(debug=False)

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.





0: 384x640 1 cn-11, 1 iso-type, 406.2ms
Speed: 36.3ms preprocess, 406.2ms inference, 37.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cn-11s, 1 iso-type, 342.6ms
Speed: 8.3ms preprocess, 342.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cn-11s, 1 iso-type, 337.8ms
Speed: 5.0ms preprocess, 337.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cn-11, 1 iso-type, 371.4ms
Speed: 4.8ms preprocess, 371.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
