In [2]:
import gradio as gr
from ultralytics import YOLO
from PIL import Image
import numpy as np

# Cargar modelos
# ids_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_IDs/ID_YOLO_container/weights/best.pt")
ids_model = YOLO("/home/gonzadzz/GitHub/yolo11_container/YOLO_IDs/ID_YOLO_container/weights/best.pt")
# char_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune/weights/best.pt")
char_model = YOLO("/home/gonzadzz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune/weights/best.pt")

def predict(image):
    detecciones = {}
    crops_con_labels = []
    texto_reconstruido_imgs = []

    # 1. Detección con primer modelo (IDs)
    results_id = ids_model.predict(image, conf=0.5)
    img_with_boxes = results_id[0].plot()
    img_with_boxes_pil = Image.fromarray(img_with_boxes)

    # 2. Extraer crops de cada detección
    for box in results_id[0].boxes:
        cls_id = int(box.cls[0].item())
        cls_name = ids_model.names[cls_id]

        # Coordenadas del ID detectado
        x1, y1, x2, y2 = box.xyxy[0].tolist()
        crop = image.crop((x1, y1, x2, y2))

        # 3. Pasar crop al modelo OCR
        results_char = char_model.predict(crop, conf=0.25)
        chars_detected = []

        # 4. Procesar caracteres uno a uno
        for cbox in results_char[0].boxes:
            c_cls_id = int(cbox.cls[0].item())
            c_cls_name = char_model.names[c_cls_id]
            cx1, cy1, cx2, cy2 = cbox.xyxy[0].tolist()
            char_crop = crop.crop((cx1, cy1, cx2, cy2))

            # ⚠️ No rotamos automáticamente → mantenemos orientación original
            chars_detected.append((cy1, c_cls_name, char_crop))

        # 5. Ordenar caracteres por coordenada Y (de arriba hacia abajo)
        chars_detected = sorted(chars_detected, key=lambda x: x[0])
        text_pred = "".join([c[1] for c in chars_detected])

        # Guardar en diccionario
        detecciones[cls_name] = text_pred

        # Guardar crop anotado
        crop_with_boxes = results_char[0].plot()
        crops_con_labels.append(Image.fromarray(crop_with_boxes))

        # 6. Reconstruir en un renglón horizontal
        if chars_detected:
            widths, heights = zip(*(c[2].size for c in chars_detected))
            total_width = sum(widths)
            max_height = max(heights)

            new_img = Image.new("RGB", (total_width, max_height), color=(0, 0, 0))
            x_offset = 0
            for _, _, char_crop in chars_detected:
                new_img.paste(char_crop, (x_offset, 0))
                x_offset += char_crop.width

            texto_reconstruido_imgs.append(new_img)

    return img_with_boxes_pil, crops_con_labels, texto_reconstruido_imgs, detecciones


# Interfaz de Gradio
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil", label="Detección IDs"),
        gr.Gallery(label="Crops con OCR", columns=2, height="auto"),
        gr.Gallery(label="Texto reconstruido en renglón", columns=1, height="auto"),
        gr.JSON(label="Resultados OCR")
    ],
    title="Container OCR Detector",
    description="El primer modelo detecta IDs de contenedores, el segundo modelo reconoce los caracteres y los recompone en un renglón legible."
)

if __name__ == "__main__":
    demo.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.



0: 640x640 1 cn-11, 1 iso-type, 7.4ms
Speed: 8.9ms preprocess, 7.4ms inference, 171.1ms postprocess per image at shape (1, 3, 640, 640)

0: 192x640 1 0, 3 3s, 1 6, 1 8, 1 9, 1 B, 1 L, 1 T, 58.3ms
Speed: 0.9ms preprocess, 58.3ms inference, 10.4ms postprocess per image at shape (1, 3, 192, 640)

0: 576x640 (no detections), 60.0ms
Speed: 2.4ms preprocess, 60.0ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 640)


In [None]:
import gradio as gr
from ultralytics import YOLO
from PIL import Image
import numpy as np

# Cargar modelos
# ids_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_IDs/ID_YOLO_container/weights/best.pt")
ids_model = YOLO("/home/gonzadzz/GitHub/yolo11_container/YOLO_IDs/ID_YOLO_container/weights/best.pt")
# char_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune/weights/best.pt")
char_model = YOLO("/home/gonzadzz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune/weights/best.pt")

def predict(image):
    detecciones = {}
    crops_con_labels = []
    texto_reconstruido_imgs = []

    cn11_code = None
    cn4_code = None
    cn7_code = None

    # 1. Detección con primer modelo (IDs)
    results_id = ids_model.predict(image, conf=0.25)
    img_with_boxes = results_id[0].plot()
    img_with_boxes_pil = Image.fromarray(img_with_boxes)

    # 2. Procesar cada clase detectada
    for box in results_id[0].boxes:
        cls_id = int(box.cls[0].item())
        cls_name = ids_model.names[cls_id]

        # Coordenadas del bounding box
        x1, y1, x2, y2 = box.xyxy[0].tolist()
        crop = image.crop((x1, y1, x2, y2))

        # 3. Pasar crop al modelo OCR
        results_char = char_model.predict(crop, conf=0.25)
        chars_detected = []

        for cbox in results_char[0].boxes:
            c_cls_id = int(cbox.cls[0].item())
            c_cls_name = char_model.names[c_cls_id]
            cx1, cy1, cx2, cy2 = cbox.xyxy[0].tolist()
            char_crop = crop.crop((cx1, cy1, cx2, cy2))
            chars_detected.append((cx1, cy1, c_cls_name, char_crop))

        # Decidir cómo ordenar según clase
        if cls_name == "cn-11" or cls_name == "iso-type":
            # Si es más alto que ancho -> vertical, ordenar por Y
            if crop.height > crop.width * 1.5:
                chars_detected = sorted(chars_detected, key=lambda x: x[1])  # Y
            else:
                chars_detected = sorted(chars_detected, key=lambda x: x[0])  # X
            cn11_code = "".join([c[2] for c in chars_detected])
            detecciones[cls_name] = cn11_code

            # Reconstrucción visual
            widths, heights = zip(*(c[3].size for c in chars_detected))
            total_width = sum(widths)
            max_height = max(heights)
            new_img = Image.new("RGB", (total_width, max_height), color=(0,0,0))
            x_offset = 0
            for _, _, _, char_crop in chars_detected:
                new_img.paste(char_crop, (x_offset, 0))
                x_offset += char_crop.width
            texto_reconstruido_imgs.append(new_img)

        elif cls_name == "cn-4":
            cn4_code = "".join([c[2] for c in sorted(chars_detected, key=lambda x: x[0])])
            detecciones[cls_name] = cn4_code

        elif cls_name == "cn-7":
            cn7_code = "".join([c[2] for c in sorted(chars_detected, key=lambda x: x[0])])
            detecciones[cls_name] = cn7_code

        # Guardar crop anotado para debugging
        crop_with_boxes = results_char[0].plot()
        crops_con_labels.append(Image.fromarray(crop_with_boxes))

    # 4. Generar code-container
    if cn11_code:
        detecciones["code-container"] = cn11_code
    elif cn4_code or cn7_code:
        detecciones["code-container"] = f"{cn4_code or ''}{cn7_code or ''}"

    return img_with_boxes_pil, crops_con_labels, texto_reconstruido_imgs, detecciones


# Interfaz de Gradio
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil", label="Detección IDs"),
        gr.Gallery(label="Crops con OCR", columns=2, height="auto"),
        gr.Gallery(label="Texto reconstruido en renglón", columns=1, height="auto"),
        gr.JSON(label="Resultados OCR")
    ],
    title="Container OCR Detector",
    description="Detecta IDs de contenedores. Si hay clase cn-11 se usa como code-container; si no, se genera con cn-4 + cn-7."
)

if __name__ == "__main__":
    demo.launch()


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.



0: 384x640 1 cn-11, 1 iso-type, 58.3ms
Speed: 2.5ms preprocess, 58.3ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)

0: 640x96 2 2s, 1 3, 1 4, 2 5s, 1 7, 1 A, 1 F, 1 M, 1 U, 50.7ms
Speed: 0.5ms preprocess, 50.7ms inference, 12.4ms postprocess per image at shape (1, 3, 640, 96)

0: 640x160 1 1, 2 2s, 1 G, 63.9ms
Speed: 1.0ms preprocess, 63.9ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 160)

0: 384x640 1 cn-11, 1 iso-type, 57.8ms
Speed: 4.0ms preprocess, 57.8ms inference, 9.5ms postprocess per image at shape (1, 3, 384, 640)

0: 640x64 2 2s, 2 4s, 3 6s, 1 M, 1 U, 77.0ms
Speed: 1.5ms preprocess, 77.0ms inference, 10.2ms postprocess per image at shape (1, 3, 640, 64)

0: 640x128 1 1, 1 2, 1 4, 1 G, 75.0ms
Speed: 1.2ms preprocess, 75.0ms inference, 10.7ms postprocess per image at shape (1, 3, 640, 128)

0: 384x640 3 cn-11s, 1 iso-type, 60.9ms
Speed: 3.7ms preprocess, 60.9ms inference, 6.7ms postprocess per image at shape (1, 3, 384, 640)

0: 128x640 1 

Traceback (most recent call last):
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/queueing.py", line 667, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/route_utils.py", line 349, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 2274, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 1781, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo


0: 384x640 3 cn-11s, 1 iso-type, 17.4ms
Speed: 2.7ms preprocess, 17.4ms inference, 8.3ms postprocess per image at shape (1, 3, 384, 640)

0: 128x640 1 0, 1 3, 2 4s, 1 5, 1 6, 1 9, 1 H, 1 R, 1 T, 1 U, 13.4ms
Speed: 1.2ms preprocess, 13.4ms inference, 25.4ms postprocess per image at shape (1, 3, 128, 640)

0: 96x640 1 0, 1 3, 1 4, 1 5, 1 6, 1 U, 17.6ms
Speed: 0.8ms preprocess, 17.6ms inference, 11.6ms postprocess per image at shape (1, 3, 96, 640)

0: 352x640 (no detections), 9.0ms
Speed: 2.2ms preprocess, 9.0ms inference, 0.7ms postprocess per image at shape (1, 3, 352, 640)


Traceback (most recent call last):
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/queueing.py", line 667, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/route_utils.py", line 349, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 2274, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 1781, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo


0: 384x640 3 cn-11s, 1 iso-type, 118.4ms
Speed: 2.3ms preprocess, 118.4ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 9, 17.9ms
Speed: 1.4ms preprocess, 17.9ms inference, 4.9ms postprocess per image at shape (1, 3, 384, 640)

0: 416x640 (no detections), 67.0ms
Speed: 1.6ms preprocess, 67.0ms inference, 0.7ms postprocess per image at shape (1, 3, 416, 640)


Traceback (most recent call last):
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/queueing.py", line 667, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/route_utils.py", line 349, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 2274, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 1781, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo


0: 384x640 1 cn-11, 1 iso-type, 193.5ms
Speed: 3.2ms preprocess, 193.5ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 640x96 1 0, 1 5, 2 6s, 1 7, 1 8, 2 9s, 1 R, 1 U, 1 Y, 62.7ms
Speed: 0.7ms preprocess, 62.7ms inference, 11.0ms postprocess per image at shape (1, 3, 640, 96)

0: 640x128 1 1, 1 5, 1 G, 73.4ms
Speed: 0.9ms preprocess, 73.4ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 128)

0: 384x640 1 cn-11, 1 iso-type, 53.6ms
Speed: 3.0ms preprocess, 53.6ms inference, 4.4ms postprocess per image at shape (1, 3, 384, 640)

0: 448x640 (no detections), 69.8ms
Speed: 1.8ms preprocess, 69.8ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)


Traceback (most recent call last):
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/queueing.py", line 667, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/route_utils.py", line 349, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 2274, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo11_container/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 1781, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gonzadzz/GitHub/yolo