In [None]:
import gradio as gr
from ultralytics import YOLO
from PIL import Image
import numpy as np
import easyocr

# Cargar modelo YOLO
char_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune_large/weights/best.pt")

# Inicializar EasyOCR
reader = easyocr.Reader(['en','es']) 

def predict(image):
    # 1. Detección
    results = char_model.predict(image, conf=0.25)[0]

    # 2. Imagen con bounding boxes
    img_with_boxes = results.plot()
    img_with_boxes_pil = Image.fromarray(img_with_boxes)

    # 3. Extraer detecciones en lista
    detections = []
    for box, cls, conf in zip(results.boxes.xyxy, results.boxes.cls, results.boxes.conf):
        x1, y1, x2, y2 = map(int, box)
        detections.append({
            "coords": (x1, y1, x2, y2),
            "class_idx": int(cls),
            "confidence": float(conf)
        })

    # 4. Ordenar por coordenada x (izquierda a derecha)
    detections = sorted(detections, key=lambda d: d["coords"][0])

    crops = []
    ocr_results = []
    yolo_char_concat = ""
    ocr_concat_char_yolo = ""

    # 5. Recorrer detecciones ordenadas
    for det in detections:
        x1, y1, x2, y2 = det["coords"]
        crop = image.crop((x1, y1, x2, y2))
        crops.append(crop)

        # YOLO char
        yolo_char = results.names[det["class_idx"]]
        yolo_char_concat += yolo_char

        # OCR por crop
        ocr_out = reader.readtext(np.array(crop))
        text = ocr_out[0][1] if len(ocr_out) > 0 else ""
        ocr_concat_char_yolo += text

        # Guardar info sin coordenadas
        ocr_results.append({
            "class": yolo_char,
            "confidence": det["confidence"],
            "ocr": text
        })

    # OCR en imagen completa
    ocr_full_out = reader.readtext(np.array(image))
    output_ocr_id = "".join([txt[1] for txt in ocr_full_out]) if len(ocr_full_out) > 0 else ""

    # Construir JSON final
    final_output = {
        "detailed_results": ocr_results,
        "output_yolo_char": yolo_char_concat,
        "ocr_concat_char_yolo": ocr_concat_char_yolo,
        "output_ocr_id": output_ocr_id
    }

    return img_with_boxes_pil, crops, final_output


# Interfaz Gradio
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil", label="Detección IDs"),
        gr.Gallery(label="Crops ordenados", columns=5, height="auto"),     
        gr.JSON(label="Resultados detallados y concatenación")
    ],
    title="Container Character Classification",
    description="Detecta caracteres de contenedores y los concatena en orden."
)

if __name__ == "__main__":
    demo.launch()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.



0: 128x640 2 0s, 1 1, 2 4s, 1 6, 1 8, 1 L, 1 T, 243.1ms
Speed: 2.1ms preprocess, 243.1ms inference, 1.7ms postprocess per image at shape (1, 3, 128, 640)





0: 128x640 1 1, 3 2s, 1 4, 2 8s, 1 S, 1 U, 323.3ms
Speed: 7.2ms preprocess, 323.3ms inference, 2.8ms postprocess per image at shape (1, 3, 128, 640)





0: 384x640 1 1, 1 5, 675.6ms
Speed: 78.1ms preprocess, 675.6ms inference, 15.5ms postprocess per image at shape (1, 3, 384, 640)





0: 384x640 1 1, 1 5, 313.8ms
Speed: 12.4ms preprocess, 313.8ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)





0: 160x640 1 0, 1 1, 1 2, 1 4, 2 5s, 1 7, 1 9, 1 J, 1 L, 1 R, 1 T, 1 U, 609.0ms
Speed: 132.7ms preprocess, 609.0ms inference, 8.0ms postprocess per image at shape (1, 3, 160, 640)




In [None]:
import gradio as gr
from ultralytics import YOLO
from PIL import Image
import numpy as np
import easyocr

# Cargar modelo YOLO
char_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune_large/weights/best.pt")

# Inicializar EasyOCR
reader = easyocr.Reader(['en','es']) 


def predict(image):
    # ---------------------------
    # 1. OCR sobre imagen completa
    # ---------------------------
    ocr_full = reader.readtext(np.array(image), detail=0)  # lista de strings

    # ---------------------------
    # 2. Detección con YOLO
    # ---------------------------
    results = char_model.predict(image, conf=0.25)[0]  # Tomamos el primer batch

    # Imagen con bounding boxes
    img_with_boxes = results.plot()
    img_with_boxes_pil = Image.fromarray(img_with_boxes)

    crops = []
    ocr_results_crops = []
    classes_detected = []

    # ---------------------------
    # 3. Recorrer cada detección y aplicar OCR en crops
    # ---------------------------
    for box, cls, conf in zip(results.boxes.xyxy, results.boxes.cls, results.boxes.conf):
        x1, y1, x2, y2 = map(int, box)
        crop = image.crop((x1, y1, x2, y2))
        crops.append(crop)

        # OCR en el crop
        ocr_text_crop = reader.readtext(np.array(crop), detail=0)

        class_name = results.names[int(cls)]
        classes_detected.append(class_name)

        ocr_results_crops.append({
            "class": class_name,
            "confidence": float(conf),
            "ocr_text": ocr_text_crop if ocr_text_crop else None
        })

    # ---------------------------
    # 4. Resultado final como texto
    # ---------------------------
    resumen = "📌 **Resumen detecciones**\n\n"
    resumen += f"- Clases detectadas por YOLO: {', '.join(classes_detected) if classes_detected else 'Ninguna'}\n"
    resumen += f"- OCR en imagen completa: {', '.join(ocr_full) if ocr_full else 'No se detectó texto'}\n"

    return (
        img_with_boxes_pil,   # imagen con bounding boxes
        crops,                # crops individuales
        ocr_results_crops,    # JSON con clases + OCR en crops
        ocr_full,             # OCR en imagen completa (lista de strings)
        resumen               # resumen final en texto
    )


# ---------------------------
# 5. Interfaz Gradio
# ---------------------------
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil", label="Detección IDs"),
        gr.Gallery(label="Crops con OCR", columns=2, height="auto"),
        gr.JSON(label="Resultados OCR en crops"),
        gr.Textbox(label="OCR en imagen completa"),
        gr.Markdown(label="Resumen final"),
    ],
    title="Container Character Classification",
    description="Comparativa de detección YOLO y OCR (EasyOCR) en imagen completa y crops."
)

if __name__ == "__main__":
    demo.launch()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.



0: 128x640 2 0s, 1 1, 2 4s, 1 6, 1 8, 1 L, 1 T, 51.7ms
Speed: 0.8ms preprocess, 51.7ms inference, 0.8ms postprocess per image at shape (1, 3, 128, 640)









0: 128x640 2 0s, 1 1, 2 4s, 1 6, 1 8, 1 L, 1 T, 201.2ms
Speed: 0.8ms preprocess, 201.2ms inference, 1.3ms postprocess per image at shape (1, 3, 128, 640)

0: 96x640 2 0s, 1 1, 2 2s, 3 8s, 1 M, 1 U, 63.8ms
Speed: 1.1ms preprocess, 63.8ms inference, 1.1ms postprocess per image at shape (1, 3, 96, 640)





0: 128x640 2 0s, 3 4s, 1 6, 1 8, 1 C, 61.0ms
Speed: 1.9ms preprocess, 61.0ms inference, 0.6ms postprocess per image at shape (1, 3, 128, 640)





0: 320x640 1 1, 132.2ms
Speed: 1.5ms preprocess, 132.2ms inference, 1.3ms postprocess per image at shape (1, 3, 320, 640)









0: 640x640 1 2, 1 3, 1 5, 1 6, 3 7s, 1 B, 1 M, 1 O, 1 U, 201.3ms
Speed: 2.8ms preprocess, 201.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)

0: 192x640 1 0, 1 2, 2 3s, 2 5s, 1 7, 1 E, 1 M, 2 Ts, 1 U, 918.8ms
Speed: 215.0ms preprocess, 918.8ms inference, 27.7ms postprocess per image at shape (1, 3, 192, 640)
