In [None]:
from ultralytics import YOLO
from PIL import Image
import gradio as gr
import numpy as np
import easyocr
import re


# Cargar modelos
ids_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_IDs/ID_YOLO_container/weights/best.pt")
# ids_model = YOLO("/home/gonzadzz/GitHub/yolo11_container/YOLO_IDs/ID_YOLO_container/weights/best.pt")
char_model = YOLO("/home/gnz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune_large/weights/best.pt")
# char_model = YOLO("/home/gonzadzz/GitHub/yolo11_container/YOLO_Characters/Character_YOLO_container_finetune_large/weights/best.pt")
# Inicializar EasyOCR
ocr_model = easyocr.Reader(['en','es']) 

# Reglas RegEx para validación
rules = {
    "code-container": {"attribute": "code-container", "regex": r"^[A-Z]{4}\d{7}$"},
    "cn-11": {"attribute": "cn-11", "regex": r"^[A-Z]{4}\d{7}$"},
    "cn-4": {"attribute": "cn-4", "regex": r"^[A-Z]{4}$"},
    "cn-7": {"attribute": "cn-7", "regex": r"^\d{7}$"},
    "iso-type": {"attribute": "iso-type", "regex": r"^.{2}[A-Z0-9]{2}$"}  # ajustado a ISO tipo
}

# Reglas de validación
def parse_detecciones(detecciones, rules):
    parsed = {}
    for key, value in detecciones.items():
        if key in rules:
            attr = rules[key]["attribute"]
            pattern = rules[key]["regex"]

            # Validar con regex
            match = bool(re.match(pattern, value))

            # Resultado estructurado para Gradio JSON
            parsed[attr] = {
                "value": value,
                "valid": "✔️" if match else "❌"
            }
    return parsed

def predict(image):
    detecciones_yolo = {}
    detecciones_easy = {}
    crops_con_labels = []
    texto_reconstruido_imgs = []

    # 1. Detección con primer modelo (IDs)
    results_id = ids_model.predict(image, conf=0.25)
    img_with_boxes = results_id[0].plot()  # Imagen con bounding boxes
    img_with_boxes_pil = Image.fromarray(img_with_boxes)

    # Variables auxiliares para armar code-container
    cn11_code_yolo, cn4_code_yolo, cn7_code_yolo = None, None, None
    cn11_code_easy, cn4_code_easy, cn7_code_easy = None, None, None

    # 2. Procesar cada detección del primer modelo
    for box in results_id[0].boxes:
        cls_id = int(box.cls[0].item())
        cls_name = ids_model.names[cls_id]

        # Coordenadas del crop
        x1, y1, x2, y2 = box.xyxy[0].tolist()
        crop = image.crop((x1, y1, x2, y2))

        # 3. Pasar crop al modelo OCR (YOLO chars)
        results_char = char_model.predict(crop, conf=0.25)
        chars_detected = []

        for cbox in results_char[0].boxes:
            c_cls_id = int(cbox.cls[0].item())
            c_cls_name = char_model.names[c_cls_id]
            cx1, cy1, cx2, cy2 = cbox.xyxy[0].tolist()
            char_crop = crop.crop((cx1, cy1, cx2, cy2))
            chars_detected.append((cx1, cy1, c_cls_name, char_crop))

        # 4. Ordenar caracteres según la clase
        text_pred = ""
        if cls_name in ["cn-11", "iso-type"]:
            if crop.height > crop.width * 1.5:  # Vertical
                chars_detected = sorted(chars_detected, key=lambda x: x[1])
            else:  # Horizontal
                chars_detected = sorted(chars_detected, key=lambda x: x[0])
            text_pred = "".join([c[2] for c in chars_detected])
        elif cls_name in ["cn-4", "cn-7"]:
            chars_detected = sorted(chars_detected, key=lambda x: x[0])  # Siempre horizontales
            text_pred = "".join([c[2] for c in chars_detected])

        # Guardar en detecciones YOLO
        detecciones_yolo[cls_name] = text_pred
        if cls_name == "cn-11":
            cn11_code_yolo = text_pred
        elif cls_name == "cn-4":
            cn4_code_yolo = text_pred
        elif cls_name == "cn-7":
            cn7_code_yolo = text_pred

        # 5. Guardar crops anotados
        crop_with_boxes = results_char[0].plot()
        crops_con_labels.append(Image.fromarray(crop_with_boxes))

        # 6. Guardar reconstrucción solo si hubo caracteres
        if chars_detected:
            widths, heights = zip(*(c[3].size for c in chars_detected))
            total_width = sum(widths)
            max_height = max(heights)
            new_img = Image.new("RGB", (total_width, max_height), color=(0, 0, 0))
            x_offset = 0
            for _, _, _, char_crop in chars_detected:
                new_img.paste(char_crop, (x_offset, 0))
                x_offset += char_crop.width
            texto_reconstruido_imgs.append(new_img)

            ocr_text = ocr_model.readtext(np.array(new_img), detail=0)
            if ocr_text:
                detecciones_easy[cls_name] = "".join(ocr_text)
                if cls_name == "cn-11":
                    cn11_code_easy = "".join(ocr_text)
                elif cls_name == "cn-4":
                    cn4_code_easy = "".join(ocr_text)
                elif cls_name == "cn-7":
                    cn7_code_easy = "".join(ocr_text)



        # 7. Si el crop es horizontal → también pasarlo por EasyOCR
        if crop.width > crop.height:
            ocr_text = ocr_model.readtext(np.array(crop), detail=0)
            if ocr_text:
                detecciones_easy[cls_name] = "".join(ocr_text)
                if cls_name == "cn-11":
                    cn11_code_easy = "".join(ocr_text)
                elif cls_name == "cn-4":
                    cn4_code_easy = "".join(ocr_text)
                elif cls_name == "cn-7":
                    cn7_code_easy = "".join(ocr_text)

    # 8. Construir code-container YOLO
    if cn11_code_yolo:
        detecciones_yolo["code-container"] = cn11_code_yolo
    elif cn4_code_yolo and cn7_code_yolo:
        detecciones_yolo["code-container"] = cn4_code_yolo + cn7_code_yolo

    # 9. Construir code-container EasyOCR
    if cn11_code_easy:
        detecciones_easy["code-container"] = cn11_code_easy
    elif cn4_code_easy and cn7_code_easy:
        detecciones_easy["code-container"] = cn4_code_easy + cn7_code_easy

    # 10. Validar ambos
    parsed_yolo = parse_detecciones(detecciones_yolo, rules)
    parsed_easy = parse_detecciones(detecciones_easy, rules) if detecciones_easy else {}

    # 11. Armar salida final
    salida_json = {
        "output_yolo_char": detecciones_yolo,
        "output_easy_ocr": detecciones_easy,
        "validation": {
            "yolo_char": parsed_yolo,
            "easy_ocr": parsed_easy
        }
    }

    return img_with_boxes_pil, crops_con_labels, texto_reconstruido_imgs, salida_json





# Interfaz de Gradio
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil", label="Detección IDs"),
        gr.Gallery(label="Crops con OCR", columns=2, height="auto"),
        gr.Gallery(label="Texto reconstruido en renglón", columns=1, height="auto"),
        gr.JSON(label="Resultados OCR")
    ],
    title="Container OCR Detector",
    description="Detecta IDs de contenedores. Si hay clase cn-11 se usa como code-container; si no, se genera con cn-4 + cn-7."
)

if __name__ == "__main__":
    demo.launch()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.



0: 640x640 1 cn-11, 327.3ms
Speed: 7.1ms preprocess, 327.3ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x64 1 2, 1 3, 1 4, 1 5, 1 6, 3 7s, 1 8, 1 U, 147.6ms
Speed: 1.7ms preprocess, 147.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 64)





0: 384x640 1 cn-11, 1 iso-type, 220.2ms
Speed: 7.6ms preprocess, 220.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 640x64 1 0, 1 1, 3 2s, 1 5, 1 8, 1 D, 1 E, 1 M, 1 U, 92.3ms
Speed: 1.5ms preprocess, 92.3ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 64)





0: 640x128 1 1, 2 2s, 1 G, 198.5ms
Speed: 2.0ms preprocess, 198.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 128)

0: 384x640 1 cn-11, 1 iso-type, 301.5ms
Speed: 16.6ms preprocess, 301.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 640x128 1 1, 2 2s, 1 G, 167.2ms
Speed: 1.1ms preprocess, 167.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 128)





0: 640x96 3 0s, 2 1s, 1 2, 2 3s, 1 R, 1 U, 1 Y, 176.2ms
Speed: 1.2ms preprocess, 176.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 96)
