In [3]:
import gradio as gr
from ultralytics import YOLO
import cv2
import tempfile
import os
import easyocr

# Cargar modelo entrenado
model = YOLO("/home/gnz/GitHub/yolo11_container/runs/detect/train/weights/best.pt")

# Inicializar EasyOCR ---
reader = easyocr.Reader(['en','es'])  # podés agregar ['en', 'es'] si querés español

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


In [None]:
# --- procesamiento de imágenes ---
def process_image(image):
    # convertir a RGB
    img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = model.predict(img, conf=0.25, verbose=False)

    annotated = results[0].plot()
    crops, texts = [], []

    # extraer crops + OCR
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        crop = img[y1:y2, x1:x2]
        crops.append(crop)

        ocr_result = reader.readtext(crop)
        text = " ".join([res[1] for res in ocr_result]) if ocr_result else "(sin texto)"
        texts.append(text)

    return annotated, crops, texts


# --- procesamiento de videos ---
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # archivo temporal de salida
    temp_out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    out = cv2.VideoWriter(temp_out.name, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # predicción por frame
        results = model.predict(frame, conf=0.25, verbose=False)
        annotated = results[0].plot()
        out.write(annotated)

    cap.release()
    out.release()
    return temp_out.name


# --- Interfaz Gradio ---
with gr.Blocks() as demo:
    gr.Markdown("## 🚢 Detección de IDs y OCR en Contenedores")

    with gr.Tab("Imagen"):
        with gr.Row():
            inp_img = gr.Image(type="numpy", label="Sube una imagen")
            out_img = gr.Image(type="numpy", label="Detecciones")
        gallery = gr.Gallery(label="Crops detectados", columns=3, height="auto")
        out_texts = gr.Textbox(label="Resultados OCR", lines=6)

        inp_img.change(process_image, inputs=inp_img, outputs=[out_img, gallery, out_texts])

    with gr.Tab("Video"):
        inp_vid = gr.Video(label="Sube un video")
        out_vid = gr.Video(label="Video con detecciones")

        inp_vid.change(process_video, inputs=inp_vid, outputs=out_vid)

demo.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




