In [1]:
import os
import csv
import cv2
import pickle
import imageio as io
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import precision_score, recall_score, f1_score
from ultralytics import YOLO

2025-01-30 17:45:34.213955: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-30 17:45:34.234128: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-30 17:45:34.234145: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-30 17:45:34.234712: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-30 17:45:34.238372: I tensorflow/core/platform/cpu_feature_guar

In [2]:

pkl_file_path = '/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation/patient70/patient70_segmentation.pkl'

with open(pkl_file_path, 'rb') as f:
    data = pickle.load(f)

print("Claves del archivo:", data.keys())

for key in data.keys():
    print(f"\nClave: {key}")
    if isinstance(data[key], (list, dict)):
        print(f"Tipo de dato: {type(data[key])}, Tamaño: {len(data[key])}")
    elif isinstance(data[key], np.ndarray):
        print(f"Tipo de dato: {type(data[key])}, Forma: {data[key].shape}")
    else:
        print(f"Tipo de dato: {type(data[key])}, Contenido: {data[key]}")

if 'mask' in data:
    masks = data['mask']
    print(f"\nEjemplo de máscara: {type(masks[0])}, Forma: {masks[0].shape}")
    print(f"Contenido de la primera máscara:\n{masks[0]}")


Claves del archivo: dict_keys(['original', 'mask'])

Clave: original
Tipo de dato: <class 'numpy.ndarray'>, Forma: (256, 256, 3, 20)

Clave: mask
Tipo de dato: <class 'numpy.ndarray'>, Forma: (256, 256, 20)

Ejemplo de máscara: <class 'numpy.ndarray'>, Forma: (256, 20)
Contenido de la primera máscara:
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [None]:
def imprimir_frames(original, masks, num_frames):
    for i in range(num_frames):
        frame_original = original[:, :, :, i].astype(np.uint8)
        frame_mask = (masks[:, :, i] * 255).astype(np.uint8)

        plt.figure(figsize=(10, 5))

        plt.subplot(1, 2, 1)
        plt.imshow(frame_original)
        plt.title(f"Cuadro Original {i+1}")
        plt.axis("off")

        plt.subplot(1, 2, 2)
        plt.imshow(frame_mask, cmap="gray")
        plt.title(f"Máscara {i+1}")
        plt.axis("off")

        plt.show()

imprimir_frames(original, masks, num_frames)


In [None]:
pkl_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation/patient70/patient70_segmentation.pkl"  # Cambia esta ruta

#
if not os.path.exists(pkl_path):
    print(f"No se encontró el archivo {pkl_path}")
    exit()

with open(pkl_path, 'rb') as f:
    data = pickle.load(f)

masks = data['mask']  

print(f"Forma de las máscaras: {masks.shape}")
print(f"Tipo de datos: {masks.dtype}")

valores_unicos = np.unique(masks)
print(f"Valores únicos en las máscaras: {valores_unicos}")

valor_minimo = masks.min()
valor_maximo = masks.max()
print(f"Rango de valores: {valor_minimo} a {valor_maximo}")

num_frames = masks.shape[-1]
for i in range(min(num_frames, 5)):
    plt.figure(figsize=(5, 5))
    plt.imshow(masks[:, :, i], cmap='gray')
    plt.title(f"Frame {i + 1}")
    plt.axis("off")
    plt.show()


In [None]:
base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"

for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)

    if not folder.startswith("patient") or not os.path.isdir(folder_path):
        continue

    pkl_path = os.path.join(folder_path, f"{folder}_segmentation.pkl")

    if not os.path.exists(pkl_path):
        print(f"No se encontró {pkl_path}, saltando...")
        continue

    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)

    masks = data['mask'] 
    height, width, num_frames = masks.shape

    mask_images_dir = os.path.join(folder_path, "masks")
    os.makedirs(mask_images_dir, exist_ok=True)

    for i in range(num_frames):
        frame_mask = masks[:, :, i].astype(np.uint8)

        unique_values = np.unique(frame_mask)
        print(f"Frame {i} de {folder}: Valores únicos: {unique_values}")

        mask_image_path = os.path.join(mask_images_dir, f"{folder}_mask_{i:03d}.png")
        cv2.imwrite(mask_image_path, frame_mask)

    print(f"Imágenes de máscaras guardadas para {folder} en {mask_images_dir}")


# Metricas

In [2]:
def calculate_dice(ground_truth_mask, predicted_mask):
    if np.sum(ground_truth_mask) == 0 and np.sum(predicted_mask) == 0:
        return 1.0
    intersection = np.logical_and(ground_truth_mask, predicted_mask)
    dice = 2 * np.sum(intersection) / (np.sum(ground_truth_mask) + np.sum(predicted_mask))
    return dice

def calculate_iou(mask1, mask2):
    if np.sum(mask1) == 0 and np.sum(mask2) == 0:
        return 1.0
    elif np.sum(mask1) == 0 or np.sum(mask2) == 0:
        return 0.0
    intersection = np.logical_and(mask1, mask2)
    union = np.logical_or(mask1, mask2)
    iou = np.sum(intersection) / np.sum(union)
    return iou

## ROI con YOLO y segmentación con UNet


In [22]:
def evaluate_videos_by_patient(base_path, yolo_model, unet_model, unet_input_size=(128, 256), margin=15):
    metrics_by_patient = {}
    all_iou_scores, all_dice_scores, all_precision_scores, all_recall_scores, all_f1_scores = [], [], [], [], []

    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)

        if not folder.startswith("patient") or not os.path.isdir(folder_path):
            continue

        original_video_path = os.path.join(folder_path, f"{folder}_original.avi")
        mask_images_dir = os.path.join(folder_path, "masks")

        if not os.path.exists(original_video_path) or not os.path.exists(mask_images_dir):
            print(f"Saltando {folder} (falta uno de los datos)")
            continue

        roi = get_max_yolo_roi(original_video_path, yolo_model, margin)
        if roi is None:
            continue

        x1, y1, x2, y2 = roi

        cap_original = cv2.VideoCapture(original_video_path)
        iou_scores, dice_scores, precision_scores, recall_scores, f1_scores = [], [], [], [], []

        frame_count = 0
        while cap_original.isOpened():
            ret_original, frame_original = cap_original.read()
            mask_image_path = os.path.join(mask_images_dir, f"{folder}_mask_{frame_count:03d}.png")

            if not ret_original or not os.path.exists(mask_image_path):
                break

            frame_mask = cv2.imread(mask_image_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = (frame_mask > 0).astype(np.uint8)

            cropped_original = frame_original[y1:y2, x1:x2]
            cropped_ground_truth = ground_truth_mask[y1:y2, x1:x2]

            resized_original = cv2.resize(cropped_original, unet_input_size)
            resized_ground_truth = cv2.resize(cropped_ground_truth, unet_input_size, interpolation=cv2.INTER_NEAREST)

            normalized_image = resized_original / 255.0
            input_batch = np.expand_dims(normalized_image, axis=0)

            seg_pred = unet_model.predict(input_batch, verbose=0)
            predicted_mask = (np.squeeze(seg_pred) > 0.5).astype(np.uint8)

            iou_scores.append(calculate_iou(resized_ground_truth, predicted_mask))
            dice_scores.append(calculate_dice(resized_ground_truth, predicted_mask))
            precision_scores.append(precision_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            recall_scores.append(recall_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            f1_scores.append(f1_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))

            frame_count += 1

        cap_original.release()

        metrics_by_patient[folder] = {
            "IoU": np.mean(iou_scores) if iou_scores else 0,
            "Dice": np.mean(dice_scores) if dice_scores else 0,
            "Precision": np.mean(precision_scores) if precision_scores else 0,
            "Recall": np.mean(recall_scores) if recall_scores else 0,
            "F1": np.mean(f1_scores) if f1_scores else 0,
        }

        all_iou_scores.extend(iou_scores)
        all_dice_scores.extend(dice_scores)
        all_precision_scores.extend(precision_scores)
        all_recall_scores.extend(recall_scores)
        all_f1_scores.extend(f1_scores)

    overall_metrics = {
        "IoU": np.mean(all_iou_scores) if all_iou_scores else 0,
        "Dice": np.mean(all_dice_scores) if all_dice_scores else 0,
        "Precision": np.mean(all_precision_scores) if all_precision_scores else 0,
        "Recall": np.mean(all_recall_scores) if all_recall_scores else 0,
        "F1": np.mean(all_f1_scores) if all_f1_scores else 0,
    }

    return metrics_by_patient, overall_metrics

base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
yolo_model = YOLO('/home/voicelab/Desktop/segmentation_glottis/models/YOLO/YOLOV8/best_yolov8n-seg-1cls.pt')
unet_model = load_model('/home/voicelab/Desktop/segmentation_glottis/models/UNets/ROI_UNet_YOLO/model/model_best.h5', compile=False)
results, overall_metrics = evaluate_videos_by_patient(base_path, yolo_model, unet_model)

import csv
output_csv = "evaluation_roi_yolo_unet_metrics.csv"
with open(output_csv, "w", newline="") as csvfile:
    fieldnames = ["Patient", "IoU", "Dice", "Precision", "Recall", "F1"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for patient, metrics in results.items():
        row = {"Patient": patient, **metrics}
        writer.writerow(row)

    writer.writerow({"Patient": "Overall", **overall_metrics})

print(f"\nMétricas guardadas en: {output_csv}")



Métricas guardadas en: evaluation_roi_yolo_unet_metrics.csv


## Evaluación UNet

In [17]:
def evaluate_unet(base_path, unet_model, unet_input_size=(128, 256)):
    metrics_by_patient = {}
    all_iou_scores, all_dice_scores, all_precision_scores, all_recall_scores, all_f1_scores = [], [], [], [], []

    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)

        if not folder.startswith("patient") or not os.path.isdir(folder_path):
            continue

        original_video_path = os.path.join(folder_path, f"{folder}_original.avi")
        mask_images_dir = os.path.join(folder_path, "masks")

        if not os.path.exists(original_video_path) or not os.path.exists(mask_images_dir):
            continue

        cap_original = cv2.VideoCapture(original_video_path)
        iou_scores, dice_scores, precision_scores, recall_scores, f1_scores = [], [], [], [], []

        frame_count = 0
        while cap_original.isOpened():
            ret_original, frame_original = cap_original.read()
            mask_image_path = os.path.join(mask_images_dir, f"{folder}_mask_{frame_count:03d}.png")

            if not ret_original or not os.path.exists(mask_image_path):
                break

            frame_mask = cv2.imread(mask_image_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = (frame_mask > 0).astype(np.uint8)

            resized_original = cv2.resize(frame_original, unet_input_size)
            resized_ground_truth = cv2.resize(ground_truth_mask, unet_input_size, interpolation=cv2.INTER_NEAREST)

            normalized_image = resized_original / 255.0
            input_batch = np.expand_dims(normalized_image, axis=0)

            seg_pred = unet_model.predict(input_batch, verbose=0)
            predicted_mask = (np.squeeze(seg_pred) > 0.5).astype(np.uint8)

            iou_scores.append(calculate_iou(resized_ground_truth, predicted_mask))
            dice_scores.append(calculate_dice(resized_ground_truth, predicted_mask))
            precision_scores.append(precision_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            recall_scores.append(recall_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            f1_scores.append(f1_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))

            frame_count += 1

        cap_original.release()
        metrics_by_patient[folder] = {
            "IoU": np.mean(iou_scores) if iou_scores else 0,
            "Dice": np.mean(dice_scores) if dice_scores else 0,
            "Precision": np.mean(precision_scores) if precision_scores else 0,
            "Recall": np.mean(recall_scores) if recall_scores else 0,
            "F1": np.mean(f1_scores) if f1_scores else 0,
        }

        all_iou_scores.extend(iou_scores)
        all_dice_scores.extend(dice_scores)
        all_precision_scores.extend(precision_scores)
        all_recall_scores.extend(recall_scores)
        all_f1_scores.extend(f1_scores)

    overall_metrics = {
        "IoU": np.mean(all_iou_scores) if all_iou_scores else 0,
        "Dice": np.mean(all_dice_scores) if all_dice_scores else 0,
        "Precision": np.mean(all_precision_scores) if all_precision_scores else 0,
        "Recall": np.mean(all_recall_scores) if all_recall_scores else 0,
        "F1": np.mean(all_f1_scores) if all_f1_scores else 0,
    }

    return metrics_by_patient, overall_metrics

base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
unet_model = load_model('/home/voicelab/Desktop/segmentation_glottis/models/UNets/new_UNet/model/model_best.h5', compile=False)
results, overall_metrics = evaluate_unet(base_path, unet_model)

output_csv = "evaluation_unet_metrics.csv"
with open(output_csv, "w", newline="") as csvfile:
    fieldnames = ["Patient", "IoU", "Dice", "Precision", "Recall", "F1"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for patient, metrics in results.items():
        row = {"Patient": patient, **metrics}
        writer.writerow(row)

    writer.writerow({"Patient": "Overall", **overall_metrics})

print("\nResultados Finales por Paciente:")
for patient, metrics in results.items():
    print(f"{patient}: {metrics}")

print("\nMétricas Promedio Globales:")
for metric, value in overall_metrics.items():
    print(f"{metric}: {value:.4f}")

print(f"\nMétricas guardadas en: {output_csv}")



Resultados Finales por Paciente:
patient61: {'IoU': 0.7308786538442361, 'Dice': 0.8410078125321021, 'Precision': 0.8399103520827185, 'Recall': 0.8468027508091872, 'F1': 0.8410078125321021}
patient14: {'IoU': 0.2806359060744593, 'Dice': 0.41109588806512304, 'Precision': 0.29474640381876366, 'Recall': 0.8877272907538059, 'F1': 0.41109588806512304}
patient17: {'IoU': 0.6600084978100117, 'Dice': 0.7150858656333472, 'Precision': 0.979765875157466, 'Recall': 0.6733587141761639, 'F1': 0.7150858656333472}
patient23: {'IoU': 0.6102145803026172, 'Dice': 0.7204946503000376, 'Precision': 0.934645922413897, 'Recall': 0.6417123746831246, 'F1': 0.7204946503000376}
patient68: {'IoU': 0.5751055483695626, 'Dice': 0.6688193430911188, 'Precision': 0.8843450739753684, 'Recall': 0.6357809732812987, 'F1': 0.6688193430911188}
patient63: {'IoU': 0.5969610649073152, 'Dice': 0.7135358711717963, 'Precision': 0.8590886503442838, 'Recall': 0.6522505544952288, 'F1': 0.7135358711717963}
patient32A2: {'IoU': 0.416580

## Evalaución S3AR-UNet


In [19]:
def evaluate_new_unet(base_path, unet_model):
    metrics_by_patient = {}
    all_iou_scores, all_dice_scores, all_precision_scores, all_recall_scores, all_f1_scores = [], [], [], [], []

    input_shape = unet_model.input_shape
    img_height, img_width = input_shape[1], input_shape[2]

    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)

        if not folder.startswith("patient") or not os.path.isdir(folder_path):
            continue

        original_video_path = os.path.join(folder_path, f"{folder}_original.avi")
        mask_images_dir = os.path.join(folder_path, "masks")

        if not os.path.exists(original_video_path) or not os.path.exists(mask_images_dir):
            continue

        cap_original = cv2.VideoCapture(original_video_path)
        ims = io.mimread(original_video_path, memtest=False)
        iou_scores, dice_scores, precision_scores, recall_scores, f1_scores = [], [], [], [], []

        for frame_count, img_orig in enumerate(ims):
            mask_image_path = os.path.join(mask_images_dir, f"{folder}_mask_{frame_count:03d}.png")

            if not os.path.exists(mask_image_path):
                break

            frame_mask = cv2.imread(mask_image_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = (frame_mask > 0).astype(np.uint8)

            img = cv2.resize(img_orig, (img_width, img_height))
            img = img / 255.0
            img = img[np.newaxis, ...]

            seg_pred = unet_model.predict(img, verbose=0)
            mask = np.squeeze(seg_pred)

            if mask.ndim == 3 and mask.shape[2] > 1:
                mask = mask[..., 0]

            predicted_mask = np.round(mask).astype(np.uint8)
            resized_ground_truth = cv2.resize(ground_truth_mask, (img_width, img_height), interpolation=cv2.INTER_NEAREST)

            iou_scores.append(calculate_iou(resized_ground_truth, predicted_mask))
            dice_scores.append(calculate_dice(resized_ground_truth, predicted_mask))
            precision_scores.append(precision_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            recall_scores.append(recall_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            f1_scores.append(f1_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))

        metrics_by_patient[folder] = {
            "IoU": np.mean(iou_scores) if iou_scores else 0,
            "Dice": np.mean(dice_scores) if dice_scores else 0,
            "Precision": np.mean(precision_scores) if precision_scores else 0,
            "Recall": np.mean(recall_scores) if recall_scores else 0,
            "F1": np.mean(f1_scores) if f1_scores else 0,
        }

        all_iou_scores.extend(iou_scores)
        all_dice_scores.extend(dice_scores)
        all_precision_scores.extend(precision_scores)
        all_recall_scores.extend(recall_scores)
        all_f1_scores.extend(f1_scores)

    overall_metrics = {
        "IoU": np.mean(all_iou_scores) if all_iou_scores else 0,a
        "Dice": np.mean(all_dice_scores) if all_dice_scores else 0,
        "Precision": np.mean(all_precision_scores) if all_precision_scores else 0,
        "Recall": np.mean(all_recall_scores) if all_recall_scores else 0,
        "F1": np.mean(all_f1_scores) if all_f1_scores else 0,
    }

    return metrics_by_patient, overall_metrics

base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
unet_model = load_model('/home/voicelab/Desktop/segmentation_glottis/models/UNets/S3AR-UNet/s3ar_unet/SeARUNet-1.h5', compile=False, custom_objects={})
results, overall_metrics = evaluate_new_unet(base_path, unet_model)

output_csv = "evaluation_s3ar_unet_metrics2.csv"
with open(output_csv, "w", newline="") as csvfile:
    fieldnames = ["Patient", "IoU", "Dice", "Precision", "Recall", "F1"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for patient, metrics in results.items():
        row = {"Patient": patient, **metrics}
        writer.writerow(row)

    writer.writerow({"Patient": "Overall", **overall_metrics})

print("\nResultados Finales por Paciente:")
for patient, metrics in results.items():
    print(f"{patient}: {metrics}")

print("\nMétricas Promedio Globales:")
for metric, value in overall_metrics.items():
    print(f"{metric}: {value:.4f}")

print(f"\nMétricas guardadas en: {output_csv}")


  function = cls._parse_function_from_config(



Resultados Finales por Paciente:
patient61: {'IoU': 0.2771145339397794, 'Dice': 0.4214034512435302, 'Precision': 0.9824470046082949, 'Recall': 0.2795833883446234, 'F1': 0.4214034512435302}
patient14: {'IoU': 0.47912736823056434, 'Dice': 0.626064434791037, 'Precision': 0.8705477855477854, 'Recall': 0.5274729619127598, 'F1': 0.626064434791037}
patient17: {'IoU': 0.45, 'Dice': 0.45, 'Precision': 1.0, 'Recall': 0.45, 'F1': 0.45}
patient23: {'IoU': 0.2, 'Dice': 0.2, 'Precision': 1.0, 'Recall': 0.2, 'F1': 0.2}
patient68: {'IoU': 0.2111725679067567, 'Dice': 0.2975221061829852, 'Precision': 0.9846266233766233, 'Recall': 0.21430499110390913, 'F1': 0.2975221061829852}
patient63: {'IoU': 0.0, 'Dice': 0.0, 'Precision': 1.0, 'Recall': 0.0, 'F1': 0.0}
patient32A2: {'IoU': 0.27345359755411164, 'Dice': 0.3702252395156579, 'Precision': 0.9447245016822474, 'Recall': 0.29131448693070827, 'F1': 0.3702252395156579}
patient1: {'IoU': 0.26386639513735793, 'Dice': 0.3311377456242831, 'Precision': 0.780568015

In [None]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
from tensorflow.keras.models import load_model
from sklearn.metrics import precision_score, recall_score, f1_score

def get_unet_mask(image, unet_model):
    img_orig = image.astype(np.float32)
    img_rgb = cv2.cvtColor(img_orig, cv2.COLOR_BGR2RGB)  
    img = cv2.resize(img_rgb, (128, 256))  

    img = img / 255.0

    img = img[None, ...]

    seg_pred = unet_model.predict(img, verbose=0)

    mask = np.squeeze(seg_pred)  
    mask = np.round(mask).astype(np.uint8)  

    mask = cv2.resize(mask, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)

    return mask


def get_max_yolo_roi(video_path, yolo_model, margin=15):
    """
    Obtiene la región de interés (ROI) que cubre todas las detecciones de YOLO en un video completo,
    imprime todas las cajas detectadas y muestra la imagen con la ROI máxima que cubre todas las detecciones.

    Parámetros:
    video_path (str): Ruta del video.
    yolo_model (YOLO): Modelo YOLO preentrenado.
    margin (int): Tamaño del margen a agregar alrededor de la ROI.

    Retorna:
    tuple: Coordenadas (x1, y1, x2, y2) de la ROI máxima detectada con márgenes aplicados.
    """
    cap = cv2.VideoCapture(video_path)
    min_x1, min_y1 = float('inf'), float('inf')
    max_x2, max_y2 = 0, 0
    max_img = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = yolo_model(frame)
        if results[0].boxes is not None and len(results[0].boxes) > 0:
            for box in results[0].boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().int().numpy()

                x1 = max(0, min(x1, frame.shape[1] - 1))
                y1 = max(0, min(y1, frame.shape[0] - 1))
                x2 = max(0, min(x2, frame.shape[1] - 1))
                y2 = max(0, min(y2, frame.shape[0] - 1))

                min_x1 = min(min_x1, x1)
                min_y1 = min(min_y1, y1)
                max_x2 = max(max_x2, x2)
                max_y2 = max(max_y2, y2)

                max_img = frame.copy()

    cap.release()

    if max_img is not None:

        min_x1 = max(0, min_x1 - margin)
        min_y1 = max(0, min_y1 - margin)
        max_x2 = min(max_img.shape[1] - 1, max_x2 + margin)
        max_y2 = min(max_img.shape[0] - 1, max_y2 + margin)

        max_roi_with_margin = (min_x1, min_y1, max_x2, max_y2)

    else:
        print("No se detectó ninguna ROI en las imágenes.")

    return max_roi_with_margin

def filter_unet_mask_with_yolo(unet_mask, roi):
    x1, y1, x2, y2 = roi
    filtered_mask = np.zeros_like(unet_mask)
    filtered_mask[y1:y2, x1:x2] = unet_mask[y1:y2, x1:x2]
    return filtered_mask

def evaluate_yolo_unet_filtered(base_path, yolo_model, unet_model, margin=10):
    metrics_by_patient = {}
    all_iou_scores, all_dice_scores, all_precision_scores, all_recall_scores, all_f1_scores = [], [], [], [], []

    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)

        if not folder.startswith("patient") or not os.path.isdir(folder_path):
            continue

        original_video_path = os.path.join(folder_path, f"{folder}_original.avi")
        mask_images_dir = os.path.join(folder_path, "masks")

        if not os.path.exists(original_video_path) or not os.path.exists(mask_images_dir):
            continue

        roi = get_max_yolo_roi(original_video_path, yolo_model, margin)
        if roi is None:
            continue

        cap_original = cv2.VideoCapture(original_video_path)
        ims = []

        while cap_original.isOpened():
            ret, frame = cap_original.read()
            if not ret:
                break
            ims.append(frame)

        cap_original.release()

        iou_scores, dice_scores, precision_scores, recall_scores, f1_scores = [], [], [], [], []

        for frame_count, img_orig in enumerate(ims):
            mask_image_path = os.path.join(mask_images_dir, f"{folder}_mask_{frame_count:03d}.png")

            if not os.path.exists(mask_image_path):
                break

            frame_mask = cv2.imread(mask_image_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = (frame_mask > 0).astype(np.uint8)

            unet_mask = get_unet_mask(img_orig, unet_model)
            filtered_mask = filter_unet_mask_with_yolo(unet_mask, roi)

            resized_ground_truth = cv2.resize(ground_truth_mask, unet_mask.shape[::-1], interpolation=cv2.INTER_NEAREST)

            iou_scores.append(calculate_iou(resized_ground_truth, filtered_mask))
            dice_scores.append(calculate_dice(resized_ground_truth, filtered_mask))
            precision_scores.append(precision_score(resized_ground_truth.flatten(), filtered_mask.flatten(), zero_division=1))
            recall_scores.append(recall_score(resized_ground_truth.flatten(), filtered_mask.flatten(), zero_division=1))
            f1_scores.append(f1_score(resized_ground_truth.flatten(), filtered_mask.flatten(), zero_division=1))

        metrics_by_patient[folder] = {
            "iou": np.mean(iou_scores) if iou_scores else 0,
            "dice": np.mean(dice_scores) if dice_scores else 0,
            "precision": np.mean(precision_scores) if precision_scores else 0,
            "recall": np.mean(recall_scores) if recall_scores else 0,
            "f1": np.mean(f1_scores) if f1_scores else 0,
        }

        all_iou_scores.extend(iou_scores)
        all_dice_scores.extend(dice_scores)
        all_precision_scores.extend(precision_scores)
        all_recall_scores.extend(recall_scores)
        all_f1_scores.extend(f1_scores)

    overall_metrics = {
        "iou": np.mean(all_iou_scores) if all_iou_scores else 0,
        "dice": np.mean(all_dice_scores) if all_dice_scores else 0,
        "precision": np.mean(all_precision_scores) if all_precision_scores else 0,
        "recall": np.mean(all_recall_scores) if all_recall_scores else 0,
        "f1": np.mean(all_f1_scores) if all_f1_scores else 0,
    }

    return metrics_by_patient, overall_metrics

base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
yolo_model = YOLO('/home/voicelab/Desktop/segmentation_glottis/models/YOLO/YOLOV8/best_yolov8n-seg-1cls.pt')
unet_model = load_model('/home/voicelab/Desktop/segmentation_glottis/models/UNets/new_UNet/model/model_best.h5', compile=False)
results, overall_metrics = evaluate_yolo_unet_filtered(base_path, yolo_model, unet_model)

print("\nResultados Finales por Paciente:")
for patient, metrics in results.items():
    print(f"{patient}: {metrics}")

print("\nMétricas Promedio Globales:")
for metric, value in overall_metrics.items():
    print(f"{metric}: {value:.4f}")


In [None]:
base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
yolo_model = YOLO('/home/voicelab/Desktop/segmentation_glottis/models/YOLO/YOLOV8/best_yolov8n-seg-1cls.pt')
unet_model = load_model('/home/voicelab/Desktop/segmentation_glottis/models/UNets/new_UNet/model/model_best.h5', compile=False)
results, overall_metrics = evaluate_yolo_unet_filtered(base_path, yolo_model, unet_model)

## Evaluacion UNet con GIRAFE

In [10]:
def evaluate_unet(base_path, unet_model, target_size=(256, 256)):
    metrics_by_patient = {}
    all_iou_scores, all_dice_scores, all_precision_scores, all_recall_scores, all_f1_scores = [], [], [], [], []

    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)

        if not folder.startswith("patient") or not os.path.isdir(folder_path):
            continue

        video_path = os.path.join(folder_path, f"{folder}_original.avi")
        mask_dir = os.path.join(folder_path, "masks")

        if not os.path.exists(video_path) or not os.path.exists(mask_dir):
            print(f"Saltando {folder}: faltan video o máscaras.")
            continue

        cap_video = cv2.VideoCapture(video_path)
        iou_scores, dice_scores, precision_scores, recall_scores, f1_scores = [], [], [], [], []

        frame_count = 0
        while cap_video.isOpened():
            ret, frame = cap_video.read()
            mask_path = os.path.join(mask_dir, f"{folder}_mask_{frame_count:04d}.png")

            if not ret or not os.path.exists(mask_path):
                break

            ground_truth_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = (ground_truth_mask > 0).astype(np.uint8)

            resized_frame = cv2.resize(frame, target_size)
            resized_ground_truth = cv2.resize(ground_truth_mask, target_size, interpolation=cv2.INTER_NEAREST)

            normalized_frame = resized_frame.astype(np.float32) / 255.0
            input_batch = np.expand_dims(normalized_frame, axis=0)

            predicted_mask = unet_model.predict(input_batch, verbose=0)
            predicted_mask = (np.squeeze(predicted_mask) > 0.5).astype(np.uint8)

            iou_scores.append(calculate_iou(resized_ground_truth, predicted_mask))
            dice_scores.append(calculate_dice(resized_ground_truth, predicted_mask))
            precision_scores.append(precision_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            recall_scores.append(recall_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            f1_scores.append(f1_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))

            frame_count += 1

        cap_video.release()

        metrics_by_patient[folder] = {
            "IoU": np.mean(iou_scores) if iou_scores else 0,
            "Dice": np.mean(dice_scores) if dice_scores else 0,
            "Precision": np.mean(precision_scores) if precision_scores else 0,
            "Recall": np.mean(recall_scores) if recall_scores else 0,
            "F1": np.mean(f1_scores) if f1_scores else 0,
        }

        all_iou_scores.extend(iou_scores)
        all_dice_scores.extend(dice_scores)
        all_precision_scores.extend(precision_scores)
        all_recall_scores.extend(recall_scores)
        all_f1_scores.extend(f1_scores)

    overall_metrics = {
        "IoU": np.mean(all_iou_scores) if all_iou_scores else 0,
        "Dice": np.mean(all_dice_scores) if all_dice_scores else 0,
        "Precision": np.mean(all_precision_scores) if all_precision_scores else 0,
        "Recall": np.mean(all_recall_scores) if all_recall_scores else 0,
        "F1": np.mean(all_f1_scores) if all_f1_scores else 0,
    }

    return metrics_by_patient, overall_metrics

# Rutas y configuración
base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
unet_model_path = "/home/voicelab/Desktop/segmentation_glottis/models/UNets/new_UNet/model/model_best.h5"
unet_model = load_model(unet_model_path, compile=False)

# Evaluar
results, overall_metrics = evaluate_unet(base_path, unet_model)

# Guardar resultados en un archivo CSV
output_csv = "evaluation_results.csv"
with open(output_csv, "w", newline="") as csvfile:
    fieldnames = ["Patient", "IoU", "Dice", "Precision", "Recall", "F1"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for patient, metrics in results.items():
        row = {"Patient": patient, **metrics}
        writer.writerow(row)

    writer.writerow({"Patient": "Overall", **overall_metrics})

print("\nResultados Finales por Paciente:")
for patient, metrics in results.items():
    print(f"{patient}: {metrics}")

print("\nMétricas Promedio Globales:")
for metric, value in overall_metrics.items():
    print(f"{metric}: {value:.4f}")

print(f"\nMétricas guardadas en: {output_csv}")



Resultados Finales por Paciente:
patient61: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient14: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient17: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient23: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient68: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient63: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient32A2: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient1: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient25: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient13: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient22A2: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient2A1: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient18A1: {'IoU': 0, 'Dice': 0, 'Precision': 0, 'Recall': 0, 'F1': 0}
patient11: {'IoU': 0, 'Dice': 0, 'Pre

## S3AR-UNet + FILTRAR CON ROI YOLO


In [3]:
import os
import cv2
import numpy as np
import imageio as io
import pandas as pd
from ultralytics import YOLO
from tensorflow.keras.models import load_model
from sklearn.metrics import precision_score, recall_score, f1_score
from tqdm import tqdm

###############################################
#   1) FUNCIÓN YOLO ROI                      #
###############################################
def get_max_yolo_roi(video_path, yolo_model, margin=15):
    """
    Obtiene la región de interés (ROI) que cubre todas las detecciones de YOLO en un video completo,
    agrega márgenes y retorna las coordenadas de la ROI.

    Parámetros:
    video_path (str): Ruta del video.
    yolo_model (YOLO): Modelo YOLO preentrenado.
    margin (int): Tamaño del margen a agregar alrededor de la ROI.

    Retorna:
    tuple: Coordenadas (x1, y1, x2, y2) de la ROI máxima detectada con márgenes aplicados.
    """
    cap = cv2.VideoCapture(video_path)
    min_x1, min_y1 = float('inf'), float('inf')
    max_x2, max_y2 = 0, 0
    max_img = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = yolo_model(frame)
        if results and results[0].boxes and len(results[0].boxes) > 0:
            for box in results[0].boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().int().numpy()

                # Asegurarse de que las coordenadas están dentro del tamaño de la imagen
                x1 = max(0, min(x1, frame.shape[1] - 1))
                y1 = max(0, min(y1, frame.shape[0] - 1))
                x2 = max(0, min(x2, frame.shape[1] - 1))
                y2 = max(0, min(y2, frame.shape[0] - 1))

                # Actualizar los valores mínimos y máximos para obtener una ROI que englobe todas las detecciones
                min_x1 = min(min_x1, x1)
                min_y1 = min(min_y1, y1)
                max_x2 = max(max_x2, x2)
                max_y2 = max(max_y2, y2)

                max_img = frame.copy()

    cap.release()

    if max_img is not None:
        # Agregar márgenes a la ROI que engloba todas las detecciones
        min_x1 = max(0, min_x1 - margin)
        min_y1 = max(0, min_y1 - margin)
        max_x2 = min(max_img.shape[1] - 1, max_x2 + margin)
        max_y2 = min(max_img.shape[0] - 1, max_y2 + margin)

        max_roi_with_margin = (min_x1, min_y1, max_x2, max_y2)
    else:
        print("No se detectó ninguna ROI en las imágenes.")
        max_roi_with_margin = None

    return max_roi_with_margin


###############################################
#   3) FUNCIÓN PARA FILTRAR UNA MÁSCARA      #
#      CON LA ROI DE YOLO                    #
###############################################
def filter_unet_mask_with_yolo(unet_mask, roi):
    """
    Filtra la máscara de UNet para mantener solo las áreas dentro de la ROI proporcionada por YOLO.

    Parámetros:
    unet_mask (numpy.ndarray): Máscara binaria de UNet.
    roi (tuple): Coordenadas de la ROI (x1, y1, x2, y2).

    Retorna:
    numpy.ndarray: Máscara filtrada que mantiene solo las áreas dentro de la ROI.
    """
    x1, y1, x2, y2 = roi
    filtered_mask = np.zeros_like(unet_mask)
    filtered_mask[y1:y2, x1:x2] = unet_mask[y1:y2, x1:x2]
    return filtered_mask


###############################################
#   4) FUNCIÓN PRINCIPAL DE EVALUACIÓN       #
###############################################
def evaluate_new_unet_with_yolo(
    base_path,
    unet_model,
    yolo_model,
    margin=15,
    output_csv=None
):
    """
    Recorre todo el dataset (carpetas 'patient...'),
    obtiene la ROI global con YOLO para cada video,
    luego hace la inferencia UNet en cada frame
    y filtra la máscara predicha usando dicha ROI.
    Finalmente, calcula y devuelve métricas de IoU, Dice, Precision, Recall, F1.
    Si se indica output_csv, guarda los resultados en ese archivo.

    Parámetros:
    base_path (str): Ruta base del dataset.
    unet_model (Model): Modelo UNet cargado.
    yolo_model (YOLO): Modelo YOLO cargado.
    margin (int): Tamaño del margen a agregar alrededor de la ROI.
    output_csv (str, opcional): Ruta para guardar las métricas en formato CSV.

    Retorna:
    tuple: (metrics_by_patient, overall_metrics)
    """
    metrics_by_patient = {}
    all_iou_scores = []
    all_dice_scores = []
    all_precision_scores = []
    all_recall_scores = []
    all_f1_scores = []

    # Obtener la forma esperada por la UNet
    input_shape = unet_model.input_shape  # (None, H, W, C)
    _, img_height, img_width, n_channels = input_shape

    # Recorremos las carpetas de pacientes
    patient_folders = [f for f in os.listdir(base_path)
                       if f.startswith("patient") and os.path.isdir(os.path.join(base_path, f))]

    for folder in tqdm(patient_folders, desc="Evaluando pacientes"):
        folder_path = os.path.join(base_path, folder)
        original_video_path = os.path.join(folder_path, f"{folder}_original.avi")
        mask_images_dir = os.path.join(folder_path, "masks")

        if not (os.path.exists(original_video_path) and os.path.exists(mask_images_dir)):
            print(f"Saltando {folder}, faltan datos.")
            continue

        # 1) Obtiene la ROI general de YOLO para todo el video
        roi = get_max_yolo_roi(original_video_path, yolo_model, margin=margin)

        if roi is None:
            print(f"Sin ROI para {folder}, saltando.")
            continue

        # 2) Leer todos los frames del video
        frames = io.mimread(original_video_path, memtest=False)

        if not frames:
            print(f"No se pudieron leer los frames de {folder}, saltando.")
            continue

        iou_scores = []
        dice_scores = []
        precision_scores = []
        recall_scores = []
        f1_scores = []

        # 3) Iterar sobre frames
        for frame_count, img_orig in enumerate(frames):
            mask_path = os.path.join(mask_images_dir, f"{folder}_mask_{frame_count:03d}.png")
            if not os.path.exists(mask_path):
                print(f"Falta la máscara para el frame {frame_count} de {folder}, saltando frame.")
                continue

            frame_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = (frame_mask > 0).astype(np.uint8)

            # Redimensionamos el frame para UNet
            resized_frame = cv2.resize(img_orig, (img_width, img_height))
            # Normalizamos (asumiendo 0-1)
            resized_frame = resized_frame.astype(np.float32) / 255.0

            # Expandimos para batch
            input_batch = np.expand_dims(resized_frame, axis=0)

            # Predicción
            seg_pred = unet_model.predict(input_batch, verbose=0)
            seg_pred = np.squeeze(seg_pred)

            # Si hay más de 1 canal de salida, seleccionar el primer canal
            if seg_pred.ndim == 3 and seg_pred.shape[-1] > 1:
                seg_pred = seg_pred[..., 0]

            # Binarizamos
            predicted_mask_small = np.round(seg_pred).astype(np.uint8)  # (H, W) = (img_height, img_width)

            # Volver a tamaño original
            predicted_mask = cv2.resize(
                predicted_mask_small,
                (img_orig.shape[1], img_orig.shape[0]),  # (ancho, alto)
                interpolation=cv2.INTER_NEAREST
            )

            # 4) Filtrar la máscara predicha con la ROI
            predicted_mask = filter_unet_mask_with_yolo(predicted_mask, roi)

            # 5) Cálculo de métricas
            iou_val = calculate_iou(ground_truth_mask, predicted_mask)
            dice_val = calculate_dice(ground_truth_mask, predicted_mask)

            precision_val = precision_score(
                ground_truth_mask.flatten(),
                predicted_mask.flatten(),
                zero_division=1
            )
            recall_val = recall_score(
                ground_truth_mask.flatten(),
                predicted_mask.flatten(),
                zero_division=1
            )
            f1_val = f1_score(
                ground_truth_mask.flatten(),
                predicted_mask.flatten(),
                zero_division=1
            )

            iou_scores.append(iou_val)
            dice_scores.append(dice_val)
            precision_scores.append(precision_val)
            recall_scores.append(recall_val)
            f1_scores.append(f1_val)

        # Guardar promedio de este paciente
        metrics_by_patient[folder] = {
            "IoU": np.mean(iou_scores) if iou_scores else 0,
            "Dice": np.mean(dice_scores) if dice_scores else 0,
            "Precision": np.mean(precision_scores) if precision_scores else 0,
            "Recall": np.mean(recall_scores) if recall_scores else 0,
            "F1": np.mean(f1_scores) if f1_scores else 0,
        }

        # Agregar a las métricas globales
        all_iou_scores.extend(iou_scores)
        all_dice_scores.extend(dice_scores)
        all_precision_scores.extend(precision_scores)
        all_recall_scores.extend(recall_scores)
        all_f1_scores.extend(f1_scores)

    # Métricas globales
    overall_metrics = {
        "IoU": np.mean(all_iou_scores) if all_iou_scores else 0,
        "Dice": np.mean(all_dice_scores) if all_dice_scores else 0,
        "Precision": np.mean(all_precision_scores) if all_precision_scores else 0,
        "Recall": np.mean(all_recall_scores) if all_recall_scores else 0,
        "F1": np.mean(all_f1_scores) if all_f1_scores else 0,
    }

    # Guardar a CSV si se solicita
    if output_csv:
        df = pd.DataFrame.from_dict(metrics_by_patient, orient="index")
        df.loc["Overall"] = overall_metrics
        df.to_csv(output_csv)
        print(f"Métricas guardadas en: {output_csv}")

    return metrics_by_patient, overall_metrics


###############################################
#   5) FUNCIONES AUXILIARES                  #
###############################################
# (Si tienes funciones adicionales, agrégalas aquí)
# Por ejemplo, si tienes una función para visualizar las máscaras filtradas, podrías incluirla.

###############################################
#   6) EJECUCIÓN DEL CÓDIGO                  #
###############################################
if __name__ == "__main__":
    base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
    yolo_model_path = '/home/voicelab/Desktop/segmentation_glottis/models/YOLO/YOLOV8/best_yolov8n-seg-1cls.pt'
    unet_model_path = '/home/voicelab/Desktop/segmentation_glottis/models/UNets/S3AR-UNet/s3ar_unet/model/SeARUNet-1/SeARUNet-1.h5'
    output_csv_path = "s3ar_unet_metrics_FILTER.csv"

    # Cargar modelos
    yolo_model = YOLO(yolo_model_path)
    unet_model = load_model(unet_model_path, compile=False)

    # Ejecutar la evaluación
    results, overall_metrics = evaluate_new_unet_with_yolo(base_path, unet_model, yolo_model, margin=10, output_csv=output_csv_path)

    # Imprimir resultados
    print("\nResultados Finales por Paciente:")
    for patient, metrics in results.items():
        print(f"{patient}: {metrics}")

    print("\nMétricas Promedio Globales:")
    for metric, value in overall_metrics.items():
        print(f"{metric}: {value:.4f}")


  function = cls._parse_function_from_config(
2025-01-30 17:46:00.646868: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-01-30 17:46:00.648855: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-01-30 17:46:00.650119: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0


0: 640x640 1 glottis, 2.2ms
Speed: 3.0ms preprocess, 2.2ms inference, 50.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 1.3ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.9ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 1.0ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 1.0ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

2025-01-30 17:46:03.959096: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
2025-01-30 17:46:03.970318: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2025-01-30 17:46:04.075311: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
Evaluando pacientes:   0%|                                                                                                           | 0/38 [00:03<?, ?it/s]


TypeError: unsupported operand type(s) for +: 'NoneType' and 'NoneType'

In [39]:


base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"
unet_model = load_model("/home/voicelab/Desktop/segmentation_glottis/models/UNets/S3AR-UNet/s3ar_unet/model/SeARUNet-1/SeARUNet-1.h5", compile=False)
yolo_model = YOLO("/home/voicelab/Desktop/segmentation_glottis/models/YOLO/YOLOV8/best_yolov8n-seg-1cls.pt")  # O como cargues tu YOLO

results, overall = evaluate_new_unet_with_yolo(
    base_path=base_path,
    unet_model=unet_model,
    yolo_model=yolo_model,
    margin=10,
    output_csv="evaluation_s3ar_unet_yolo_FILTER_ROI.csv"
)

print("Resultados por paciente:")
for patient, vals in results.items():
    print(patient, vals)

print("\nMétricas globales:", overall)


  function = cls._parse_function_from_config(
Evaluando pacientes:   0%|                                                                                                           | 0/38 [00:00<?, ?it/s]


0: 640x640 1 glottis, 2.1ms
Speed: 1.9ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.9ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:   3%|██▌                                                                                                | 1/38 [00:02<01:32,  2.50s/it]


0: 640x640 1 glottis, 2.8ms
Speed: 1.4ms preprocess, 2.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:   5%|█████▏                                                                                             | 2/38 [00:03<01:04,  1.79s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.6ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 1.0ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess per image at

Evaluando pacientes:   8%|███████▊                                                                                           | 3/38 [00:05<00:53,  1.54s/it]


0: 640x640 1 glottis, 3.1ms
Speed: 1.4ms preprocess, 3.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.7ms
Speed: 0.9ms preprocess, 2.7ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.3ms
Speed: 1.0ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.8ms
Speed: 0.9ms preprocess, 2.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 64

Evaluando pacientes:  11%|██████████▍                                                                                        | 4/38 [00:06<00:48,  1.43s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.6ms preprocess, 3.2ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.8ms preprocess, 2.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3

Evaluando pacientes:  13%|█████████████                                                                                      | 5/38 [00:07<00:45,  1.38s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.6ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 1.5ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.8ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3

Evaluando pacientes:  16%|███████████████▋                                                                                   | 6/38 [00:08<00:43,  1.35s/it]


0: 640x640 1 glottis, 3.1ms
Speed: 1.6ms preprocess, 3.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.6ms
Speed: 0.9ms preprocess, 2.6ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  18%|██████████████████▏                                                                                | 7/38 [00:10<00:41,  1.33s/it]


0: 640x640 1 glottis, 3.4ms
Speed: 1.4ms preprocess, 3.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 1.0ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.7ms
Speed: 0.9ms preprocess, 2.7ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 1.0ms preprocess, 2.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 1.1ms preprocess, 2.4ms inference, 0.2ms postprocess per image at

Evaluando pacientes:  21%|████████████████████▊                                                                              | 8/38 [00:11<00:39,  1.31s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.6ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.8ms preprocess, 2.5ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.8ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640,

Evaluando pacientes:  24%|███████████████████████▍                                                                           | 9/38 [00:12<00:37,  1.30s/it]


0: 640x640 1 glottis, 4.9ms
Speed: 2.2ms preprocess, 4.9ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.8ms
Speed: 0.9ms preprocess, 2.8ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.7ms
Speed: 1.1ms preprocess, 2.7ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 1.0ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640,

Evaluando pacientes:  26%|█████████████████████████▊                                                                        | 10/38 [00:13<00:36,  1.29s/it]


0: 640x640 1 glottis, 3.4ms
Speed: 1.3ms preprocess, 3.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 1.0ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 5.7ms
Speed: 1.6ms preprocess, 5.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 5.5ms
Speed: 1.5ms preprocess, 5.5ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 5.0ms
Speed: 1.5ms preprocess, 5.0ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  29%|████████████████████████████▎                                                                     | 11/38 [00:15<00:34,  1.30s/it]


0: 640x640 1 glottis, 3.1ms
Speed: 1.5ms preprocess, 3.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.7ms
Speed: 0.8ms preprocess, 2.7ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 1.0ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 1.1ms preprocess, 2.5ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.8ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.2ms postprocess per image at shape (1, 3, 640,

Evaluando pacientes:  32%|██████████████████████████████▉                                                                   | 12/38 [00:16<00:34,  1.32s/it]


0: 640x640 1 glottis, 3.4ms
Speed: 1.5ms preprocess, 3.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.4ms
Speed: 1.0ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 64

Evaluando pacientes:  34%|█████████████████████████████████▌                                                                | 13/38 [00:17<00:32,  1.31s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.4ms preprocess, 3.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 1.0ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.6ms
Speed: 1.1ms preprocess, 2.6ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 1.0ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 1.1ms preprocess, 2.4ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.7ms
Speed: 1.6ms preprocess, 5.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 1.0ms preprocess, 2.4ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.2ms postprocess per image at

Evaluando pacientes:  37%|████████████████████████████████████                                                              | 14/38 [00:19<00:31,  1.30s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.3ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.9ms preprocess, 2.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess 

Evaluando pacientes:  39%|██████████████████████████████████████▋                                                           | 15/38 [00:20<00:29,  1.28s/it]


0: 640x640 (no detections), 3.2ms
Speed: 1.5ms preprocess, 3.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640,

Evaluando pacientes:  42%|█████████████████████████████████████████▎                                                        | 16/38 [00:21<00:28,  1.28s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.6ms preprocess, 3.2ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.8ms preprocess, 2.4ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.8ms
Speed: 1.0ms preprocess, 2.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  45%|███████████████████████████████████████████▊                                                      | 17/38 [00:22<00:26,  1.27s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.3ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 0.8ms preprocess, 2.4ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 1.1ms preprocess, 2.4ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess 

Evaluando pacientes:  47%|██████████████████████████████████████████████▍                                                   | 18/38 [00:24<00:25,  1.27s/it]


0: 640x640 (no detections), 3.2ms
Speed: 1.5ms preprocess, 3.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.6ms
Speed: 0.8ms preprocess, 2.6ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.7ms postprocess per image at

Evaluando pacientes:  50%|█████████████████████████████████████████████████                                                 | 19/38 [00:25<00:24,  1.27s/it]


0: 640x640 (no detections), 3.2ms
Speed: 1.5ms preprocess, 3.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.5ms
Speed: 1.0ms preprocess, 2.5ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 1.0ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image a

Evaluando pacientes:  53%|███████████████████████████████████████████████████▌                                              | 20/38 [00:26<00:22,  1.27s/it]


0: 640x640 (no detections), 3.4ms
Speed: 1.5ms preprocess, 3.4ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.8ms postpr

Evaluando pacientes:  55%|██████████████████████████████████████████████████████▏                                           | 21/38 [00:28<00:21,  1.27s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.5ms preprocess, 3.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  58%|████████████████████████████████████████████████████████▋                                         | 22/38 [00:29<00:20,  1.26s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.6ms preprocess, 3.2ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 1.0ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  61%|███████████████████████████████████████████████████████████▎                                      | 23/38 [00:30<00:19,  1.28s/it]


0: 640x640 1 glottis, 3.1ms
Speed: 1.9ms preprocess, 3.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 6.4ms
Speed: 0.8ms preprocess, 6.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.8ms
Speed: 0.8ms preprocess, 2.8ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 5.0ms
Speed: 1.6ms preprocess, 5.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  63%|█████████████████████████████████████████████████████████████▉                                    | 24/38 [00:31<00:18,  1.30s/it]


0: 640x640 2 glottiss, 3.2ms
Speed: 1.3ms preprocess, 3.2ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.8ms preprocess, 2.4ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.6ms
Speed: 0.9ms preprocess, 2.6ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 6.1ms
Speed: 1.6ms preprocess, 6.1ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 

Evaluando pacientes:  66%|████████████████████████████████████████████████████████████████▍                                 | 25/38 [00:33<00:17,  1.31s/it]


0: 640x640 1 glottis, 5.2ms
Speed: 1.9ms preprocess, 5.2ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 3.3ms
Speed: 1.5ms preprocess, 3.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.9ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  68%|███████████████████████████████████████████████████████████████████                               | 26/38 [00:34<00:15,  1.30s/it]


0: 640x640 1 glottis, 3.1ms
Speed: 1.5ms preprocess, 3.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.4ms
Speed: 0.8ms preprocess, 2.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.9ms preprocess, 2.1ms inference, 0.2ms postprocess 

Evaluando pacientes:  71%|█████████████████████████████████████████████████████████████████████▋                            | 27/38 [00:35<00:14,  1.28s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.3ms preprocess, 3.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  74%|████████████████████████████████████████████████████████████████████████▏                         | 28/38 [00:37<00:12,  1.27s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.5ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1,

Evaluando pacientes:  76%|██████████████████████████████████████████████████████████████████████████▊                       | 29/38 [00:38<00:11,  1.27s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.5ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.5ms postprocess per image at shape (1, 3

Evaluando pacientes:  79%|█████████████████████████████████████████████████████████████████████████████▎                    | 30/38 [00:39<00:10,  1.26s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.5ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 1.9ms
Speed: 0.8ms preprocess, 1.9ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per im

Evaluando pacientes:  82%|███████████████████████████████████████████████████████████████████████████████▉                  | 31/38 [00:40<00:08,  1.25s/it]


0: 640x640 1 glottis, 3.1ms
Speed: 1.3ms preprocess, 3.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


Evaluando pacientes:  84%|██████████████████████████████████████████████████████████████████████████████████▌               | 32/38 [00:42<00:07,  1.26s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.4ms preprocess, 3.2ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.1ms postprocess per image at shape (1, 3

Evaluando pacientes:  87%|█████████████████████████████████████████████████████████████████████████████████████             | 33/38 [00:43<00:06,  1.26s/it]


0: 640x640 1 glottis, 3.2ms
Speed: 1.5ms preprocess, 3.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.1ms postprocess per image at shape

Evaluando pacientes:  89%|███████████████████████████████████████████████████████████████████████████████████████▋          | 34/38 [00:44<00:05,  1.26s/it]


0: 640x640 2 glottiss, 3.1ms
Speed: 1.3ms preprocess, 3.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 0.9ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.9ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.4ms
Speed: 1.0ms preprocess, 2.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

Evaluando pacientes:  92%|██████████████████████████████████████████████████████████████████████████████████████████▎       | 35/38 [00:45<00:03,  1.29s/it]


0: 640x640 2 glottiss, 3.1ms
Speed: 1.4ms preprocess, 3.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.9ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 6

Evaluando pacientes:  95%|████████████████████████████████████████████████████████████████████████████████████████████▊     | 36/38 [00:47<00:02,  1.29s/it]


0: 640x640 1 glottis, 3.1ms
Speed: 1.4ms preprocess, 3.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 3.3ms
Speed: 1.0ms preprocess, 3.3ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.8ms
Speed: 1.2ms preprocess, 2.8ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 1.9ms
Speed: 0.7ms preprocess, 1.9ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess 

Evaluando pacientes:  97%|███████████████████████████████████████████████████████████████████████████████████████████████▍  | 37/38 [00:48<00:01,  1.29s/it]


0: 640x640 (no detections), 3.1ms
Speed: 1.5ms preprocess, 3.1ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 glottiss, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 glottiss, 2.1ms
Speed: 0.9ms preprocess, 2.1ms inference, 0.6ms postprocess per image at sha

Evaluando pacientes: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 38/38 [00:49<00:00,  1.31s/it]

Métricas guardadas en: evaluation_s3ar_unet_yolo_FILTER_ROI.csv
Resultados por paciente:
patient61 {'IoU': 0.28141287858398967, 'Dice': 0.42849447767216997, 'Precision': 0.9893499423963134, 'Recall': 0.2829012312238126, 'F1': 0.42849447767216997}
patient14 {'IoU': 0.47244235243008914, 'Dice': 0.6226822337462695, 'Precision': 0.8950784892191141, 'Recall': 0.5139499716610667, 'F1': 0.6226822337462695}
patient17 {'IoU': 0.45, 'Dice': 0.0, 'Precision': 1.0, 'Recall': 0.45, 'F1': 0.45}
patient23 {'IoU': 0.2, 'Dice': 0.0, 'Precision': 1.0, 'Recall': 0.2, 'F1': 0.2}
patient68 {'IoU': 0.20780774489656495, 'Dice': 0.29384138721381686, 'Precision': 0.982835509348807, 'Recall': 0.2110818748944851, 'F1': 0.29384138721381686}
patient63 {'IoU': 0.0, 'Dice': 0.0, 'Precision': 1.0, 'Recall': 0.0, 'F1': 0.0}
patient32A2 {'IoU': 0.28121985282172396, 'Dice': 0.3777792191345755, 'Precision': 0.946670347506785, 'Recall': 0.2988299884932905, 'F1': 0.3777792191345755}
patient1 {'IoU': 0.27836095578950737, 'D




In [40]:
import imageio
print(imageio.__version__)


2.34.2


In [16]:
import tensorflow_addons as tfa

def evaluate_model_with_roi(base_path, yolo_model, model, model_type="unet", output_csv="evaluation_metrics.csv"):
    if model_type == "unet":
        input_size = (128, 256, 3)  
    elif model_type == "s3arunet":
        input_size = (128, 128, 3)  

    metrics_by_patient = {}
    all_iou_scores, all_dice_scores, all_precision_scores, all_recall_scores, all_f1_scores = [], [], [], [], []

    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)
        if not folder.startswith("patient") or not os.path.isdir(folder_path):
            continue

        original_video_path = os.path.join(folder_path, f"{folder}_original.avi")
        mask_images_dir = os.path.join(folder_path, "masks")

        if not os.path.exists(original_video_path) or not os.path.exists(mask_images_dir):
            print(f"Saltando {folder} (falta uno de los datos)")
            continue

        roi = get_max_yolo_roi(original_video_path, yolo_model)
        if roi is None:
            continue

        x1, y1, x2, y2 = roi

        cap_original = cv2.VideoCapture(original_video_path)
        iou_scores, dice_scores, precision_scores, recall_scores, f1_scores = [], [], [], [], []

        frame_count = 0
        while cap_original.isOpened():
            ret_original, frame_original = cap_original.read()
            mask_image_path = os.path.join(mask_images_dir, f"{folder}_mask_{frame_count:03d}.png")

            if not ret_original or not os.path.exists(mask_image_path):
                break

            frame_mask = cv2.imread(mask_image_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = (frame_mask > 0).astype(np.uint8)

            cropped_original = frame_original[y1:y2, x1:x2]
            cropped_ground_truth = ground_truth_mask[y1:y2, x1:x2]

            resized_original = cv2.resize(cropped_original, input_size[:2]) / 255.0
            input_batch = np.expand_dims(resized_original, axis=0)

            seg_pred = model.predict(input_batch, verbose=0)
            predicted_mask = (np.squeeze(seg_pred) > 0.5).astype(np.uint8)

            resized_ground_truth = cv2.resize(cropped_ground_truth, predicted_mask.shape[::-1], interpolation=cv2.INTER_NEAREST)

            iou_scores.append(calculate_iou(resized_ground_truth, predicted_mask))
            dice_scores.append(calculate_dice(resized_ground_truth, predicted_mask))
            precision_scores.append(precision_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            recall_scores.append(recall_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))
            f1_scores.append(f1_score(resized_ground_truth.flatten(), predicted_mask.flatten(), zero_division=1))

            frame_count += 1

        cap_original.release()

        metrics_by_patient[folder] = {
            "IoU": np.mean(iou_scores),
            "Dice": np.mean(dice_scores),
            "Precision": np.mean(precision_scores),
            "Recall": np.mean(recall_scores),
            "F1": np.mean(f1_scores),
        }

        all_iou_scores.extend(iou_scores)
        all_dice_scores.extend(dice_scores)
        all_precision_scores.extend(precision_scores)
        all_recall_scores.extend(recall_scores)
        all_f1_scores.extend(f1_scores)

    overall_metrics = {
        "IoU": np.mean(all_iou_scores),
        "Dice": np.mean(all_dice_scores),
        "Precision": np.mean(all_precision_scores),
        "Recall": np.mean(all_recall_scores),
        "F1": np.mean(all_f1_scores),
    }

    df = pd.DataFrame.from_dict(metrics_by_patient, orient="index")
    df.loc["Overall"] = overall_metrics
    df.to_csv(output_csv)

    print(f"Métricas guardadas en: {output_csv}")

    return metrics_by_patient, overall_metrics


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [17]:
base_path = "/home/voicelab/Desktop/segmentation_glottis/datasets/GIRAFE/Seg_FP-Results/Manual_segmentation"

yolo_model = YOLO("/home/voicelab/Desktop/segmentation_glottis/models/YOLO/YOLOV8/best_yolov8n-seg-1cls.pt")

unet_model = load_model(
    "/home/voicelab/Desktop/segmentation_glottis/models/UNets/ROI_UNet_YOLO/model/model_best.h5",
    compile=False, custom_objects={'InstanceNormalization': tfa.layers.InstanceNormalization}
)

metrics_by_patient, overall_metrics = evaluate_model_with_roi(
    base_path=base_path,
    yolo_model=yolo_model,
    model=unet_model,
    model_type="unet",
    output_csv="evaluation_unet_roi_cropped_metrics.csv"
)



0: 640x640 1 glottis, 2.3ms
Speed: 1.7ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.1ms
Speed: 0.8ms preprocess, 2.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.0ms
Speed: 0.8ms preprocess, 2.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.6ms
Speed: 0.9ms preprocess, 2.6ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.8ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.2ms
Speed: 0.8ms preprocess, 2.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)


In [18]:
s3arunet_model = load_model(
    "/home/voicelab/Desktop/segmentation_glottis/models/UNets/S3AR-UNet/s3ar_unet/model/SeARUNet-1/SeARUNet-1.h5",
    compile=False
)

metrics_by_patient, overall_metrics = evaluate_model_with_roi(
    base_path=base_path,
    yolo_model=yolo_model,
    model=s3arunet_model,
    model_type="s3arunet",
    output_csv="evaluation_s3arunet_roi_cropped_metrics.csv"
)


  function = cls._parse_function_from_config(



0: 640x640 1 glottis, 2.5ms
Speed: 1.9ms preprocess, 2.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.5ms
Speed: 0.9ms preprocess, 2.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.9ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 glottis, 2.3ms
Speed: 0.8ms preprocess, 2.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)
