In [3]:
import numpy as np
import cv2
import os
import gc
import tensorflow as tf
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from ultralytics import YOLO
from tensorflow.keras.models import load_model
from utils.masks import get_unet_mask, get_max_yolo_roi, filter_unet_mask_with_yolo
from utils.metrics import calculate_iou, calculate_recall, calculate_precision, calculate_dice, calculate_f1, calculate_map50, calculate_mcc, calculate_tnr

2024-09-26 13:03:40.841660: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-26 13:03:40.862112: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-26 13:03:40.862134: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-26 13:03:40.862693: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-26 13:03:40.866396: I tensorflow/core/platform/cpu_feature_guar

In [None]:
def evaluate_unet_model(test_dir, unet_model, num_images=3500):
    hybrid_ious = []
    jaccard_scores = []
    unet_recalls = []
    unet_precisions = []
    unet_dices = []
    unet_f1_scores = []
    unet_map50s = []
    unet_mccs = []
    hybrid_tnrs = []
    zero_iou_count = 0
    iou_bins = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    iou_histogram = np.zeros(len(iou_bins) - 1)

    for idx in range(num_images):
        try:
            image_name = f"{idx}.png"
            mask_name = f"{idx}_seg.png"
            mask_path = os.path.join(test_dir, mask_name)
            image_path = os.path.join(test_dir, image_name)

            ground_truth_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = ground_truth_mask.astype(bool)

            # Leer la imagen original
            image = cv2.imread(image_path)

            # Obtener la máscara de UNet y filtrarla con la ROI de YOLO
            unet_mask = get_unet_mask(image, unet_model)
            roi = get_max_yolo_roi(image_path, yolo_model, margin)
            filtered_mask = filter_unet_mask_with_yolo(unet_mask, roi)

            # Redimensionar la máscara filtrada a las dimensiones de la máscara de referencia
            filtered_mask_resized = cv2.resize(filtered_mask.astype(np.uint8), (ground_truth_mask.shape[1], ground_truth_mask.shape[0])).astype(bool)

            # Calcular las métricas
            iou_hybrid = calculate_iou(ground_truth_mask, filtered_mask_resized)
            jaccard = jaccard_score(ground_truth_mask.flatten(), filtered_mask_resized.flatten(), average='binary')
            recall_hybrid = calculate_recall(ground_truth_mask, filtered_mask_resized)
            precision_hybrid = calculate_precision(ground_truth_mask, filtered_mask_resized)
            dice_hybrid = calculate_dice(ground_truth_mask, filtered_mask_resized)
            f1_hybrid = calculate_f1(ground_truth_mask, filtered_mask_resized)
            map50_hybrid = calculate_map50(ground_truth_mask, filtered_mask_resized)
            mcc_hybrid = calculate_mcc(ground_truth_mask, filtered_mask_resized)
            tnr_hybrid = calculate_tnr(ground_truth_mask, filtered_mask_resized)

            if np.isnan(iou_hybrid):
                print(f"Warning: IoU is NaN for image {image_path}")
            else:
                hybrid_ious.append(iou_hybrid)
                jaccard_scores.append(jaccard)
                hybrid_recalls.append(recall_hybrid)
                hybrid_precisions.append(precision_hybrid)
                hybrid_dices.append(dice_hybrid)
                hybrid_f1_scores.append(f1_hybrid)
                hybrid_map50s.append(map50_hybrid)
                hybrid_mccs.append(mcc_hybrid)
                hybrid_tnrs.append(tnr_hybrid)

                if iou_hybrid == 0.0:
                    zero_iou_count += 1
                else:
                    for i in range(len(iou_bins) - 1):
                        if iou_bins[i] <= iou_hybrid < iou_bins[i + 1]:
                            iou_histogram[i] += 1
                            break

            # Liberar recursos
            del ground_truth_mask
            del filtered_mask_resized
            gc.collect()
            tf.keras.backend.clear_session()
        except Exception as e:
            print(f"Error processing {image_name}: {e}")
            continue

    # Calcular las métricas promedio
    avg_iou_hybrid = np.mean(hybrid_ious) if hybrid_ious else 0.0
    avg_jaccard = np.mean(jaccard_scores) if jaccard_scores else 0.0
    avg_recall = np.mean(hybrid_recalls) if hybrid_recalls else 0.0
    avg_precision = np.mean(hybrid_precisions) if hybrid_precisions else 0.0
    avg_dice = np.mean(hybrid_dices) if hybrid_dices else 0.0
    avg_f1 = np.mean(hybrid_f1_scores) if hybrid_f1_scores else 0.0
    avg_map50 = np.mean(hybrid_map50s) if hybrid_map50s else 0.0
    avg_mcc = np.mean(hybrid_mccs) if hybrid_mccs else 0.0
    avg_tnr = np.mean(hybrid_tnrs) if hybrid_tnrs else 0.0

    # Imprimir las métricas promedio
    print(f"Average Hybrid IoU: {avg_iou_hybrid}")
    print(f"Average Hybrid Jaccard: {avg_jaccard}")
    print(f"Average Hybrid Recall: {avg_recall}")
    print(f"Average Hybrid Precision: {avg_precision}")
    print(f"Average Hybrid DICE: {avg_dice}")
    print(f"Average Hybrid F1: {avg_f1}")
    print(f"Average Hybrid mAP50: {avg_map50}")
    print(f"Average Hybrid MCC: {avg_mcc}")
    print(f"Average Hybrid TNR: {avg_tnr}")
    print(f"IoU 0: {zero_iou_count}")


    return hybrid_ious, jaccard_scores, hybrid_recalls, hybrid_precisions, hybrid_dices, hybrid_f1_scores, hybrid_map50s, hybrid_mccs, hybrid_tnrs


In [4]:
def evaluate_hybrid_model(test_dir, yolo_model, unet_model, margin=15, num_images=3500):
    hybrid_ious = []
    jaccard_scores = []
    hybrid_recalls = []
    hybrid_precisions = []
    hybrid_dices = []
    hybrid_f1_scores = []
    hybrid_map50s = []
    hybrid_mccs = []
    hybrid_tnrs = []
    zero_iou_count = 0
    iou_bins = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    iou_histogram = np.zeros(len(iou_bins) - 1)

    for idx in range(num_images):
        try:
            image_name = f"{idx}.png"
            mask_name = f"{idx}_seg.png"
            mask_path = os.path.join(test_dir, mask_name)
            image_path = os.path.join(test_dir, image_name)

            ground_truth_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = ground_truth_mask.astype(bool)

            # Leer la imagen original
            image = cv2.imread(image_path)

            # Obtener la máscara de UNet y filtrarla con la ROI de YOLO
            unet_mask = get_unet_mask(image, unet_model)
            roi = get_max_yolo_roi(image_path, yolo_model, margin)
            filtered_mask = filter_unet_mask_with_yolo(unet_mask, roi)

            # Redimensionar la máscara filtrada a las dimensiones de la máscara de referencia
            filtered_mask_resized = cv2.resize(filtered_mask.astype(np.uint8), (ground_truth_mask.shape[1], ground_truth_mask.shape[0])).astype(bool)

            # Calcular las métricas
            iou_hybrid = calculate_iou(ground_truth_mask, filtered_mask_resized)
            jaccard = jaccard_score(ground_truth_mask.flatten(), filtered_mask_resized.flatten(), average='binary')
            recall_hybrid = calculate_recall(ground_truth_mask, filtered_mask_resized)
            precision_hybrid = calculate_precision(ground_truth_mask, filtered_mask_resized)
            dice_hybrid = calculate_dice(ground_truth_mask, filtered_mask_resized)
            f1_hybrid = calculate_f1(ground_truth_mask, filtered_mask_resized)
            map50_hybrid = calculate_map50(ground_truth_mask, filtered_mask_resized)
            mcc_hybrid = calculate_mcc(ground_truth_mask, filtered_mask_resized)
            tnr_hybrid = calculate_tnr(ground_truth_mask, filtered_mask_resized)

            if np.isnan(iou_hybrid):
                print(f"Warning: IoU is NaN for image {image_path}")
            else:
                hybrid_ious.append(iou_hybrid)
                jaccard_scores.append(jaccard)
                hybrid_recalls.append(recall_hybrid)
                hybrid_precisions.append(precision_hybrid)
                hybrid_dices.append(dice_hybrid)
                hybrid_f1_scores.append(f1_hybrid)
                hybrid_map50s.append(map50_hybrid)
                hybrid_mccs.append(mcc_hybrid)
                hybrid_tnrs.append(tnr_hybrid)

                if iou_hybrid == 0.0:
                    zero_iou_count += 1
                else:
                    for i in range(len(iou_bins) - 1):
                        if iou_bins[i] <= iou_hybrid < iou_bins[i + 1]:
                            iou_histogram[i] += 1
                            break

            # Liberar recursos
            del ground_truth_mask
            del filtered_mask_resized
            gc.collect()
            tf.keras.backend.clear_session()
        except Exception as e:
            print(f"Error processing {image_name}: {e}")
            continue

    # Calcular las métricas promedio
    avg_iou_hybrid = np.mean(hybrid_ious) if hybrid_ious else 0.0
    avg_jaccard = np.mean(jaccard_scores) if jaccard_scores else 0.0
    avg_recall = np.mean(hybrid_recalls) if hybrid_recalls else 0.0
    avg_precision = np.mean(hybrid_precisions) if hybrid_precisions else 0.0
    avg_dice = np.mean(hybrid_dices) if hybrid_dices else 0.0
    avg_f1 = np.mean(hybrid_f1_scores) if hybrid_f1_scores else 0.0
    avg_map50 = np.mean(hybrid_map50s) if hybrid_map50s else 0.0
    avg_mcc = np.mean(hybrid_mccs) if hybrid_mccs else 0.0
    avg_tnr = np.mean(hybrid_tnrs) if hybrid_tnrs else 0.0

    # Imprimir las métricas promedio
    print(f"Average Hybrid IoU: {avg_iou_hybrid}")
    print(f"Average Hybrid Jaccard: {avg_jaccard}")
    print(f"Average Hybrid Recall: {avg_recall}")
    print(f"Average Hybrid Precision: {avg_precision}")
    print(f"Average Hybrid DICE: {avg_dice}")
    print(f"Average Hybrid F1: {avg_f1}")
    print(f"Average Hybrid mAP50: {avg_map50}")
    print(f"Average Hybrid MCC: {avg_mcc}")
    print(f"Average Hybrid TNR: {avg_tnr}")
    print(f"IoU 0: {zero_iou_count}")


    return hybrid_ious, jaccard_scores, hybrid_recalls, hybrid_precisions, hybrid_dices, hybrid_f1_scores, hybrid_map50s, hybrid_mccs, hybrid_tnrs


In [6]:
test_dir = "/home/voicelab/Desktop/segmentation_glottis/BAGLS/test/test"
yolo_model_path = "/home/voicelab/Desktop/segmentation_glottis/YOLOV8/best_yolov8n-seg-1cls.pt"
yolo_model = YOLO(yolo_model_path)
unet_model_path = "/home/voicelab/Desktop/segmentation_glottis/metrics/epoch070.h5"
unet_model = load_model(unet_model_path, compile=False, custom_objects={'InstanceNormalization': tfa.layers.InstanceNormalization})
evaluate_hybrid_model(test_dir, yolo_model, unet_model)

2024-09-26 13:04:26.058086: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-26 13:04:26.058696: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-26 13:04:26.058768: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-


Error processing 0.png: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED

Error processing 1.png: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED

Error processing 2.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 3.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 4.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 5.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 6.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 7.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 8.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 9.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 10.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 11.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 12.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 13.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Error processing 14.png: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

Er

([], [], [], [], [], [], [], [], [])

In [None]:
import cv2
import numpy as np
import os
from ultralytics import YOLO
from utils.metrics import calculate_iou, calculate_recall, calculate_precision, calculate_dice, calculate_f1, calculate_map50, calculate_mcc, calculate_tnr
from sklearn.metrics import precision_recall_curve
def get_yolo_mask(image_path, model):
    """
    Genera una máscara de segmentación utilizando un modelo YOLO.

    Parámetros:
    image_path (str): Ruta de la imagen de entrada.
    model (YOLO): Modelo YOLO preentrenado.

    Retorna:
    numpy.ndarray: Máscara de segmentación binaria.
    """
    try:
        image = cv2.imread(image_path)
        original_shape = image.shape[:2]
        results = model(image)

        mask = np.zeros(original_shape, dtype=np.uint8)

        if results[0].masks is not None:
            masks = results[0].masks.xy
            for mask_array in masks:
                if mask_array.shape[0] == 0:  # Manejar el caso de máscaras vacías
                    continue
                mask_array = mask_array.astype(np.int32)
                cv2.fillPoly(mask, [mask_array], 1)
        else:
            print("No masks found in the results")

        mask = mask.astype(bool)
        return mask
    except Exception as e:
        print(f"Error in get_yolo_mask for image {image_path}: {e}")
        return None
def evaluate_yolo_model(test_dir, yolo_model, num_images=3500):
    yolo_ious = []
    yolo_recalls = []
    yolo_precisions = []
    yolo_dices = []
    yolo_f1_scores = []
    yolo_map50s = []
    yolo_mccs = []
    yolo_tnrs = []

    for idx in range(num_images):
        try:
            image_name = f"{idx}.png"
            mask_name = f"{idx}_seg.png"
            mask_path = os.path.join(test_dir, mask_name)
            image_path = os.path.join(test_dir, image_name)

            ground_truth_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            ground_truth_mask = ground_truth_mask.astype(bool)

            # Leer la imagen original
            image = cv2.imread(image_path)

            yolo_mask = get_yolo_mask(image_path, yolo_model)
            # Calcular métricas usando la máscara de YOLO
            iou_yolo = calculate_iou(ground_truth_mask, yolo_mask)
            recall_yolo = calculate_recall(ground_truth_mask, yolo_mask)
            precision_yolo = calculate_precision(ground_truth_mask, yolo_mask)
            dice_yolo = calculate_dice(ground_truth_mask, yolo_mask)
            f1_yolo = calculate_f1(ground_truth_mask, yolo_mask)
            map50_yolo = calculate_map50(ground_truth_mask, yolo_mask)
            mcc_yolo = calculate_mcc(ground_truth_mask, yolo_mask)
            tnr_yolo = calculate_tnr(ground_truth_mask, yolo_mask)

            yolo_ious.append(iou_yolo)
            yolo_recalls.append(recall_yolo)
            yolo_precisions.append(precision_yolo)
            yolo_dices.append(dice_yolo)
            yolo_f1_scores.append(f1_yolo)
            yolo_map50s.append(map50_yolo)
            yolo_mccs.append(mcc_yolo)
            yolo_tnrs.append(tnr_yolo)

        except Exception as e:
            print(f"Error processing {image_name}: {e}")
            continue

    # Imprimir las métricas promedio
    print("Average IoU YOLO: ", np.mean(yolo_ious))
    print("Average Recall YOLO: ", np.mean(yolo_recalls))
    print("Average Precision YOLO: ", np.mean(yolo_precisions))
    print("Average DICE YOLO: ", np.mean(yolo_dices))
    print("Average F1 YOLO: ", np.mean(yolo_f1_scores))
    print("Average mAP50 YOLO: ", np.mean(yolo_map50s))
    print("Average MCC YOLO: ", np.mean(yolo_mccs))
    print("Average TNR YOLO: ", np.mean(yolo_tnrs))



In [None]:
test_dir = "/home/voicelab/Desktop/segmentation_glottis/BAGLS_ROI/test"
yolo_model_path = "/home/voicelab/Desktop/segmentation_glottis/ROI/YOLOv8/runs/segment/train6/weights/best.pt"
yolo_model = YOLO(yolo_model_path)
evaluate_yolo_model(test_dir, yolo_model, num_images=3500)

In [None]:
import numpy as np
import cv2
import os
import gc
import tensorflow as tf
import matplotlib.pyplot as plt
from utils.masks import get_unet_mask, get_max_yolo_roi, filter_unet_mask_with_yolo
from utils.metrics import calculate_iou, calculate_recall, calculate_precision

def evaluate_and_plot_hybrid_model(video_path, mask_video_path, yolo_model, unet_model, margin=15):
    hybrid_ious = []
    hybrid_recalls = []
    hybrid_precisions = []
    
    # Abrir el video y el video de máscaras
    video_cap = cv2.VideoCapture(video_path)
    mask_cap = cv2.VideoCapture(mask_video_path)

    roi = get_max_yolo_roi(video_path, yolo_model, margin)
    
    frame_count = 0
    while video_cap.isOpened() and mask_cap.isOpened() and frame_count < 60:
        ret_video, frame_video = video_cap.read()
        ret_mask, frame_mask = mask_cap.read()

        if not ret_video or not ret_mask:
            break

        # Convertir el frame de la máscara a binario (blanco es la glotis)
        ground_truth_mask = frame_mask[:, :, 0] > 127

        # Obtener la máscara de UNet y filtrarla con la ROI de YOLO
        unet_mask = get_unet_mask(frame_video, unet_model)
    
        filtered_mask = filter_unet_mask_with_yolo(unet_mask, roi)

        # Redimensionar la máscara filtrada a las dimensiones de la máscara de referencia
        filtered_mask_resized = cv2.resize(filtered_mask.astype(np.uint8), (ground_truth_mask.shape[1], ground_truth_mask.shape[0])).astype(bool)

        # Calcular las métricas
        iou_hybrid = calculate_iou(ground_truth_mask, filtered_mask_resized)
        recall_hybrid = calculate_recall(ground_truth_mask, filtered_mask_resized)
        precision_hybrid = calculate_precision(ground_truth_mask, filtered_mask_resized)

        # Almacenar las métricas
        hybrid_ious.append(iou_hybrid)
        hybrid_recalls.append(recall_hybrid)
        hybrid_precisions.append(precision_hybrid)

        frame_count += 1

        # Liberar recursos
        del ground_truth_mask
        del filtered_mask_resized
        if frame_count % 100 == 0:
            gc.collect()
            tf.keras.backend.clear_session()

    video_cap.release()
    mask_cap.release()

    # Llamada a la función para graficar las métricas
    plot_metrics(hybrid_ious, hybrid_recalls, hybrid_precisions)

def plot_metrics(iou_list, recall_list, precision_list):
    frames = range(1, len(iou_list) + 1)  # Números de frame empezando desde 1
    
    # Ajustar el tamaño de los gráficos para formato IEEE
    plt.figure(figsize=(3.45, 6))  # 8.8 cm para una columna, altura proporcional
    
    # Gráfico de IoU
    plt.subplot(3, 1, 1)
    plt.plot(frames, iou_list, label="IoU", color='black', linestyle='-', linewidth=1)
    plt.xlabel("Frame", fontsize=8)
    plt.ylabel("IoU", fontsize=8)
    plt.xticks(np.arange(1, len(iou_list) , 10), fontsize=8)  # Mostrar los números de los frames de 10 en 10
    plt.yticks(fontsize=8)
    plt.legend(fontsize=8, loc='best')

    # Gráfico de Recall
    plt.subplot(3, 1, 2)
    plt.plot(frames, recall_list, label="Recall", color='black', linestyle='-', linewidth=1)
    plt.xlabel("Frame", fontsize=8)
    plt.ylabel("Recall", fontsize=8)
    plt.xticks(np.arange(1, len(recall_list) + 1, 10), fontsize=8)  # Ajuste similar para Recall
    plt.yticks(fontsize=8)
    plt.legend(fontsize=8, loc='best')

    # Gráfico de Precision
    plt.subplot(3, 1, 3)
    plt.plot(frames, precision_list, label="Precision", color='black', linestyle='-', linewidth=1)
    plt.xlabel("Frame", fontsize=8)
    plt.ylabel("Precision", fontsize=8)
    plt.xticks(np.arange(1, len(precision_list) + 1, 10), fontsize=8)  # Ajuste similar para Precision
    plt.yticks(fontsize=8)
    plt.legend(fontsize=8, loc='best')

    plt.tight_layout()
    plt.show()



In [None]:
original_video_path = "/home/voicelab/Desktop/segmentation_glottis/BAGLS/raw/raw/547.mp4"
mask_video_path = "/home/voicelab/Desktop/segmentation_glottis/BAGLS/raw/raw/547_seg.mp4"
yolo_model_path = "/home/voicelab/Desktop/segmentation_glottis/YOLOV8/best_yolov8n-seg-1cls.pt"
yolo_model = YOLO(yolo_model_path)
unet_model_path = "/home/voicelab/Downloads/epoch025.h5"
unet_model = load_model(unet_model_path, compile=False, custom_objects={'InstanceNormalization': tfa.layers.InstanceNormalization})

evaluate_and_plot_hybrid_model(original_video_path, mask_video_path, yolo_model, unet_model)

In [None]:
original_video_path = "/home/voicelab/Desktop/segmentation_glottis/BAGLS/raw/raw/155.mp4"
mask_video_path = "/home/voicelab/Desktop/segmentation_glottis/BAGLS/raw/raw/155_seg.mp4"
yolo_model_path = "/home/voicelab/Desktop/segmentation_glottis/YOLOV8/best_yolov8n-seg-1cls.pt"
yolo_model = YOLO(yolo_model_path)
unet_model_path = "/home/voicelab/Downloads/epoch025.h5"
unet_model = load_model(unet_model_path, compile=False, custom_objects={'InstanceNormalization': tfa.layers.InstanceNormalization})

evaluate_and_plot_hybrid_model(original_video_path, mask_video_path, yolo_model, unet_model)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
from tensorflow.keras.models import load_model
from ultralytics import YOLO
from utils.metrics import calculate_iou, calculate_recall, calculate_precision
from utils.masks import get_unet_mask, get_max_yolo_roi, filter_unet_mask_with_yolo

def calculate_metrics_per_frame(original_video_path, mask_video_path, Unet, max_frames=60):
    iou_list = []
    recall_list = []
    precision_list = []
    
    original_cap = cv2.VideoCapture(original_video_path)
    mask_cap = cv2.VideoCapture(mask_video_path)
    
    frame_count = 0
    while original_cap.isOpened() and mask_cap.isOpened() and frame_count < max_frames:
        ret_original, frame_original = original_cap.read()
        ret_mask, mask_frame = mask_cap.read()
        
        if not ret_original or not ret_mask:
            break

        predicted_mask = get_unet_mask(frame_original, Unet)
        ground_truth_mask = mask_frame[:, :, 0] > 127  # Convertir máscara a binario

        iou = calculate_iou(ground_truth_mask, predicted_mask)
        recall = calculate_recall(ground_truth_mask, predicted_mask)
        precision = calculate_precision(ground_truth_mask, predicted_mask)

        iou_list.append(iou)
        recall_list.append(recall)
        precision_list.append(precision)

        frame_count += 1

    original_cap.release()
    mask_cap.release()
    
    return iou_list, recall_list, precision_list


def evaluate_and_plot_hybrid_model(video_path, mask_video_path, yolo_model, unet_model, margin=15):
    hybrid_ious = []
    hybrid_recalls = []
    hybrid_precisions = []
    
    # Abrir el video y el video de máscaras
    video_cap = cv2.VideoCapture(video_path)
    mask_cap = cv2.VideoCapture(mask_video_path)

    roi = get_max_yolo_roi(video_path, yolo_model, margin)
    
    
    frame_count = 0
    while video_cap.isOpened() and mask_cap.isOpened() and frame_count < 60:
        ret_video, frame_video = video_cap.read()
        ret_mask, frame_mask = mask_cap.read()

        if not ret_video or not ret_mask:
            break

        # Convertir el frame de la máscara a binario (blanco es la glotis)
        ground_truth_mask = frame_mask[:, :, 0] > 127

        # Obtener la máscara de UNet y filtrarla con la ROI de YOLO
        unet_mask = get_unet_mask(frame_video, unet_model)
    
        filtered_mask = filter_unet_mask_with_yolo(unet_mask, roi)

        # Redimensionar la máscara filtrada a las dimensiones de la máscara de referencia
        filtered_mask_resized = cv2.resize(filtered_mask.astype(np.uint8), (ground_truth_mask.shape[1], ground_truth_mask.shape[0])).astype(bool)

        # Calcular las métricas
        iou_hybrid = calculate_iou(ground_truth_mask, filtered_mask_resized)
        recall_hybrid = calculate_recall(ground_truth_mask, filtered_mask_resized)
        precision_hybrid = calculate_precision(ground_truth_mask, filtered_mask_resized)

        # Almacenar las métricas
        hybrid_ious.append(iou_hybrid)
        hybrid_recalls.append(recall_hybrid)
        hybrid_precisions.append(precision_hybrid)

        frame_count += 1

        # Liberar recursos
        del ground_truth_mask
        del filtered_mask_resized
        if frame_count % 100 == 0:
            gc.collect()
            tf.keras.backend.clear_session()

    video_cap.release()
    mask_cap.release()

    return hybrid_ious, hybrid_recalls, hybrid_precisions


def plot_combined_metrics(video_path, mask_video_path, yolo_model, unet_model, frames=60):
    # Calcular métricas para UNet
    iou_unet, recall_unet, precision_unet = calculate_metrics_per_frame(video_path, mask_video_path, unet_model, frames)
    
    # Calcular métricas para el modelo híbrido
    hybrid_ious, hybrid_recalls, hybrid_precisions = evaluate_and_plot_hybrid_model(video_path, mask_video_path, yolo_model, unet_model, margin=15)

    # Graficar las métricas combinadas
    frame_indices = range(1, len(iou_unet) + 1)

    plt.figure(figsize=(6, 8))  # Ajuste del tamaño del gráfico

    # Gráfico de IoU
    plt.subplot(3, 1, 1)
    plt.plot(frame_indices, iou_unet, label="IoU UNet", color='blue', linestyle='-', linewidth=1)
    plt.plot(frame_indices, iou_hybrid, label="IoU Híbrido", color='red', linestyle='--', linewidth=1)
    plt.xlabel("Frame", fontsize=10)
    plt.ylabel("IoU", fontsize=10)
    plt.xticks(np.arange(1, len(frame_indices) + 1, 10), fontsize=8)
    plt.yticks(fontsize=8)
    plt.legend(fontsize=8, loc='best')

    # Gráfico de Recall
    plt.subplot(3, 1, 2)
    plt.plot(frame_indices, recall_unet, label="Recall UNet", color='blue', linestyle='-', linewidth=1)
    plt.plot(frame_indices, recall_hybrid, label="Recall Híbrido", color='red', linestyle='--', linewidth=1)
    plt.xlabel("Frame", fontsize=10)
    plt.ylabel("Recall", fontsize=10)
    plt.xticks(np.arange(1, len(frame_indices) + 1, 10), fontsize=8)
    plt.yticks(fontsize=8)
    plt.legend(fontsize=8, loc='best')

    # Gráfico de Precision
    plt.subplot(3, 1, 3)
    plt.plot(frame_indices, precision_unet, label="Precision UNet", color='blue', linestyle='-', linewidth=1)
    plt.plot(frame_indices, precision_hybrid, label="Precision Híbrido", color='red', linestyle='--', linewidth=1)
    plt.xlabel("Frame", fontsize=10)
    plt.ylabel("Precision", fontsize=10)
    plt.xticks(np.arange(1, len(frame_indices) + 1, 10), fontsize=8)
    plt.yticks(fontsize=8)
    plt.legend(fontsize=8, loc='best')

    plt.tight_layout()
    plt.show()



In [None]:
original_video_path = "/home/voicelab/Desktop/segmentation_glottis/BAGLS/raw/raw/155.mp4"
mask_video_path = "/home/voicelab/Desktop/segmentation_glottis/BAGLS/raw/raw/155_seg.mp4"
yolo_model_path = "/home/voicelab/Desktop/segmentation_glottis/YOLOV8/best_yolov8n-seg-1cls.pt"
yolo_model = YOLO(yolo_model_path)
unet_model_path = "/home/voicelab/Downloads/epoch025.h5"
unet_model = load_model(unet_model_path, compile=False, custom_objects={'InstanceNormalization': tfa.layers.InstanceNormalization})
plot_combined_metrics(original_video_path, mask_video_path, yolo_model, unet_model)
