In [3]:
import torch
from model import TwoEE
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
import os
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, classification_report
import onnxruntime as ort
import os
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import joblib

In [4]:
def dataset_creator(path_to_folder, max_by_experiment = 3):
    col, fire, steer = 0, 0, 0
    images, labels = [], []

    for folder in os.listdir(path_to_folder):

        if os.path.isdir(os.path.join(path_to_folder, folder)):

            for file in os.listdir(os.path.join(path_to_folder, folder)):

                if file.endswith('.txt'):
                    if 'labels' in file and col < max_by_experiment:
                        col += 1
                        for pic in sorted(os.listdir(os.path.join(path_to_folder, folder, 'images'))):
                            img = load_img(os.path.join(os.path.join(path_to_folder, folder, 'images'), pic), target_size=(200, 200), color_mode='grayscale')  # Ajustar tamaño y modo de color
                            img_array = img_to_array(img) / 128.0 -1 # Normalizar la imagen
                            images.append(img_array)
                        
                        labels_txt = np.loadtxt(os.path.join(path_to_folder, folder, file))

                        for label in labels_txt:
                            if label == 0:
                                label = [np.array([1, 0]), np.array([np.nan]*4), np.array([np.nan])]
                            elif label == 1:
                                label = [np.array([0, 1]), np.array([np.nan]*4), np.array([np.nan])]
                            label = pad_sequences(label, dtype='float32', padding='post', value=np.nan)
                            labels.append(label)


                    elif 'fire' in file and fire < max_by_experiment:
                        fire += 1
                        for pic in sorted(os.listdir(os.path.join(path_to_folder, folder, 'images'))):
                            img = load_img(os.path.join(os.path.join(path_to_folder, folder, 'images'), pic), target_size=(200, 200), color_mode='grayscale')  # Ajustar tamaño y modo de color
                            img_array = img_to_array(img) / 128.0 -1  # Normalizar la imagen
                            images.append(img_array)
                        
                        labels_txt = np.loadtxt(os.path.join(path_to_folder, folder, file), delimiter=' ')

                        for label in labels_txt:
                            label = [np.array([np.nan]*2), label, np.array([np.nan])]
                            label = pad_sequences(label, dtype='float32', padding='post', value=np.nan)
                            labels.append(label)

                            
                    elif 'sync' in file and steer < max_by_experiment:
                        steer += 1
                        for pic in sorted(os.listdir(os.path.join(path_to_folder, folder, 'images'))):
                            img = load_img(os.path.join(os.path.join(path_to_folder, folder, 'images'), pic), target_size=(200, 200), color_mode='grayscale')
                            img_array = img_to_array(img) / 128.0 - 1
                            images.append(img_array)

                        labels_txt = np.loadtxt(os.path.join(path_to_folder, folder, file), usecols=0, delimiter=',', skiprows=1)

                        for label in labels_txt:
                            label = [np.array([np.nan]*2), np.array([np.nan]*4), np.array([label])]
                            label = pad_sequences(label, dtype='float32', padding='post', value=np.nan)
                            labels.append(label)

    return np.array(images), np.array(labels)

images_test, labels_test = dataset_creator('../../../testing', 5)
indices = np.random.permutation(images_test.shape[0])
images_test = images_test[indices]
images_test = torch.tensor(images_test).permute(0, 3, 1, 2)
labels_test = labels_test[indices]
y_col_test, y_fire_test, y_steer_test = labels_test[:,0, :][:, :2], labels_test[:, 1, :], labels_test[:, 2, :][:, 0]
y_col_test, y_fire_test, y_steer_test = torch.tensor(y_col_test), torch.tensor(y_fire_test), torch.tensor(y_steer_test)

In [5]:
model = TwoEE()
model.load_state_dict(torch.load('trained_model.pth'))
model.eval()

  model.load_state_dict(torch.load('trained_model.pth'))


TwoEE(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (ee_branch_binary): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (ee_branch_binary_bn): BatchNorm2d(128, eps=1e-05, momentum=0

### With the first strategy (only on th binary classification task)

In [35]:
def evaluar_modelo_con_confianza(modelo, entradas, labels_cls1, labels_cls2, labels_reg1, labels_reg2, umbral_range=np.arange(0.1, 1.0, 0.05)):
    modelo.eval()  # Ponemos el modelo en modo evaluación
    
    # Listas para almacenar las métricas y las confianzas
    all_pred_cls1 = []
    all_pred_cls2 = []
    all_pred_reg1 = []
    all_pred_reg2 = []
    
    all_labels_cls1 = []
    all_labels_cls2 = []
    all_labels_reg1 = []
    all_labels_reg2 = []
    
    confianza_cls1 = []
    confianza_cls2 = []

    # Recorremos las entradas una por una
    for i in tqdm(range(len(entradas))):
        input_data = entradas[i]
        label_cls1 = labels_cls1[i]
        label_cls2 = labels_cls2[i]
        label_reg1 = labels_reg1[i]
        label_reg2 = labels_reg2[i]
        
        # Hacemos la inferencia
        with torch.no_grad():
            logits_cls1, logits_reg1, logits_cls2, logits_reg2 = modelo(input_data.unsqueeze(0))  # Añadimos batch dimension
        
        
        # Clasificación binaria (Sigmoide)
        sigmoide_cls1 = torch.sigmoid(logits_cls1) if not np.isnan(label_cls1).any() else None
        pred_cls1 = (sigmoide_cls1 > 0.5).int() if not np.isnan(label_cls1).any() else None

        sigmoide_cls2 = torch.sigmoid(logits_cls2) if not np.isnan(label_cls2).any() else None
        pred_cls2 = (sigmoide_cls2 > 0.5).int() if not np.isnan(label_cls2).any() else None
        
        # Regresión (sin transformación)
        pred_reg1 = logits_reg1.item() if not np.isnan(label_reg1).any() else None
        pred_reg2 = logits_reg2.item() if not np.isnan(label_reg2).any() else None
        
        # Métricas de confianza
        if not np.isnan(label_cls1).any():
            if sigmoide_cls1.item()>0.5:
                confianza_cls1.append(sigmoide_cls1.item())
            else:
                confianza_cls1.append(1-sigmoide_cls1.item())
        
        if not np.isnan(label_cls2).any():
            if sigmoide_cls2.item()>0.5:
                confianza_cls2.append(sigmoide_cls2.item())
            else:
                confianza_cls2.append(1-sigmoide_cls2.item())

        
        # Almacenamos las predicciones y etiquetas
        if not np.isnan(label_cls1).any():
            all_pred_cls1.append(pred_cls1.item())
            all_labels_cls1.append(np.argmax(label_cls1).item())  # Aplicamos argmax a la etiqueta binaria
        if not np.isnan(label_cls2).any():
            all_pred_cls2.append(pred_cls2.item())
            all_labels_cls2.append(np.argmax(label_cls2).item())
        if not np.isnan(label_reg1).any():
            all_pred_reg1.append(pred_reg1)
            all_labels_reg1.append(label_reg1.item())
        if not np.isnan(label_reg2).any():
            all_pred_reg2.append(pred_reg2)
            all_labels_reg2.append(label_reg2.item())
    
    # Evaluamos las métricas por separado para cada tarea
    
    # Clasificación binaria
    mask_cls1 = ~np.isnan(all_labels_cls1)
    filtered_pred_cls1 = np.array(all_pred_cls1)[mask_cls1]
    filtered_labels_cls1 = np.array(all_labels_cls1)[mask_cls1]
    acc_cls1 = accuracy_score(filtered_labels_cls1, filtered_pred_cls1)

    mask_cls2 = ~np.isnan(all_labels_cls2)
    filtered_pred_cls2 = np.array(all_pred_cls2)[mask_cls2]
    filtered_labels_cls2 = np.array(all_labels_cls2)[mask_cls2]
    acc_cls2 = accuracy_score(filtered_labels_cls2, filtered_pred_cls2)
    
    # Regresión
    mask_reg1 = ~np.isnan(all_labels_reg1)
    filtered_pred_reg1 = np.array(all_pred_reg1)[mask_reg1]
    filtered_labels_reg1 = np.array(all_labels_reg1)[mask_reg1]
    mse_reg1 = mean_squared_error(filtered_labels_reg1, filtered_pred_reg1)

    mask_reg2 = ~np.isnan(all_labels_reg2)
    filtered_pred_reg2 = np.array(all_pred_reg2)[mask_reg2]
    filtered_labels_reg2 = np.array(all_labels_reg2)[mask_reg2]
    mse_reg2 = mean_squared_error(filtered_labels_reg2, filtered_pred_reg2)
    
    # Confianza promedio
    confianza_prom_cls1 = np.mean(confianza_cls1) if len(confianza_cls1) > 0 else 0
    confianza_prom_cls2 = np.mean(confianza_cls2) if len(confianza_cls2) > 0 else 0

    # Determinamos el umbral de confianza óptimo para maximizar la precisión en la tarea 1
    best_threshold = None
    best_accuracy = -float('inf')
    
    # Asegurarnos de que estamos trabajando con las confidencias correctas
    confidences = np.array(confianza_cls1)  # Usamos la confianza de la tarea 1 (puedes adaptarlo para otras tareas)
    
    for threshold in umbral_range:
        # Aplicamos el umbral sobre las confidencias para obtener las predicciones binarias (1 o 0)
        pred_cls1_thresh = (confidences > threshold).astype(int)  # Se compara la confianza con el umbral
        
        # Filtramos las etiquetas y las predicciones que tienen confianza sobre el umbral
        mask = confidences > threshold
        filtered_pred_cls1_thresh = filtered_pred_cls1[mask]
        filtered_labels_cls1_thresh = filtered_labels_cls1[mask]
        current_len = 0
        
        # Calculamos el accuracy solo para las instancias donde hay etiquetas válidas
        if len(filtered_labels_cls1_thresh) > 0:  # Asegurarse de que hay datos para calcular el accuracy
            acc = accuracy_score(filtered_labels_cls1_thresh, filtered_pred_cls1_thresh)
        
            # Si encontramos un umbral mejor, lo guardamos
            if acc > best_accuracy:
                best_accuracy = acc
                best_threshold = threshold
                current_len = len(filtered_labels_cls1_thresh)
                best_mask = mask
    
    acc_final = accuracy_score(filtered_labels_cls2[~best_mask], filtered_pred_cls2[~best_mask])

    # Mostrar resultados
    print(f'Precisión clasificación binaria ee: {acc_cls1:.4f}')
    print(f'Precisión clasificación binaria normal: {acc_cls2:.4f}')
    print(f'Error cuadrático medio (MSE) en regresión ee: {mse_reg1:.4f}')
    print(f'Error cuadrático medio (MSE) en regresión normal: {mse_reg2:.4f}')
    
    # Confianza
    print(f'Confianza promedio (cls1): {confianza_prom_cls1:.4f}')
    print(f'Confianza promedio (cls2): {confianza_prom_cls2:.4f}')
    
    # Mejor umbral para la tarea 1
    print(f'El mejor umbral de confianza para tarea 1 es: {best_threshold:.4f}')
    print(f'Con ese umbral, la precisión es: {best_accuracy:.4f}')

    return {'acc_cls1': acc_cls1, 'acc_cls2': acc_cls2, 'mse_reg1': mse_reg1, 'mse_reg2': mse_reg2, 'best_threshold': best_threshold, 'best_accuracy': best_accuracy, 'proportion where threshold is applied': current_len/len(filtered_pred_cls1), 'acc on cls2 w/ ee': acc_final}

# Llamada a la función de evaluación
metrics = evaluar_modelo_con_confianza(model, images_test, y_col_test, y_col_test, y_steer_test, y_steer_test)


  sigmoide_cls1 = torch.sigmoid(logits_cls1) if not np.isnan(label_cls1).any() else None
  pred_cls1 = (sigmoide_cls1 > 0.5).int() if not np.isnan(label_cls1).any() else None
  sigmoide_cls2 = torch.sigmoid(logits_cls2) if not np.isnan(label_cls2).any() else None
  pred_cls2 = (sigmoide_cls2 > 0.5).int() if not np.isnan(label_cls2).any() else None
  pred_reg1 = logits_reg1.item() if not np.isnan(label_reg1).any() else None
  pred_reg2 = logits_reg2.item() if not np.isnan(label_reg2).any() else None
  if not np.isnan(label_cls1).any():
  if not np.isnan(label_cls2).any():
  if not np.isnan(label_cls1).any():
  if not np.isnan(label_cls2).any():
  if not np.isnan(label_reg1).any():
  if not np.isnan(label_reg2).any():
 14%|█▍        | 745/5157 [04:19<24:51,  2.96it/s]

In [36]:
metrics

{'acc_cls1': 0.8798498122653317,
 'acc_cls2': 0.9236545682102628,
 'mse_reg1': np.float64(0.011940631447059893),
 'mse_reg2': np.float64(0.05897579385018556),
 'best_threshold': np.float64(0.9500000000000003),
 'best_accuracy': 0.9192634560906515,
 'proportion where threshold is applied': 0.8836045056320401,
 'acc on cls2 w/ ee': 0.8279569892473119}

In [37]:
# overall accuracy on collision task
acc_task = metrics['acc on cls2 w/ ee']* (1-metrics['proportion where threshold is applied']) + metrics['best_accuracy']*metrics['proportion where threshold is applied']
print(f'Overall accuracy on collision task: {acc_task:.4f}')
print(f'Maximum accuracy that could be achieved: {metrics["acc_cls2"]:.4f}')

Overall accuracy on collision task: 0.9086
Maximum accuracy that could be achieved: 0.9237


In [38]:
# meanwhile, the mse on the regression task is
print(f'Aproximate MSE on regression task: {metrics["mse_reg1"]*metrics['proportion where threshold is applied']+metrics['mse_reg2']*(1-metrics['proportion where threshold is applied']):.4f}')

Aproximate MSE on regression task: 0.0174


Precision reduction

In [39]:
summary(model, (1, 200, 200))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 200, 200]             320
       BatchNorm2d-2         [-1, 32, 200, 200]              64
            Conv2d-3         [-1, 64, 200, 200]          18,496
       BatchNorm2d-4         [-1, 64, 200, 200]             128
         MaxPool2d-5         [-1, 64, 100, 100]               0
            Conv2d-6        [-1, 128, 100, 100]          73,856
       BatchNorm2d-7        [-1, 128, 100, 100]             256
            Conv2d-8        [-1, 256, 100, 100]         295,168
       BatchNorm2d-9        [-1, 256, 100, 100]             512
        MaxPool2d-10          [-1, 256, 50, 50]               0
           Conv2d-11          [-1, 128, 50, 50]         295,040
      BatchNorm2d-12          [-1, 128, 50, 50]             256
           Linear-13                    [-1, 1]         320,001
           Conv2d-14          [-1, 128,

Half of the model is deactivated in case the early exit is taken. Concretely, from Conv2d-17 to the end.

In [40]:
num_macs_deact = 1180160+1024+2359808+1024+81920256+257+81920256+257

In [41]:
# over 100 inferences

num_discount =( 1 - metrics['proportion where threshold is applied']) * num_macs_deact * 100
print(f"Over 100 inferences, aprox {num_discount:.2f} MACs will be saved")

print(f'Which represents a {(169002436 * 100 - num_discount)/(169002436*100)*100:.2f}% discount over the full model')

Over 100 inferences, aprox 1948263192.24 MACs will be saved
Which represents a 88.47% discount over the full model


### With the second strategy, evaluating also confidence on regression

In [None]:
def evaluar_modelo_con_confianza(modelo, entradas, labels_cls1, labels_cls2, labels_reg1, labels_reg2, umbral_range=np.arange(0.1, 1.0, 0.05)):
    modelo.eval()  # Ponemos el modelo en modo evaluación
    
    # Listas para almacenar las métricas y las confianzas
    all_pred_cls1 = []
    all_pred_cls2 = []
    all_pred_reg1 = []
    all_pred_reg2 = []
    
    all_labels_cls1 = []
    all_labels_cls2 = []
    all_labels_reg1 = []
    all_labels_reg2 = []
    
    confianza_cls1 = []
    confianza_cls2 = []

    # Recorremos las entradas una por una
    for i in tqdm(range(len(entradas))):
        input_data = entradas[i]
        label_cls1 = labels_cls1[i]
        label_cls2 = labels_cls2[i]
        label_reg1 = labels_reg1[i]
        label_reg2 = labels_reg2[i]
        
        # Hacemos la inferencia
        with torch.no_grad():
            logits_cls1, logits_reg1, logits_cls2, logits_reg2 = modelo(input_data.unsqueeze(0))  # Añadimos batch dimension
        
        
        # Clasificación binaria (Sigmoide)
        sigmoide_cls1 = torch.sigmoid(logits_cls1) if not np.isnan(label_cls1).any() else None
        pred_cls1 = (sigmoide_cls1 > 0.5).int() if not np.isnan(label_cls1).any() else None

        sigmoide_cls2 = torch.sigmoid(logits_cls2) if not np.isnan(label_cls2).any() else None
        pred_cls2 = (sigmoide_cls2 > 0.5).int() if not np.isnan(label_cls2).any() else None
        
        # Regresión (sin transformación)
        pred_reg1 = logits_reg1.item() if not np.isnan(label_reg1).any() else None
        pred_reg2 = logits_reg2.item() if not np.isnan(label_reg2).any() else None
        
        # Métricas de confianza
        if not np.isnan(label_cls1).any():
            if sigmoide_cls1.item()>0.5:
                confianza_cls1.append(sigmoide_cls1.item())
            else:
                confianza_cls1.append(1-sigmoide_cls1.item())
        
        if not np.isnan(label_cls2).any():
            if sigmoide_cls2.item()>0.5:
                confianza_cls2.append(sigmoide_cls2.item())
            else:
                confianza_cls2.append(1-sigmoide_cls2.item())

        
        # Almacenamos las predicciones y etiquetas
        if not np.isnan(label_cls1).any():
            all_pred_cls1.append(pred_cls1.item())
            all_labels_cls1.append(np.argmax(label_cls1).item())  # Aplicamos argmax a la etiqueta binaria
        if not np.isnan(label_cls2).any():
            all_pred_cls2.append(pred_cls2.item())
            all_labels_cls2.append(np.argmax(label_cls2).item())
        if not np.isnan(label_reg1).any():
            all_pred_reg1.append(pred_reg1)
            all_labels_reg1.append(label_reg1.item())
        if not np.isnan(label_reg2).any():
            all_pred_reg2.append(pred_reg2)
            all_labels_reg2.append(label_reg2.item())
    
    # Evaluamos las métricas por separado para cada tarea
    
    # Clasificación binaria
    mask_cls1 = ~np.isnan(all_labels_cls1)
    filtered_pred_cls1 = np.array(all_pred_cls1)[mask_cls1]
    filtered_labels_cls1 = np.array(all_labels_cls1)[mask_cls1]
    acc_cls1 = accuracy_score(filtered_labels_cls1, filtered_pred_cls1)

    mask_cls2 = ~np.isnan(all_labels_cls2)
    filtered_pred_cls2 = np.array(all_pred_cls2)[mask_cls2]
    filtered_labels_cls2 = np.array(all_labels_cls2)[mask_cls2]
    acc_cls2 = accuracy_score(filtered_labels_cls2, filtered_pred_cls2)
    
    # Regresión
    mask_reg1 = ~np.isnan(all_labels_reg1)
    filtered_pred_reg1 = np.array(all_pred_reg1)[mask_reg1]
    filtered_labels_reg1 = np.array(all_labels_reg1)[mask_reg1]
    mse_reg1 = mean_squared_error(filtered_labels_reg1, filtered_pred_reg1)

    mask_reg2 = ~np.isnan(all_labels_reg2)
    filtered_pred_reg2 = np.array(all_pred_reg2)[mask_reg2]
    filtered_labels_reg2 = np.array(all_labels_reg2)[mask_reg2]
    mse_reg2 = mean_squared_error(filtered_labels_reg2, filtered_pred_reg2)
    
    # Confianza promedio
    confianza_prom_cls1 = np.mean(confianza_cls1) if len(confianza_cls1) > 0 else 0
    confianza_prom_cls2 = np.mean(confianza_cls2) if len(confianza_cls2) > 0 else 0

    # Determinamos el umbral de confianza óptimo para maximizar la precisión en la tarea 1
    best_threshold = None
    best_accuracy = -float('inf')
    
    # Asegurarnos de que estamos trabajando con las confidencias correctas
    confidences = np.array(confianza_cls1)  # Usamos la confianza de la tarea 1 (puedes adaptarlo para otras tareas)
    
    for threshold in umbral_range:
        # Aplicamos el umbral sobre las confidencias para obtener las predicciones binarias (1 o 0)
        pred_cls1_thresh = (confidences > threshold).astype(int)  # Se compara la confianza con el umbral
        
        # Filtramos las etiquetas y las predicciones que tienen confianza sobre el umbral
        mask = confidences > threshold
        filtered_pred_cls1_thresh = filtered_pred_cls1[mask]
        filtered_labels_cls1_thresh = filtered_labels_cls1[mask]
        current_len = 0
        
        # Calculamos el accuracy solo para las instancias donde hay etiquetas válidas
        if len(filtered_labels_cls1_thresh) > 0:  # Asegurarse de que hay datos para calcular el accuracy
            acc = accuracy_score(filtered_labels_cls1_thresh, filtered_pred_cls1_thresh)
        
            # Si encontramos un umbral mejor, lo guardamos
            if acc > best_accuracy:
                best_accuracy = acc
                best_threshold = threshold
                current_len = len(filtered_labels_cls1_thresh)
                best_mask = mask
    
    acc_final = accuracy_score(filtered_labels_cls2[~best_mask], filtered_pred_cls2[~best_mask])



    best_threshold_reg = None
    best_mse = float('inf')

    error_model = joblib.load('lasso.pkl')
    X = np.array(entradas).squeeze(1)
    X = X.view(X.size(0), -1)
    X = np.hstack([X, np.array(all_pred_reg1).reshape(-1, 1)])
    confidences_reg = error_model.predict(X)

    for threshold in umbral_range:
        pred_reg1_thresh = (confidences_reg > threshold).astype(int)

        mask = confidences_reg > threshold
        filtered_pred_reg1_thresh = filtered_pred_reg1[mask]
        filtered_labels_reg1_thresh = filtered_labels_reg1[mask]
        current_len_reg = 0

        if len(filtered_labels_reg1_thresh) > 0:
            mse = mean_squared_error(filtered_labels_reg1_thresh, filtered_pred_reg1_thresh)

            if mse < best_mse:
                best_mse = mse
                best_threshold_reg = threshold
                current_len_reg = len(filtered_labels_reg1_thresh)
                best_mask_reg = mask

    mse_final = mean_squared_error(filtered_labels_reg2[~best_mask_reg], filtered_pred_reg2[~best_mask_reg])

    

    # Mostrar resultados
    print(f'Precisión clasificación binaria ee: {acc_cls1:.4f}')
    print(f'Precisión clasificación binaria normal: {acc_cls2:.4f}')
    print(f'Error cuadrático medio (MSE) en regresión ee: {mse_reg1:.4f}')
    print(f'Error cuadrático medio (MSE) en regresión normal: {mse_reg2:.4f}')
    
    # Confianza
    print(f'Confianza promedio (cls1): {confianza_prom_cls1:.4f}')
    print(f'Confianza promedio (cls2): {confianza_prom_cls2:.4f}')
    
    # Mejor umbral para la tarea 1
    print(f'El mejor umbral de confianza para tarea 1 es: {best_threshold:.4f}')
    print(f'Con ese umbral, la precisión es: {best_accuracy:.4f}')

    return {'acc_cls1': acc_cls1, 'acc_cls2': acc_cls2, 'mse_reg1': mse_reg1, 'mse_reg2': mse_reg2, 'best_threshold': best_threshold, 'best_accuracy': best_accuracy, 'proportion where threshold is applied': current_len/len(filtered_pred_cls1),
             'acc on cls2 w/ ee': acc_final, 'best_threshold_reg': best_threshold_reg, 'best_mse': best_mse, 'proportion where threshold is applied_reg': current_len_reg/len(filtered_pred_reg1), 'mse on cls2 w/ ee': mse_final}

# Llamada a la función de evaluación
metrics = evaluar_modelo_con_confianza(model, images_test, y_col_test, y_col_test, y_steer_test, y_steer_test)
