In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
import os
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, classification_report
import onnxruntime as ort
import os
from model import *
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm

In [None]:
def dataset_creator(path_to_folder, max_by_experiment = 3):
    col, fire, steer = 0, 0, 0
    images, labels = [], []

    for folder in os.listdir(path_to_folder):

        if os.path.isdir(os.path.join(path_to_folder, folder)):

            for file in os.listdir(os.path.join(path_to_folder, folder)):

                if file.endswith('.txt'):
                    if 'labels' in file and col < max_by_experiment:
                        col += 1
                        for pic in sorted(os.listdir(os.path.join(path_to_folder, folder, 'images'))):
                            img = load_img(os.path.join(os.path.join(path_to_folder, folder, 'images'), pic), target_size=(200, 200), color_mode='grayscale')  
                            img_array = img_to_array(img) / 128.0 -1 
                            images.append(img_array)
                        
                        labels_txt = np.loadtxt(os.path.join(path_to_folder, folder, file))

                        for label in labels_txt:
                            if label == 0:
                                label = [np.array([1, 0]), np.array([np.nan]*4), np.array([np.nan])]
                            elif label == 1:
                                label = [np.array([0, 1]), np.array([np.nan]*4), np.array([np.nan])]
                            label = pad_sequences(label, dtype='float32', padding='post', value=np.nan)
                            labels.append(label)


                    elif 'fire' in file and fire < max_by_experiment:
                        fire += 1
                        for pic in sorted(os.listdir(os.path.join(path_to_folder, folder, 'images'))):
                            img = load_img(os.path.join(os.path.join(path_to_folder, folder, 'images'), pic), target_size=(200, 200), color_mode='grayscale')  
                            img_array = img_to_array(img) / 128.0 -1  
                            images.append(img_array)
                        
                        labels_txt = np.loadtxt(os.path.join(path_to_folder, folder, file), delimiter=' ')

                        for label in labels_txt:
                            label = [np.array([np.nan]*2), label, np.array([np.nan])]
                            label = pad_sequences(label, dtype='float32', padding='post', value=np.nan)
                            labels.append(label)

                            
                    elif 'sync' in file and steer < max_by_experiment:
                        steer += 1
                        for pic in sorted(os.listdir(os.path.join(path_to_folder, folder, 'images'))):
                            img = load_img(os.path.join(os.path.join(path_to_folder, folder, 'images'), pic), target_size=(200, 200), color_mode='grayscale')
                            img_array = img_to_array(img) / 128.0 - 1
                            images.append(img_array)

                        labels_txt = np.loadtxt(os.path.join(path_to_folder, folder, file), usecols=0, delimiter=',', skiprows=1)

                        for label in labels_txt:
                            label = [np.array([np.nan]*2), np.array([np.nan]*4), np.array([label])]
                            label = pad_sequences(label, dtype='float32', padding='post', value=np.nan)
                            labels.append(label)

    return np.array(images), np.array(labels)

images_test, labels_test = dataset_creator('../../../testing', 5)
indices = np.random.permutation(images_test.shape[0])
images_test = images_test[indices]
images_test = torch.tensor(images_test).permute(0, 3, 1, 2)
labels_test = labels_test[indices]
y_col_test, y_fire_test, y_steer_test = labels_test[:,0, :][:, :2], labels_test[:, 1, :], labels_test[:, 2, :][:, 0]
y_col_test, y_fire_test, y_steer_test = torch.tensor(y_col_test), torch.tensor(y_fire_test), torch.tensor(y_steer_test)

In [6]:
model = OneEE()
state = torch.load('trained_model.pth')
model.load_state_dict(state)

model.eval()

  state = torch.load('trained_model.pth')


OneEE(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (obj_detect_conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (obj_detect_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, aff

In [None]:
def evaluar_modelo_con_confianza(modelo, entradas, labels_cls1, labels_cls2, labels_bin, labels_reg, umbral_range=np.arange(0.1, 1.0, 0.05)):
    modelo.eval()  
    
    all_pred_cls1 = []
    all_pred_cls2 = []
    all_pred_bin = []
    all_pred_reg = []
    
    all_labels_cls1 = []
    all_labels_cls2 = []
    all_labels_bin = []
    all_labels_reg = []
    
    confianza_cls1 = []
    confianza_cls2 = []
    confianza_bin = []
    confianza_reg = []
    
    for i in tqdm(range(len(entradas))):
        input_data = entradas[i]
        label_cls1 = labels_cls1[i]
        label_cls2 = labels_cls2[i]
        label_bin = labels_bin[i]
        label_reg = labels_reg[i]
        
        with torch.no_grad():
            logits_cls1, logits_cls2, logits_bin, logits_reg = modelo(input_data.unsqueeze(0))  
        
        softmax_cls1 = F.softmax(logits_cls1, dim=1) if not np.isnan(label_cls1).any() else None
        softmax_cls2 = F.softmax(logits_cls2, dim=1) if not np.isnan(label_cls2).any() else None
        
        pred_cls1 = torch.argmax(softmax_cls1, dim=1) if not np.isnan(label_cls1).any() else None
        pred_cls2 = torch.argmax(softmax_cls2, dim=1) if not np.isnan(label_cls2).any() else None
        
        sigmoide_bin = torch.sigmoid(logits_bin) if not np.isnan(label_bin).any() else None
        pred_bin = (sigmoide_bin > 0.5).int() if not np.isnan(label_bin).any() else None
        
        pred_reg = logits_reg.item() if not np.isnan(label_reg).any() else None
        
        if not np.isnan(label_cls1).any():
            confianza_cls1.append(torch.max(softmax_cls1, dim=1).values.item())  # max prob as confidence value
        if not np.isnan(label_cls2).any():
            confianza_cls2.append(torch.max(softmax_cls2, dim=1).values.item())  
        if not np.isnan(label_bin).any():
            confianza_bin.append(sigmoide_bin.item())  # sigmoid output as confidence value
        if not np.isnan(label_reg).any():
            confianza_reg.append(abs(pred_reg - label_reg.item()))  # absolute difference as confidence value
        
        if not np.isnan(label_cls1).any():
            all_pred_cls1.append(pred_cls1.item())
            all_labels_cls1.append(np.argmax(label_cls1).item())  # argmax
        if not np.isnan(label_cls2).any():
            all_pred_cls2.append(pred_cls2.item())
            all_labels_cls2.append(np.argmax(label_cls2).item())  
        if not np.isnan(label_bin).any():
            all_pred_bin.append(pred_bin.item())
            all_labels_bin.append(np.argmax(label_bin).item())  
        if not np.isnan(label_reg).any():
            all_pred_reg.append(pred_reg)
            all_labels_reg.append(label_reg.item())
    
    # classifier multiclass 1
    mask_cls1 = ~np.isnan(all_labels_cls1)
    filtered_pred_cls1 = np.array(all_pred_cls1)[mask_cls1]
    filtered_labels_cls1 = np.array(all_labels_cls1)[mask_cls1]
    acc_cls1 = accuracy_score(filtered_labels_cls1, filtered_pred_cls1)
    
    # classifier multiclass 2
    mask_cls2 = ~np.isnan(all_labels_cls2)
    filtered_pred_cls2 = np.array(all_pred_cls2)[mask_cls2]
    filtered_labels_cls2 = np.array(all_labels_cls2)[mask_cls2]
    acc_cls2 = accuracy_score(filtered_labels_cls2, filtered_pred_cls2)
    
    # binary classifier
    mask_bin = ~np.isnan(all_labels_bin)
    filtered_pred_bin = np.array(all_pred_bin)[mask_bin]
    filtered_labels_bin = np.array(all_labels_bin)[mask_bin]
    acc_bin = accuracy_score(filtered_labels_bin, filtered_pred_bin)
    
    # regression 
    mask_reg = ~np.isnan(all_labels_reg)
    filtered_pred_reg = np.array(all_pred_reg)[mask_reg]
    filtered_labels_reg = np.array(all_labels_reg)[mask_reg]
    mse_reg = mean_squared_error(filtered_labels_reg, filtered_pred_reg)
    
    # average confidence
    confianza_prom_cls1 = np.mean(confianza_cls1) if len(confianza_cls1) > 0 else 0
    confianza_prom_cls2 = np.mean(confianza_cls2) if len(confianza_cls2) > 0 else 0
    confianza_prom_bin = np.mean(confianza_bin) if len(confianza_bin) > 0 else 0
    confianza_prom_reg = np.mean(confianza_reg) if len(confianza_reg) > 0 else 0

    best_threshold = None
    best_accuracy = -float('inf')
    
    confidences = np.array(confianza_cls1)  
    
    for threshold in umbral_range:
        pred_cls1_thresh = (confidences > threshold).astype(int)  
        
        # filtering by the threshold considered
        mask = confidences > threshold
        filtered_pred_cls1_thresh = filtered_pred_cls1[mask]
        filtered_labels_cls1_thresh = filtered_labels_cls1[mask]
        current_len = 0
        
        # accuracy calculation over the filtered data
        if len(filtered_labels_cls1_thresh) > 0:  
            acc = accuracy_score(filtered_labels_cls1_thresh, filtered_pred_cls1_thresh)
        
            # save the best threshold
            if acc > best_accuracy:
                best_accuracy = acc
                best_threshold = threshold
                current_len = len(filtered_labels_cls1_thresh)
                best_mask = mask
    
    acc_final = accuracy_score(filtered_labels_cls2[~best_mask], filtered_pred_cls2[~best_mask])

    print(f'Precisión clasificación multiclase 1: {acc_cls1:.4f}')
    print(f'Precisión clasificación multiclase 2: {acc_cls2:.4f}')
    print(f'Precisión clasificación binaria: {acc_bin:.4f}')
    print(f'Error cuadrático medio (MSE) en regresión: {mse_reg:.4f}')
    
    print(f'Confianza promedio (cls1): {confianza_prom_cls1:.4f}')
    print(f'Confianza promedio (cls2): {confianza_prom_cls2:.4f}')
    print(f'Confianza promedio (binaria): {confianza_prom_bin:.4f}')
    print(f'Confianza promedio (regresión): {confianza_prom_reg:.4f}')
    
    print(f'El mejor umbral de confianza para tarea 1 es: {best_threshold:.4f}')
    print(f'Con ese umbral, la precisión es: {best_accuracy:.4f}')

    return {'acc_cls1': acc_cls1, 'acc_cls2': acc_cls2, 'acc_bin': acc_bin, 'mse_reg': mse_reg, 'best_threshold': best_threshold, 'best_accuracy': best_accuracy, 'proportion where threshold is applied': current_len/len(filtered_pred_cls1), 'acc on cls2 w/ ee': acc_final}


metrics = evaluar_modelo_con_confianza(model, images_test, y_fire_test, y_fire_test, y_col_test, y_steer_test)


  softmax_cls1 = F.softmax(logits_cls1, dim=1) if not np.isnan(label_cls1).any() else None
  softmax_cls2 = F.softmax(logits_cls2, dim=1) if not np.isnan(label_cls2).any() else None
  pred_cls1 = torch.argmax(softmax_cls1, dim=1) if not np.isnan(label_cls1).any() else None
  pred_cls2 = torch.argmax(softmax_cls2, dim=1) if not np.isnan(label_cls2).any() else None
  sigmoide_bin = torch.sigmoid(logits_bin) if not np.isnan(label_bin).any() else None
  pred_bin = (sigmoide_bin > 0.5).int() if not np.isnan(label_bin).any() else None
  pred_reg = logits_reg.item() if not np.isnan(label_reg).any() else None
  if not np.isnan(label_cls1).any():
  if not np.isnan(label_cls2).any():
  if not np.isnan(label_bin).any():
  if not np.isnan(label_reg).any():
  if not np.isnan(label_cls1).any():
  if not np.isnan(label_cls2).any():
  if not np.isnan(label_bin).any():
  if not np.isnan(label_reg).any():
100%|██████████| 5157/5157 [29:46<00:00,  2.89it/s]

Precisión clasificación multiclase 1: 0.7819
Precisión clasificación multiclase 2: 0.8083
Precisión clasificación binaria: 0.9499
Error cuadrático medio (MSE) en regresión: 0.0018
Confianza promedio (cls1): 0.9434
Confianza promedio (cls2): 0.9485
Confianza promedio (binaria): 0.2174
Confianza promedio (regresión): 0.0282
El mejor umbral de confianza para tarea 1 es: 0.9500
Con ese umbral, la precisión es: 0.8608





In [None]:
with open('metrics_EE.pkl', 'wb') as f:
    pkl.dump(metrics, f)

In [9]:
metrics

{'acc_cls1': 0.7818791946308725,
 'acc_cls2': 0.8083053691275168,
 'acc_bin': 0.9499374217772215,
 'mse_reg': np.float64(0.0017734443385888697),
 'best_threshold': np.float64(0.9500000000000003),
 'best_accuracy': 0.86084142394822,
 'proportion where threshold is applied': 0.7776845637583892,
 'acc on cls2 w/ ee': 0.6}

In [12]:
# overall accuracy on fire task
acc_task = metrics['acc on cls2 w/ ee']* (1-metrics['proportion where threshold is applied']) + metrics['best_accuracy']*metrics['proportion where threshold is applied']
print(f'Overall accuracy on fire task: {acc_task:.4f}')
print(f'Maximum accuracy that could be achieved: {metrics["acc_cls2"]:.4f}')

Overall accuracy on fire task: 0.8029
Maximum accuracy that could be achieved: 0.8083
