In [1]:
import pandas as pd
import os
import openpyxl
from natsort import natsorted, index_natsorted
from tqdm import tqdm
import numpy as np
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, auc
import matplotlib.pyplot as plt

In [2]:
def auc_res(u_path, d_path,seconds_to_anticipate):
    # Load the driving logs and uncertainty (loss) data from CSV files
    uncertainty   = pd.read_csv(u_path)
    driving_log   = pd.read_csv(d_path)
    # Set the frames per second (fps) and anticipation time (in seconds)
    fps = 15
    frames_to_anticipate = fps * seconds_to_anticipate

    # Combine the uncertainty and driving log data
    data = pd.concat([uncertainty, driving_log], axis=1)

    # Calculate the true labels based on your logic (crash anticipation)
    data['true_labels'] = 0  # Initialize all labels as 0 (no crash)

    # Identify frames where a crash is anticipated and set the label to 1 for the entire window
    for idx in range(len(data) - frames_to_anticipate):
        if data.iloc[idx]['crashed'] == 0 and data.iloc[idx+frames_to_anticipate]['crashed'] == 1:
            # Set labels to 1 for the entire window
            data.loc[idx:idx+frames_to_anticipate, 'true_labels'] = 1

    # Extract uncertainty values and true labels
    uncertainty_values = data['0'].values
    true_labels = data['true_labels'].values

    # Calculate AUC-ROC and AUC-PRC curves for various thresholds
    fpr, tpr, roc_thresholds = roc_curve(true_labels, uncertainty_values)
    roc_auc = auc(fpr, tpr)

    precision, recall, prc_thresholds = precision_recall_curve(true_labels, uncertainty_values)
    prc_auc = auc(recall, precision)
    '''
    # Print AUC-ROC and AUC-PRC values
    print(f'AUC-ROC: {roc_auc:.4f}')
    print(f'AUC-PRC: {prc_auc:.4f}')
    print(f'fpr: {fpr.mean()}')
    print(f'tpr: {tpr.mean()}')
    print(f'roc_thresholds: {roc_thresholds.mean()}')
    print(f'prc_thresholds: {prc_thresholds.mean()}')
    print(f'precision: {precision.mean()}')
    print(f'recall: {recall.mean()}')
    '''
    return fpr, tpr, roc_thresholds, precision, recall, prc_thresholds, roc_auc, prc_auc

In [3]:
icse20 = ['DAVE2-Track1-DayNight','DAVE2-Track1-DayNightFog','DAVE2-Track1-DayNightRain','DAVE2-Track1-DayNightSnow',
          'DAVE2-Track1-Fog','DAVE2-Track1-Rain','DAVE2-Track1-Snow']
    
ase22 = ['xai-track1-fog-10', 'xai-track1-fog-20', 'xai-track1-fog-40','xai-track1-fog-50','xai-track1-fog-60','xai-track1-fog-70','xai-track1-fog-80','xai-track1-fog-90','xai-track1-fog-100',
            'xai-track1-rain-10', 'xai-track1-rain-20', 'xai-track1-rain-30', 'xai-track1-rain-40','xai-track1-rain-50','xai-track1-rain-60','xai-track1-rain-70','xai-track1-rain-80','xai-track1-rain-90','xai-track1-rain-100',
            'xai-track1-snow-10', 'xai-track1-snow-20', 'xai-track1-snow-30', 'xai-track1-snow-40','xai-track1-snow-50','xai-track1-snow-60','xai-track1-snow-70','xai-track1-snow-90','xai-track1-snow-100']
    
mutants = ['udacity_add_weights_regularisation_mutated0_MP_l1_3_1','udacity_add_weights_regularisation_mutated0_MP_l1_l2_3_2','udacity_add_weights_regularisation_mutated0_MP_l2_3_0','udacity_change_activation_function_mutated0_MP_exponential_4_0',
            'udacity_change_activation_function_mutated0_MP_hard_sigmoid_4_0','udacity_change_activation_function_mutated0_MP_relu_4_2','udacity_change_activation_function_mutated0_MP_selu_4_0','udacity_change_activation_function_mutated0_MP_sigmoid_4_3',
            'udacity_change_activation_function_mutated0_MP_softmax_4_4','udacity_change_activation_function_mutated0_MP_softsign_4_5','udacity_change_activation_function_mutated0_MP_tanh_4_2','udacity_change_dropout_rate_mutated0_MP_0.25_0.25_6_7',
            'udacity_change_dropout_rate_mutated0_MP_0.75_0.75_6_0','udacity_change_dropout_rate_mutated0_MP_0.125_0.125_6_2','udacity_change_dropout_rate_mutated0_MP_1.0_1.0_6_1','udacity_change_label_mutated0_MP_12.5_4','udacity_change_label_mutated0_MP_25.0_1',
            'udacity_change_loss_function_mutated0_MP_mean_absolute_error_2']

In [4]:
conditions = icse20 + ase22 + mutants

In [22]:
import os
import pandas as pd
from sklearn.metrics import roc_curve, auc, precision_recall_curve

def auc_res_2(conditions, models,model_type):
    # Initialize a list to store the results
    results = []

    for condition in tqdm(conditions):
        # Load the driving log data for the current condition
        d_path = os.path.join('simulations', condition, 'driving_log.csv')
        driving_log = pd.read_csv(d_path)

        for seconds in [1, 2, 3]:
            # Set the frames per second (fps) and anticipation time (in seconds)
            fps = 15
            frames_to_anticipate = fps * seconds

            # Identify frames where a crash is anticipated and set the label to 1 for the entire window
            driving_log['true_labels'] = 0  # Initialize all labels as 0 (no crash)

            for idx in range(len(driving_log) - frames_to_anticipate):
                if driving_log.iloc[idx]['crashed'] == 0 and driving_log.iloc[idx + frames_to_anticipate]['crashed'] == 1:
                    # Set labels to 1 for the entire window
                    driving_log.loc[idx:idx + frames_to_anticipate, 'true_labels'] = 1

            for model in models:
                # Load the uncertainty (loss) data for the current model and condition
                u_path = os.path.join('plots', 'uncertainty', model_type, condition, model)
                uncertainty = pd.read_csv(u_path)

                # Extract uncertainty values and true labels
                uncertainty_values = uncertainty['0'].values
                true_labels = driving_log['true_labels'].values

                # Calculate AUC-ROC and AUC-PRC curves for various thresholds
                fpr, tpr, roc_thresholds = roc_curve(true_labels, uncertainty_values)
                roc_auc = auc(fpr, tpr)

                precision, recall, prc_thresholds = precision_recall_curve(true_labels, uncertainty_values)
                prc_auc = auc(recall, precision)

                # Append the results to the list
                results.append([model, condition, seconds, roc_auc, prc_auc, fpr.mean(), tpr.mean(), roc_thresholds.mean(), prc_thresholds.mean(), precision.mean(), recall.mean()])

    # Create a DataFrame from the results
    result_df = pd.DataFrame(results, columns=['Model', 'Condition', 'Seconds', 'AUC-ROC', 'AUC-PRC', 'FPR', 'TPR', 'ROC Thresholds','PRC Thresholds', 'Precision', 'Recall'])

    # Save the DataFrame to CSV
    result_df.to_csv(f'results/auc/auc_results_{model_type}.csv', index=False)

In [6]:
models_mc = []
for i in [5,10,15,20,25,30,35]:
    for j in [2,5,10,64,128]:
                model = 'dave2-p10-track1-mcd_'+(str)(i)+'_S'+str(j)+'.csv'
                models_mc.append(model)

In [23]:
models_de = []
for i in [3,5,10,50,120]:
            model = 'dave2-track1-DE_'+(str)(i)+'.csv'
            models_de.append(model)

In [8]:
auc_res_2(conditions,models_de,'DE')

100%|██████████| 53/53 [06:53<00:00,  7.80s/it]


In [9]:
auc_res_2(conditions,models_mc,'MC')

100%|██████████| 53/53 [03:48<00:00,  4.31s/it]


In [10]:
result_df = pd.read_csv(r'results/auc/auc_results_DE.csv')

In [11]:
for model in set(result_df['Model']):
    model_df = result_df[result_df['Model'] == model]
    output_path = os.path.join('results', 'auc', f'{model}')
    model_df.to_csv(output_path, index=False)

In [12]:
result_df = pd.read_csv(r'results/auc/auc_results_MC.csv')

In [13]:
for model in set(result_df['Model']):
    model_df = result_df[result_df['Model'] == model]
    output_path = os.path.join('results', 'auc', f'{model}')
    model_df.to_csv(output_path, index=False)

In [14]:
averages_df = result_df.groupby(['Model', 'Seconds']).mean().reset_index()
averages_df

Unnamed: 0,Model,Seconds,AUC-ROC,AUC-PRC,FPR,TPR,ROC Thresholds,PRC Thresholds,Precision,Recall
0,dave2-p10-track1-mcd_10_S10.csv,1,0.742835,0.154343,0.269872,0.524864,0.013112,0.003574,0.078165,0.670936
1,dave2-p10-track1-mcd_10_S10.csv,2,0.752058,0.216744,0.264355,0.534349,0.009154,0.003302,0.133823,0.689384
2,dave2-p10-track1-mcd_10_S10.csv,3,0.755628,0.268211,0.264412,0.541271,0.007721,0.003236,0.182066,0.687828
3,dave2-p10-track1-mcd_10_S128.csv,1,0.759501,0.185876,0.256134,0.536220,0.013779,0.004809,0.101583,0.621324
4,dave2-p10-track1-mcd_10_S128.csv,2,0.771381,0.255108,0.245858,0.543006,0.009921,0.004412,0.166513,0.648897
...,...,...,...,...,...,...,...,...,...,...
100,dave2-p10-track1-mcd_5_S5.csv,2,0.758270,0.183009,0.260704,0.526542,0.006769,0.001569,0.117415,0.733748
101,dave2-p10-track1-mcd_5_S5.csv,3,0.754096,0.243322,0.272149,0.537735,0.005556,0.001559,0.164047,0.723701
102,dave2-p10-track1-mcd_5_S64.csv,1,0.836451,0.192802,0.181519,0.531866,0.011094,0.002916,0.108331,0.727128
103,dave2-p10-track1-mcd_5_S64.csv,2,0.837064,0.293682,0.188001,0.551907,0.007933,0.002601,0.175773,0.748766


In [17]:
# Define the metric (e.g., 'AUC-ROC' or 'AUC-PRC')
metric = 'AUC-ROC'

# Initialize an empty DataFrame to store the best models
best_models_df = pd.DataFrame()

# Iterate through unique 'Seconds' values
for second in averages_df['Seconds'].unique():
    # Filter the DataFrame for the current second
    second_df = averages_df[averages_df['Seconds'] == second]
    
    # Sort the DataFrame by the chosen metric in descending order to get the best models
    sorted_df = second_df.sort_values(by=metric, ascending=False)
    
    # Select the top three models
    top_three_models = sorted_df.head(3)
    
    # Append the top three models to the best_models_df
    best_models_df = best_models_df.append(top_three_models)

# Reset the index of the best_models_df
best_models_df.reset_index(drop=True, inplace=True)

# Display the best models DataFrame
print(best_models_df)

                              Model  Seconds   AUC-ROC   AUC-PRC       FPR  \
0   dave2-p10-track1-mcd_5_S128.csv        1  0.842931  0.213250  0.176944   
1    dave2-p10-track1-mcd_5_S64.csv        1  0.836451  0.192802  0.181519   
2    dave2-p10-track1-mcd_5_S10.csv        1  0.798922  0.135146  0.214564   
3   dave2-p10-track1-mcd_5_S128.csv        2  0.842795  0.313007  0.184409   
4    dave2-p10-track1-mcd_5_S64.csv        2  0.837064  0.293682  0.188001   
5  dave2-p10-track1-mcd_30_S128.csv        2  0.803447  0.287982  0.220181   
6   dave2-p10-track1-mcd_5_S128.csv        3  0.841672  0.388343  0.193292   
7    dave2-p10-track1-mcd_5_S64.csv        3  0.836629  0.370633  0.195909   
8  dave2-p10-track1-mcd_30_S128.csv        3  0.803047  0.332602  0.222749   

        TPR  ROC Thresholds  PRC Thresholds  Precision    Recall  
0  0.536343        0.011216        0.003011   0.112958  0.725215  
1  0.531866        0.011094        0.002916   0.108331  0.727128  
2  0.518173       

In [18]:
best_models_df

Unnamed: 0,Model,Seconds,AUC-ROC,AUC-PRC,FPR,TPR,ROC Thresholds,PRC Thresholds,Precision,Recall
0,dave2-p10-track1-mcd_5_S128.csv,1,0.842931,0.21325,0.176944,0.536343,0.011216,0.003011,0.112958,0.725215
1,dave2-p10-track1-mcd_5_S64.csv,1,0.836451,0.192802,0.181519,0.531866,0.011094,0.002916,0.108331,0.727128
2,dave2-p10-track1-mcd_5_S10.csv,1,0.798922,0.135146,0.214564,0.518173,0.010473,0.002164,0.077843,0.740006
3,dave2-p10-track1-mcd_5_S128.csv,2,0.842795,0.313007,0.184409,0.555471,0.008007,0.002723,0.185376,0.742967
4,dave2-p10-track1-mcd_5_S64.csv,2,0.837064,0.293682,0.188001,0.551907,0.007933,0.002601,0.175773,0.748766
5,dave2-p10-track1-mcd_30_S128.csv,2,0.803447,0.287982,0.220181,0.558737,0.01258,0.006181,0.16644,0.692802
6,dave2-p10-track1-mcd_5_S128.csv,3,0.841672,0.388343,0.193292,0.574479,0.006766,0.002585,0.242401,0.742382
7,dave2-p10-track1-mcd_5_S64.csv,3,0.836629,0.370633,0.195909,0.569133,0.006676,0.002491,0.233256,0.746523
8,dave2-p10-track1-mcd_30_S128.csv,3,0.803047,0.332602,0.222749,0.561461,0.010673,0.005758,0.213322,0.698708


In [None]:
# Load your result_df DataFrame (assuming you've already created it)
results_df = pd.read_csv('results/auc/auc_results_DE.csv')

# Perform natural sorting by 'Model', 'Condition', and 'Seconds'
results_df['Model'] = natsorted(results_df['Model'])
results_df['Condition'] = natsorted(results_df['Condition'])
results_df['Seconds'] = results_df['Seconds'].astype(str)  # Convert to string to ensure proper sorting
results_df = results_df.iloc[index_natsorted(results_df.index)]

# Reset the index of the sorted DataFrame
results_df.reset_index(drop=True, inplace=True)

# Display the sorted DataFrame
results_df.to_excel('results/auc/sorted_results_de.xlsx', index=False)