In [128]:
# Own files
from Functions2 import *
from DataDefined2 import *
from ood_models import OCSVM, LocalOutlierFactorModel, IsolationForestModel

from itertools import permutations
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve, roc_auc_score


In [129]:
# Load and prepare dataset
df = pd.read_csv('Driving Data(KIA SOUL)_(150728-160714)_(10 Drivers_A-J).csv')
columns_to_drop = one_val_col + irrelevant_col
df = df.drop(columns_to_drop, axis=1)
df = addRideNumbers(df)
df = add_delta(df, delta_col)
encoder = LabelEncoder()
df['Class'] = encoder.fit_transform(df['Class'])

In [130]:
number_of_classes = list(range(10))
# All possible ordered pairs of class comparisons, where one is ID and the other OOD
class_comparisons = list(permutations(number_of_classes, 2))

In [131]:
len(class_comparisons)

90

In [132]:
models = {
    'IsolationForest': IsolationForestModel(),
    'OneClassSVM': OCSVM(),
    'LOF': LocalOutlierFactorModel()
}

results = []

for CLASS_ID, CLASS_OOD in class_comparisons:
    print(f"Evaluating ID: {CLASS_ID} vs OOD: {CLASS_OOD}")
    
    # Extract ID and OOD data
    id_data = df[df['Class'] == CLASS_ID]
    ood_data = df[df['Class'] == CLASS_OOD]

    columns_to_remove = ["Time(s)", "Class", "PathOrder"]
    id_data = id_data.drop(columns=columns_to_remove)
    ood_data = ood_data.drop(columns=columns_to_remove)

    # Apply feature selection and sliding window
    id_windows = create_sliding_windows(id_data, window_size=60, overlap=20)
    ood_windows = create_sliding_windows(ood_data, window_size=60, overlap=20)

    n_id_samples = len(id_windows)
    train_idx = int(n_id_samples * 0.7)
    val_idx = train_idx + int(n_id_samples * 0.1)

    train_data = id_windows[:train_idx]
    val_data = id_windows[train_idx:val_idx]
    test_id_data = id_windows[val_idx:]
    train_data += val_data

    train_data = np.array([extract_features_from_window(window) for window in train_data])
    test_id_data = np.array([extract_features_from_window(window) for window in test_id_data])
    test_ood_data = np.array([extract_features_from_window(window) for window in ood_windows])

    for model_name, model in models.items():
        print(f"Evaluating with {model_name}...")

        if model_name == 'LOF': 
            local_outlier_factor = LocalOutlierFactorModel()
            local_outlier_factor.train_model(train_data)
            ood_scores, ood_labels, all_scores = local_outlier_factor.test_model(test_id_data, test_ood_data)    

        elif model_name == 'OneClassSVM':
            one_class_svm = OCSVM()
            one_class_svm.train_model(train_data)
            ood_scores, ood_labels, all_scores = one_class_svm.test_model(test_id_data, test_ood_data)

        elif model_name == 'IsolationForest':
            isolation_forest = IsolationForestModel()
            isolation_forest.train_model(train_data)
            ood_scores, ood_labels, all_scores = isolation_forest.test_model(test_id_data, test_ood_data)    
            
        auroc, fpr95 = evaluate_ood_performance(ood_scores, ood_labels)
        print(auroc)

        results.append({
            'CLASS_ID': CLASS_ID,
            'CLASS_OOD': CLASS_OOD,
            'Model': model_name,
            'AUROC': auroc,
            'FPR95': fpr95,
            'AllScores': all_scores,
            'OOD labels': ood_labels,
        })

results_df = pd.DataFrame(results)
# results_df.to_csv('comparison3.csv')

Evaluating ID: 0 vs OOD: 1
Evaluating with IsolationForest...
0.7108697482529257
Evaluating with OneClassSVM...
0.928517302349078
Evaluating with LOF...
0.9152984760461396
Evaluating ID: 0 vs OOD: 2
Evaluating with IsolationForest...
0.6565977742448331
Evaluating with OneClassSVM...
0.8738256973551092
Evaluating with LOF...
0.8520017343546755
Evaluating ID: 0 vs OOD: 3
Evaluating with IsolationForest...
0.6863226863226863
Evaluating with OneClassSVM...
0.9262899262899262
Evaluating with LOF...
0.9218673218673219
Evaluating ID: 0 vs OOD: 4
Evaluating with IsolationForest...
0.7507078507078507
Evaluating with OneClassSVM...
0.9682110682110683
Evaluating with LOF...
0.9706563706563707
Evaluating ID: 0 vs OOD: 5
Evaluating with IsolationForest...
0.6553560860130202
Evaluating with OneClassSVM...
0.8260998224501873
Evaluating with LOF...
0.8085421187610968
Evaluating ID: 0 vs OOD: 6
Evaluating with IsolationForest...
0.7128741644870678
Evaluating with OneClassSVM...
0.8188026736413833
Evalu

In [138]:
iforest_stats = comparison[comparison['Model'] == 'IsolationForest']
print('Isolation forest AUROC mean:', iforest_stats['AUROC'].mean())
print('Isolation forest AUROC std:', iforest_stats['AUROC'].std())
print('Isolation forest AUROC min:', iforest_stats['AUROC'].min())
print('Isolation forest AUROC max:', iforest_stats['AUROC'].max(), '\n')

svm_stats = comparison[comparison['Model'] == 'OneClassSVM']
print('OneClassSVM AUROC mean:', svm_stats['AUROC'].mean())
print('OneClassSVM AUROC std:', svm_stats['AUROC'].std())
print('OneClassSVM AUROC min:', svm_stats['AUROC'].min())
print('OneClassSVM AUROC max:', svm_stats['AUROC'].max(), '\n')

lof_stats = comparison[comparison['Model'] == 'LOF']
print('LocalOutlierFactor AUROC mean:', lof_stats['AUROC'].mean())
print('LocalOutlierFactor AUROC std:', lof_stats['AUROC'].std())
print('LocalOutlierFactor AUROC min:', lof_stats['AUROC'].min())
print('LocalOutlierFactor AUROC max:', lof_stats['AUROC'].max())

Isolation forest AUROC mean: 0.6547037874932246
Isolation forest AUROC std: 0.11546846090722697
Isolation forest AUROC min: 0.4262773722627737
Isolation forest AUROC max: 0.9237976782752902 

OneClassSVM AUROC mean: 0.7395639172559685
OneClassSVM AUROC std: 0.13662540195216016
OneClassSVM AUROC min: 0.4373983739837399
OneClassSVM AUROC max: 0.9782142857142856 

LocalOutlierFactor AUROC mean: 0.725365657610308
LocalOutlierFactor AUROC std: 0.13275137451224098
LocalOutlierFactor AUROC min: 0.4510944340212633
LocalOutlierFactor AUROC max: 0.9819047619047618
