In [57]:
# Own files
from Functions2 import *
from DataDefined2 import *
from ood_models import OCSVM, LocalOutlierFactorModel, IsolationForestModel

from itertools import permutations
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve, roc_auc_score


In [58]:
# Load and prepare dataset
df = pd.read_csv('Driving Data(KIA SOUL)_(150728-160714)_(10 Drivers_A-J).csv')
columns_to_drop = one_val_col + irrelevant_col
df = df.drop(columns_to_drop, axis=1)
df = addRideNumbers(df)
df = add_delta(df, delta_col)
encoder = LabelEncoder()
df['Class'] = encoder.fit_transform(df['Class'])

In [59]:
number_of_classes = list(range(10))
# All possible ordered pairs of class comparisons, where one is ID and the other OOD
class_comparisons = list(permutations(number_of_classes, 2))

In [60]:
models = {
    'IsolationForest': IsolationForestModel(),
    'OneClassSVM': OCSVM(),
    'LOF': LocalOutlierFactorModel()
}

results = []

for CLASS_ID, CLASS_OOD in class_comparisons:
    print(f"Evaluating ID: {CLASS_ID} vs OOD: {CLASS_OOD}")
    
    # Extract ID and OOD data
    id_data = df[df['Class'] == CLASS_ID]
    ood_data = df[df['Class'] == CLASS_OOD]

    # Apply feature selection and sliding window
    id_windows = create_sliding_windows(id_data, window_size=60, overlap=20)
    ood_windows = create_sliding_windows(ood_data, window_size=60, overlap=20)

    n_id_samples = len(id_windows)
    train_idx = int(n_id_samples * 0.7)
    val_idx = train_idx + int(n_id_samples * 0.1)

    train_data = id_windows[:train_idx]
    val_data = id_windows[train_idx:val_idx]
    test_id_data = id_windows[val_idx:]
    train_data += val_data

    train_data = np.array([extract_features_from_window(window) for window in train_data])
    test_id_data = np.array([extract_features_from_window(window) for window in test_id_data])
    test_ood_data = np.array([extract_features_from_window(window) for window in ood_windows])

    for model_name, model in models.items():
        print(f"Evaluating with {model_name}...")

        if model_name == 'LOF': 
            local_outlier_factor = LocalOutlierFactorModel()
            local_outlier_factor.train_model(train_data)
            ood_scores, ood_labels = local_outlier_factor.test_model(test_id_data, test_ood_data)    

        elif model_name == 'OneClassSVM':
            one_class_svm = OCSVM()
            one_class_svm.train_model(train_data)
            ood_scores, ood_labels = one_class_svm.test_model(test_id_data, test_ood_data)

        elif model_name == 'IsolationForest':
            isolation_forest = IsolationForestModel()
            isolation_forest.train_model(train_data)
            ood_scores, ood_labels = isolation_forest.test_model(test_id_data, test_ood_data)    
            
        auroc, fpr95 = evaluate_ood_performance(ood_scores, ood_labels)

        results.append({
            'CLASS_ID': CLASS_ID,
            'CLASS_OOD': CLASS_OOD,
            'Model': model_name,
            'AUROC': auroc,
            'FPR95': fpr95
        })

results_df = pd.DataFrame(results)
results_df.to_csv('comparison.csv')

Evaluating ID: 0 vs OOD: 1
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...
Evaluating with LOF...
Evaluating ID: 0 vs OOD: 2
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...
Evaluating with LOF...
Evaluating ID: 0 vs OOD: 3
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...
Evaluating with LOF...
Evaluating ID: 0 vs OOD: 4
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...
Evaluating with LOF...
Evaluating ID: 0 vs OOD: 5
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...
Evaluating with LOF...
Evaluating ID: 0 vs OOD: 6
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...
Evaluating with LOF...
Evaluating ID: 0 vs OOD: 7
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...
Evaluating with LOF...
Evaluating ID: 0 vs OOD: 8
180
125
143
Evaluating with IsolationForest...
Evaluating with OneClassSVM...

In [63]:
comparison = pd.read_csv('comparison.csv')
comparison

Unnamed: 0.1,Unnamed: 0,CLASS_ID,CLASS_OOD,Model,AUROC,FPR95
0,0,0,1,IsolationForest,0.654458,0.702703
1,1,0,1,OneClassSVM,0.859055,0.405405
2,2,0,1,LOF,0.582386,0.783784
3,3,0,2,IsolationForest,0.609192,0.729730
4,4,0,2,OneClassSVM,0.782772,0.702703
...,...,...,...,...,...,...
265,265,9,7,OneClassSVM,0.628049,0.911111
266,266,9,7,LOF,0.615808,0.822222
267,267,9,8,IsolationForest,0.359908,0.933333
268,268,9,8,OneClassSVM,0.638660,0.955556


In [73]:
iforest_stats = comparison[comparison['Model'] == 'IsolationForest']
print('Isolation forest AUROC mean:', iforest_stats['AUROC'].mean())
print('Isolation forest AUROC min:', iforest_stats['AUROC'].min())
print('Isolation forest AUROC max:', iforest_stats['AUROC'].max(), '\n')

svm_stats = comparison[comparison['Model'] == 'OneClassSVM']
print('OneClassSVM AUROC mean:', svm_stats['AUROC'].mean())
print('OneClassSVM AUROC min:', svm_stats['AUROC'].min())
print('OneClassSVM AUROC max:', svm_stats['AUROC'].max(), '\n')

lof_stats = comparison[comparison['Model'] == 'LOF']
print('LocalOutlierFactor AUROC mean:', lof_stats['AUROC'].mean())
print('LocalOutlierFactor AUROC min:', lof_stats['AUROC'].min())
print('LocalOutlierFactor AUROC max:', lof_stats['AUROC'].max())


Isolation forest AUROC mean: 0.5193385648658873
Isolation forest AUROC min: 0.2288978494623656
Isolation forest AUROC max: 0.880101322825781 

OneClassSVM AUROC mean: 0.7248994129413119
OneClassSVM AUROC min: 0.415749578888265
OneClassSVM AUROC max: 0.9339285714285714 

LocalOutlierFactor AUROC mean: 0.4344619709146791
LocalOutlierFactor AUROC min: 0.0261313868613138
LocalOutlierFactor AUROC max: 0.9445773524720892


In [55]:
results_df['AUROC'].mean()

np.float64(0.48205434910566436)

In [56]:
results_df

Unnamed: 0,CLASS_ID,CLASS_OOD,AUROC,FPR95
0,0,1,0.510819,0.864865
1,0,2,0.463362,0.864865
2,0,3,0.560033,0.756757
3,0,4,0.547362,0.891892
4,0,5,0.531466,0.783784
...,...,...,...,...
85,9,4,0.458730,0.888889
86,9,5,0.413950,0.888889
87,9,6,0.343489,0.888889
88,9,7,0.391509,0.888889


In [8]:
isolation_forest = IsolationForestModel()
isolation_forest.train_model(train_data)
ood_scores, ood_labels = isolation_forest.test_model(test_id_data, test_ood_data)    
evaluate_ood_performance(ood_scores, ood_labels)

AUROC: 0.5743
FPR95: 0.8955


In [9]:
local_outlier_factor = LocalOutlierFactorModel()
local_outlier_factor.train_model(train_data)
ood_scores, ood_labels = local_outlier_factor.test_model(test_id_data, test_ood_data)    
evaluate_ood_performance(ood_scores, ood_labels)

AUROC: 0.6457
FPR95: 0.8657


In [11]:
one_class_svm = OCSVM()
one_class_svm.train_model(train_data)
ood_scores, ood_labels = one_class_svm.test_model(test_id_data, test_ood_data)    
evaluate_ood_performance(ood_scores, ood_labels)

AUROC: 0.7505
FPR95: 0.8209
