In [1]:
# Own files
from Functions2 import *
from DataDefined2 import *
from ood_models import OCSVM, LocalOutlierFactorModel, IsolationForestModel

from itertools import permutations
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve, roc_auc_score


In [2]:
# Load and prepare dataset
df = pd.read_csv('Driving Data(KIA SOUL)_(150728-160714)_(10 Drivers_A-J).csv')
columns_to_drop = one_val_col + irrelevant_col
df = df.drop(columns_to_drop, axis=1)
df = addRideNumbers(df)
df = add_delta(df, delta_col)
encoder = LabelEncoder()
df['Class'] = encoder.fit_transform(df['Class'])

In [3]:
number_of_classes = list(range(10))
# All possible ordered pairs of class comparisons, where one is ID and the other OOD
class_comparisons = list(permutations(number_of_classes, 2))

In [4]:
len(class_comparisons)

90

In [23]:
models = {
    'IsolationForest': IsolationForestModel(),
    'OneClassSVM': OCSVM(),
    'LOF': LocalOutlierFactorModel()
}

results = []

for CLASS_ID, CLASS_OOD in class_comparisons:
    print(f"Evaluating ID: {CLASS_ID} vs OOD: {CLASS_OOD}")
    
    # Extract ID and OOD data
    id_data = df[df['Class'] == CLASS_ID]
    ood_data = df[df['Class'] == CLASS_OOD]

    # Apply feature selection and sliding window
    id_windows = create_sliding_windows(id_data, window_size=60, overlap=20)
    ood_windows = create_sliding_windows(ood_data, window_size=60, overlap=20)

    n_id_samples = len(id_windows)
    train_idx = int(n_id_samples * 0.7)
    val_idx = train_idx + int(n_id_samples * 0.1)

    train_data = id_windows[:train_idx]
    val_data = id_windows[train_idx:val_idx]
    test_id_data = id_windows[val_idx:]
    train_data += val_data

    train_data = np.array([extract_features_from_window(window) for window in train_data])
    test_id_data = np.array([extract_features_from_window(window) for window in test_id_data])
    test_ood_data = np.array([extract_features_from_window(window) for window in ood_windows])

    for model_name, model in models.items():
        print(f"Evaluating with {model_name}...")

        if model_name == 'LOF': 
            local_outlier_factor = LocalOutlierFactorModel()
            local_outlier_factor.train_model(train_data)
            ood_scores, ood_labels = local_outlier_factor.test_model(test_id_data, test_ood_data)    

        elif model_name == 'OneClassSVM':
            one_class_svm = OCSVM()
            one_class_svm.train_model(train_data)
            ood_scores, ood_labels = one_class_svm.test_model(test_id_data, test_ood_data)

        elif model_name == 'IsolationForest':
            isolation_forest = IsolationForestModel()
            isolation_forest.train_model(train_data)
            ood_scores, ood_labels = isolation_forest.test_model(test_id_data, test_ood_data)    
            
        auroc, fpr95 = evaluate_ood_performance(ood_scores, ood_labels)

        results.append({
            'CLASS_ID': CLASS_ID,
            'CLASS_OOD': CLASS_OOD,
            'Model': model_name,
            'AUROC': auroc,
            'FPR95': fpr95,
        })

results_df = pd.DataFrame(results)
# results_df.to_csv('comparison3.csv')

In [19]:
# In case you dont want to spend 20+ min running the cell above :)
comparison = pd.read_csv('comparison2.csv')
comparison

Unnamed: 0.1,Unnamed: 0,CLASS_ID,CLASS_OOD,Model,AUROC,FPR95
0,0,0,1,IsolationForest,0.654458,0.702703
1,1,0,1,OneClassSVM,0.859055,0.405405
2,2,0,1,LOF,0.417614,0.783784
3,3,0,2,IsolationForest,0.609192,0.729730
4,4,0,2,OneClassSVM,0.782772,0.702703
...,...,...,...,...,...,...
265,265,9,7,OneClassSVM,0.628049,0.911111
266,266,9,7,LOF,0.384192,0.933333
267,267,9,8,IsolationForest,0.359908,0.933333
268,268,9,8,OneClassSVM,0.638660,0.955556


In [20]:
svm_stats = comparison[comparison['Model'] == 'OneClassSVM']
iforest_stats = comparison[comparison['Model'] == 'OneClassSVM']
lof_stats = comparison[comparison['Model'] == 'LOF']

In [35]:
svm_stats

Unnamed: 0.1,Unnamed: 0,CLASS_ID,CLASS_OOD,Model,AUROC,FPR95
1,1,0,1,OneClassSVM,0.859055,0.405405
4,4,0,2,OneClassSVM,0.782772,0.702703
7,7,0,3,OneClassSVM,0.809337,0.540541
10,10,0,4,OneClassSVM,0.862677,0.405405
13,13,0,5,OneClassSVM,0.806421,0.405405
...,...,...,...,...,...,...
256,256,9,4,OneClassSVM,0.751270,0.733333
259,259,9,5,OneClassSVM,0.626399,0.933333
262,262,9,6,OneClassSVM,0.608781,0.933333
265,265,9,7,OneClassSVM,0.628049,0.911111


In [24]:
iforest_stats = comparison[comparison['Model'] == 'IsolationForest']
print('Isolation forest AUROC mean:', iforest_stats['AUROC'].mean())
print('Isolation forest AUROC std:', iforest_stats['AUROC'].std())
print('Isolation forest AUROC min:', iforest_stats['AUROC'].min())
print('Isolation forest AUROC max:', iforest_stats['AUROC'].max(), '\n')

svm_stats = comparison[comparison['Model'] == 'OneClassSVM']
print('OneClassSVM AUROC mean:', svm_stats['AUROC'].mean())
print('OneClassSVM AUROC std:', svm_stats['AUROC'].std())
print('OneClassSVM AUROC min:', svm_stats['AUROC'].min())
print('OneClassSVM AUROC max:', svm_stats['AUROC'].max(), '\n')

lof_stats = comparison[comparison['Model'] == 'LOF']
print('LocalOutlierFactor AUROC mean:', lof_stats['AUROC'].mean())
print('LocalOutlierFactor AUROC std:', lof_stats['AUROC'].std())
print('LocalOutlierFactor AUROC min:', lof_stats['AUROC'].min())
print('LocalOutlierFactor AUROC max:', lof_stats['AUROC'].max())


Isolation forest AUROC mean: 0.5193385648658873
Isolation forest AUROC std: 0.15021536830971155
Isolation forest AUROC min: 0.2288978494623656
Isolation forest AUROC max: 0.880101322825781 

OneClassSVM AUROC mean: 0.7248994129413119
OneClassSVM AUROC std: 0.11043264351673886
OneClassSVM AUROC min: 0.415749578888265
OneClassSVM AUROC max: 0.9339285714285714 

LocalOutlierFactor AUROC mean: 0.5655380290853209
LocalOutlierFactor AUROC std: 0.2738104674039578
LocalOutlierFactor AUROC min: 0.0554226475279107
LocalOutlierFactor AUROC max: 0.973868613138686
