In [1]:
import sys
sys.path.append('../src')
import evaluation_utils, data_utils

import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor


In [2]:
X, y = data_utils.get_data('../data/6_cardio.npz')
# data_utils.plot_dataset(X, y)

In [3]:
# Define the anomaly detection methods
methods = {
    "Isolation Forest": IsolationForest(contamination=0.1),
    "One-Class SVM": OneClassSVM(nu=0.1),
    "Local Outlier Factor": LocalOutlierFactor(n_neighbors=20, contamination=0.1)
}

# Apply each method
for name, method in methods.items():
    if name == "Local Outlier Factor":
        y_pred = method.fit_predict(X)
    else:
        method.fit(X)
        y_pred = method.predict(X)
    
    # Reshape the prediction values to 0 for valid, 1 for fraud. 
    y_pred[y_pred == 1] = 0
    y_pred[y_pred == -1] = 1

    print(f"{name} Results:")
    print(evaluation_utils.run_evaluation(y, y_pred, do_point_adjustment=True))

Isolation Forest Results:
{'AUCROC': 0.733871189233727, 'AUCPR': 0.3086684231360168, 'F1': 0.5125298208912222, 'Precision': 0.5027322404371585, 'Recall': 0.5227272727272727, 'Adjusted AUCROC': 0.9725075528700906, 'Adjusted AUCPR': 0.6591760299625468, 'Adjusted F1': 0.7945776037873541, 'Adjusted Precision': 0.6591760299625468, 'Adjusted Recall': 1.0}
One-Class SVM Results:
{'AUCROC': 0.666961343037627, 'AUCPR': 0.2113352044163913, 'F1': 0.3922601972493124, 'Precision': 0.3817204301075269, 'Recall': 0.4034090909090909, 'Adjusted AUCROC': 0.9652567975830816, 'Adjusted AUCPR': 0.6048109965635738, 'Adjusted F1': 0.7537426265716416, 'Adjusted Precision': 0.6048109965635738, 'Adjusted Recall': 1.0}
Local Outlier Factor Results:
{'AUCROC': 0.5452897555616589, 'AUCPR': 0.11043889214914632, 'F1': 0.17826798254249498, 'Precision': 0.17486338797814208, 'Recall': 0.18181818181818182, 'Adjusted AUCROC': 0.954380664652568, 'Adjusted AUCPR': 0.5382262996941896, 'Adjusted F1': 0.6997966434691368, 'Adju