In [18]:
import os
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat
from time import time

# Import Pyod and the methods

In [2]:
pip install pyod

Note: you may need to restart the kernel to use updated packages.


In [8]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

# Import Metrics Package

In [9]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [20]:
from glob import glob, iglob
mat_file_list=[ i for i in glob('.\mat files\*.mat')]


In [21]:
mat_file_list

['.\\mat files\\arrhythmia.mat',
 '.\\mat files\\cardio.mat',
 '.\\mat files\\glass.mat',
 '.\\mat files\\ionosphere.mat',
 '.\\mat files\\letter.mat',
 '.\\mat files\\lympho.mat',
 '.\\mat files\\mnist.mat',
 '.\\mat files\\musk.mat',
 '.\\mat files\\optdigits.mat',
 '.\\mat files\\pendigits.mat',
 '.\\mat files\\pima.mat',
 '.\\mat files\\satellite.mat',
 '.\\mat files\\satimage-2.mat',
 '.\\mat files\\shuttle.mat',
 '.\\mat files\\vertebral.mat',
 '.\\mat files\\vowels.mat',
 '.\\mat files\\wbc.mat']

In [22]:
df_columns=['Data','#Sample','#Dimensions','Outlier Perc','PCA','MCD','OCSVM','LOF','CBLOF','KNN','HBOS','ABOD','IFOREST','FEATUREBAGGING']

# Precision Time and Roc evolution tables creation

In [17]:
roc_df=pd.DataFrame(columns=df_columns)
prn_df=pd.DataFrame(columns=df_columns)
time_df=pd.DataFrame(columns=df_columns)
print(roc_df,prn_df,time_df)

Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: []


# Exploring Mat files

In [23]:
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
   print("\n... Processing", mat_file, '...')
   mat = loadmat(mat_file)

   X = mat['X']
   y = mat['y'].ravel()
   outliers_fraction = np.count_nonzero(y) / len(y)
   outliers_percentage = round(outliers_fraction * 100, ndigits=4)

   # construct containers for saving results
   roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

   # 60% data for training and 40% for testing
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                       random_state=random_state)

   # standardizing data for processing
   X_train_norm, X_test_norm = standardizer(X_train, X_test)

   classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
       contamination=outliers_fraction),
       'Cluster-based Local Outlier Factor': CBLOF(
           contamination=outliers_fraction, check_estimator=False,
           random_state=random_state),
       'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                         random_state=random_state),
       'Histogram-base Outlier Detection (HBOS)': HBOS(
           contamination=outliers_fraction),
       'Isolation Forest': IForest(contamination=outliers_fraction,
                                   random_state=random_state),
       'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
       'Local Outlier Factor (LOF)': LOF(
           contamination=outliers_fraction),
       'Minimum Covariance Determinant (MCD)': MCD(
           contamination=outliers_fraction, random_state=random_state),
       'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
       'Principal Component Analysis (PCA)': PCA(
           contamination=outliers_fraction, random_state=random_state),
   }

   for clf_name, clf in classifiers.items():
       t0 = time()
       clf.fit(X_train_norm)
       test_scores = clf.decision_function(X_test_norm)
       t1 = time()
       duration = round(t1 - t0, ndigits=4)
       time_list.append(duration)

       roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
       prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

       print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
             'execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))

       roc_list.append(roc)
       prn_list.append(prn)

   temp_df = pd.DataFrame(time_list).transpose()
   temp_df.columns = df_columns
   time_df = pd.concat([time_df, temp_df], axis=0)

   temp_df = pd.DataFrame(roc_list).transpose()
   temp_df.columns = df_columns
   roc_df = pd.concat([roc_df, temp_df], axis=0)

   temp_df = pd.DataFrame(prn_list).transpose()
   temp_df.columns = df_columns
   prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing .\mat files\arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 5.5707s
Cluster-based Local Outlier Factor ROC:0.7789, precision @ rank n:0.4643, execution time: 2.9711s
Feature Bagging ROC:0.7796, precision @ rank n:0.4643, execution time: 0.6336s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 2.2511s




Isolation Forest ROC:0.8639, precision @ rank n:0.6071, execution time: 0.5852s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0955s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0625s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 2.4661s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.064s
Principal Component Analysis (PCA) ROC:0.8, precision @ rank n:0.5, execution time: 0.2642s

... Processing .\mat files\cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5892, precision @ rank n:0.1918, execution time: 0.9057s
Cluster-based Local Outlier Factor ROC:0.8845, precision @ rank n:0.4932, execution time: 0.2287s
Feature Bagging ROC:0.6385, precision @ rank n:0.1781, execution time: 1.1761s
Histogram-base Outlier Detection (HBOS) ROC:0.8373, precision @ rank n:0.4521, execution time: 0.0313s




Isolation Forest ROC:0.9502, precision @ rank n:0.6027, execution time: 0.4352s
K Nearest Neighbors (KNN) ROC:0.734, precision @ rank n:0.3562, execution time: 0.213s
Local Outlier Factor (LOF) ROC:0.588, precision @ rank n:0.1507, execution time: 0.136s




Minimum Covariance Determinant (MCD) ROC:0.8534, precision @ rank n:0.411, execution time: 0.9497s
One-class SVM (OCSVM) ROC:0.9478, precision @ rank n:0.5342, execution time: 0.1102s
Principal Component Analysis (PCA) ROC:0.9616, precision @ rank n:0.6849, execution time: 0.1719s

... Processing .\mat files\glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6951, precision @ rank n:0.25, execution time: 0.0706s
Cluster-based Local Outlier Factor ROC:0.811, precision @ rank n:0.25, execution time: 0.0469s
Feature Bagging ROC:0.7073, precision @ rank n:0.25, execution time: 0.0514s
Histogram-base Outlier Detection (HBOS) ROC:0.7073, precision @ rank n:0.0, execution time: 0.0s




Isolation Forest ROC:0.7134, precision @ rank n:0.25, execution time: 0.2261s
K Nearest Neighbors (KNN) ROC:0.8384, precision @ rank n:0.25, execution time: 0.016s
Local Outlier Factor (LOF) ROC:0.7043, precision @ rank n:0.25, execution time: 0.0s
Minimum Covariance Determinant (MCD) ROC:0.8293, precision @ rank n:0.0, execution time: 0.1239s
One-class SVM (OCSVM) ROC:0.6585, precision @ rank n:0.25, execution time: 0.0s
Principal Component Analysis (PCA) ROC:0.686, precision @ rank n:0.25, execution time: 0.0s

... Processing .\mat files\ionosphere.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.9181, precision @ rank n:0.8431, execution time: 0.1182s
Cluster-based Local Outlier Factor ROC:0.9176, precision @ rank n:0.8039, execution time: 0.0779s
Feature Bagging ROC:0.9303, precision @ rank n:0.8039, execution time: 0.0713s
Histogram-base Outlier Detection (HBOS) ROC:0.6052, precision @ rank n:0.3922, execution time: 0.0156s




Isolation Forest ROC:0.8486, precision @ rank n:0.5882, execution time: 0.2466s
K Nearest Neighbors (KNN) ROC:0.932, precision @ rank n:0.8824, execution time: 0.0237s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.7843, execution time: 0.0156s
Minimum Covariance Determinant (MCD) ROC:0.9669, precision @ rank n:0.8627, execution time: 0.0781s
One-class SVM (OCSVM) ROC:0.8257, precision @ rank n:0.6863, execution time: 0.008s
Principal Component Analysis (PCA) ROC:0.7941, precision @ rank n:0.5686, execution time: 0.0638s

... Processing .\mat files\letter.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.8783, precision @ rank n:0.4375, execution time: 0.6697s
Cluster-based Local Outlier Factor ROC:0.7783, precision @ rank n:0.1875, execution time: 0.1719s
Feature Bagging ROC:0.8947, precision @ rank n:0.4062, execution time: 0.9119s
Histogram-base Outlier Detection (HBOS) ROC:0.6063, precision @ rank n:0.0938, execution time: 0.0156s




Isolation Forest ROC:0.6201, precision @ rank n:0.0625, execution time: 0.3584s
K Nearest Neighbors (KNN) ROC:0.8573, precision @ rank n:0.3125, execution time: 0.1875s
Local Outlier Factor (LOF) ROC:0.8765, precision @ rank n:0.3438, execution time: 0.1122s
Minimum Covariance Determinant (MCD) ROC:0.8061, precision @ rank n:0.1875, execution time: 1.4296s
One-class SVM (OCSVM) ROC:0.5927, precision @ rank n:0.125, execution time: 0.0943s
Principal Component Analysis (PCA) ROC:0.5216, precision @ rank n:0.125, execution time: 0.0156s

... Processing .\mat files\lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9831, precision @ rank n:0.0, execution time: 0.067s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.0477s
Feature Bagging ROC:1.0, precision @ rank n:1.0, execution time: 0.0469s
Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.0s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 0.2221s
K Nearest Neighbors (KNN) ROC:1.0, precision @ rank n:1.0, execution time: 0.008s
Local Outlier Factor (LOF) ROC:1.0, precision @ rank n:1.0, execution time: 0.008s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 0.0553s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.0156s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.0s

... Processing .\mat files\mnist.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.7628, precision @ rank n:0.3367, execution time: 9.5397s
Cluster-based Local Outlier Factor ROC:0.8389, precision @ rank n:0.3912, execution time: 1.6007s
Feature Bagging ROC:0.7157, precision @ rank n:0.3741, execution time: 54.7242s
Histogram-base Outlier Detection (HBOS) ROC:0.5766, precision @ rank n:0.1361, execution time: 0.0625s




Isolation Forest ROC:0.7939, precision @ rank n:0.2721, execution time: 2.4839s
K Nearest Neighbors (KNN) ROC:0.8498, precision @ rank n:0.432, execution time: 7.5206s
Local Outlier Factor (LOF) ROC:0.7195, precision @ rank n:0.3673, execution time: 7.134s




Minimum Covariance Determinant (MCD) ROC:0.8713, precision @ rank n:0.2653, execution time: 4.0368s
One-class SVM (OCSVM) ROC:0.854, precision @ rank n:0.3946, execution time: 5.1351s
Principal Component Analysis (PCA) ROC:0.8534, precision @ rank n:0.3878, execution time: 0.2808s

... Processing .\mat files\musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2161, precision @ rank n:0.1, execution time: 2.7815s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.43s
Feature Bagging ROC:0.473, precision @ rank n:0.125, execution time: 13.773s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.975, execution time: 0.0781s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 1.4611s
K Nearest Neighbors (KNN) ROC:0.8009, precision @ rank n:0.175, execution time: 2.0154s
Local Outlier Factor (LOF) ROC:0.4629, precision @ rank n:0.125, execution time: 1.8573s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 15.6036s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.288s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.2543s

... Processing .\mat files\optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4894, precision @ rank n:0.0152, execution time: 3.5457s
Cluster-based Local Outlier Factor ROC:0.7901, precision @ rank n:0.0, execution time: 0.592s
Feature Bagging ROC:0.5062, precision @ rank n:0.0303, execution time: 14.234s
Histogram-base Outlier Detection (HBOS) ROC:0.8774, precision @ rank n:0.2121, execution time: 0.0312s




Isolation Forest ROC:0.6735, precision @ rank n:0.0303, execution time: 1.0045s
K Nearest Neighbors (KNN) ROC:0.406, precision @ rank n:0.0, execution time: 2.0835s
Local Outlier Factor (LOF) ROC:0.5277, precision @ rank n:0.0303, execution time: 1.882s




Minimum Covariance Determinant (MCD) ROC:0.3822, precision @ rank n:0.0, execution time: 1.7966s
One-class SVM (OCSVM) ROC:0.5171, precision @ rank n:0.0, execution time: 1.6398s
Principal Component Analysis (PCA) ROC:0.526, precision @ rank n:0.0, execution time: 0.1094s

... Processing .\mat files\pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.667, precision @ rank n:0.0526, execution time: 2.6174s
Cluster-based Local Outlier Factor ROC:0.8082, precision @ rank n:0.1579, execution time: 0.3025s
Feature Bagging ROC:0.4889, precision @ rank n:0.0526, execution time: 4.7196s
Histogram-base Outlier Detection (HBOS) ROC:0.9348, precision @ rank n:0.2632, execution time: 0.0156s




Isolation Forest ROC:0.9376, precision @ rank n:0.3333, execution time: 0.7416s
K Nearest Neighbors (KNN) ROC:0.7371, precision @ rank n:0.0702, execution time: 0.8334s
Local Outlier Factor (LOF) ROC:0.4965, precision @ rank n:0.0702, execution time: 0.778s
Minimum Covariance Determinant (MCD) ROC:0.8204, precision @ rank n:0.0877, execution time: 2.496s
One-class SVM (OCSVM) ROC:0.9235, precision @ rank n:0.3158, execution time: 1.3507s
Principal Component Analysis (PCA) ROC:0.9309, precision @ rank n:0.3158, execution time: 0.0156s

... Processing .\mat files\pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7163, precision @ rank n:0.5253, execution time: 0.2699s
Cluster-based Local Outlier Factor ROC:0.67, precision @ rank n:0.4949, execution time: 0.125s
Feature Bagging ROC:0.6448, precision @ rank n:0.4444, execution time: 0.1549s
Histogram-base Outlier Detection (HBOS) ROC:0.711, precision @ rank n:0.5354, execution time: 0.0s




Isolation Forest ROC:0.6818, precision @ rank n:0.5152, execution time: 0.2899s
K Nearest Neighbors (KNN) ROC:0.7395, precision @ rank n:0.5859, execution time: 0.0469s
Local Outlier Factor (LOF) ROC:0.6574, precision @ rank n:0.4646, execution time: 0.0156s
Minimum Covariance Determinant (MCD) ROC:0.7175, precision @ rank n:0.5152, execution time: 0.0625s
One-class SVM (OCSVM) ROC:0.6561, precision @ rank n:0.5051, execution time: 0.0156s
Principal Component Analysis (PCA) ROC:0.6762, precision @ rank n:0.5354, execution time: 0.0s

... Processing .\mat files\satellite.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.5653, precision @ rank n:0.3962, execution time: 3.0095s
Cluster-based Local Outlier Factor ROC:0.7241, precision @ rank n:0.5412, execution time: 0.5234s
Feature Bagging ROC:0.572, precision @ rank n:0.4, execution time: 10.0254s
Histogram-base Outlier Detection (HBOS) ROC:0.7486, precision @ rank n:0.57, execution time: 0.0312s




Isolation Forest ROC:0.6825, precision @ rank n:0.5825, execution time: 0.9208s
K Nearest Neighbors (KNN) ROC:0.6853, precision @ rank n:0.4988, execution time: 1.3487s
Local Outlier Factor (LOF) ROC:0.572, precision @ rank n:0.395, execution time: 1.2565s
Minimum Covariance Determinant (MCD) ROC:0.8055, precision @ rank n:0.6762, execution time: 3.0648s
One-class SVM (OCSVM) ROC:0.6478, precision @ rank n:0.5225, execution time: 1.7008s
Principal Component Analysis (PCA) ROC:0.5923, precision @ rank n:0.465, execution time: 0.0313s

... Processing .\mat files\satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8432, precision @ rank n:0.2333, execution time: 2.6397s
Cluster-based Local Outlier Factor ROC:0.9998, precision @ rank n:0.9333, execution time: 0.3896s
Feature Bagging ROC:0.5235, precision @ rank n:0.1667, execution time: 7.5007s
Histogram-base Outlier Detection (HBOS) ROC:0.9784, precision @ rank n:0.6, execution time: 0.0156s




Isolation Forest ROC:0.9952, precision @ rank n:0.8667, execution time: 0.8199s
K Nearest Neighbors (KNN) ROC:0.9515, precision @ rank n:0.4333, execution time: 1.1241s
Local Outlier Factor (LOF) ROC:0.5257, precision @ rank n:0.1667, execution time: 0.9578s
Minimum Covariance Determinant (MCD) ROC:0.9963, precision @ rank n:0.6667, execution time: 2.68s
One-class SVM (OCSVM) ROC:0.9997, precision @ rank n:0.9, execution time: 1.4662s
Principal Component Analysis (PCA) ROC:0.9816, precision @ rank n:0.7333, execution time: 0.0241s

... Processing .\mat files\shuttle.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.6171, precision @ rank n:0.2003, execution time: 24.4806s
Cluster-based Local Outlier Factor ROC:0.6273, precision @ rank n:0.2025, execution time: 1.1569s
Feature Bagging ROC:0.4725, precision @ rank n:0.0257, execution time: 92.9439s
Histogram-base Outlier Detection (HBOS) ROC:0.9871, precision @ rank n:0.9985, execution time: 0.0313s




Isolation Forest ROC:0.9976, precision @ rank n:0.9596, execution time: 4.3391s
K Nearest Neighbors (KNN) ROC:0.6507, precision @ rank n:0.212, execution time: 11.2567s
Local Outlier Factor (LOF) ROC:0.5556, precision @ rank n:0.1548, execution time: 14.5307s






Minimum Covariance Determinant (MCD) ROC:0.9899, precision @ rank n:0.7395, execution time: 13.5788s
One-class SVM (OCSVM) ROC:0.9934, precision @ rank n:0.956, execution time: 71.7615s
Principal Component Analysis (PCA) ROC:0.9915, precision @ rank n:0.9516, execution time: 0.0469s

... Processing .\mat files\vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5366, precision @ rank n:0.2143, execution time: 0.1014s
Cluster-based Local Outlier Factor ROC:0.439, precision @ rank n:0.0714, execution time: 0.0552s
Feature Bagging ROC:0.5279, precision @ rank n:0.1429, execution time: 0.0312s
Histogram-base Outlier Detection (HBOS) ROC:0.3506, precision @ rank n:0.0, execution time: 0.0156s




Isolation Forest ROC:0.3772, precision @ rank n:0.0, execution time: 0.2229s
K Nearest Neighbors (KNN) ROC:0.4573, precision @ rank n:0.0714, execution time: 0.016s
Local Outlier Factor (LOF) ROC:0.4983, precision @ rank n:0.1429, execution time: 0.0s
Minimum Covariance Determinant (MCD) ROC:0.4103, precision @ rank n:0.0714, execution time: 0.1412s
One-class SVM (OCSVM) ROC:0.4686, precision @ rank n:0.0714, execution time: 0.0s
Principal Component Analysis (PCA) ROC:0.4085, precision @ rank n:0.0, execution time: 0.0s

... Processing .\mat files\vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9616, precision @ rank n:0.6316, execution time: 0.5121s
Cluster-based Local Outlier Factor ROC:0.8963, precision @ rank n:0.3158, execution time: 0.125s
Feature Bagging ROC:0.9365, precision @ rank n:0.3684, execution time: 0.3886s
Histogram-base Outlier Detection (HBOS) ROC:0.6876, precision @ rank n:0.1579, execution time: 0.0s




Isolation Forest ROC:0.8209, precision @ rank n:0.1579, execution time: 0.3349s
K Nearest Neighbors (KNN) ROC:0.9734, precision @ rank n:0.4737, execution time: 0.1094s
Local Outlier Factor (LOF) ROC:0.9398, precision @ rank n:0.3684, execution time: 0.0469s
Minimum Covariance Determinant (MCD) ROC:0.7243, precision @ rank n:0.1053, execution time: 1.0064s
One-class SVM (OCSVM) ROC:0.8163, precision @ rank n:0.2632, execution time: 0.0469s
Principal Component Analysis (PCA) ROC:0.6297, precision @ rank n:0.1579, execution time: 0.0s

... Processing .\mat files\wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.921, precision @ rank n:0.375, execution time: 0.1544s
Cluster-based Local Outlier Factor ROC:0.9149, precision @ rank n:0.375, execution time: 0.08s
Feature Bagging ROC:0.9271, precision @ rank n:0.375, execution time: 0.096s
Histogram-base Outlier Detection (HBOS) ROC:0.9479, precision @ rank n:0.5, execution time: 0.0119s




Isolation Forest ROC:0.9436, precision @ rank n:0.5, execution time: 0.2766s
K Nearest Neighbors (KNN) ROC:0.9444, precision @ rank n:0.5, execution time: 0.0313s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.375, execution time: 0.0156s
Minimum Covariance Determinant (MCD) ROC:0.9288, precision @ rank n:0.5, execution time: 0.0781s
One-class SVM (OCSVM) ROC:0.9358, precision @ rank n:0.375, execution time: 0.0156s
Principal Component Analysis (PCA) ROC:0.9262, precision @ rank n:0.375, execution time: 0.0s


In [24]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,.\mat files\arrhythmia,452,274,14.6018,0.7687,0.7789,0.7796,0.8511,0.8639,0.782,0.7787,0.8228,0.7986,0.8
0,.\mat files\cardio,1831,21,9.6122,0.5892,0.8845,0.6385,0.8373,0.9502,0.734,0.588,0.8534,0.9478,0.9616
0,.\mat files\glass,214,9,4.2056,0.6951,0.811,0.7073,0.7073,0.7134,0.8384,0.7043,0.8293,0.6585,0.686
0,.\mat files\ionosphere,351,33,35.8974,0.9181,0.9176,0.9303,0.6052,0.8486,0.932,0.9227,0.9669,0.8257,0.7941
0,.\mat files\letter,1600,32,6.25,0.8783,0.7783,0.8947,0.6063,0.6201,0.8573,0.8765,0.8061,0.5927,0.5216
0,.\mat files\lympho,148,18,4.0541,0.9831,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,.\mat files\mnist,7603,100,9.2069,0.7628,0.8389,0.7157,0.5766,0.7939,0.8498,0.7195,0.8713,0.854,0.8534
0,.\mat files\musk,3062,166,3.1679,0.2161,1.0,0.473,0.9999,1.0,0.8009,0.4629,1.0,1.0,1.0
0,.\mat files\optdigits,5216,64,2.8758,0.4894,0.7901,0.5062,0.8774,0.6735,0.406,0.5277,0.3822,0.5171,0.526
0,.\mat files\pendigits,6870,16,2.2707,0.667,0.8082,0.4889,0.9348,0.9376,0.7371,0.4965,0.8204,0.9235,0.9309


In [28]:
time_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,.\mat files\arrhythmia,452,274,14.6018,5.5707,2.9711,0.6336,2.2511,0.5852,0.0955,0.0625,2.4661,0.064,0.2642
0,.\mat files\cardio,1831,21,9.6122,0.9057,0.2287,1.1761,0.0313,0.4352,0.213,0.136,0.9497,0.1102,0.1719
0,.\mat files\glass,214,9,4.2056,0.0706,0.0469,0.0514,0.0,0.2261,0.016,0.0,0.1239,0.0,0.0
0,.\mat files\ionosphere,351,33,35.8974,0.1182,0.0779,0.0713,0.0156,0.2466,0.0237,0.0156,0.0781,0.008,0.0638
0,.\mat files\letter,1600,32,6.25,0.6697,0.1719,0.9119,0.0156,0.3584,0.1875,0.1122,1.4296,0.0943,0.0156
0,.\mat files\lympho,148,18,4.0541,0.067,0.0477,0.0469,0.0,0.2221,0.008,0.008,0.0553,0.0156,0.0
0,.\mat files\mnist,7603,100,9.2069,9.5397,1.6007,54.7242,0.0625,2.4839,7.5206,7.134,4.0368,5.1351,0.2808
0,.\mat files\musk,3062,166,3.1679,2.7815,0.43,13.773,0.0781,1.4611,2.0154,1.8573,15.6036,1.288,0.2543
0,.\mat files\optdigits,5216,64,2.8758,3.5457,0.592,14.234,0.0312,1.0045,2.0835,1.882,1.7966,1.6398,0.1094
0,.\mat files\pendigits,6870,16,2.2707,2.6174,0.3025,4.7196,0.0156,0.7416,0.8334,0.778,2.496,1.3507,0.0156


In [27]:
prn_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,.\mat files\arrhythmia,452,274,14.6018,0.3571,0.4643,0.4643,0.5714,0.6071,0.5,0.4643,0.4286,0.5,0.5
0,.\mat files\cardio,1831,21,9.6122,0.1918,0.4932,0.1781,0.4521,0.6027,0.3562,0.1507,0.411,0.5342,0.6849
0,.\mat files\glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,.\mat files\ionosphere,351,33,35.8974,0.8431,0.8039,0.8039,0.3922,0.5882,0.8824,0.7843,0.8627,0.6863,0.5686
0,.\mat files\letter,1600,32,6.25,0.4375,0.1875,0.4062,0.0938,0.0625,0.3125,0.3438,0.1875,0.125,0.125
0,.\mat files\lympho,148,18,4.0541,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,.\mat files\mnist,7603,100,9.2069,0.3367,0.3912,0.3741,0.1361,0.2721,0.432,0.3673,0.2653,0.3946,0.3878
0,.\mat files\musk,3062,166,3.1679,0.1,1.0,0.125,0.975,1.0,0.175,0.125,1.0,1.0,1.0
0,.\mat files\optdigits,5216,64,2.8758,0.0152,0.0,0.0303,0.2121,0.0303,0.0,0.0303,0.0,0.0,0.0
0,.\mat files\pendigits,6870,16,2.2707,0.0526,0.1579,0.0526,0.2632,0.3333,0.0702,0.0702,0.0877,0.3158,0.3158
