In [2]:
import os
import sys
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from scipy.io import loadmat # for read mat extension file

import pyod Packages & method

In [3]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF# local outlier factor 

In [4]:
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

# import metrics Packages

In [5]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

# define data file and read X and y

In [6]:
mat_file_list = ['arrhythmia.mat',
                'cardio.mat',
                'glass.mat',
                'ionosphere.mat',
                'letter.mat',
                'lympho.mat',
                'mnist.mat',
                'musk.mat',
                 'optdigits.mat',
                 'pendigits.mat',
                 'pima.mat',
                 'satellite.mat',
                 'satimage-2.mat',
                 'shuttle.mat',
                 'vertebral.mat',
                 'vowels.mat',
                 'wbc.mat'
                ]

#  how to load mat file

In [7]:
data = loadmat("D:letsupgrade/Anamoly_detec_data/cardio.mat")

In [8]:
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

# input (independent ) feature shape in mat file

In [9]:
type(data["X"]),data["y"].shape

(numpy.ndarray, (1831, 1))

# dependent target

In [10]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [11]:
time_df = pd.DataFrame()
roc_df = pd.DataFrame()
prn_df = pd.DataFrame()
df_columns =["Data","#Sample","#Dimensiens", "Outliers Pers","ABOD","CBOLF","FB","HBOS","IForest","KNN","LOF","MCD","OCSVM","PCA"]
from time import time
random_state = np.random.RandomState(42)
for mat_file in mat_file_list:
    print("\n.... Processing",mat_file,".....")
    mat = loadmat(os.path.join("D:/letsupgrade/Anamoly_detec_data/", mat_file))
    X= mat["X"]
    y=mat["y"].ravel() # convert 2d to 1d
    outliers_fraction = np.count_nonzero(y)/len(y)
    outliers_percentage = round(outliers_fraction*100,ndigits=4)
    # construct containers for saving results
    
    roc_list = [mat_file[:-4],X.shape[0],X.shape[1],outliers_percentage]
    prn_list =  [mat_file[:-4],X.shape[0],X.shape[1],outliers_percentage]
    time_list =  [mat_file[:-4],X.shape[0],X.shape[1],outliers_percentage]
    
    # 60% data for training & rest for testing
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.4,random_state=random_state)
    
    # standardizing data for processing
    x_train_norm,x_testst_norm = standardizer(x_train,x_test)
    classifiers = {"Angle-based Outlier Detector (ABOD) ": ABOD(contamination=outliers_fraction),
                  'Cluster-based Local Outlier Factor':CBLOF(contamination=outliers_fraction,check_estimator=False,random_state=random_state),
                  'Feature Bagging':FeatureBagging(contamination=outliers_fraction,random_state=random_state),
                  'Histogram-base Outlier Detection (HBOS) ':HBOS(contamination=outliers_fraction),
                  "Isolation Forest" : IForest(contamination=outliers_fraction,random_state=random_state),
                  'K Nearest Neighbors (KNN)':KNN(contamination=outliers_fraction),
                  'Local Outlier Factor (LOF)':LOF(contamination=outliers_fraction),
                  'Minimum Covariance Determinant (MCD)':MCD(contamination=outliers_fraction,random_state=random_state),
                  'One-class SVM (OCSVM)':OCSVM(contamination=outliers_fraction),
                  'Principal Component Analysis (PCA)':PCA(contamination=outliers_fraction,random_state=random_state)
                  }
    for clf_name,clf in classifiers.items():
        t0=time()
        clf.fit(x_train_norm)
        test_scores = clf.decision_function(x_testst_norm)
        t1 =time()
        duration = round(t1-t0,ndigits=4)
        time_list.append(duration)
        
        roc = round(roc_auc_score(y_test,test_scores),ndigits=4)
        prn = round(precision_n_scores(y_test,test_scores),ndigits=4)
        print("{} ROC:{}, precision @ rank n :{},execution time : {}s".format(clf_name,roc,prn,duration))
        roc_list.append(roc)
        prn_list.append(prn)
    temp_df = pd.DataFrame(time_list).T
    temp_df.columns = df_columns
    time_df = pd.concat([time_df,temp_df],axis=0)
    
    temp_df = pd.DataFrame(roc_list).T
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df,temp_df],axis=0)
    
    temp_df = pd.DataFrame(prn_list).T
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df,temp_df],axis=0)


.... Processing arrhythmia.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.7687, precision @ rank n :0.3571,execution time : 5.7618s
Cluster-based Local Outlier Factor ROC:0.7789, precision @ rank n :0.4643,execution time : 5.0024s
Feature Bagging ROC:0.7796, precision @ rank n :0.4643,execution time : 1.2623s
Histogram-base Outlier Detection (HBOS)  ROC:0.8511, precision @ rank n :0.5714,execution time : 4.2785s




Isolation Forest ROC:0.8637, precision @ rank n :0.6071,execution time : 3.3171s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n :0.5,execution time : 0.1609s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n :0.4643,execution time : 0.1549s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n :0.4286,execution time : 6.9494s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n :0.5,execution time : 0.0969s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n :0.5,execution time : 0.3318s

.... Processing cardio.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.5892, precision @ rank n :0.1918,execution time : 0.8055s
Cluster-based Local Outlier Factor ROC:0.8845, precision @ rank n :0.4932,execution time : 0.2688s
Feature Bagging ROC:0.6385, precision @ rank n :0.1781,execution time : 2.1118s
Histogram-base Outlier Detection (HBOS)  ROC:0.8373, precision @ rank n :0.4521,execution time : 0.013s




Isolation Forest ROC:0.951, precision @ rank n :0.6027,execution time : 0.9475s
K Nearest Neighbors (KNN) ROC:0.734, precision @ rank n :0.3562,execution time : 0.4447s
Local Outlier Factor (LOF) ROC:0.588, precision @ rank n :0.1507,execution time : 0.2818s




Minimum Covariance Determinant (MCD) ROC:0.8534, precision @ rank n :0.411,execution time : 1.5101s
One-class SVM (OCSVM) ROC:0.9478, precision @ rank n :0.5342,execution time : 0.2059s
Principal Component Analysis (PCA) ROC:0.9616, precision @ rank n :0.6849,execution time : 0.0805s

.... Processing glass.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.6951, precision @ rank n :0.25,execution time : 0.1019s
Cluster-based Local Outlier Factor ROC:0.811, precision @ rank n :0.25,execution time : 0.07s
Feature Bagging ROC:0.7073, precision @ rank n :0.25,execution time : 0.072s
Histogram-base Outlier Detection (HBOS)  ROC:0.7073, precision @ rank n :0.0,execution time : 0.005s




Isolation Forest ROC:0.7134, precision @ rank n :0.25,execution time : 0.6646s
K Nearest Neighbors (KNN) ROC:0.8384, precision @ rank n :0.25,execution time : 0.024s
Local Outlier Factor (LOF) ROC:0.7043, precision @ rank n :0.25,execution time : 0.006s
Minimum Covariance Determinant (MCD) ROC:0.8293, precision @ rank n :0.0,execution time : 0.103s
One-class SVM (OCSVM) ROC:0.6585, precision @ rank n :0.25,execution time : 0.003s
Principal Component Analysis (PCA) ROC:0.686, precision @ rank n :0.25,execution time : 0.003s

.... Processing ionosphere.mat .....




Angle-based Outlier Detector (ABOD)  ROC:0.9181, precision @ rank n :0.8431,execution time : 0.1499s
Cluster-based Local Outlier Factor ROC:0.9176, precision @ rank n :0.8039,execution time : 0.0994s
Feature Bagging ROC:0.9303, precision @ rank n :0.8039,execution time : 0.1569s
Histogram-base Outlier Detection (HBOS)  ROC:0.6052, precision @ rank n :0.3922,execution time : 0.018s




Isolation Forest ROC:0.8516, precision @ rank n :0.6078,execution time : 0.7106s
K Nearest Neighbors (KNN) ROC:0.932, precision @ rank n :0.8824,execution time : 0.039s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n :0.7843,execution time : 0.013s
Minimum Covariance Determinant (MCD) ROC:0.9669, precision @ rank n :0.8627,execution time : 0.1499s
One-class SVM (OCSVM) ROC:0.8257, precision @ rank n :0.6863,execution time : 0.013s
Principal Component Analysis (PCA) ROC:0.7941, precision @ rank n :0.5686,execution time : 0.055s

.... Processing letter.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.8783, precision @ rank n :0.4375,execution time : 0.7326s
Cluster-based Local Outlier Factor ROC:0.7783, precision @ rank n :0.1875,execution time : 0.4008s
Feature Bagging ROC:0.8947, precision @ rank n :0.4062,execution time : 1.759s
Histogram-base Outlier Detection (HBOS)  ROC:0.6063, precision @ rank n :0.0938,execution time : 0.019s




Isolation Forest ROC:0.6279, precision @ rank n :0.0625,execution time : 0.8595s
K Nearest Neighbors (KNN) ROC:0.8573, precision @ rank n :0.3125,execution time : 0.3458s
Local Outlier Factor (LOF) ROC:0.8765, precision @ rank n :0.3438,execution time : 0.1889s
Minimum Covariance Determinant (MCD) ROC:0.8061, precision @ rank n :0.1875,execution time : 2.4176s
One-class SVM (OCSVM) ROC:0.5927, precision @ rank n :0.125,execution time : 0.1889s
Principal Component Analysis (PCA) ROC:0.5216, precision @ rank n :0.125,execution time : 0.015s

.... Processing lympho.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.9831, precision @ rank n :0.0,execution time : 0.057s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n :1.0,execution time : 0.1169s
Feature Bagging ROC:1.0, precision @ rank n :1.0,execution time : 0.067s
Histogram-base Outlier Detection (HBOS)  ROC:1.0, precision @ rank n :1.0,execution time : 0.01s




Isolation Forest ROC:1.0, precision @ rank n :1.0,execution time : 0.6696s
K Nearest Neighbors (KNN) ROC:1.0, precision @ rank n :1.0,execution time : 0.018s
Local Outlier Factor (LOF) ROC:1.0, precision @ rank n :1.0,execution time : 0.009s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n :1.0,execution time : 0.1259s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n :1.0,execution time : 0.004s




Principal Component Analysis (PCA) ROC:1.0, precision @ rank n :1.0,execution time : 0.005s

.... Processing mnist.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.7628, precision @ rank n :0.3367,execution time : 14.4377s
Cluster-based Local Outlier Factor ROC:0.8389, precision @ rank n :0.3912,execution time : 2.7664s
Feature Bagging ROC:0.7157, precision @ rank n :0.3741,execution time : 96.8706s
Histogram-base Outlier Detection (HBOS)  ROC:0.5766, precision @ rank n :0.1361,execution time : 0.1109s




Isolation Forest ROC:0.7915, precision @ rank n :0.2687,execution time : 4.9372s
K Nearest Neighbors (KNN) ROC:0.8498, precision @ rank n :0.432,execution time : 13.1824s
Local Outlier Factor (LOF) ROC:0.7195, precision @ rank n :0.3673,execution time : 12.5538s




Minimum Covariance Determinant (MCD) ROC:0.8713, precision @ rank n :0.2653,execution time : 8.3355s
One-class SVM (OCSVM) ROC:0.854, precision @ rank n :0.3946,execution time : 7.9626s
Principal Component Analysis (PCA) ROC:0.8534, precision @ rank n :0.3878,execution time : 0.3468s

.... Processing musk.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.2161, precision @ rank n :0.1,execution time : 3.9338s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n :1.0,execution time : 0.9494s
Feature Bagging ROC:0.473, precision @ rank n :0.125,execution time : 23.1118s
Histogram-base Outlier Detection (HBOS)  ROC:0.9999, precision @ rank n :0.975,execution time : 0.1269s




Isolation Forest ROC:1.0, precision @ rank n :1.0,execution time : 3.458s
K Nearest Neighbors (KNN) ROC:0.8009, precision @ rank n :0.175,execution time : 3.2621s
Local Outlier Factor (LOF) ROC:0.4629, precision @ rank n :0.125,execution time : 3.1672s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n :1.0,execution time : 32.3923s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n :1.0,execution time : 1.9759s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n :1.0,execution time : 0.3508s

.... Processing optdigits.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.4894, precision @ rank n :0.0152,execution time : 4.9983s
Cluster-based Local Outlier Factor ROC:0.7901, precision @ rank n :0.0,execution time : 1.4662s
Feature Bagging ROC:0.5062, precision @ rank n :0.0303,execution time : 25.8122s
Histogram-base Outlier Detection (HBOS)  ROC:0.8774, precision @ rank n :0.2121,execution time : 0.0979s




Isolation Forest ROC:0.686, precision @ rank n :0.0303,execution time : 2.8843s
K Nearest Neighbors (KNN) ROC:0.406, precision @ rank n :0.0,execution time : 3.9228s
Local Outlier Factor (LOF) ROC:0.5277, precision @ rank n :0.0303,execution time : 3.3681s




Minimum Covariance Determinant (MCD) ROC:0.3822, precision @ rank n :0.0,execution time : 3.7398s
One-class SVM (OCSVM) ROC:0.5171, precision @ rank n :0.0,execution time : 2.5475s
Principal Component Analysis (PCA) ROC:0.526, precision @ rank n :0.0,execution time : 0.0979s

.... Processing pendigits.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.667, precision @ rank n :0.0526,execution time : 2.9803s
Cluster-based Local Outlier Factor ROC:0.8082, precision @ rank n :0.1579,execution time : 0.6846s
Feature Bagging ROC:0.4889, precision @ rank n :0.0526,execution time : 8.2093s
Histogram-base Outlier Detection (HBOS)  ROC:0.9348, precision @ rank n :0.2632,execution time : 0.029s




Isolation Forest ROC:0.939, precision @ rank n :0.3333,execution time : 1.4002s
K Nearest Neighbors (KNN) ROC:0.7371, precision @ rank n :0.0702,execution time : 1.3272s
Local Outlier Factor (LOF) ROC:0.4965, precision @ rank n :0.0702,execution time : 1.3212s
Minimum Covariance Determinant (MCD) ROC:0.8204, precision @ rank n :0.0877,execution time : 4.7363s
One-class SVM (OCSVM) ROC:0.9235, precision @ rank n :0.3158,execution time : 2.0568s
Principal Component Analysis (PCA) ROC:0.9309, precision @ rank n :0.3158,execution time : 0.027s

.... Processing pima.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.7163, precision @ rank n :0.5253,execution time : 0.2789s
Cluster-based Local Outlier Factor ROC:0.67, precision @ rank n :0.4949,execution time : 0.1659s
Feature Bagging ROC:0.6448, precision @ rank n :0.4444,execution time : 0.2439s
Histogram-base Outlier Detection (HBOS)  ROC:0.711, precision @ rank n :0.5354,execution time : 0.006s




Isolation Forest ROC:0.6829, precision @ rank n :0.5253,execution time : 0.6023s
K Nearest Neighbors (KNN) ROC:0.7395, precision @ rank n :0.5859,execution time : 0.072s
Local Outlier Factor (LOF) ROC:0.6574, precision @ rank n :0.4646,execution time : 0.025s
Minimum Covariance Determinant (MCD) ROC:0.7175, precision @ rank n :0.5152,execution time : 0.1409s
One-class SVM (OCSVM) ROC:0.6561, precision @ rank n :0.5051,execution time : 0.02s
Principal Component Analysis (PCA) ROC:0.6762, precision @ rank n :0.5354,execution time : 0.003s

.... Processing satellite.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.5653, precision @ rank n :0.3962,execution time : 3.7749s
Cluster-based Local Outlier Factor ROC:0.7241, precision @ rank n :0.5412,execution time : 1.0962s
Feature Bagging ROC:0.572, precision @ rank n :0.4,execution time : 15.9828s
Histogram-base Outlier Detection (HBOS)  ROC:0.7486, precision @ rank n :0.57,execution time : 0.038s




Isolation Forest ROC:0.6838, precision @ rank n :0.5812,execution time : 2.0938s
K Nearest Neighbors (KNN) ROC:0.6853, precision @ rank n :0.4988,execution time : 2.2957s
Local Outlier Factor (LOF) ROC:0.572, precision @ rank n :0.395,execution time : 2.5056s
Minimum Covariance Determinant (MCD) ROC:0.8055, precision @ rank n :0.6762,execution time : 5.8057s
One-class SVM (OCSVM) ROC:0.6478, precision @ rank n :0.5225,execution time : 2.6735s
Principal Component Analysis (PCA) ROC:0.5923, precision @ rank n :0.465,execution time : 0.0599s

.... Processing satimage-2.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.8432, precision @ rank n :0.2333,execution time : 3.48s
Cluster-based Local Outlier Factor ROC:0.9998, precision @ rank n :0.9333,execution time : 0.8645s
Feature Bagging ROC:0.5235, precision @ rank n :0.1667,execution time : 13.2354s
Histogram-base Outlier Detection (HBOS)  ROC:0.9784, precision @ rank n :0.6,execution time : 0.035s




Isolation Forest ROC:0.9955, precision @ rank n :0.8667,execution time : 1.6121s
K Nearest Neighbors (KNN) ROC:0.9515, precision @ rank n :0.4333,execution time : 1.8969s
Local Outlier Factor (LOF) ROC:0.5257, precision @ rank n :0.1667,execution time : 1.7s
Minimum Covariance Determinant (MCD) ROC:0.9963, precision @ rank n :0.6667,execution time : 4.9162s
One-class SVM (OCSVM) ROC:0.9997, precision @ rank n :0.9,execution time : 2.1128s
Principal Component Analysis (PCA) ROC:0.9816, precision @ rank n :0.7333,execution time : 0.057s

.... Processing shuttle.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.6171, precision @ rank n :0.2003,execution time : 41.0955s
Cluster-based Local Outlier Factor ROC:0.6273, precision @ rank n :0.2025,execution time : 3.3042s
Feature Bagging ROC:0.4725, precision @ rank n :0.0257,execution time : 245.6561s
Histogram-base Outlier Detection (HBOS)  ROC:0.9871, precision @ rank n :0.9985,execution time : 0.051s




Isolation Forest ROC:0.9976, precision @ rank n :0.9501,execution time : 9.9413s
K Nearest Neighbors (KNN) ROC:0.6507, precision @ rank n :0.212,execution time : 25.465s
Local Outlier Factor (LOF) ROC:0.5556, precision @ rank n :0.1548,execution time : 35.1241s






Minimum Covariance Determinant (MCD) ROC:0.9899, precision @ rank n :0.7395,execution time : 31.3962s
One-class SVM (OCSVM) ROC:0.9934, precision @ rank n :0.956,execution time : 133.8917s
Principal Component Analysis (PCA) ROC:0.9915, precision @ rank n :0.9516,execution time : 0.086s

.... Processing vertebral.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.5366, precision @ rank n :0.2143,execution time : 0.083s
Cluster-based Local Outlier Factor ROC:0.439, precision @ rank n :0.0714,execution time : 0.082s
Feature Bagging ROC:0.5279, precision @ rank n :0.1429,execution time : 0.078s
Histogram-base Outlier Detection (HBOS)  ROC:0.3506, precision @ rank n :0.0,execution time : 0.005s




Isolation Forest ROC:0.3789, precision @ rank n :0.0,execution time : 0.6936s
K Nearest Neighbors (KNN) ROC:0.4573, precision @ rank n :0.0714,execution time : 0.037s
Local Outlier Factor (LOF) ROC:0.4983, precision @ rank n :0.1429,execution time : 0.009s
Minimum Covariance Determinant (MCD) ROC:0.4085, precision @ rank n :0.0714,execution time : 0.0949s
One-class SVM (OCSVM) ROC:0.4686, precision @ rank n :0.0714,execution time : 0.004s
Principal Component Analysis (PCA) ROC:0.4085, precision @ rank n :0.0,execution time : 0.005s

.... Processing



 vowels.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.9616, precision @ rank n :0.6316,execution time : 0.5407s
Cluster-based Local Outlier Factor ROC:0.8963, precision @ rank n :0.3158,execution time : 0.1989s
Feature Bagging ROC:0.9365, precision @ rank n :0.3684,execution time : 0.6986s
Histogram-base Outlier Detection (HBOS)  ROC:0.6876, precision @ rank n :0.1579,execution time : 0.008s




Isolation Forest ROC:0.8214, precision @ rank n :0.1579,execution time : 0.6976s
K Nearest Neighbors (KNN) ROC:0.9734, precision @ rank n :0.4737,execution time : 0.1589s
Local Outlier Factor (LOF) ROC:0.9398, precision @ rank n :0.3684,execution time : 0.079s
Minimum Covariance Determinant (MCD) ROC:0.7243, precision @ rank n :0.1053,execution time : 1.8889s
One-class SVM (OCSVM) ROC:0.8163, precision @ rank n :0.2632,execution time : 0.088s
Principal Component Analysis (PCA) ROC:0.6297, precision @ rank n :0.1579,execution time : 0.006s

.... Processing wbc.mat .....
Angle-based Outlier Detector (ABOD)  ROC:0.921, precision @ rank n :0.375,execution time : 0.1719s
Cluster-based Local Outlier Factor ROC:0.9149, precision @ rank n :0.375,execution time : 0.114s
Feature Bagging ROC:0.9271, precision @ rank n :0.375,execution time : 0.1969s
Histogram-base Outlier Detection (HBOS)  ROC:0.9479, precision @ rank n :0.5,execution time : 0.015s




Isolation Forest ROC:0.9418, precision @ rank n :0.625,execution time : 0.7096s
K Nearest Neighbors (KNN) ROC:0.9444, precision @ rank n :0.5,execution time : 0.047s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n :0.375,execution time : 0.016s
Minimum Covariance Determinant (MCD) ROC:0.9288, precision @ rank n :0.5,execution time : 0.1789s
One-class SVM (OCSVM) ROC:0.9358, precision @ rank n :0.375,execution time : 0.011s
Principal Component Analysis (PCA) ROC:0.9262, precision @ rank n :0.375,execution time : 0.004s


In [12]:
time_df

Unnamed: 0,Data,#Sample,#Dimensiens,Outliers Pers,ABOD,CBOLF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,5.7618,5.0024,1.2623,4.2785,3.3171,0.1609,0.1549,6.9494,0.0969,0.3318
0,cardio,1831,21,9.6122,0.8055,0.2688,2.1118,0.013,0.9475,0.4447,0.2818,1.5101,0.2059,0.0805
0,glass,214,9,4.2056,0.1019,0.07,0.072,0.005,0.6646,0.024,0.006,0.103,0.003,0.003
0,ionosphere,351,33,35.8974,0.1499,0.0994,0.1569,0.018,0.7106,0.039,0.013,0.1499,0.013,0.055
0,letter,1600,32,6.25,0.7326,0.4008,1.759,0.019,0.8595,0.3458,0.1889,2.4176,0.1889,0.015
0,lympho,148,18,4.0541,0.057,0.1169,0.067,0.01,0.6696,0.018,0.009,0.1259,0.004,0.005
0,mnist,7603,100,9.2069,14.4377,2.7664,96.8706,0.1109,4.9372,13.1824,12.5538,8.3355,7.9626,0.3468
0,musk,3062,166,3.1679,3.9338,0.9494,23.1118,0.1269,3.458,3.2621,3.1672,32.3923,1.9759,0.3508
0,optdigits,5216,64,2.8758,4.9983,1.4662,25.8122,0.0979,2.8843,3.9228,3.3681,3.7398,2.5475,0.0979
0,pendigits,6870,16,2.2707,2.9803,0.6846,8.2093,0.029,1.4002,1.3272,1.3212,4.7363,2.0568,0.027


In [13]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensiens,Outliers Pers,ABOD,CBOLF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7687,0.7789,0.7796,0.8511,0.8637,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5892,0.8845,0.6385,0.8373,0.951,0.734,0.588,0.8534,0.9478,0.9616
0,glass,214,9,4.2056,0.6951,0.811,0.7073,0.7073,0.7134,0.8384,0.7043,0.8293,0.6585,0.686
0,ionosphere,351,33,35.8974,0.9181,0.9176,0.9303,0.6052,0.8516,0.932,0.9227,0.9669,0.8257,0.7941
0,letter,1600,32,6.25,0.8783,0.7783,0.8947,0.6063,0.6279,0.8573,0.8765,0.8061,0.5927,0.5216
0,lympho,148,18,4.0541,0.9831,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.7628,0.8389,0.7157,0.5766,0.7915,0.8498,0.7195,0.8713,0.854,0.8534
0,musk,3062,166,3.1679,0.2161,1.0,0.473,0.9999,1.0,0.8009,0.4629,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4894,0.7901,0.5062,0.8774,0.686,0.406,0.5277,0.3822,0.5171,0.526
0,pendigits,6870,16,2.2707,0.667,0.8082,0.4889,0.9348,0.939,0.7371,0.4965,0.8204,0.9235,0.9309


In [14]:
prn_df

Unnamed: 0,Data,#Sample,#Dimensiens,Outliers Pers,ABOD,CBOLF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.4643,0.5714,0.6071,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1918,0.4932,0.1781,0.4521,0.6027,0.3562,0.1507,0.411,0.5342,0.6849
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8431,0.8039,0.8039,0.3922,0.6078,0.8824,0.7843,0.8627,0.6863,0.5686
0,letter,1600,32,6.25,0.4375,0.1875,0.4062,0.0938,0.0625,0.3125,0.3438,0.1875,0.125,0.125
0,lympho,148,18,4.0541,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.3367,0.3912,0.3741,0.1361,0.2687,0.432,0.3673,0.2653,0.3946,0.3878
0,musk,3062,166,3.1679,0.1,1.0,0.125,0.975,1.0,0.175,0.125,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.0152,0.0,0.0303,0.2121,0.0303,0.0,0.0303,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0526,0.1579,0.0526,0.2632,0.3333,0.0702,0.0702,0.0877,0.3158,0.3158
