In [1]:
import os
import sys
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from scipy.io import loadmat


### Import Pyod packages and methods

In [7]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM

# proximity based outlier
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS

# Probabilistic model based outlier
from pyod.models.abod import ABOD

# Outlier Ensembles and combination frameworks
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging


### Import Metrics Packages

In [8]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores

from sklearn.metrics import roc_auc_score

### Define Data file and read X and Y

In [4]:
mat_file_list =['arrhythmia.mat',
               'cardio.mat',
               'glass.mat',
               'ionosphere.mat',
               'letter.mat',
               'lympho.mat',
               'mnist.mat',
               'musk.mat',
               'optdigits.mat',
               'pendigits.mat',
               'pima.mat',
               'satellite.mat',
               'satimage-2.mat',
               'shuttle.mat',
               'vertebral.mat',
               'vowels.mat',
               'wbc.mat']

### Define nine outlier detection tools to be compared

In [5]:
df_columns=['Data','#Sample','#Dimensions','Outlier Perc',
            'PCA','MCD','OCSVM','LOF','CBLOF','KNN','HBOS','ABOD',
            'IForest','FeatureBagging']

# ROC performance evolution table
roc_df = pd.DataFrame(columns=df_columns)

# Precision_n_scores - Performance evolution table
prn_df = pd.DataFrame(columns=df_columns)

# Time dataframe
time_df = pd.DataFrame(columns=df_columns)

# Exploring all Mat files

In [9]:
from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
    print("\n... Processing : ", mat_file, '...')
    mat = loadmat(os.path.join('Anamoly_detec_data', mat_file))
    
    X = mat['X']
    y = mat['y'].ravel()
    
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)
    
    # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    
    # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=random_state)
    
    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)
    
    classifiers = {
        'Angle-based Outlier Detector (ABOD)': ABOD(contamination=outliers_fraction),
        'Cluster-based Local Outlier Factor': CBLOF(contamination=outliers_fraction, check_estimator=False,random_state=random_state),
        'Feature Bagging': FeatureBagging(contamination=outliers_fraction,random_state=random_state),
        'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction),
        'Isolation Forest': IForest(contamination=outliers_fraction,random_state=random_state),
        'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
        'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction),
        'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state),
        'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
        'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction, random_state=random_state),
    }
    
    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)
        
        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
        
        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))
        
        roc_list.append(roc)
        prn_list.append(prn)
        
    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)
    
    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)
    
    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)
    
    print("=======================================================\n\n")
    


... Processing :  arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 0.304s
Cluster-based Local Outlier Factor ROC:0.7684, precision @ rank n:0.4643, execution time: 0.157s




Feature Bagging ROC:0.7799, precision @ rank n:0.5, execution time: 0.738s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 0.133s
Isolation Forest ROC:0.8478, precision @ rank n:0.5357, execution time: 0.766s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.117s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.089s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 1.527s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.047s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.08s



... Processing :  cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5763, precision @ rank n:0.1875, execution time: 0.83s




Cluster-based Local Outlier Factor ROC:0.8221, precision @ rank n:0.4844, execution time: 0.2519s
Feature Bagging ROC:0.4879, precision @ rank n:0.1406, execution time: 1.133s
Histogram-base Outlier Detection (HBOS) ROC:0.8453, precision @ rank n:0.4688, execution time: 0.013s
Isolation Forest ROC:0.9316, precision @ rank n:0.4531, execution time: 0.715s
K Nearest Neighbors (KNN) ROC:0.6959, precision @ rank n:0.2812, execution time: 0.3139s
Local Outlier Factor (LOF) ROC:0.4715, precision @ rank n:0.125, execution time: 0.135s




Minimum Covariance Determinant (MCD) ROC:0.8781, precision @ rank n:0.3906, execution time: 0.906s
One-class SVM (OCSVM) ROC:0.9507, precision @ rank n:0.5938, execution time: 0.119s
Principal Component Analysis (PCA) ROC:0.9638, precision @ rank n:0.6875, execution time: 0.006s



... Processing :  glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7104, precision @ rank n:0.25, execution time: 0.105s
Cluster-based Local Outlier Factor ROC:0.8506, precision @ rank n:0.25, execution time: 0.075s
Feature Bagging ROC:0.7043, precision @ rank n:0.25, execution time: 0.056s
Histogram-base Outlier Detection (HBOS) ROC:0.6524, precision @ rank n:0.0, execution time: 0.005s




Isolation Forest ROC:0.7195, precision @ rank n:0.25, execution time: 0.541s
K Nearest Neighbors (KNN) ROC:0.7805, precision @ rank n:0.25, execution time: 0.022s
Local Outlier Factor (LOF) ROC:0.7774, precision @ rank n:0.25, execution time: 0.005s
Minimum Covariance Determinant (MCD) ROC:0.7165, precision @ rank n:0.0, execution time: 0.062s
One-class SVM (OCSVM) ROC:0.6189, precision @ rank n:0.25, execution time: 0.003s
Principal Component Analysis (PCA) ROC:0.622, precision @ rank n:0.25, execution time: 0.002s



... Processing :  ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9004, precision @ rank n:0.8214, execution time: 0.162s
Cluster-based Local Outlier Factor ROC:0.8952, precision @ rank n:0.8036, execution time: 0.079s
Feature Bagging ROC:0.8933, precision @ rank n:0.75, execution time: 0.102s




Histogram-base Outlier Detection (HBOS) ROC:0.5195, precision @ rank n:0.3393, execution time: 0.02s
Isolation Forest ROC:0.8294, precision @ rank n:0.6607, execution time: 0.583s
K Nearest Neighbors (KNN) ROC:0.9134, precision @ rank n:0.8393, execution time: 0.035s
Local Outlier Factor (LOF) ROC:0.8989, precision @ rank n:0.75, execution time: 0.01s
Minimum Covariance Determinant (MCD) ROC:0.9399, precision @ rank n:0.8571, execution time: 0.111s
One-class SVM (OCSVM) ROC:0.8372, precision @ rank n:0.7143, execution time: 0.006s
Principal Component Analysis (PCA) ROC:0.7971, precision @ rank n:0.5893, execution time: 0.005s



... Processing :  letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8465, precision @ rank n:0.275, execution time: 0.756s
Cluster-based Local Outlier Factor ROC:0.7423, precision @ rank n:0.175, execution time: 0.176s




Feature Bagging ROC:0.866, precision @ rank n:0.4, execution time: 1.011s
Histogram-base Outlier Detection (HBOS) ROC:0.5728, precision @ rank n:0.125, execution time: 0.018s
Isolation Forest ROC:0.5836, precision @ rank n:0.05, execution time: 0.713s
K Nearest Neighbors (KNN) ROC:0.845, precision @ rank n:0.3, execution time: 0.2439s
Local Outlier Factor (LOF) ROC:0.8409, precision @ rank n:0.325, execution time: 0.118s
Minimum Covariance Determinant (MCD) ROC:0.7499, precision @ rank n:0.075, execution time: 1.8909s
One-class SVM (OCSVM) ROC:0.5744, precision @ rank n:0.1, execution time: 0.11s
Principal Component Analysis (PCA) ROC:0.48, precision @ rank n:0.05, execution time: 0.008s



... Processing :  lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9382, precision @ rank n:0.4, execution time: 0.067s
Cluster-based Local Outlier Factor ROC:0.9709, precision @ rank n:0.6, execution time: 0.091s
Feature Bagging ROC:0.9673, precision @ rank n:0.6, execution time: 0.049s
His



Isolation Forest ROC:0.9855, precision @ rank n:0.6, execution time: 0.53s
K Nearest Neighbors (KNN) ROC:0.9636, precision @ rank n:0.6, execution time: 0.015s
Local Outlier Factor (LOF) ROC:0.9636, precision @ rank n:0.6, execution time: 0.005s
Minimum Covariance Determinant (MCD) ROC:0.9164, precision @ rank n:0.6, execution time: 0.063s
One-class SVM (OCSVM) ROC:0.9636, precision @ rank n:0.6, execution time: 0.003s
Principal Component Analysis (PCA) ROC:0.9818, precision @ rank n:0.8, execution time: 0.004s



... Processing :  mnist.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.7813, precision @ rank n:0.3562, execution time: 12.0329s




Cluster-based Local Outlier Factor ROC:0.8447, precision @ rank n:0.4007, execution time: 1.3669s
Feature Bagging ROC:0.7259, precision @ rank n:0.3664, execution time: 81.1019s
Histogram-base Outlier Detection (HBOS) ROC:0.5675, precision @ rank n:0.1199, execution time: 0.127s
Isolation Forest ROC:0.7813, precision @ rank n:0.3116, execution time: 5.3159s
K Nearest Neighbors (KNN) ROC:0.8409, precision @ rank n:0.4144, execution time: 16.5178s
Local Outlier Factor (LOF) ROC:0.7085, precision @ rank n:0.339, execution time: 16.2788s




Minimum Covariance Determinant (MCD) ROC:0.863, precision @ rank n:0.3973, execution time: 8.4629s
One-class SVM (OCSVM) ROC:0.8417, precision @ rank n:0.3801, execution time: 8.0189s
Principal Component Analysis (PCA) ROC:0.8396, precision @ rank n:0.3767, execution time: 0.294s



... Processing :  musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.0809, precision @ rank n:0.0333, execution time: 4.259s




Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.792s
Feature Bagging ROC:0.5228, precision @ rank n:0.1667, execution time: 21.1367s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9667, execution time: 0.134s
Isolation Forest ROC:0.9992, precision @ rank n:0.9, execution time: 3.6999s
K Nearest Neighbors (KNN) ROC:0.7348, precision @ rank n:0.2333, execution time: 4.0629s
Local Outlier Factor (LOF) ROC:0.5323, precision @ rank n:0.1333, execution time: 2.475s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:0.9667, execution time: 27.8916s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.283s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.196s



... Processing :  optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4428, precision @ rank n:0.0161, execution time: 4.022s




Cluster-based Local Outlier Factor ROC:0.7852, precision @ rank n:0.0, execution time: 0.6049s
Feature Bagging ROC:0.4641, precision @ rank n:0.0484, execution time: 17.0898s
Histogram-base Outlier Detection (HBOS) ROC:0.8822, precision @ rank n:0.2581, execution time: 0.057s
Isolation Forest ROC:0.5442, precision @ rank n:0.0161, execution time: 1.787s
K Nearest Neighbors (KNN) ROC:0.3824, precision @ rank n:0.0, execution time: 2.407s
Local Outlier Factor (LOF) ROC:0.4584, precision @ rank n:0.0484, execution time: 1.968s




Minimum Covariance Determinant (MCD) ROC:0.3486, precision @ rank n:0.0, execution time: 2.4099s
One-class SVM (OCSVM) ROC:0.4972, precision @ rank n:0.0, execution time: 1.7s
Principal Component Analysis (PCA) ROC:0.504, precision @ rank n:0.0, execution time: 0.069s



... Processing :  pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7008, precision @ rank n:0.0308, execution time: 3.182s




Cluster-based Local Outlier Factor ROC:0.9609, precision @ rank n:0.3077, execution time: 0.4139s
Feature Bagging ROC:0.4687, precision @ rank n:0.0462, execution time: 6.9579s
Histogram-base Outlier Detection (HBOS) ROC:0.9294, precision @ rank n:0.2615, execution time: 0.017s
Isolation Forest ROC:0.9482, precision @ rank n:0.2615, execution time: 1.193s
K Nearest Neighbors (KNN) ROC:0.7602, precision @ rank n:0.0462, execution time: 1.1029s
Local Outlier Factor (LOF) ROC:0.481, precision @ rank n:0.0462, execution time: 0.85s
Minimum Covariance Determinant (MCD) ROC:0.8271, precision @ rank n:0.0615, execution time: 3.5799s
One-class SVM (OCSVM) ROC:0.93, precision @ rank n:0.2923, execution time: 1.471s
Principal Component Analysis (PCA) ROC:0.9332, precision @ rank n:0.3385, execution time: 0.016s



... Processing :  pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6757, precision @ rank n:0.5106, execution time: 0.33s
Cluster-based Local Outlier Factor ROC:0.684, precision 



Feature Bagging ROC:0.6446, precision @ rank n:0.4468, execution time: 0.17s
Histogram-base Outlier Detection (HBOS) ROC:0.7169, precision @ rank n:0.5213, execution time: 0.006s
Isolation Forest ROC:0.6777, precision @ rank n:0.4787, execution time: 0.579s
K Nearest Neighbors (KNN) ROC:0.7252, precision @ rank n:0.5106, execution time: 0.067s
Local Outlier Factor (LOF) ROC:0.6604, precision @ rank n:0.4787, execution time: 0.016s
Minimum Covariance Determinant (MCD) ROC:0.7047, precision @ rank n:0.4787, execution time: 0.086s
One-class SVM (OCSVM) ROC:0.6423, precision @ rank n:0.4574, execution time: 0.015s
Principal Component Analysis (PCA) ROC:0.6639, precision @ rank n:0.5, execution time: 0.005s



... Processing :  satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5681, precision @ rank n:0.3918, execution time: 3.687s




Cluster-based Local Outlier Factor ROC:0.7234, precision @ rank n:0.5574, execution time: 0.475s
Feature Bagging ROC:0.557, precision @ rank n:0.4051, execution time: 10.7969s
Histogram-base Outlier Detection (HBOS) ROC:0.7393, precision @ rank n:0.5466, execution time: 0.034s
Isolation Forest ROC:0.7094, precision @ rank n:0.578, execution time: 1.559s
K Nearest Neighbors (KNN) ROC:0.6781, precision @ rank n:0.4994, execution time: 1.742s
Local Outlier Factor (LOF) ROC:0.5551, precision @ rank n:0.4051, execution time: 1.44s
Minimum Covariance Determinant (MCD) ROC:0.792, precision @ rank n:0.6747, execution time: 3.9879s
One-class SVM (OCSVM) ROC:0.636, precision @ rank n:0.5224, execution time: 1.87s
Principal Component Analysis (PCA) ROC:0.5783, precision @ rank n:0.4559, execution time: 0.038s



... Processing :  satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.86, precision @ rank n:0.2593, execution time: 3.251s




Cluster-based Local Outlier Factor ROC:0.9987, precision @ rank n:0.8889, execution time: 0.424s
Feature Bagging ROC:0.4971, precision @ rank n:0.0741, execution time: 9.0239s
Histogram-base Outlier Detection (HBOS) ROC:0.9837, precision @ rank n:0.5926, execution time: 0.029s
Isolation Forest ROC:0.9973, precision @ rank n:0.8889, execution time: 1.385s
K Nearest Neighbors (KNN) ROC:0.9505, precision @ rank n:0.3704, execution time: 1.431s
Local Outlier Factor (LOF) ROC:0.5006, precision @ rank n:0.0741, execution time: 1.097s
Minimum Covariance Determinant (MCD) ROC:0.9946, precision @ rank n:0.5185, execution time: 3.7269s
One-class SVM (OCSVM) ROC:0.9976, precision @ rank n:0.9259, execution time: 1.479s
Principal Component Analysis (PCA) ROC:0.9841, precision @ rank n:0.8519, execution time: 0.03s



... Processing :  shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6186, precision @ rank n:0.1918, execution time: 31.3796s




Cluster-based Local Outlier Factor ROC:0.6286, precision @ rank n:0.2336, execution time: 1.1459s
Feature Bagging ROC:0.5211, precision @ rank n:0.111, execution time: 89.3208s
Histogram-base Outlier Detection (HBOS) ROC:0.9851, precision @ rank n:0.9857, execution time: 0.043s
Isolation Forest ROC:0.9972, precision @ rank n:0.9337, execution time: 6.4579s
K Nearest Neighbors (KNN) ROC:0.645, precision @ rank n:0.2199, execution time: 19.4917s
Local Outlier Factor (LOF) ROC:0.5347, precision @ rank n:0.1406, execution time: 26.7226s






Minimum Covariance Determinant (MCD) ROC:0.9903, precision @ rank n:0.7534, execution time: 28.4786s
One-class SVM (OCSVM) ROC:0.9922, precision @ rank n:0.9553, execution time: 91.7838s
Principal Component Analysis (PCA) ROC:0.9902, precision @ rank n:0.9503, execution time: 0.061s



... Processing :  vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2797, precision @ rank n:0.0, execution time: 0.11s
Cluster-based Local Outlier Factor ROC:0.3908, precision @ rank n:0.0, execution time: 0.081s
Feature Bagging ROC:0.3027, precision @ rank n:0.0, execution time: 0.055s
Histogram-base Outlier Detection (HBOS) ROC:0.2695, precision @ rank n:0.0, execution time: 0.004s




Isolation Forest ROC:0.3576, precision @ rank n:0.0, execution time: 0.509s
K Nearest Neighbors (KNN) ROC:0.318, precision @ rank n:0.0, execution time: 0.021s
Local Outlier Factor (LOF) ROC:0.318, precision @ rank n:0.0, execution time: 0.005s
Minimum Covariance Determinant (MCD) ROC:0.3321, precision @ rank n:0.0, execution time: 0.077s
One-class SVM (OCSVM) ROC:0.4087, precision @ rank n:0.0, execution time: 0.003s
Principal Component Analysis (PCA) ROC:0.3397, precision @ rank n:0.0, execution time: 0.004s



... Processing :  vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9521, precision @ rank n:0.4706, execution time: 0.598s
Cluster-based Local Outlier Factor ROC:0.9278, precision @ rank n:0.4118, execution time: 0.137s




Feature Bagging ROC:0.9385, precision @ rank n:0.3529, execution time: 0.427s
Histogram-base Outlier Detection (HBOS) ROC:0.6758, precision @ rank n:0.1765, execution time: 0.008s
Isolation Forest ROC:0.7469, precision @ rank n:0.1176, execution time: 0.633s
K Nearest Neighbors (KNN) ROC:0.9568, precision @ rank n:0.5294, execution time: 0.145s
Local Outlier Factor (LOF) ROC:0.9345, precision @ rank n:0.4118, execution time: 0.051s
Minimum Covariance Determinant (MCD) ROC:0.6779, precision @ rank n:0.0, execution time: 1.2119s
One-class SVM (OCSVM) ROC:0.7415, precision @ rank n:0.2941, execution time: 0.061s
Principal Component Analysis (PCA) ROC:0.5787, precision @ rank n:0.1176, execution time: 0.004s



... Processing :  wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9232, precision @ rank n:0.3, execution time: 0.176s
Cluster-based Local Outlier Factor ROC:0.9063, precision @ rank n:0.6, execution time: 0.094s




Feature Bagging ROC:0.9415, precision @ rank n:0.5, execution time: 0.109s
Histogram-base Outlier Detection (HBOS) ROC:0.9592, precision @ rank n:0.7, execution time: 0.015s
Isolation Forest ROC:0.9451, precision @ rank n:0.5, execution time: 0.534s
K Nearest Neighbors (KNN) ROC:0.9437, precision @ rank n:0.5, execution time: 0.036s
Local Outlier Factor (LOF) ROC:0.9352, precision @ rank n:0.4, execution time: 0.01s
Minimum Covariance Determinant (MCD) ROC:0.8986, precision @ rank n:0.4, execution time: 0.114s
One-class SVM (OCSVM) ROC:0.9408, precision @ rank n:0.5, execution time: 0.01s
Principal Component Analysis (PCA) ROC:0.9324, precision @ rank n:0.6, execution time: 0.005s




In [10]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IForest,FeatureBagging
0,arrhythmia,452,274,14.6018,0.7687,0.7684,0.7799,0.8511,0.8478,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5763,0.8221,0.4879,0.8453,0.9316,0.6959,0.4715,0.8781,0.9507,0.9638
0,glass,214,9,4.2056,0.7104,0.8506,0.7043,0.6524,0.7195,0.7805,0.7774,0.7165,0.6189,0.622
0,ionosphere,351,33,35.8974,0.9004,0.8952,0.8933,0.5195,0.8294,0.9134,0.8989,0.9399,0.8372,0.7971
0,letter,1600,32,6.25,0.8465,0.7423,0.866,0.5728,0.5836,0.845,0.8409,0.7499,0.5744,0.48
0,lympho,148,18,4.0541,0.9382,0.9709,0.9673,0.9964,0.9855,0.9636,0.9636,0.9164,0.9636,0.9818
0,mnist,7603,100,9.2069,0.7813,0.8447,0.7259,0.5675,0.7813,0.8409,0.7085,0.863,0.8417,0.8396
0,musk,3062,166,3.1679,0.0809,1.0,0.5228,0.9999,0.9992,0.7348,0.5323,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4428,0.7852,0.4641,0.8822,0.5442,0.3824,0.4584,0.3486,0.4972,0.504
0,pendigits,6870,16,2.2707,0.7008,0.9609,0.4687,0.9294,0.9482,0.7602,0.481,0.8271,0.93,0.9332


In [12]:
prn_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IForest,FeatureBagging
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.5,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1875,0.4844,0.1406,0.4688,0.4531,0.2812,0.125,0.3906,0.5938,0.6875
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8214,0.8036,0.75,0.3393,0.6607,0.8393,0.75,0.8571,0.7143,0.5893
0,letter,1600,32,6.25,0.275,0.175,0.4,0.125,0.05,0.3,0.325,0.075,0.1,0.05
0,lympho,148,18,4.0541,0.4,0.6,0.6,0.8,0.6,0.6,0.6,0.6,0.6,0.8
0,mnist,7603,100,9.2069,0.3562,0.4007,0.3664,0.1199,0.3116,0.4144,0.339,0.3973,0.3801,0.3767
0,musk,3062,166,3.1679,0.0333,1.0,0.1667,0.9667,0.9,0.2333,0.1333,0.9667,1.0,1.0
0,optdigits,5216,64,2.8758,0.0161,0.0,0.0484,0.2581,0.0161,0.0,0.0484,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0308,0.3077,0.0462,0.2615,0.2615,0.0462,0.0462,0.0615,0.2923,0.3385


In [11]:
time_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IForest,FeatureBagging
0,arrhythmia,452,274,14.6018,2.583,2.6269,0.721,3.6039,0.867,0.168,0.103,1.549,0.047,0.086
0,cardio,1831,21,9.6122,0.827,0.261,1.139,0.013,0.721,0.265,0.135,0.88,0.118,0.008
0,glass,214,9,4.2056,0.097,0.084,0.056,0.005,0.522,0.018,0.005,0.067,0.003,0.004
0,ionosphere,351,33,35.8974,0.172,0.083,0.106,0.02,0.547,0.036,0.011,0.124,0.008,0.007
0,letter,1600,32,6.25,0.767,0.175,1.041,0.019,0.798,0.304,0.118,1.9129,0.127,0.016
0,lympho,148,18,4.0541,0.102,0.074,0.05,0.01,0.552,0.014,0.004,0.07,0.002,0.003
0,mnist,7603,100,9.2069,12.9019,1.3759,79.0779,0.097,4.1559,10.9559,9.5769,6.3999,7.4823,0.273
0,musk,3062,166,3.1679,4.213,0.761,19.3614,0.111,2.5989,2.5509,2.009,26.5396,1.264,0.2489
0,optdigits,5216,64,2.8758,3.976,0.5959,18.9287,0.06,3.239,3.384,2.6189,4.0099,2.052,0.173
0,pendigits,6870,16,2.2707,3.183,0.5429,6.979,0.016,1.175,1.085,0.845,3.5999,1.472,0.014
