In [1]:
import os
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

# Import Pyod and the methods

In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging



# Import Metrics Package

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [4]:
mat_file_list=['arrhythmia.mat','cardio.mat','glass.mat','ionosphere.mat','letter.mat','lympho.mat','mnist.mat','musk.mat','optdigits.mat','pendigits.mat','pima.mat','satellite.mat','satimage-2.mat','shuttle.mat','shuttle.mat','vertebral.mat','vowels.mat','wbc.mat']

In [5]:
mat_file_list

['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

# Loading mat file

In [6]:
data=loadmat("cardio.mat")


In [7]:
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [8]:
len(data)

5

In [9]:
df_columns=['Data','#Sample','#Dimensions','Outlier Perc','PCA','MCD','OCSVM','LOF','CBLOF','KNN','HBOS','ABOD','IFOREST','FEATUREBAGGING']

# Precision Time and Roc evolution tables creation 

In [10]:
roc_df=pd.DataFrame(columns=df_columns)
prn_df=pd.DataFrame(columns=df_columns)
time_df=pd.DataFrame(columns=df_columns)
print(roc_df,prn_df,time_df)

Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: []


# Exploring Mat files

In [13]:
from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('C:/Users/udais/Downloads/Day-29_PROJECT1/Anamoly_detec_data', mat_file))

    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

   # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

   # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                       random_state=random_state)

   # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
       contamination=outliers_fraction),
       'Cluster-based Local Outlier Factor': CBLOF(
           contamination=outliers_fraction, check_estimator=False,
           random_state=random_state),
       'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                         random_state=random_state),
       'Histogram-base Outlier Detection (HBOS)': HBOS(
           contamination=outliers_fraction),
       'Isolation Forest': IForest(contamination=outliers_fraction,
                                   random_state=random_state),
       'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
       'Local Outlier Factor (LOF)': LOF(
           contamination=outliers_fraction),
       'Minimum Covariance Determinant (MCD)': MCD(
           contamination=outliers_fraction, random_state=random_state),
       'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
       'Principal Component Analysis (PCA)': PCA(
           contamination=outliers_fraction, random_state=random_state),
   }

    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)
 
        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
             'execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))

        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)




... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 2.8322s




Cluster-based Local Outlier Factor ROC:0.7684, precision @ rank n:0.4643, execution time: 2.0942s
Feature Bagging ROC:0.7799, precision @ rank n:0.5, execution time: 0.7226s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 3.101s
Isolation Forest ROC:0.8478, precision @ rank n:0.5357, execution time: 0.7001s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.1094s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0912s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 2.0434s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0625s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0938s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5763, precision @ rank n:0.1875, execution time: 0.629s




Cluster-based Local Outlier Factor ROC:0.8221, precision @ rank n:0.4844, execution time: 0.2254s
Feature Bagging ROC:0.4879, precision @ rank n:0.1406, execution time: 1.0553s
Histogram-base Outlier Detection (HBOS) ROC:0.8453, precision @ rank n:0.4688, execution time: 0.0156s
Isolation Forest ROC:0.9316, precision @ rank n:0.4531, execution time: 0.6906s
K Nearest Neighbors (KNN) ROC:0.6959, precision @ rank n:0.2812, execution time: 0.2603s
Local Outlier Factor (LOF) ROC:0.4715, precision @ rank n:0.125, execution time: 0.1482s




Minimum Covariance Determinant (MCD) ROC:0.8781, precision @ rank n:0.3906, execution time: 1.0642s
One-class SVM (OCSVM) ROC:0.9507, precision @ rank n:0.5938, execution time: 0.1101s
Principal Component Analysis (PCA) ROC:0.9638, precision @ rank n:0.6875, execution time: 0.008s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7104, precision @ rank n:0.25, execution time: 0.0726s
Cluster-based Local Outlier Factor ROC:0.8506, precision @ rank n:0.25, execution time: 0.0599s
Feature Bagging ROC:0.7043, precision @ rank n:0.25, execution time: 0.0625s
Histogram-base Outlier Detection (HBOS) ROC:0.6524, precision @ rank n:0.0, execution time: 0.0s




Isolation Forest ROC:0.7195, precision @ rank n:0.25, execution time: 0.5925s
K Nearest Neighbors (KNN) ROC:0.7805, precision @ rank n:0.25, execution time: 0.032s
Local Outlier Factor (LOF) ROC:0.7774, precision @ rank n:0.25, execution time: 0.016s
Minimum Covariance Determinant (MCD) ROC:0.7165, precision @ rank n:0.0, execution time: 0.1383s
One-class SVM (OCSVM) ROC:0.6189, precision @ rank n:0.25, execution time: 0.008s
Principal Component Analysis (PCA) ROC:0.622, precision @ rank n:0.25, execution time: 0.0s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9004, precision @ rank n:0.8214, execution time: 0.1451s




Cluster-based Local Outlier Factor ROC:0.8952, precision @ rank n:0.8036, execution time: 0.0927s
Feature Bagging ROC:0.8933, precision @ rank n:0.75, execution time: 0.12s
Histogram-base Outlier Detection (HBOS) ROC:0.5195, precision @ rank n:0.3393, execution time: 0.016s
Isolation Forest ROC:0.8294, precision @ rank n:0.6607, execution time: 0.7102s
K Nearest Neighbors (KNN) ROC:0.9134, precision @ rank n:0.8393, execution time: 0.032s
Local Outlier Factor (LOF) ROC:0.8989, precision @ rank n:0.75, execution time: 0.0058s
Minimum Covariance Determinant (MCD) ROC:0.9399, precision @ rank n:0.8571, execution time: 0.2086s
One-class SVM (OCSVM) ROC:0.8372, precision @ rank n:0.7143, execution time: 0.008s
Principal Component Analysis (PCA) ROC:0.7971, precision @ rank n:0.5893, execution time: 0.008s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8465, precision @ rank n:0.275, execution time: 0.5289s
Cluster-based Local Outlier Factor ROC:0.7423, precision @ 



Feature Bagging ROC:0.866, precision @ rank n:0.4, execution time: 0.9722s
Histogram-base Outlier Detection (HBOS) ROC:0.5728, precision @ rank n:0.125, execution time: 0.016s
Isolation Forest ROC:0.5836, precision @ rank n:0.05, execution time: 0.6827s
K Nearest Neighbors (KNN) ROC:0.845, precision @ rank n:0.3, execution time: 0.2147s
Local Outlier Factor (LOF) ROC:0.8409, precision @ rank n:0.325, execution time: 0.1049s
Minimum Covariance Determinant (MCD) ROC:0.7499, precision @ rank n:0.075, execution time: 2.6364s
One-class SVM (OCSVM) ROC:0.5744, precision @ rank n:0.1, execution time: 0.0985s
Principal Component Analysis (PCA) ROC:0.48, precision @ rank n:0.05, execution time: 0.016s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9382, precision @ rank n:0.4, execution time: 0.0469s
Cluster-based Local Outlier Factor ROC:0.9709, precision @ rank n:0.6, execution time: 0.0672s
Feature Bagging ROC:0.9673, precision @ rank n:0.6, execution time: 0.0626s




Isolation Forest ROC:0.9855, precision @ rank n:0.6, execution time: 0.4831s
K Nearest Neighbors (KNN) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0156s
Local Outlier Factor (LOF) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0156s
Minimum Covariance Determinant (MCD) ROC:0.9164, precision @ rank n:0.6, execution time: 0.0781s
One-class SVM (OCSVM) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0s
Principal Component Analysis (PCA) ROC:0.9818, precision @ rank n:0.8, execution time: 0.0s

... Processing mnist.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7813, precision @ rank n:0.3562, execution time: 9.4146s




Cluster-based Local Outlier Factor ROC:0.8447, precision @ rank n:0.4007, execution time: 1.0692s
Feature Bagging ROC:0.7259, precision @ rank n:0.3664, execution time: 64.0632s
Histogram-base Outlier Detection (HBOS) ROC:0.5675, precision @ rank n:0.1199, execution time: 0.0883s
Isolation Forest ROC:0.7813, precision @ rank n:0.3116, execution time: 3.7571s
K Nearest Neighbors (KNN) ROC:0.8409, precision @ rank n:0.4144, execution time: 10.4468s
Local Outlier Factor (LOF) ROC:0.7085, precision @ rank n:0.339, execution time: 9.136s




Minimum Covariance Determinant (MCD) ROC:0.863, precision @ rank n:0.3973, execution time: 10.0391s
One-class SVM (OCSVM) ROC:0.8417, precision @ rank n:0.3801, execution time: 5.5553s
Principal Component Analysis (PCA) ROC:0.8396, precision @ rank n:0.3767, execution time: 0.1785s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.0809, precision @ rank n:0.0333, execution time: 3.9915s




Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.4316s
Feature Bagging ROC:0.5228, precision @ rank n:0.1667, execution time: 15.8044s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9667, execution time: 0.0938s
Isolation Forest ROC:0.9992, precision @ rank n:0.9, execution time: 2.2745s
K Nearest Neighbors (KNN) ROC:0.7348, precision @ rank n:0.2333, execution time: 2.3039s
Local Outlier Factor (LOF) ROC:0.5323, precision @ rank n:0.1333, execution time: 2.0226s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:0.9667, execution time: 39.4933s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.2552s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.2713s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4428, precision @ rank n:0.0161, execution time: 3.3432s




Cluster-based Local Outlier Factor ROC:0.7852, precision @ rank n:0.0, execution time: 0.4381s
Feature Bagging ROC:0.4641, precision @ rank n:0.0484, execution time: 17.0437s
Histogram-base Outlier Detection (HBOS) ROC:0.8822, precision @ rank n:0.2581, execution time: 0.0469s
Isolation Forest ROC:0.5442, precision @ rank n:0.0161, execution time: 1.5366s
K Nearest Neighbors (KNN) ROC:0.3824, precision @ rank n:0.0, execution time: 2.1945s
Local Outlier Factor (LOF) ROC:0.4584, precision @ rank n:0.0484, execution time: 2.0536s




Minimum Covariance Determinant (MCD) ROC:0.3486, precision @ rank n:0.0, execution time: 3.6379s
One-class SVM (OCSVM) ROC:0.4972, precision @ rank n:0.0, execution time: 1.4826s
Principal Component Analysis (PCA) ROC:0.504, precision @ rank n:0.0, execution time: 0.0695s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7008, precision @ rank n:0.0308, execution time: 2.4274s




Cluster-based Local Outlier Factor ROC:0.9609, precision @ rank n:0.3077, execution time: 0.3237s
Feature Bagging ROC:0.4687, precision @ rank n:0.0462, execution time: 5.8534s
Histogram-base Outlier Detection (HBOS) ROC:0.9294, precision @ rank n:0.2615, execution time: 0.0156s
Isolation Forest ROC:0.9482, precision @ rank n:0.2615, execution time: 1.1137s
K Nearest Neighbors (KNN) ROC:0.7602, precision @ rank n:0.0462, execution time: 0.8464s
Local Outlier Factor (LOF) ROC:0.481, precision @ rank n:0.0462, execution time: 0.7399s
Minimum Covariance Determinant (MCD) ROC:0.8271, precision @ rank n:0.0615, execution time: 3.7283s
One-class SVM (OCSVM) ROC:0.93, precision @ rank n:0.2923, execution time: 1.2405s
Principal Component Analysis (PCA) ROC:0.9332, precision @ rank n:0.3385, execution time: 0.0s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6757, precision @ rank n:0.5106, execution time: 0.22s
Cluster-based Local Outlier Factor ROC:0.684, precision @ 



Feature Bagging ROC:0.6446, precision @ rank n:0.4468, execution time: 0.1385s
Histogram-base Outlier Detection (HBOS) ROC:0.7169, precision @ rank n:0.5213, execution time: 0.0156s
Isolation Forest ROC:0.6777, precision @ rank n:0.4787, execution time: 0.5783s
K Nearest Neighbors (KNN) ROC:0.7252, precision @ rank n:0.5106, execution time: 0.0625s
Local Outlier Factor (LOF) ROC:0.6604, precision @ rank n:0.4787, execution time: 0.0156s
Minimum Covariance Determinant (MCD) ROC:0.7047, precision @ rank n:0.4787, execution time: 0.1302s
One-class SVM (OCSVM) ROC:0.6423, precision @ rank n:0.4574, execution time: 0.024s
Principal Component Analysis (PCA) ROC:0.6639, precision @ rank n:0.5, execution time: 0.008s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5681, precision @ rank n:0.3918, execution time: 2.7394s




Cluster-based Local Outlier Factor ROC:0.7234, precision @ rank n:0.5574, execution time: 0.3757s
Feature Bagging ROC:0.557, precision @ rank n:0.4051, execution time: 9.2998s
Histogram-base Outlier Detection (HBOS) ROC:0.7393, precision @ rank n:0.5466, execution time: 0.0312s
Isolation Forest ROC:0.7094, precision @ rank n:0.578, execution time: 1.3994s
K Nearest Neighbors (KNN) ROC:0.6781, precision @ rank n:0.4994, execution time: 1.4054s
Local Outlier Factor (LOF) ROC:0.5551, precision @ rank n:0.4051, execution time: 1.2856s
Minimum Covariance Determinant (MCD) ROC:0.792, precision @ rank n:0.6747, execution time: 4.758s
One-class SVM (OCSVM) ROC:0.636, precision @ rank n:0.5224, execution time: 1.4973s
Principal Component Analysis (PCA) ROC:0.5783, precision @ rank n:0.4559, execution time: 0.0312s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.86, precision @ rank n:0.2593, execution time: 2.442s




Cluster-based Local Outlier Factor ROC:0.9987, precision @ rank n:0.8889, execution time: 0.3146s
Feature Bagging ROC:0.4971, precision @ rank n:0.0741, execution time: 7.5562s
Histogram-base Outlier Detection (HBOS) ROC:0.9837, precision @ rank n:0.5926, execution time: 0.0313s
Isolation Forest ROC:0.9973, precision @ rank n:0.8889, execution time: 1.1908s
K Nearest Neighbors (KNN) ROC:0.9505, precision @ rank n:0.3704, execution time: 1.0821s
Local Outlier Factor (LOF) ROC:0.5006, precision @ rank n:0.0741, execution time: 0.9402s
Minimum Covariance Determinant (MCD) ROC:0.9946, precision @ rank n:0.5185, execution time: 4.6891s
One-class SVM (OCSVM) ROC:0.9976, precision @ rank n:0.9259, execution time: 1.1903s
Principal Component Analysis (PCA) ROC:0.9841, precision @ rank n:0.8519, execution time: 0.0469s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6186, precision @ rank n:0.1918, execution time: 22.8839s




Cluster-based Local Outlier Factor ROC:0.6286, precision @ rank n:0.2336, execution time: 0.9407s
Feature Bagging ROC:0.5211, precision @ rank n:0.111, execution time: 69.6819s
Histogram-base Outlier Detection (HBOS) ROC:0.9851, precision @ rank n:0.9857, execution time: 0.0469s
Isolation Forest ROC:0.9972, precision @ rank n:0.9337, execution time: 5.3621s
K Nearest Neighbors (KNN) ROC:0.645, precision @ rank n:0.2199, execution time: 12.7784s
Local Outlier Factor (LOF) ROC:0.5347, precision @ rank n:0.1406, execution time: 16.2119s






Minimum Covariance Determinant (MCD) ROC:0.9903, precision @ rank n:0.7534, execution time: 18.7412s
One-class SVM (OCSVM) ROC:0.9922, precision @ rank n:0.9553, execution time: 62.2066s
Principal Component Analysis (PCA) ROC:0.9902, precision @ rank n:0.9503, execution time: 0.08s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6227, precision @ rank n:0.1938, execution time: 22.8625s




Cluster-based Local Outlier Factor ROC:0.6578, precision @ rank n:0.2262, execution time: 0.9755s
Feature Bagging ROC:0.5023, precision @ rank n:0.0669, execution time: 70.5941s
Histogram-base Outlier Detection (HBOS) ROC:0.9868, precision @ rank n:0.9386, execution time: 0.0156s
Isolation Forest ROC:0.9975, precision @ rank n:0.9563, execution time: 5.2117s
K Nearest Neighbors (KNN) ROC:0.6549, precision @ rank n:0.2213, execution time: 11.961s
Local Outlier Factor (LOF) ROC:0.5336, precision @ rank n:0.155, execution time: 14.6184s








Minimum Covariance Determinant (MCD) ROC:0.9897, precision @ rank n:0.7477, execution time: 16.7306s
One-class SVM (OCSVM) ROC:0.9923, precision @ rank n:0.9605, execution time: 58.6894s
Principal Component Analysis (PCA) ROC:0.9907, precision @ rank n:0.9577, execution time: 0.0781s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.3688, precision @ rank n:0.0, execution time: 0.0881s
Cluster-based Local Outlier Factor ROC:0.3891, precision @ rank n:0.0, execution time: 0.0975s
Feature Bagging ROC:0.3688, precision @ rank n:0.0, execution time: 0.0738s
Histogram-base Outlier Detection (HBOS) ROC:0.3617, precision @ rank n:0.0, execution time: 0.0156s




Isolation Forest ROC:0.3539, precision @ rank n:0.0, execution time: 0.627s
K Nearest Neighbors (KNN) ROC:0.3562, precision @ rank n:0.0, execution time: 0.0206s
Local Outlier Factor (LOF) ROC:0.3562, precision @ rank n:0.0, execution time: 0.0s
Minimum Covariance Determinant (MCD) ROC:0.4203, precision @ rank n:0.0, execution time: 0.0762s
One-class SVM (OCSVM) ROC:0.4078, precision @ rank n:0.0, execution time: 0.0s
Principal Component Analysis (PCA) ROC:0.3633, precision @ rank n:0.0625, execution time: 0.0066s

... Processing vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8982, precision @ rank n:0.2857, execution time: 0.5498s
Cluster-based Local Outlier Factor ROC:0.8608, precision @ rank n:0.2143, execution time: 0.1296s




Feature Bagging ROC:0.9124, precision @ rank n:0.2143, execution time: 0.3318s
Histogram-base Outlier Detection (HBOS) ROC:0.7567, precision @ rank n:0.2143, execution time: 0.012s
Isolation Forest ROC:0.7758, precision @ rank n:0.2143, execution time: 0.8882s
K Nearest Neighbors (KNN) ROC:0.9746, precision @ rank n:0.3571, execution time: 0.1298s
Local Outlier Factor (LOF) ROC:0.9192, precision @ rank n:0.2857, execution time: 0.0702s
Minimum Covariance Determinant (MCD) ROC:0.6966, precision @ rank n:0.0714, execution time: 1.2496s
One-class SVM (OCSVM) ROC:0.8426, precision @ rank n:0.2857, execution time: 0.0452s
Principal Component Analysis (PCA) ROC:0.6907, precision @ rank n:0.2143, execution time: 0.0s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9573, precision @ rank n:0.5556, execution time: 0.0956s




Cluster-based Local Outlier Factor ROC:0.9751, precision @ rank n:0.6667, execution time: 0.0741s
Feature Bagging ROC:0.979, precision @ rank n:0.7778, execution time: 0.1s
Histogram-base Outlier Detection (HBOS) ROC:0.9876, precision @ rank n:0.6667, execution time: 0.0148s
Isolation Forest ROC:0.9674, precision @ rank n:0.5556, execution time: 0.5799s
K Nearest Neighbors (KNN) ROC:0.9713, precision @ rank n:0.4444, execution time: 0.0171s
Local Outlier Factor (LOF) ROC:0.979, precision @ rank n:0.6667, execution time: 0.0s
Minimum Covariance Determinant (MCD) ROC:0.9658, precision @ rank n:0.5556, execution time: 0.1472s
One-class SVM (OCSVM) ROC:0.9782, precision @ rank n:0.6667, execution time: 0.0129s
Principal Component Analysis (PCA) ROC:0.9775, precision @ rank n:0.6667, execution time: 0.0s


In [14]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,0.7687,0.7684,0.7799,0.8511,0.8478,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5763,0.8221,0.4879,0.8453,0.9316,0.6959,0.4715,0.8781,0.9507,0.9638
0,glass,214,9,4.2056,0.7104,0.8506,0.7043,0.6524,0.7195,0.7805,0.7774,0.7165,0.6189,0.622
0,ionosphere,351,33,35.8974,0.9004,0.8952,0.8933,0.5195,0.8294,0.9134,0.8989,0.9399,0.8372,0.7971
0,letter,1600,32,6.25,0.8465,0.7423,0.866,0.5728,0.5836,0.845,0.8409,0.7499,0.5744,0.48
0,lympho,148,18,4.0541,0.9382,0.9709,0.9673,0.9964,0.9855,0.9636,0.9636,0.9164,0.9636,0.9818
0,mnist,7603,100,9.2069,0.7813,0.8447,0.7259,0.5675,0.7813,0.8409,0.7085,0.863,0.8417,0.8396
0,musk,3062,166,3.1679,0.0809,1.0,0.5228,0.9999,0.9992,0.7348,0.5323,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4428,0.7852,0.4641,0.8822,0.5442,0.3824,0.4584,0.3486,0.4972,0.504
0,pendigits,6870,16,2.2707,0.7008,0.9609,0.4687,0.9294,0.9482,0.7602,0.481,0.8271,0.93,0.9332


In [15]:
prn_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.5,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1875,0.4844,0.1406,0.4688,0.4531,0.2812,0.125,0.3906,0.5938,0.6875
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8214,0.8036,0.75,0.3393,0.6607,0.8393,0.75,0.8571,0.7143,0.5893
0,letter,1600,32,6.25,0.275,0.175,0.4,0.125,0.05,0.3,0.325,0.075,0.1,0.05
0,lympho,148,18,4.0541,0.4,0.6,0.6,0.8,0.6,0.6,0.6,0.6,0.6,0.8
0,mnist,7603,100,9.2069,0.3562,0.4007,0.3664,0.1199,0.3116,0.4144,0.339,0.3973,0.3801,0.3767
0,musk,3062,166,3.1679,0.0333,1.0,0.1667,0.9667,0.9,0.2333,0.1333,0.9667,1.0,1.0
0,optdigits,5216,64,2.8758,0.0161,0.0,0.0484,0.2581,0.0161,0.0,0.0484,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0308,0.3077,0.0462,0.2615,0.2615,0.0462,0.0462,0.0615,0.2923,0.3385


In [16]:
time_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,2.8322,2.0942,0.7226,3.101,0.7001,0.1094,0.0912,2.0434,0.0625,0.0938
0,cardio,1831,21,9.6122,0.629,0.2254,1.0553,0.0156,0.6906,0.2603,0.1482,1.0642,0.1101,0.008
0,glass,214,9,4.2056,0.0726,0.0599,0.0625,0.0,0.5925,0.032,0.016,0.1383,0.008,0.0
0,ionosphere,351,33,35.8974,0.1451,0.0927,0.12,0.016,0.7102,0.032,0.0058,0.2086,0.008,0.008
0,letter,1600,32,6.25,0.5289,0.1606,0.9722,0.016,0.6827,0.2147,0.1049,2.6364,0.0985,0.016
0,lympho,148,18,4.0541,0.0469,0.0672,0.0626,0.0158,0.4831,0.0156,0.0156,0.0781,0.0,0.0
0,mnist,7603,100,9.2069,9.4146,1.0692,64.0632,0.0883,3.7571,10.4468,9.136,10.0391,5.5553,0.1785
0,musk,3062,166,3.1679,3.9915,0.4316,15.8044,0.0938,2.2745,2.3039,2.0226,39.4933,1.2552,0.2713
0,optdigits,5216,64,2.8758,3.3432,0.4381,17.0437,0.0469,1.5366,2.1945,2.0536,3.6379,1.4826,0.0695
0,pendigits,6870,16,2.2707,2.4274,0.3237,5.8534,0.0156,1.1137,0.8464,0.7399,3.7283,1.2405,0.0
