# PROJECT DAY 1
___Sarthak Kumar Das___
sarthakkdas.official@gmail.com

# Anomaly Detection

#### Importing essential modules¶ 

In [18]:

import os
import sys
import numpy as np
import pandas as pd
from time import time 
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

#### Importing pyod methods


In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

#### Importing metrics

In [3]:

from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

#### Assigning all the datasets to a list

In [5]:


mat_file_list = ['arrhythmia.mat','cardio.mat',
                 'glass.mat','ionosphere.mat',
                 'letter.mat','lympho.mat',
                 'mnist.mat','musk.mat',
                 'optdigits.mat','pendigits.mat',
                 'pima.mat','satellite.mat',
                 'satimage-2.mat','shuttle.mat',
                 'vertebral.mat','vowels.mat','wbc.mat']

In [6]:

mat_file_list


['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

In [7]:
len(mat_file_list)

17

#### Making a list of the column name to record the result

In [8]:
df_columns=['Data','#Sample','#Dimensions','Outlier Perc','PCA','MCD','OCSVM','LOF','CBLOF','KNN','HBOS','ABOD','IFOREST','FEATUREBAGGING']

Creating empty dataframes

ROC Dataframe to record all Roc values performed on each dataset

Precison Dataframe to record all Precison values performed on each dataset

Execution Time Dataframe to record the time taken to perform algorithm on each dataset, 
So as to find the algorithm which takes minimum amount of time and gives best accuracy


#### 1.) ROC Dataframe

In [10]:
roc_df=pd.DataFrame(columns=df_columns)
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING


#### 2.) Precison Dataframe

In [11]:

prn_df=pd.DataFrame(columns=df_columns)
prn_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING


#### 3.) Execution Time Dataframe

In [12]:
time_df=pd.DataFrame(columns=df_columns)
time_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING


#### Load one Dataframe to check values of X and y

In [14]:

data_1 = loadmat("vowels.mat") 
data_1

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-26 08:42:13 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.58046914, -0.90253404,  0.61789919, ...,  1.60463715,
         -0.6230598 , -0.38312549],
        [ 0.78437493, -1.07736635,  0.6157809 , ...,  1.26023551,
         -0.42333934, -0.2877912 ],
        [ 0.79129238, -1.08624216,  0.66977272, ...,  1.08179729,
         -0.26720104, -0.17220348],
        ...,
        [ 0.9470763 ,  0.35810832,  0.27472497, ..., -1.08832841,
          0.3271257 ,  1.69283401],
        [ 1.58485142,  0.69359118, -0.37568588, ..., -3.07682047,
         -0.24109405,  1.94433536],
        [ 2.32735022,  0.38281412,  0.77590669, ..., -0.48257003,
         -0.59043614, -0.72199018]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}


Inference:

Mat files are in form of Dictionary 

In the file, header , version , globals are predefined clases

X and y are the variables we are going to use, Like in ML we don't need to define the X & y seperately.

X & y are 2D numpy arrays

#### Exploring Mat files and finding best Algorithm to detect Anomaly

In [19]:
# Creating random state
random_state = np.random.RandomState(42)

# Processing mat files one by one : 
for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('data', mat_file))

    X = mat['X']
    y = mat['y'].ravel()  #ravel() function converts 2D to 1D
    
    # Counting Outlier :
    
    # Counts the number of non-zero values in the array y and divide by length of y : It gives outlier in fraction
    outliers_fraction = np.count_nonzero(y) / len(y)
    
    # Calculating Outlier percentage
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # Construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

    # Spliting Data into : 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=random_state)
    
    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    # Applying all the algorithms and storing thier result in a dictionary format:
    classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(contamination=outliers_fraction),
                   
                   'Cluster-based Local Outlier Factor': CBLOF(contamination=outliers_fraction, check_estimator=False,
                                                               random_state=random_state),
                   
                   'Feature Bagging': FeatureBagging(contamination=outliers_fraction, random_state=random_state),
                   
                   'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction),
                   
                   'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state),
                   
                   'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
                   
                   'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction),
                   
                   'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state),
                   
                   'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
                   
                   'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction, random_state=random_state),
   }


    # Calculating Time taken for each algorithm
    for clf_name, clf in classifiers.items():
        # Initialize the start time 
        t0 = time() 
        
        # Fit( Train )the data
        clf.fit(X_train_norm) 
        
        # Predicting Value on Xtest
        test_scores = clf.decision_function(X_test_norm)  
        
        # Final Time
        t1 = time()   
        
        # Total time duration : t1 - t0
        duration = round(t1 - t0, ndigits=4) 
        
        # Append duration in time list
        time_list.append(duration)

        #Calculating roc and precision value of the algorithm
        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        # Print the roc , precision and executing time 
        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(clf_name=clf_name, roc=roc, prn=prn, duration=duration))

        # Append roc and precision value to their respective list
        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)
    
    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 5.2825s
Cluster-based Local Outlier Factor ROC:0.7789, precision @ rank n:0.4643, execution time: 2.3158s
Feature Bagging ROC:0.7796, precision @ rank n:0.4643, execution time: 0.5949s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 1.4915s




Isolation Forest ROC:0.8637, precision @ rank n:0.6071, execution time: 0.4461s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0852s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0729s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 1.4137s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0692s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.1253s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5892, precision @ rank n:0.1918, execution time: 0.4131s
Cluster-based Local Outlier Factor ROC:0.8845, precision @ rank n:0.4932, execution time: 0.1464s
Feature Bagging ROC:0.6385, precision @ rank n:0.1781, execution time: 0.8785s
Histogram-base Outlier Detection (HBOS) ROC:0.8373, precision @ rank n:0.4521, execution time: 0.007s




Isolation Forest ROC:0.951, precision @ rank n:0.6027, execution time: 0.327s
K Nearest Neighbors (KNN) ROC:0.734, precision @ rank n:0.3562, execution time: 0.1555s
Local Outlier Factor (LOF) ROC:0.588, precision @ rank n:0.1507, execution time: 0.1053s




Minimum Covariance Determinant (MCD) ROC:0.8534, precision @ rank n:0.411, execution time: 0.6187s
One-class SVM (OCSVM) ROC:0.9478, precision @ rank n:0.5342, execution time: 0.0948s
Principal Component Analysis (PCA) ROC:0.9616, precision @ rank n:0.6849, execution time: 0.0326s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6951, precision @ rank n:0.25, execution time: 0.0541s
Cluster-based Local Outlier Factor ROC:0.811, precision @ rank n:0.25, execution time: 0.0532s
Feature Bagging ROC:0.7073, precision @ rank n:0.25, execution time: 0.0331s
Histogram-base Outlier Detection (HBOS) ROC:0.7073, precision @ rank n:0.0, execution time: 0.002s




Isolation Forest ROC:0.7134, precision @ rank n:0.25, execution time: 0.2563s
K Nearest Neighbors (KNN) ROC:0.8384, precision @ rank n:0.25, execution time: 0.012s
Local Outlier Factor (LOF) ROC:0.7043, precision @ rank n:0.25, execution time: 0.002s
Minimum Covariance Determinant (MCD) ROC:0.8293, precision @ rank n:0.0, execution time: 0.0913s
One-class SVM (OCSVM) ROC:0.6585, precision @ rank n:0.25, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.686, precision @ rank n:0.25, execution time: 0.001s

... Processing ionosphere.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.9181, precision @ rank n:0.8431, execution time: 0.0867s
Cluster-based Local Outlier Factor ROC:0.9176, precision @ rank n:0.8039, execution time: 0.0431s
Feature Bagging ROC:0.9303, precision @ rank n:0.8039, execution time: 0.0782s
Histogram-base Outlier Detection (HBOS) ROC:0.6052, precision @ rank n:0.3922, execution time: 0.007s




Isolation Forest ROC:0.8516, precision @ rank n:0.6078, execution time: 0.2954s
K Nearest Neighbors (KNN) ROC:0.932, precision @ rank n:0.8824, execution time: 0.0191s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.7843, execution time: 0.007s
Minimum Covariance Determinant (MCD) ROC:0.9669, precision @ rank n:0.8627, execution time: 0.0732s
One-class SVM (OCSVM) ROC:0.8257, precision @ rank n:0.6863, execution time: 0.006s
Principal Component Analysis (PCA) ROC:0.7941, precision @ rank n:0.5686, execution time: 0.003s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8783, precision @ rank n:0.4375, execution time: 0.4457s
Cluster-based Local Outlier Factor ROC:0.7783, precision @ rank n:0.1875, execution time: 0.1584s
Feature Bagging ROC:0.8947, precision @ rank n:0.4062, execution time: 0.905s
Histogram-base Outlier Detection (HBOS) ROC:0.6063, precision @ rank n:0.0938, execution time: 0.0165s




Isolation Forest ROC:0.6279, precision @ rank n:0.0625, execution time: 0.5215s
K Nearest Neighbors (KNN) ROC:0.8573, precision @ rank n:0.3125, execution time: 0.1599s
Local Outlier Factor (LOF) ROC:0.8765, precision @ rank n:0.3438, execution time: 0.1279s
Minimum Covariance Determinant (MCD) ROC:0.8061, precision @ rank n:0.1875, execution time: 1.9704s
One-class SVM (OCSVM) ROC:0.5927, precision @ rank n:0.125, execution time: 0.1038s
Principal Component Analysis (PCA) ROC:0.5216, precision @ rank n:0.125, execution time: 0.0075s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9831, precision @ rank n:0.0, execution time: 0.0722s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.0612s
Feature Bagging ROC:1.0, precision @ rank n:1.0, execution time: 0.0331s
Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.0045s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 0.2457s
K Nearest Neighbors (KNN) ROC:1.0, precision @ rank n:1.0, execution time: 0.007s
Local Outlier Factor (LOF) ROC:1.0, precision @ rank n:1.0, execution time: 0.003s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 0.0381s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.001s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.0025s

... Processing mnist.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.7628, precision @ rank n:0.3367, execution time: 8.1541s
Cluster-based Local Outlier Factor ROC:0.8389, precision @ rank n:0.3912, execution time: 1.3296s
Feature Bagging ROC:0.7157, precision @ rank n:0.3741, execution time: 53.0252s
Histogram-base Outlier Detection (HBOS) ROC:0.5766, precision @ rank n:0.1361, execution time: 0.0602s




Isolation Forest ROC:0.7915, precision @ rank n:0.2687, execution time: 1.9721s
K Nearest Neighbors (KNN) ROC:0.8498, precision @ rank n:0.432, execution time: 7.0008s
Local Outlier Factor (LOF) ROC:0.7195, precision @ rank n:0.3673, execution time: 6.8617s




Minimum Covariance Determinant (MCD) ROC:0.8713, precision @ rank n:0.2653, execution time: 2.9773s
One-class SVM (OCSVM) ROC:0.854, precision @ rank n:0.3946, execution time: 4.9914s
Principal Component Analysis (PCA) ROC:0.8534, precision @ rank n:0.3878, execution time: 0.2637s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2161, precision @ rank n:0.1, execution time: 2.2692s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.3921s
Feature Bagging ROC:0.473, precision @ rank n:0.125, execution time: 14.2376s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.975, execution time: 0.0562s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 1.3254s
K Nearest Neighbors (KNN) ROC:0.8009, precision @ rank n:0.175, execution time: 2.0194s
Local Outlier Factor (LOF) ROC:0.4629, precision @ rank n:0.125, execution time: 1.9317s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 12.1717s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.2563s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.1584s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4894, precision @ rank n:0.0152, execution time: 2.8138s
Cluster-based Local Outlier Factor ROC:0.7901, precision @ rank n:0.0, execution time: 0.6578s
Feature Bagging ROC:0.5062, precision @ rank n:0.0303, execution time: 13.8118s
Histogram-base Outlier Detection (HBOS) ROC:0.8774, precision @ rank n:0.2121, execution time: 0.1167s




Isolation Forest ROC:0.686, precision @ rank n:0.0303, execution time: 1.5619s
K Nearest Neighbors (KNN) ROC:0.406, precision @ rank n:0.0, execution time: 2.0445s
Local Outlier Factor (LOF) ROC:0.5277, precision @ rank n:0.0303, execution time: 1.8574s




Minimum Covariance Determinant (MCD) ROC:0.3822, precision @ rank n:0.0, execution time: 1.4216s
One-class SVM (OCSVM) ROC:0.5171, precision @ rank n:0.0, execution time: 1.7071s
Principal Component Analysis (PCA) ROC:0.526, precision @ rank n:0.0, execution time: 0.0551s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.667, precision @ rank n:0.0526, execution time: 1.5629s
Cluster-based Local Outlier Factor ROC:0.8082, precision @ rank n:0.1579, execution time: 0.3028s
Feature Bagging ROC:0.4889, precision @ rank n:0.0526, execution time: 4.3418s
Histogram-base Outlier Detection (HBOS) ROC:0.9348, precision @ rank n:0.2632, execution time: 0.017s




Isolation Forest ROC:0.939, precision @ rank n:0.3333, execution time: 0.8918s
K Nearest Neighbors (KNN) ROC:0.7371, precision @ rank n:0.0702, execution time: 0.8408s
Local Outlier Factor (LOF) ROC:0.4965, precision @ rank n:0.0702, execution time: 1.0361s
Minimum Covariance Determinant (MCD) ROC:0.8204, precision @ rank n:0.0877, execution time: 2.5153s
One-class SVM (OCSVM) ROC:0.9235, precision @ rank n:0.3158, execution time: 1.052s
Principal Component Analysis (PCA) ROC:0.9309, precision @ rank n:0.3158, execution time: 0.008s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7163, precision @ rank n:0.5253, execution time: 0.1601s
Cluster-based Local Outlier Factor ROC:0.67, precision @ rank n:0.4949, execution time: 0.0908s
Feature Bagging ROC:0.6448, precision @ rank n:0.4444, execution time: 0.1183s
Histogram-base Outlier Detection (HBOS) ROC:0.711, precision @ rank n:0.5354, execution time: 0.002s




Isolation Forest ROC:0.6829, precision @ rank n:0.5253, execution time: 0.2848s
K Nearest Neighbors (KNN) ROC:0.7395, precision @ rank n:0.5859, execution time: 0.0351s
Local Outlier Factor (LOF) ROC:0.6574, precision @ rank n:0.4646, execution time: 0.0111s
Minimum Covariance Determinant (MCD) ROC:0.7175, precision @ rank n:0.5152, execution time: 0.0611s
One-class SVM (OCSVM) ROC:0.6561, precision @ rank n:0.5051, execution time: 0.013s
Principal Component Analysis (PCA) ROC:0.6762, precision @ rank n:0.5354, execution time: 0.002s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5653, precision @ rank n:0.3962, execution time: 1.9326s
Cluster-based Local Outlier Factor ROC:0.7241, precision @ rank n:0.5412, execution time: 0.7818s
Feature Bagging ROC:0.572, precision @ rank n:0.4, execution time: 9.7684s
Histogram-base Outlier Detection (HBOS) ROC:0.7486, precision @ rank n:0.57, execution time: 0.0211s




Isolation Forest ROC:0.6838, precision @ rank n:0.5812, execution time: 0.9121s
K Nearest Neighbors (KNN) ROC:0.6853, precision @ rank n:0.4988, execution time: 1.1572s
Local Outlier Factor (LOF) ROC:0.572, precision @ rank n:0.395, execution time: 1.0886s
Minimum Covariance Determinant (MCD) ROC:0.8055, precision @ rank n:0.6762, execution time: 2.6263s
One-class SVM (OCSVM) ROC:0.6478, precision @ rank n:0.5225, execution time: 1.4531s
Principal Component Analysis (PCA) ROC:0.5923, precision @ rank n:0.465, execution time: 0.0251s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8432, precision @ rank n:0.2333, execution time: 1.757s
Cluster-based Local Outlier Factor ROC:0.9998, precision @ rank n:0.9333, execution time: 0.35s
Feature Bagging ROC:0.5235, precision @ rank n:0.1667, execution time: 7.5331s
Histogram-base Outlier Detection (HBOS) ROC:0.9784, precision @ rank n:0.6, execution time: 0.0171s




Isolation Forest ROC:0.9955, precision @ rank n:0.8667, execution time: 0.7301s
K Nearest Neighbors (KNN) ROC:0.9515, precision @ rank n:0.4333, execution time: 0.9855s
Local Outlier Factor (LOF) ROC:0.5257, precision @ rank n:0.1667, execution time: 0.8617s
Minimum Covariance Determinant (MCD) ROC:0.9963, precision @ rank n:0.6667, execution time: 2.4086s
One-class SVM (OCSVM) ROC:0.9997, precision @ rank n:0.9, execution time: 1.2883s
Principal Component Analysis (PCA) ROC:0.9816, precision @ rank n:0.7333, execution time: 0.0291s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6171, precision @ rank n:0.2003, execution time: 18.5001s
Cluster-based Local Outlier Factor ROC:0.6273, precision @ rank n:0.2025, execution time: 1.1959s
Feature Bagging ROC:0.4725, precision @ rank n:0.0257, execution time: 101.2403s
Histogram-base Outlier Detection (HBOS) ROC:0.9871, precision @ rank n:0.9985, execution time: 0.0256s




Isolation Forest ROC:0.9976, precision @ rank n:0.9501, execution time: 4.1845s
K Nearest Neighbors (KNN) ROC:0.6507, precision @ rank n:0.212, execution time: 16.0182s
Local Outlier Factor (LOF) ROC:0.5556, precision @ rank n:0.1548, execution time: 16.5976s




Minimum Covariance Determinant (MCD) ROC:0.9899, precision @ rank n:0.7395, execution time: 12.6409s
One-class SVM (OCSVM) ROC:0.9934, precision @ rank n:0.956, execution time: 63.5801s
Principal Component Analysis (PCA) ROC:0.9915, precision @ rank n:0.9516, execution time: 0.0698s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5366, precision @ rank n:0.2143, execution time: 0.0565s
Cluster-based Local Outlier Factor ROC:0.439, precision @ rank n:0.0714, execution time: 0.049s
Feature Bagging ROC:0.5279, precision @ rank n:0.1429, execution time: 0.0402s
Histogram-base Outlier Detection (HBOS) ROC:0.3506, precision @ rank n:0.0, execution time: 0.0037s




Isolation Forest ROC:0.3789, precision @ rank n:0.0, execution time: 0.2554s
K Nearest Neighbors (KNN) ROC:0.4573, precision @ rank n:0.0714, execution time: 0.011s
Local Outlier Factor (LOF) ROC:0.4983, precision @ rank n:0.1429, execution time: 0.0035s
Minimum Covariance Determinant (MCD) ROC:0.4085, precision @ rank n:0.0714, execution time: 0.1104s
One-class SVM (OCSVM) ROC:0.4686, precision @ rank n:0.0714, execution time: 0.0011s
Principal Component Analysis (PCA) ROC:0.4085, precision @ rank n:0.0, execution time: 0.001s

... Processing vowels.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.9616, precision @ rank n:0.6316, execution time: 0.2718s
Cluster-based Local Outlier Factor ROC:0.8963, precision @ rank n:0.3158, execution time: 0.1309s
Feature Bagging ROC:0.9365, precision @ rank n:0.3684, execution time: 0.3204s
Histogram-base Outlier Detection (HBOS) ROC:0.6876, precision @ rank n:0.1579, execution time: 0.004s




Isolation Forest ROC:0.8214, precision @ rank n:0.1579, execution time: 0.3421s
K Nearest Neighbors (KNN) ROC:0.9734, precision @ rank n:0.4737, execution time: 0.0778s
Local Outlier Factor (LOF) ROC:0.9398, precision @ rank n:0.3684, execution time: 0.0342s
Minimum Covariance Determinant (MCD) ROC:0.7243, precision @ rank n:0.1053, execution time: 1.4815s
One-class SVM (OCSVM) ROC:0.8163, precision @ rank n:0.2632, execution time: 0.0436s
Principal Component Analysis (PCA) ROC:0.6297, precision @ rank n:0.1579, execution time: 0.0025s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.921, precision @ rank n:0.375, execution time: 0.0832s
Cluster-based Local Outlier Factor ROC:0.9149, precision @ rank n:0.375, execution time: 0.0622s
Feature Bagging ROC:0.9271, precision @ rank n:0.375, execution time: 0.0767s
Histogram-base Outlier Detection (HBOS) ROC:0.9479, precision @ rank n:0.5, execution time: 0.007s




Isolation Forest ROC:0.9418, precision @ rank n:0.625, execution time: 0.2492s
K Nearest Neighbors (KNN) ROC:0.9444, precision @ rank n:0.5, execution time: 0.0186s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.375, execution time: 0.0085s
Minimum Covariance Determinant (MCD) ROC:0.9288, precision @ rank n:0.5, execution time: 0.0975s
One-class SVM (OCSVM) ROC:0.9358, precision @ rank n:0.375, execution time: 0.005s
Principal Component Analysis (PCA) ROC:0.9262, precision @ rank n:0.375, execution time: 0.0035s


### ROC Dataframe

In [20]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,0.7687,0.7789,0.7796,0.8511,0.8637,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5892,0.8845,0.6385,0.8373,0.951,0.734,0.588,0.8534,0.9478,0.9616
0,glass,214,9,4.2056,0.6951,0.811,0.7073,0.7073,0.7134,0.8384,0.7043,0.8293,0.6585,0.686
0,ionosphere,351,33,35.8974,0.9181,0.9176,0.9303,0.6052,0.8516,0.932,0.9227,0.9669,0.8257,0.7941
0,letter,1600,32,6.25,0.8783,0.7783,0.8947,0.6063,0.6279,0.8573,0.8765,0.8061,0.5927,0.5216
0,lympho,148,18,4.0541,0.9831,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.7628,0.8389,0.7157,0.5766,0.7915,0.8498,0.7195,0.8713,0.854,0.8534
0,musk,3062,166,3.1679,0.2161,1.0,0.473,0.9999,1.0,0.8009,0.4629,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4894,0.7901,0.5062,0.8774,0.686,0.406,0.5277,0.3822,0.5171,0.526
0,pendigits,6870,16,2.2707,0.667,0.8082,0.4889,0.9348,0.939,0.7371,0.4965,0.8204,0.9235,0.9309


### Precision Dataframe

In [21]:
prn_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.4643,0.5714,0.6071,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1918,0.4932,0.1781,0.4521,0.6027,0.3562,0.1507,0.411,0.5342,0.6849
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8431,0.8039,0.8039,0.3922,0.6078,0.8824,0.7843,0.8627,0.6863,0.5686
0,letter,1600,32,6.25,0.4375,0.1875,0.4062,0.0938,0.0625,0.3125,0.3438,0.1875,0.125,0.125
0,lympho,148,18,4.0541,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.3367,0.3912,0.3741,0.1361,0.2687,0.432,0.3673,0.2653,0.3946,0.3878
0,musk,3062,166,3.1679,0.1,1.0,0.125,0.975,1.0,0.175,0.125,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.0152,0.0,0.0303,0.2121,0.0303,0.0,0.0303,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0526,0.1579,0.0526,0.2632,0.3333,0.0702,0.0702,0.0877,0.3158,0.3158


### Execution Time Dataframe

In [22]:
time_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,5.2825,2.3158,0.5949,1.4915,0.4461,0.0852,0.0729,1.4137,0.0692,0.1253
0,cardio,1831,21,9.6122,0.4131,0.1464,0.8785,0.007,0.327,0.1555,0.1053,0.6187,0.0948,0.0326
0,glass,214,9,4.2056,0.0541,0.0532,0.0331,0.002,0.2563,0.012,0.002,0.0913,0.001,0.001
0,ionosphere,351,33,35.8974,0.0867,0.0431,0.0782,0.007,0.2954,0.0191,0.007,0.0732,0.006,0.003
0,letter,1600,32,6.25,0.4457,0.1584,0.905,0.0165,0.5215,0.1599,0.1279,1.9704,0.1038,0.0075
0,lympho,148,18,4.0541,0.0722,0.0612,0.0331,0.0045,0.2457,0.007,0.003,0.0381,0.001,0.0025
0,mnist,7603,100,9.2069,8.1541,1.3296,53.0252,0.0602,1.9721,7.0008,6.8617,2.9773,4.9914,0.2637
0,musk,3062,166,3.1679,2.2692,0.3921,14.2376,0.0562,1.3254,2.0194,1.9317,12.1717,1.2563,0.1584
0,optdigits,5216,64,2.8758,2.8138,0.6578,13.8118,0.1167,1.5619,2.0445,1.8574,1.4216,1.7071,0.0551
0,pendigits,6870,16,2.2707,1.5629,0.3028,4.3418,0.017,0.8918,0.8408,1.0361,2.5153,1.052,0.008
