In [2]:
import os
import sys
import numpy as np
import pandas as pd
    
from sklearn.model_selection import train_test_split
from scipy.io import loadmat
    
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging
from time import time            
    
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
    
mat_file_list = ['arrhythmia.mat',
                    'cardio.mat',
                    'glass.mat',
                    'ionosphere.mat',
                    'letter.mat',
                    'lympho.mat',
                    'mnist.mat',
                    'musk.mat',
                    'optdigits.mat',
                    'pendigits.mat',
                    'pima.mat',
                    'satellite.mat',
                    'satimage-2.mat',
                    'shuttle.mat',
                    'vertebral.mat',
                    'vowels.mat',
                    'wbc.mat']
    
    
df_columns = ['Data','#Samples','#Dimensions','Outlier Perc','ABOD','CBLOF','FB','HBOS','IForest','KNN','LOF','MCD','OCSVM','PCA']
    
roc_df = pd.DataFrame(columns=df_columns)
print(roc_df)
    
time_df = pd.DataFrame(columns=df_columns)
print(time_df)
    
prn_df = pd.DataFrame(columns=df_columns)
print(prn_df)
    
print(os)
random_state = np.random.RandomState(42)
    
for i in mat_file_list:
        mat = loadmat(os.path.join('data', i))
        print("\n...processing", i, '...')
            
            
        X = mat['X']
        y = mat['y'].ravel()
        outliers_fraction = np.count_nonzero(y) / len(y)
        outliers_percentage = round(outliers_fraction * 100, ndigits=4)
            
        roc_list = [i[:-4], X.shape[0], X.shape[1], outliers_percentage]
        prn_list = [i[:-4], X.shape[0], X.shape[1], outliers_percentage]
        time_list = [i[:-4], X.shape[0], X.shape[1], outliers_percentage]
            
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = None)
        X_train_norm, X_test_norm = standardizer(X_train, X_test)
            
        classifiers = {'Angle-based outlier detector (ABOD)': ABOD(contamination = outliers_fraction),
                           'Cluster-based Local outlier factor': CBLOF(contamination= outliers_fraction,check_estimator=False,random_state=random_state, alpha=0.75,beta=3),
                           'feature bagging': FeatureBagging(contamination = outliers_fraction, random_state=None),
                           'HIstogram-base outlier detection (HBOS)': HBOS(contamination=outliers_fraction),
                           'Isolation forest': IForest(contamination=outliers_fraction, random_state=None),
                           'K Nearest neibghours (KNN)': KNN(contamination=outliers_fraction),
                           'Local outlier factor': LOF(contamination=outliers_fraction),
                           'Minimum Covariance determination (MCD)': MCD(contamination=outliers_fraction, random_state=None),
                           'One-class SVM (OCSVM)': OCSVM(contamination = outliers_fraction),
                           'Principal component analysis (PCA)': PCA(contamination = outliers_fraction, random_state=None)}
        for clf_name, clf in classifiers.items():
            t0 = time()
            clf.fit(X_train_norm)
            test_scores = clf.decision_function(X_test_norm)
            t1 = time()
            duration = round(t1 - t0, ndigits=4)
            time_list.append(duration)
            
            roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
            prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
            
            print('{clf_name} ROC:{roc}, precision @ rank n: {prn}, execution time: {duration}s'.format(clf_name=clf_name,
                                                                                                        roc=roc, prn=prn, duration=duration))
            roc_list.append(roc)
            prn_list.append(prn)
        
        temp_df = pd.DataFrame(time_list).transpose()
        temp_df.columns = df_columns
        time_df = pd.concat([time_df, temp_df], axis=0)
    
        temp_df = pd.DataFrame(roc_list).transpose()
        temp_df.columns = df_columns
        roc_df = pd.concat([roc_df, temp_df], axis=0)
    
        temp_df = pd.DataFrame(prn_list).transpose()
        temp_df.columns = df_columns
        prn_df = pd.concat([prn_df, temp_df], axis=0)
    
print("\n the prn_df")
print(prn_df)
print("\n the time_df")
print(time_df)
print("\n the roc_df")
print(roc_df)

Empty DataFrame
Columns: [Data, #Samples, #Dimensions, Outlier Perc, ABOD, CBLOF, FB, HBOS, IForest, KNN, LOF, MCD, OCSVM, PCA]
Index: []
Empty DataFrame
Columns: [Data, #Samples, #Dimensions, Outlier Perc, ABOD, CBLOF, FB, HBOS, IForest, KNN, LOF, MCD, OCSVM, PCA]
Index: []
Empty DataFrame
Columns: [Data, #Samples, #Dimensions, Outlier Perc, ABOD, CBLOF, FB, HBOS, IForest, KNN, LOF, MCD, OCSVM, PCA]
Index: []
<module 'os' from 'C:\\Users\\User\\anaconda3\\lib\\os.py'>

...processing arrhythmia.mat ...
Angle-based outlier detector (ABOD) ROC:0.7098, precision @ rank n: 0.2308, execution time: 1.923s




Cluster-based Local outlier factor ROC:0.7311, precision @ rank n: 0.3077, execution time: 1.852s
feature bagging ROC:0.7392, precision @ rank n: 0.2308, execution time: 0.6816s
HIstogram-base outlier detection (HBOS) ROC:0.8011, precision @ rank n: 0.3846, execution time: 2.4298s
Isolation forest ROC:0.748, precision @ rank n: 0.3077, execution time: 0.5367s
K Nearest neibghours (KNN) ROC:0.7248, precision @ rank n: 0.3077, execution time: 0.0979s
Local outlier factor ROC:0.733, precision @ rank n: 0.3077, execution time: 0.0819s




Minimum Covariance determination (MCD) ROC:0.6961, precision @ rank n: 0.1538, execution time: 0.9804s
One-class SVM (OCSVM) ROC:0.7548, precision @ rank n: 0.3077, execution time: 0.053s
Principal component analysis (PCA) ROC:0.7486, precision @ rank n: 0.3077, execution time: 0.067s

...processing cardio.mat ...
Angle-based outlier detector (ABOD) ROC:0.5752, precision @ rank n: 0.2321, execution time: 0.7405s
Cluster-based Local outlier factor ROC:0.7141, precision @ rank n: 0.2679, execution time: 0.1489s




feature bagging ROC:0.5453, precision @ rank n: 0.1429, execution time: 1.0653s
HIstogram-base outlier detection (HBOS) ROC:0.8077, precision @ rank n: 0.4286, execution time: 0.012s
Isolation forest ROC:0.8992, precision @ rank n: 0.4643, execution time: 0.5936s
K Nearest neibghours (KNN) ROC:0.7254, precision @ rank n: 0.3214, execution time: 0.1859s
Local outlier factor ROC:0.5462, precision @ rank n: 0.1964, execution time: 0.1309s




Minimum Covariance determination (MCD) ROC:0.8175, precision @ rank n: 0.4643, execution time: 0.8335s
One-class SVM (OCSVM) ROC:0.9302, precision @ rank n: 0.5179, execution time: 0.1489s
Principal component analysis (PCA) ROC:0.9443, precision @ rank n: 0.5536, execution time: 0.009s

...processing glass.mat ...
Angle-based outlier detector (ABOD) ROC:0.9032, precision @ rank n: 0.0, execution time: 0.0809s
Cluster-based Local outlier factor ROC:0.957, precision @ rank n: 0.3333, execution time: 0.06s




feature bagging ROC:0.914, precision @ rank n: 0.3333, execution time: 0.053s
HIstogram-base outlier detection (HBOS) ROC:0.8602, precision @ rank n: 0.0, execution time: 0.005s
Isolation forest ROC:0.8817, precision @ rank n: 0.0, execution time: 0.3928s
K Nearest neibghours (KNN) ROC:0.957, precision @ rank n: 0.3333, execution time: 0.012s
Local outlier factor ROC:0.9462, precision @ rank n: 0.3333, execution time: 0.005s
Minimum Covariance determination (MCD) ROC:0.8441, precision @ rank n: 0.0, execution time: 0.057s
One-class SVM (OCSVM) ROC:0.8925, precision @ rank n: 0.3333, execution time: 0.003s
Principal component analysis (PCA) ROC:0.828, precision @ rank n: 0.0, execution time: 0.003s

...processing ionosphere.mat ...




Angle-based outlier detector (ABOD) ROC:0.9246, precision @ rank n: 0.8333, execution time: 0.1319s
Cluster-based Local outlier factor ROC:0.8385, precision @ rank n: 0.6111, execution time: 0.082s
feature bagging ROC:0.8706, precision @ rank n: 0.7222, execution time: 0.1059s




HIstogram-base outlier detection (HBOS) ROC:0.5246, precision @ rank n: 0.25, execution time: 0.015s
Isolation forest ROC:0.8536, precision @ rank n: 0.6667, execution time: 0.3978s
K Nearest neibghours (KNN) ROC:0.9429, precision @ rank n: 0.9167, execution time: 0.023s
Local outlier factor ROC:0.8667, precision @ rank n: 0.6944, execution time: 0.012s
Minimum Covariance determination (MCD) ROC:0.9476, precision @ rank n: 0.8889, execution time: 0.0839s
One-class SVM (OCSVM) ROC:0.8468, precision @ rank n: 0.7222, execution time: 0.008s
Principal component analysis (PCA) ROC:0.7802, precision @ rank n: 0.5556, execution time: 0.005s

...processing letter.mat ...
Angle-based outlier detector (ABOD) ROC:0.8594, precision @ rank n: 0.2812, execution time: 0.6636s
Cluster-based Local outlier factor ROC:0.7277, precision @ rank n: 0.125, execution time: 0.1589s




feature bagging ROC:0.8624, precision @ rank n: 0.4062, execution time: 1.1493s
HIstogram-base outlier detection (HBOS) ROC:0.5177, precision @ rank n: 0.0312, execution time: 0.015s
Isolation forest ROC:0.5827, precision @ rank n: 0.0625, execution time: 0.5377s
K Nearest neibghours (KNN) ROC:0.8537, precision @ rank n: 0.3125, execution time: 0.1959s
Local outlier factor ROC:0.8495, precision @ rank n: 0.4062, execution time: 0.1659s
Minimum Covariance determination (MCD) ROC:0.7907, precision @ rank n: 0.125, execution time: 1.631s
One-class SVM (OCSVM) ROC:0.5529, precision @ rank n: 0.0938, execution time: 0.1209s
Principal component analysis (PCA) ROC:0.4863, precision @ rank n: 0.0625, execution time: 0.007s

...processing lympho.mat ...
Angle-based outlier detector (ABOD) ROC:0.9302, precision @ rank n: 0.5, execution time: 0.066s
Cluster-based Local outlier factor ROC:1.0, precision @ rank n: 1.0, execution time: 0.067s
feature bagging ROC:1.0, precision @ rank n: 1.0, executi



Isolation forest ROC:1.0, precision @ rank n: 1.0, execution time: 0.3668s
K Nearest neibghours (KNN) ROC:0.9767, precision @ rank n: 0.5, execution time: 0.008s
Local outlier factor ROC:1.0, precision @ rank n: 1.0, execution time: 0.004s
Minimum Covariance determination (MCD) ROC:0.9419, precision @ rank n: 0.5, execution time: 0.075s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n: 1.0, execution time: 0.003s
Principal component analysis (PCA) ROC:1.0, precision @ rank n: 1.0, execution time: 0.004s

...processing mnist.mat ...




Angle-based outlier detector (ABOD) ROC:0.7648, precision @ rank n: 0.3682, execution time: 10.4945s




Cluster-based Local outlier factor ROC:0.8305, precision @ rank n: 0.4328, execution time: 1.2702s
feature bagging ROC:0.7171, precision @ rank n: 0.3383, execution time: 52.2327s
HIstogram-base outlier detection (HBOS) ROC:0.5667, precision @ rank n: 0.0995, execution time: 0.072s
Isolation forest ROC:0.7894, precision @ rank n: 0.2886, execution time: 2.7413s
K Nearest neibghours (KNN) ROC:0.8493, precision @ rank n: 0.4378, execution time: 8.6806s
Local outlier factor ROC:0.7056, precision @ rank n: 0.3234, execution time: 7.9271s




Minimum Covariance determination (MCD) ROC:0.8745, precision @ rank n: 0.3831, execution time: 4.3203s
One-class SVM (OCSVM) ROC:0.8441, precision @ rank n: 0.398, execution time: 6.3191s
Principal component analysis (PCA) ROC:0.8467, precision @ rank n: 0.3781, execution time: 0.1653s

...processing musk.mat ...
Angle-based outlier detector (ABOD) ROC:0.1883, precision @ rank n: 0.0, execution time: 3.4549s




Cluster-based Local outlier factor ROC:1.0, precision @ rank n: 1.0, execution time: 0.4487s
feature bagging ROC:0.5142, precision @ rank n: 0.2581, execution time: 19.386s
HIstogram-base outlier detection (HBOS) ROC:0.9999, precision @ rank n: 0.9667, execution time: 0.0869s
Isolation forest ROC:0.9993, precision @ rank n: 0.9032, execution time: 2.0537s
K Nearest neibghours (KNN) ROC:0.8243, precision @ rank n: 0.2581, execution time: 3.172s
Local outlier factor ROC:0.5052, precision @ rank n: 0.1935, execution time: 3.209s
Minimum Covariance determination (MCD) ROC:0.9999, precision @ rank n: 0.9677, execution time: 20.7152s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n: 1.0, execution time: 1.582s
Principal component analysis (PCA) ROC:1.0, precision @ rank n: 1.0, execution time: 0.1887s

...processing optdigits.mat ...
Angle-based outlier detector (ABOD) ROC:0.4216, precision @ rank n: 0.0, execution time: 3.8856s




Cluster-based Local outlier factor ROC:0.8176, precision @ rank n: 0.0, execution time: 0.6546s
feature bagging ROC:0.4668, precision @ rank n: 0.025, execution time: 18.2477s
HIstogram-base outlier detection (HBOS) ROC:0.8991, precision @ rank n: 0.2, execution time: 0.057s
Isolation forest ROC:0.776, precision @ rank n: 0.025, execution time: 1.8918s
K Nearest neibghours (KNN) ROC:0.3386, precision @ rank n: 0.0, execution time: 2.5434s
Local outlier factor ROC:0.4613, precision @ rank n: 0.025, execution time: 2.3945s




Minimum Covariance determination (MCD) ROC:0.3744, precision @ rank n: 0.0, execution time: 2.0277s
One-class SVM (OCSVM) ROC:0.5272, precision @ rank n: 0.0, execution time: 2.0217s
Principal component analysis (PCA) ROC:0.5331, precision @ rank n: 0.0, execution time: 0.053s

...processing pendigits.mat ...
Angle-based outlier detector (ABOD) ROC:0.7327, precision @ rank n: 0.1, execution time: 2.6604s




Cluster-based Local outlier factor ROC:0.9167, precision @ rank n: 0.125, execution time: 0.3758s
feature bagging ROC:0.4631, precision @ rank n: 0.05, execution time: 5.2078s
HIstogram-base outlier detection (HBOS) ROC:0.9267, precision @ rank n: 0.25, execution time: 0.013s
Isolation forest ROC:0.9421, precision @ rank n: 0.275, execution time: 0.9354s
K Nearest neibghours (KNN) ROC:0.7487, precision @ rank n: 0.1, execution time: 0.7206s
Local outlier factor ROC:0.4814, precision @ rank n: 0.075, execution time: 0.7455s
Minimum Covariance determination (MCD) ROC:0.8504, precision @ rank n: 0.1, execution time: 3.3849s
One-class SVM (OCSVM) ROC:0.9365, precision @ rank n: 0.225, execution time: 1.7169s
Principal component analysis (PCA) ROC:0.9426, precision @ rank n: 0.2, execution time: 0.011s

...processing pima.mat ...
Angle-based outlier detector (ABOD) ROC:0.6987, precision @ rank n: 0.5488, execution time: 0.3308s
Cluster-based Local outlier factor ROC:0.6791, precision @ rank



feature bagging ROC:0.6199, precision @ rank n: 0.439, execution time: 0.1639s
HIstogram-base outlier detection (HBOS) ROC:0.6806, precision @ rank n: 0.561, execution time: 0.005s
Isolation forest ROC:0.6803, precision @ rank n: 0.5366, execution time: 0.4167s
K Nearest neibghours (KNN) ROC:0.7165, precision @ rank n: 0.561, execution time: 0.042s
Local outlier factor ROC:0.6294, precision @ rank n: 0.4634, execution time: 0.017s
Minimum Covariance determination (MCD) ROC:0.6964, precision @ rank n: 0.561, execution time: 0.9034s
One-class SVM (OCSVM) ROC:0.6395, precision @ rank n: 0.5122, execution time: 0.017s
Principal component analysis (PCA) ROC:0.6553, precision @ rank n: 0.4878, execution time: 0.004s

...processing satellite.mat ...
Angle-based outlier detector (ABOD) ROC:0.5716, precision @ rank n: 0.3771, execution time: 3.1211s




Cluster-based Local outlier factor ROC:0.7758, precision @ rank n: 0.6109, execution time: 0.3568s
feature bagging ROC:0.5421, precision @ rank n: 0.3532, execution time: 8.2009s
HIstogram-base outlier detection (HBOS) ROC:0.7732, precision @ rank n: 0.5802, execution time: 0.025s
Isolation forest ROC:0.7035, precision @ rank n: 0.6041, execution time: 1.0853s
K Nearest neibghours (KNN) ROC:0.6716, precision @ rank n: 0.471, execution time: 1.2672s
Local outlier factor ROC:0.5407, precision @ rank n: 0.3481, execution time: 1.1943s
Minimum Covariance determination (MCD) ROC:0.8007, precision @ rank n: 0.6809, execution time: 3.3599s
One-class SVM (OCSVM) ROC:0.662, precision @ rank n: 0.5324, execution time: 2.1277s
Principal component analysis (PCA) ROC:0.5977, precision @ rank n: 0.4659, execution time: 0.03s

...processing satimage-2.mat ...
Angle-based outlier detector (ABOD) ROC:0.8291, precision @ rank n: 0.15, execution time: 2.8952s




Cluster-based Local outlier factor ROC:0.9996, precision @ rank n: 0.9, execution time: 0.4047s
feature bagging ROC:0.4826, precision @ rank n: 0.0, execution time: 8.5717s
HIstogram-base outlier detection (HBOS) ROC:0.987, precision @ rank n: 0.7, execution time: 0.024s
Isolation forest ROC:0.996, precision @ rank n: 0.9, execution time: 1.0843s
K Nearest neibghours (KNN) ROC:0.9571, precision @ rank n: 0.3, execution time: 1.3332s
Local outlier factor ROC:0.4902, precision @ rank n: 0.0, execution time: 1.2202s
Minimum Covariance determination (MCD) ROC:0.9926, precision @ rank n: 0.45, execution time: 3.0811s
One-class SVM (OCSVM) ROC:0.9994, precision @ rank n: 0.9, execution time: 1.8079s
Principal component analysis (PCA) ROC:0.9847, precision @ rank n: 0.85, execution time: 0.04s

...processing shuttle.mat ...
Angle-based outlier detector (ABOD) ROC:0.6097, precision @ rank n: 0.1762, execution time: 27.7468s




Cluster-based Local outlier factor ROC:0.6445, precision @ rank n: 0.2238, execution time: 0.7855s
feature bagging ROC:0.4893, precision @ rank n: 0.1158, execution time: 104.4294s
HIstogram-base outlier detection (HBOS) ROC:0.9793, precision @ rank n: 0.9475, execution time: 0.026s
Isolation forest ROC:0.9971, precision @ rank n: 0.9574, execution time: 4.2923s
K Nearest neibghours (KNN) ROC:0.6364, precision @ rank n: 0.2034, execution time: 11.377s
Local outlier factor ROC:0.5329, precision @ rank n: 0.1248, execution time: 20.4583s




Minimum Covariance determination (MCD) ROC:0.9897, precision @ rank n: 0.7327, execution time: 19.0752s
One-class SVM (OCSVM) ROC:0.99, precision @ rank n: 0.9525, execution time: 92.1939s
Principal component analysis (PCA) ROC:0.988, precision @ rank n: 0.9465, execution time: 0.062s

...processing vertebral.mat ...
Angle-based outlier detector (ABOD) ROC:0.5097, precision @ rank n: 0.1111, execution time: 0.1059s
Cluster-based Local outlier factor ROC:0.3986, precision @ rank n: 0.1111, execution time: 0.068s
feature bagging ROC:0.478, precision @ rank n: 0.1111, execution time: 0.053s
HIstogram-base outlier detection (HBOS) ROC:0.2998, precision @ rank n: 0.0, execution time: 0.003s




Isolation forest ROC:0.4444, precision @ rank n: 0.1111, execution time: 0.4127s
K Nearest neibghours (KNN) ROC:0.425, precision @ rank n: 0.1111, execution time: 0.013s
Local outlier factor ROC:0.4603, precision @ rank n: 0.1111, execution time: 0.004s
Minimum Covariance determination (MCD) ROC:0.4638, precision @ rank n: 0.0, execution time: 0.07s
One-class SVM (OCSVM) ROC:0.5414, precision @ rank n: 0.1111, execution time: 0.003s
Principal component analysis (PCA) ROC:0.4533, precision @ rank n: 0.0, execution time: 0.003s

...processing vowels.mat ...




Angle-based outlier detector (ABOD) ROC:0.9531, precision @ rank n: 0.4286, execution time: 0.6296s
Cluster-based Local outlier factor ROC:0.8261, precision @ rank n: 0.1429, execution time: 0.1799s




feature bagging ROC:0.9377, precision @ rank n: 0.2143, execution time: 0.5577s
HIstogram-base outlier detection (HBOS) ROC:0.6707, precision @ rank n: 0.2143, execution time: 0.006s
Isolation forest ROC:0.7835, precision @ rank n: 0.1429, execution time: 0.5037s
K Nearest neibghours (KNN) ROC:0.9681, precision @ rank n: 0.5, execution time: 0.1019s
Local outlier factor ROC:0.9296, precision @ rank n: 0.2143, execution time: 0.065s
Minimum Covariance determination (MCD) ROC:0.6589, precision @ rank n: 0.0, execution time: 1.5291s
One-class SVM (OCSVM) ROC:0.7984, precision @ rank n: 0.2857, execution time: 0.0859s
Principal component analysis (PCA) ROC:0.6292, precision @ rank n: 0.0714, execution time: 0.005s

...processing wbc.mat ...
Angle-based outlier detector (ABOD) ROC:0.8899, precision @ rank n: 0.0, execution time: 0.2069s
Cluster-based Local outlier factor ROC:0.8991, precision @ rank n: 0.4, execution time: 0.0879s




feature bagging ROC:0.9321, precision @ rank n: 0.4, execution time: 0.1309s
HIstogram-base outlier detection (HBOS) ROC:0.9083, precision @ rank n: 0.4, execution time: 0.013s
Isolation forest ROC:0.9174, precision @ rank n: 0.4, execution time: 0.4387s
K Nearest neibghours (KNN) ROC:0.9156, precision @ rank n: 0.2, execution time: 0.026s
Local outlier factor ROC:0.9321, precision @ rank n: 0.4, execution time: 0.012s
Minimum Covariance determination (MCD) ROC:0.8624, precision @ rank n: 0.4, execution time: 0.1109s
One-class SVM (OCSVM) ROC:0.9339, precision @ rank n: 0.4, execution time: 0.008s
Principal component analysis (PCA) ROC:0.9193, precision @ rank n: 0.4, execution time: 0.003s

 the prn_df
         Data #Samples #Dimensions Outlier Perc    ABOD   CBLOF      FB  \
0  arrhythmia      452         274      14.6018  0.2308  0.3077  0.2308   
0      cardio     1831          21       9.6122  0.2321  0.2679  0.1429   
0       glass      214           9       4.2056       0  0.333

In [None]:
jupyter_notebook_config