In [1]:
import os
import sys
import pandas as pd
import numpy as np
from scipy.io import loadmat
from sklearn.model_selection import train_test_split

In [4]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

In [6]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [9]:
from time import time
random_state=np.random.RandomState(23)

In [10]:
mat_file_list=['arrhythmia.mat','cardio.mat','glass.mat','ionosphere.mat','letter.mat','lympho.mat','mnist.mat','musk.mat',
              'optdigits.mat','pendigits.mat','pima.mat','satellite.mat','satimage-2.mat','shuttle.mat','vertebral.mat','vowels.mat',
              'wbc.mat']

In [25]:
for i in mat_file_list:
    print('Processing',i,'..')
    mat=loadmat(os.path.join('data',i))
    X=mat['X']
    y=mat['y'].ravel()
    outlier_frac=np.count_nonzero(y)/len(y)
    outlier_per=round(outlier_frac*100, ndigits=4)
    
    roc_list=[i[:-4],X.shape[0],X.shape[1],outlier_per]
    prn_list=[i[:-4],X.shape[0],X.shape[1],outlier_per]
    time_list=[i[:-4],X.shape[0],X.shape[1],outlier_per]
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=0)
    X_train_norm,X_test_norm=standardizer(X_train,X_test)
    
    classifiers={'Angle based outlier detection(ABOD)': ABOD(contamination=outlier_frac),
                'Cluster based local outlier factor(CBLOF)': CBLOF(contamination=outlier_frac,random_state=0),
                'Feature bagging': FeatureBagging(contamination=outlier_frac,random_state=0),
                'Histogram based outlier detection(HBOS)': HBOS(contamination=outlier_frac),
                'Isolation Forest': IForest(contamination=outlier_frac,random_state=0),
                'KNN': KNN(contamination=outlier_frac),
                'Local outlier factor': LOF(contamination=outlier_frac),
                'Minimum covariance determinant(MCD)': MCD(contamination=outlier_frac,random_state=0),
                'One class SVM(OCSVM)': OCSVM(contamination=outlier_frac),
                'Principal component analysis(PCA)': PCA(contamination=outlier_frac,random_state=0)}
    
    for clf_name, clf in classifiers.items():
        t0=time()
        clf.fit(X_train_norm)
        test_score=clf.decision_function(X_test_norm)
        t1=time()
        duration=round(t1-t0, ndigits=4)
        time_list.append(duration)
        
        roc=round(roc_auc_score(y_test,test_score), ndigits=4)
        prn=round(precision_n_scores(y_test,test_score), ndigits=4)
        
        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, execution time: {duration}s'.format(clf_name=clf_name, roc=roc,
                                                                                                  prn=prn, duration=duration))
        roc_list.append(roc)
        prn_list.append(prn)
        
    temp_df=pd.DataFrame(time_list).transpose()
#     temp_df.columns=df_columns
    time_df=pd.DataFrame()
    time_df=pd.concat([time_df,temp_df],axis=0)
    
    temp_df=pd.DataFrame(roc_list).transpose()
#     temp_df.columns=df_columns
    roc_df=pd.DataFrame()
    roc_df=pd.concat([roc_df,temp_df],axis=0)
    
    temp_df=pd.DataFrame(prn_list).transpose()
#     temp_df.columns=df_columns
    prn_df=pd.DataFrame()
    prn_df=pd.concat([prn_df,temp_df],axis=0)
        
        

Processing arrhythmia.mat ..
Angle based outlier detection(ABOD) ROC:0.8035, precision @ rank n:0.3462, execution time: 0.2622s
Cluster based local outlier factor(CBLOF) ROC:0.8102, precision @ rank n:0.5385, execution time: 0.3068s
Feature bagging ROC:0.804, precision @ rank n:0.5, execution time: 1.0216s
Histogram based outlier detection(HBOS) ROC:0.8608, precision @ rank n:0.5385, execution time: 0.1203s
Isolation Forest ROC:0.8092, precision @ rank n:0.5385, execution time: 0.5676s
KNN ROC:0.8176, precision @ rank n:0.5, execution time: 0.2117s
Local outlier factor ROC:0.8082, precision @ rank n:0.5, execution time: 0.1899s




Minimum covariance determinant(MCD) ROC:0.8464, precision @ rank n:0.5, execution time: 1.4259s
One class SVM(OCSVM) ROC:0.8102, precision @ rank n:0.5, execution time: 0.1179s
Principal component analysis(PCA) ROC:0.8164, precision @ rank n:0.5, execution time: 0.1337s
Processing cardio.mat ..
Angle based outlier detection(ABOD) ROC:0.5783, precision @ rank n:0.2388, execution time: 1.0709s
Cluster based local outlier factor(CBLOF) ROC:0.7488, precision @ rank n:0.2836, execution time: 0.3162s
Feature bagging ROC:0.6352, precision @ rank n:0.1791, execution time: 1.6513s
Histogram based outlier detection(HBOS) ROC:0.8435, precision @ rank n:0.4627, execution time: 0.016s
Isolation Forest ROC:0.9292, precision @ rank n:0.4925, execution time: 0.6086s
KNN ROC:0.7309, precision @ rank n:0.2836, execution time: 0.3831s
Local outlier factor ROC:0.6078, precision @ rank n:0.194, execution time: 0.208s




Minimum covariance determinant(MCD) ROC:0.7765, precision @ rank n:0.403, execution time: 1.3542s
One class SVM(OCSVM) ROC:0.928, precision @ rank n:0.4179, execution time: 0.1786s
Principal component analysis(PCA) ROC:0.9454, precision @ rank n:0.5522, execution time: 0.009s
Processing glass.mat ..
Angle based outlier detection(ABOD) ROC:0.6546, precision @ rank n:0.0, execution time: 0.0945s
Cluster based local outlier factor(CBLOF) ROC:0.8594, precision @ rank n:0.0, execution time: 0.0741s
Feature bagging ROC:0.8996, precision @ rank n:0.3333, execution time: 0.0918s
Histogram based outlier detection(HBOS) ROC:0.6707, precision @ rank n:0.0, execution time: 0.007s
Isolation Forest ROC:0.6466, precision @ rank n:0.0, execution time: 0.4069s
KNN ROC:0.8072, precision @ rank n:0.0, execution time: 0.014s
Local outlier factor ROC:0.9116, precision @ rank n:0.3333, execution time: 0.004s
Minimum covariance determinant(MCD) ROC:0.759, precision @ rank n:0.0, execution time: 0.1098s
One c



Principal component analysis(PCA) ROC:0.4578, precision @ rank n:0.0, execution time: 0.0492s
Processing ionosphere.mat ..
Angle based outlier detection(ABOD) ROC:0.9532, precision @ rank n:0.8929, execution time: 0.1874s
Cluster based local outlier factor(CBLOF) ROC:0.8954, precision @ rank n:0.7193, execution time: 0.1023s
Feature bagging ROC:0.8985, precision @ rank n:0.7193, execution time: 0.1846s
Histogram based outlier detection(HBOS) ROC:0.5792, precision @ rank n:0.4211, execution time: 0.0189s
Isolation Forest ROC:0.8296, precision @ rank n:0.614, execution time: 0.5309s
KNN ROC:0.9365, precision @ rank n:0.8596, execution time: 0.036s
Local outlier factor ROC:0.9046, precision @ rank n:0.7544, execution time: 0.015s
Minimum covariance determinant(MCD) ROC:0.9714, precision @ rank n:0.9123, execution time: 0.164s
One class SVM(OCSVM) ROC:0.8584, precision @ rank n:0.7193, execution time: 0.007s
Principal component analysis(PCA) ROC:0.7786, precision @ rank n:0.5965, execution



Angle based outlier detection(ABOD) ROC:0.852, precision @ rank n:0.3902, execution time: 0.9383s
Cluster based local outlier factor(CBLOF) ROC:0.7475, precision @ rank n:0.2195, execution time: 0.285s
Feature bagging ROC:0.8677, precision @ rank n:0.3902, execution time: 1.4484s
Histogram based outlier detection(HBOS) ROC:0.5857, precision @ rank n:0.0244, execution time: 0.0199s
Isolation Forest ROC:0.6144, precision @ rank n:0.1463, execution time: 0.6401s
KNN ROC:0.8564, precision @ rank n:0.2927, execution time: 0.3307s
Local outlier factor ROC:0.8523, precision @ rank n:0.3902, execution time: 0.1912s
Minimum covariance determinant(MCD) ROC:0.7883, precision @ rank n:0.122, execution time: 2.6386s
One class SVM(OCSVM) ROC:0.5859, precision @ rank n:0.122, execution time: 0.2321s
Principal component analysis(PCA) ROC:0.5073, precision @ rank n:0.0732, execution time: 0.01s
Processing lympho.mat ..
Angle based outlier detection(ABOD) ROC:0.9831, precision @ rank n:0.0, execution ti



Angle based outlier detection(ABOD) ROC:0.7936, precision @ rank n:0.3548, execution time: 15.8009s
Cluster based local outlier factor(CBLOF) ROC:0.8612, precision @ rank n:0.4516, execution time: 2.9692s
Feature bagging ROC:0.7408, precision @ rank n:0.3548, execution time: 110.4145s
Histogram based outlier detection(HBOS) ROC:0.5828, precision @ rank n:0.1613, execution time: 0.1293s
Isolation Forest ROC:0.8129, precision @ rank n:0.3154, execution time: 4.0257s
KNN ROC:0.8592, precision @ rank n:0.4409, execution time: 13.7532s
Local outlier factor ROC:0.732, precision @ rank n:0.3477, execution time: 13.6127s




Minimum covariance determinant(MCD) ROC:0.8666, precision @ rank n:0.172, execution time: 6.3769s
One class SVM(OCSVM) ROC:0.8728, precision @ rank n:0.4409, execution time: 10.0449s
Principal component analysis(PCA) ROC:0.8713, precision @ rank n:0.4301, execution time: 0.3824s
Processing musk.mat ..
Angle based outlier detection(ABOD) ROC:0.1127, precision @ rank n:0.0, execution time: 4.7074s
Cluster based local outlier factor(CBLOF) ROC:1.0, precision @ rank n:1.0, execution time: 0.8411s
Feature bagging ROC:0.3838, precision @ rank n:0.1667, execution time: 23.0968s
Histogram based outlier detection(HBOS) ROC:1.0, precision @ rank n:0.9722, execution time: 0.1058s
Isolation Forest ROC:0.998, precision @ rank n:0.8571, execution time: 2.1608s
KNN ROC:0.7583, precision @ rank n:0.2222, execution time: 3.867s
Local outlier factor ROC:0.3941, precision @ rank n:0.0833, execution time: 3.5882s
Minimum covariance determinant(MCD) ROC:1.0, precision @ rank n:1.0, execution time: 25.4862s



Minimum covariance determinant(MCD) ROC:0.3972, precision @ rank n:0.0, execution time: 2.593s
One class SVM(OCSVM) ROC:0.5132, precision @ rank n:0.0, execution time: 3.0957s
Principal component analysis(PCA) ROC:0.5126, precision @ rank n:0.0, execution time: 0.1481s
Processing pendigits.mat ..
Angle based outlier detection(ABOD) ROC:0.7344, precision @ rank n:0.0364, execution time: 3.3249s
Cluster based local outlier factor(CBLOF) ROC:0.8279, precision @ rank n:0.2182, execution time: 0.4757s
Feature bagging ROC:0.5345, precision @ rank n:0.0545, execution time: 8.0581s
Histogram based outlier detection(HBOS) ROC:0.9335, precision @ rank n:0.3455, execution time: 0.0249s
Isolation Forest ROC:0.9531, precision @ rank n:0.3273, execution time: 1.2979s
KNN ROC:0.7819, precision @ rank n:0.0909, execution time: 1.3259s
Local outlier factor ROC:0.5243, precision @ rank n:0.0727, execution time: 1.2817s
Minimum covariance determinant(MCD) ROC:0.8389, precision @ rank n:0.0909, execution 



Angle based outlier detection(ABOD) ROC:0.5781, precision @ rank n:0.399, execution time: 4.182s
Cluster based local outlier factor(CBLOF) ROC:0.7182, precision @ rank n:0.5459, execution time: 0.86s
Feature bagging ROC:0.5608, precision @ rank n:0.388, execution time: 15.748s
Histogram based outlier detection(HBOS) ROC:0.7461, precision @ rank n:0.5483, execution time: 0.0518s
Isolation Forest ROC:0.6613, precision @ rank n:0.5251, execution time: 1.5647s
KNN ROC:0.686, precision @ rank n:0.4896, execution time: 2.4318s
Local outlier factor ROC:0.5644, precision @ rank n:0.3929, execution time: 2.2785s
Minimum covariance determinant(MCD) ROC:0.7988, precision @ rank n:0.6671, execution time: 5.0363s
One class SVM(OCSVM) ROC:0.6405, precision @ rank n:0.5177, execution time: 2.8268s
Principal component analysis(PCA) ROC:0.5777, precision @ rank n:0.4676, execution time: 0.0654s
Processing satimage-2.mat ..
Angle based outlier detection(ABOD) ROC:0.8346, precision @ rank n:0.3125, execu



Angle based outlier detection(ABOD) ROC:0.6326, precision @ rank n:0.2085, execution time: 33.7649s
Cluster based local outlier factor(CBLOF) ROC:0.601, precision @ rank n:0.2734, execution time: 2.1913s
Feature bagging ROC:0.4416, precision @ rank n:0.0195, execution time: 119.4036s
Histogram based outlier detection(HBOS) ROC:0.9882, precision @ rank n:0.9348, execution time: 0.0382s
Isolation Forest ROC:0.9959, precision @ rank n:0.9603, execution time: 6.6179s
KNN ROC:0.6617, precision @ rank n:0.2233, execution time: 17.0952s
Local outlier factor ROC:0.5249, precision @ rank n:0.1374, execution time: 22.4345s






Minimum covariance determinant(MCD) ROC:0.9908, precision @ rank n:0.7615, execution time: 23.5402s
One class SVM(OCSVM) ROC:0.9911, precision @ rank n:0.9519, execution time: 97.5249s
Principal component analysis(PCA) ROC:0.989, precision @ rank n:0.9498, execution time: 0.0958s
Processing vertebral.mat ..
Angle based outlier detection(ABOD) ROC:0.4272, precision @ rank n:0.0, execution time: 0.1476s
Cluster based local outlier factor(CBLOF) ROC:0.442, precision @ rank n:0.0, execution time: 0.1167s
Feature bagging ROC:0.5012, precision @ rank n:0.0, execution time: 0.0955s
Histogram based outlier detection(HBOS) ROC:0.3778, precision @ rank n:0.0, execution time: 0.0052s
Isolation Forest ROC:0.4337, precision @ rank n:0.0, execution time: 0.4037s
KNN ROC:0.437, precision @ rank n:0.0, execution time: 0.017s
Local outlier factor ROC:0.4807, precision @ rank n:0.0, execution time: 0.0061s
Minimum covariance determinant(MCD) ROC:0.4461, precision @ rank n:0.0, execution time: 0.1455s
On




Principal component analysis(PCA) ROC:0.4272, precision @ rank n:0.0, execution time: 0.006s
Processing vowels.mat ..
Angle based outlier detection(ABOD) ROC:0.9538, precision @ rank n:0.5556, execution time: 0.6092s
Cluster based local outlier factor(CBLOF) ROC:0.882, precision @ rank n:0.2778, execution time: 0.1836s
Feature bagging ROC:0.9213, precision @ rank n:0.2778, execution time: 0.6196s
Histogram based outlier detection(HBOS) ROC:0.6272, precision @ rank n:0.1667, execution time: 0.007s
Isolation Forest ROC:0.7735, precision @ rank n:0.3333, execution time: 0.5599s
KNN ROC:0.9751, precision @ rank n:0.5556, execution time: 0.1719s
Local outlier factor ROC:0.9443, precision @ rank n:0.3333, execution time: 0.072s
Minimum covariance determinant(MCD) ROC:0.6951, precision @ rank n:0.0, execution time: 1.8715s
One class SVM(OCSVM) ROC:0.7615, precision @ rank n:0.3333, execution time: 0.0968s
Principal component analysis(PCA) ROC:0.5799, precision @ rank n:0.1111, execution time