# Performance Profiling of Contextual Detector

In [1]:
from __future__ import division
from __future__ import print_function

import os
import sys
from time import time

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

import numpy as np
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager

# Import all models
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.lscp import LSCP
from pyod.models.contextual import ContextualDetector
from pyod.utils import generate_contextual_data

In [2]:
outliers_fraction = 0.1

# initialize a set of detectors for LSCP
detector_list = [LOF(n_neighbors=5), LOF(n_neighbors=10), LOF(n_neighbors=15),
                 LOF(n_neighbors=20), LOF(n_neighbors=25), LOF(n_neighbors=30),
                 LOF(n_neighbors=35), LOF(n_neighbors=40), LOF(n_neighbors=45),
                 LOF(n_neighbors=50)]

In [3]:
random_state = np.random.RandomState(42)
# Define nine outlier detection tools to be compared
classifiers = {
    'ABOD':
        ABOD(contamination=outliers_fraction),
    'CBLOF':
        CBLOF(contamination=outliers_fraction,
              check_estimator=False, random_state=random_state),
    'Feature Bagging':
        FeatureBagging(LOF(n_neighbors=35),
                       contamination=outliers_fraction,
                       random_state=random_state),
    'HBOS': HBOS(
        contamination=outliers_fraction),
    'Isolation Forest': IForest(contamination=outliers_fraction,
                                behaviour="new",
                                random_state=random_state),
    'KNN': KNN(
        contamination=outliers_fraction),
    'Average KNN': KNN(method='mean',
                       contamination=outliers_fraction),
    'LOF':
        LOF(n_neighbors=35, contamination=outliers_fraction),
    'MCD': MCD(
        contamination=outliers_fraction, random_state=random_state),
    'OCSVM': OCSVM(contamination=outliers_fraction),
    'PCA': PCA(
        contamination=outliers_fraction, random_state=random_state),
    'LSCP': LSCP(
        detector_list, contamination=outliers_fraction,
        random_state=random_state)
}

contextual_classifiers = {"Contextual " + k: ContextualDetector(base_detector=v) for k, v in classifiers.items()}
classifiers = {**classifiers, **contextual_classifiers}

In [4]:
X_train, X_test, y_train, y_test = generate_contextual_data(n_train=10000, n_test=10000, random_state=42)

In [5]:
def profile(classifier):
    # profile fit
    classifier.fit(X_train)

    # profile predict
    classifier.decision_function(X_test)

In [6]:
import cProfile
cProfile.run('profile(classifiers["Contextual LOF"])', sort="tottime")

         42933 function calls (42887 primitive calls) in 0.216 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        4    0.140    0.035    0.142    0.036 {method 'query' of 'sklearn.neighbors.kd_tree.BinaryTree' objects}
        2    0.018    0.009    0.029    0.014 contextual.py:23(_contextualize)
        4    0.012    0.003    0.013    0.003 lof.py:478(_local_reachability_density)
       86    0.008    0.000    0.008    0.000 {built-in method numpy.array}
       20    0.007    0.000    0.007    0.000 {built-in method numpy.concatenate}
        4    0.005    0.001    0.156    0.039 base.py:333(kneighbors)
        2    0.005    0.003    0.092    0.046 lof.py:437(_score_samples)
        2    0.004    0.002    0.091    0.045 lof.py:216(fit)
    40014    0.003    0.000    0.003    0.000 {method 'append' of 'list' objects}
        2    0.002    0.001    0.003    0.001 base.py:164(_fit)
        4    0.002    0.001    0.002   