# Performance Profiling of Contextual Detector

In [1]:
from __future__ import division
from __future__ import print_function

import os
import sys
from time import time

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

import numpy as np
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager

# Import all models
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.lscp import LSCP
from pyod.models.contextual import ContextualDetector
from pyod.utils import generate_contextual_data

In [2]:
outliers_fraction = 0.1

# initialize a set of detectors for LSCP
detector_list = [LOF(n_neighbors=5), LOF(n_neighbors=10), LOF(n_neighbors=15),
                 LOF(n_neighbors=20), LOF(n_neighbors=25), LOF(n_neighbors=30),
                 LOF(n_neighbors=35), LOF(n_neighbors=40), LOF(n_neighbors=45),
                 LOF(n_neighbors=50)]

In [3]:
random_state = np.random.RandomState(42)
# Define nine outlier detection tools to be compared
classifiers = {
    'ABOD':
        ABOD(contamination=outliers_fraction),
    'CBLOF':
        CBLOF(contamination=outliers_fraction,
              check_estimator=False, random_state=random_state),
    'Feature Bagging':
        FeatureBagging(LOF(n_neighbors=35),
                       contamination=outliers_fraction,
                       random_state=random_state),
    'HBOS': HBOS(
        contamination=outliers_fraction),
    'Isolation Forest': IForest(contamination=outliers_fraction,
                                behaviour="new",
                                random_state=random_state),
    'KNN': KNN(
        contamination=outliers_fraction),
    'Average KNN': KNN(method='mean',
                       contamination=outliers_fraction),
    'LOF':
        LOF(n_neighbors=35, contamination=outliers_fraction),
    'MCD': MCD(
        contamination=outliers_fraction, random_state=random_state),
    'OCSVM': OCSVM(contamination=outliers_fraction),
    'PCA': PCA(
        contamination=outliers_fraction, random_state=random_state),
    'LSCP': LSCP(
        detector_list, contamination=outliers_fraction,
        random_state=random_state)
}

contextual_classifiers = {"Contextual " + k: ContextualDetector(base_detector=v) for k, v in classifiers.items()}
classifiers = {**classifiers, **contextual_classifiers}

In [17]:
X_train, X_test, y_train, y_test = generate_contextual_data(n_train=10000, n_test=10000)

In [25]:
def profile(classifier):
    # profile fit
    classifier.fit(X_train)

    # profile predict
    classifier.predict(X_test)

In [26]:
import cProfile
cProfile.run('profile(classifiers["Contextual LOF"])', sort="tottime")

         4292946 function calls (4282733 primitive calls) in 4.865 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10002    0.580    0.000    1.052    0.000 {method 'query' of 'sklearn.neighbors.kd_tree.BinaryTree' objects}
    30006    0.270    0.000    1.286    0.000 validation.py:362(check_array)
    50025    0.206    0.000    0.206    0.000 {method 'reduce' of 'numpy.ufunc' objects}
    20007    0.148    0.000    0.257    0.000 _methods.py:58(_mean)
    10002    0.145    0.000    3.313    0.000 base.py:333(kneighbors)
    10002    0.135    0.000    2.040    0.000 parallel.py:866(__call__)
    10002    0.134    0.000    0.310    0.000 lof.py:478(_local_reachability_density)
    30006    0.119    0.000    0.333    0.000 validation.py:40(_assert_all_finite)
   420078    0.112    0.000    0.112    0.000 {built-in method builtins.hasattr}
    10000    0.110    0.000    4.254    0.000 lof.py:437(_score_samples)
   260112 