In [1]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.font_manager

from sklearn import svm
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

In [2]:
print(__doc__)


Automatically created module for IPython interactive environment


In [3]:
rng = np.random.RandomState(42)

In [4]:
rng

<mtrand.RandomState at 0x9204d38>

In [5]:
# Example settings
n_samples = 200
outliers_fraction = 0.25
clusters_separation = [0, 1, 2]

In [6]:
# define two outlier detection tools to be compared
classifiers = {
    "One-Class SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,
                                     kernel="rbf", gamma=0.1),
    "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
    "Isolation Forest": IsolationForest(max_samples=n_samples,
                                        contamination=outliers_fraction,
                                        random_state=rng),
    "Local Outlier Factor": LocalOutlierFactor(
        n_neighbors=35,
        contamination=outliers_fraction)}

In [7]:
# Compare given classifiers under given settings
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.ones(n_samples, dtype=int)
ground_truth[-n_outliers:] = -1

In [8]:
# Fit the problem with varying cluster separation
for i, offset in enumerate(clusters_separation):
    np.random.seed(42)
    # Data generation
    X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
    X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
    X = np.r_[X1, X2]
    # Add outliers
    X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]

array([[-7.        , -7.        , -7.        , ..., -7.        ,
        -7.        , -7.        ],
       [-6.85858586, -6.85858586, -6.85858586, ..., -6.85858586,
        -6.85858586, -6.85858586],
       [-6.71717172, -6.71717172, -6.71717172, ..., -6.71717172,
        -6.71717172, -6.71717172],
       ..., 
       [ 6.71717172,  6.71717172,  6.71717172, ...,  6.71717172,
         6.71717172,  6.71717172],
       [ 6.85858586,  6.85858586,  6.85858586, ...,  6.85858586,
         6.85858586,  6.85858586],
       [ 7.        ,  7.        ,  7.        , ...,  7.        ,
         7.        ,  7.        ]])