Load Models and Parameters (Get & Set)

In [26]:
%load_ext autoreload
%autoreload 2

# temporary solution for relative imports in case sadl is not installed
# if sadl is installed, no need to use the line
import sys
import os
import inspect
from inspect import signature
import numpy as np
sys.path.append(os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from SADL.static_data.algorithms import sklearn
from sklearn.covariance import EllipticEnvelope



# Example settings
n_samples = 300
outliers_fraction = 0.15
n_outliers = int(outliers_fraction * n_samples)
n_inliers = n_samples - n_outliers

model = EllipticEnvelope(contamination=outliers_fraction, random_state=42)

#print(model.get_params())

kwargs = {"algorithm_": "elliptic","contamination":0.15, "label_parser": True}
model1 = sklearn.SkLearnAnomalyDetection(**kwargs)
#print(model1.get_params())

kwargs = {"algorithm_": "elliptic","contamination":0.4}
model1 = sklearn.SkLearnAnomalyDetection(**kwargs)
#print(model1.get_params())


kwargs = {"algorithm_": "sgdocsvm", "nu" : 0.15, "shuffle":True, "fit_intercept":True, "random_state":42,"tol":1e-6}
model2 = sklearn.SkLearnAnomalyDetection(**kwargs)
print(model2.get_params())



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
{'label_parser': None, 'algorithm_': 'SGDOneClassSVM', 'average': False, 'eta0': 0.0, 'fit_intercept': True, 'learning_rate': 'optimal', 'max_iter': 1000, 'nu': 0.15, 'power_t': 0.5, 'random_state': 42, 'shuffle': True, 'tol': 1e-06, 'verbose': 0, 'warm_start': False}


Fit Models

In [35]:
# Define the number of inliers and outliers
n_samples = 200
outliers_fraction = 0.25

# Compare given detectors under given settings
# Initialize the data
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.zeros(n_samples, dtype=int)
ground_truth[-n_outliers:] = 1

# Show the statics of the data
print('Number of inliers: %i' % n_inliers)
print('Number of outliers: %i' % n_outliers)
print('Ground truth shape is {shape}. Outlier are 1 and inliers are 0.\n'.format(shape=ground_truth.shape))
print(ground_truth)

X1 = 0.3 * np.random.randn(n_inliers // 2, 2)
X2 = 0.3 * np.random.randn(n_inliers // 2, 2)
X = np.r_[X1, X2]
# Add outliers
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]

#model.fit(X)
#model.score_samples(X)
#Fit models
model1.fit(X)
model1.model.score_samples(X)

Number of inliers: 150
Number of outliers: 50
Ground truth shape is (200,). Outlier are 1 and inliers are 0.

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


array([-8.89634983e-01, -1.69750635e+00, -1.47999686e+00, -1.67982403e+00,
       -2.25100078e+00, -2.80526692e+00, -1.96124726e+00, -8.58590968e-01,
       -4.93819228e+00, -1.04724917e+00, -9.08951578e-01, -6.48300589e-01,
       -1.77221212e+00, -1.48883540e+00, -1.22435123e+00, -4.12103173e-01,
       -2.18110428e+00, -1.00678139e+00, -1.33650552e+00, -7.07209963e+00,
       -2.30026631e+00, -4.97440389e+00, -2.25184494e+00, -5.68123044e-01,
       -1.36532715e+00, -4.46276669e-01, -2.32723087e+00, -6.33910284e-01,
       -6.44008396e+00, -6.12794250e+00, -3.97147059e+00, -2.58377225e-01,
       -1.38842653e+00, -1.23759552e-01, -2.05290413e+00, -1.81660238e-01,
       -3.92242560e+00, -3.04534526e+00, -3.50279599e+00, -4.65418975e-01,
       -2.28613480e+00, -1.52905835e-01, -1.96789247e+00, -1.90833504e+00,
       -1.37264070e+00, -1.58213196e+00, -2.23475675e-01, -2.90914753e+00,
       -8.34009795e-02, -4.44765874e-01, -2.74618596e-01, -2.04432286e+00,
       -2.17454639e-01, -

Decision Function & Predict

In [31]:
model2.fit(X).decision_function(X)

array([ 0.00910062,  0.00844819,  0.00792779,  0.01053171,  0.0081789 ,
        0.00961207,  0.00535795,  0.00936529,  0.00685113,  0.00923498,
        0.00875967,  0.00807777,  0.00917998,  0.01016415,  0.00870156,
        0.00862809,  0.00875269,  0.01001466,  0.00957266,  0.01216242,
        0.00677529,  0.00936614,  0.00792772,  0.00780005,  0.00834745,
        0.01113742,  0.00779147,  0.0080967 ,  0.00847872,  0.00935997,
        0.0090977 ,  0.00692173,  0.00801638,  0.01074882,  0.00925003,
        0.00935875,  0.01088476,  0.009174  ,  0.00759843,  0.00871302,
        0.00666384,  0.01100839,  0.01126712,  0.00619302,  0.00816511,
        0.00965291,  0.00870533,  0.00880395,  0.0069613 ,  0.00869981,
        0.00782067,  0.01127395,  0.0076995 ,  0.01060094,  0.00944729,
        0.01042164,  0.00806997,  0.00521373,  0.01082998,  0.01043398,
        0.00730858,  0.01082579,  0.00738305,  0.01077221,  0.00625984,
        0.00756777,  0.00971612,  0.00806257,  0.01114065,  0.00