In [1]:
import logging
import numpy as np
import os
import numpy.random as rnd

In [7]:
from ad_examples.common.utils import read_csv, dataframe_to_matrix
from ad_examples.common.gen_samples import get_synthetic_samples
from ad_examples.aad.aad_support import AadOpts, get_aad_command_args, configure_logger
from ad_examples.aad.forest_description import CompactDescriber, MinimumVolumeCoverDescriber, \
    BayesianRulesetsDescriber, get_region_memberships

from ad_examples.aad.demo_aad import get_debug_args, detect_anomalies_and_describe

In [3]:
from ad_examples.loda.loda import Loda

In [4]:
logger = logging.getLogger(__name__)

# Prepare the aad arguments. It is easier to first create the parsed args and
# then create the actual AadOpts from the args
args = get_aad_command_args(debug=True, debug_args=get_debug_args())

In [4]:
opts = AadOpts(args)
logger.debug(opts.str_opts())
np.random.seed(opts.randseed)
# load synthetic (toy 2) dataset
x, y = get_synthetic_samples(stype=2)

In [5]:
# run interactive anomaly detection loop
model, x_transformed, queried, ridxs_counts, region_extents = detect_anomalies_and_describe(x, y, opts)

baseline found:
[0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9]
AAD found:
[0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7, 7, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 13, 14, 15, 16]
UNK  4
UNK  4


## Testing simple anomaly detection

Open data and run LODA algorithm on it.

In [5]:
ad_type="loda"
sample_type="donut_"
rnd.seed(42)

In [8]:
# x, y = get_demo_samples(sample_type)
# x, y = get_synthetic_samples(stype=2)
data_df = read_csv("./data/simple.type123.csv", header=True)
x, y = dataframe_to_matrix(data_df)

In [11]:
n = x.shape[0]
outliers_fraction = 0.01
xx = yy = x_grid = Z = scores = None

In [None]:
ad = Loda(mink=100, maxk=200)
ad.fit(x)
scores = -ad.decision_function(x)
# Z = -ad.decision_function(x_grid)

In [17]:
print("scores:\n%s" % str(list(scores)))
top_anoms = np.argsort(-scores)[np.arange(10)]

scores:
[260.82764009235564, 351.10089221055273, 279.5733327798263, 348.6140006271513, 304.40439538694534, 304.85407544184477, 307.43563313929343, 431.9268572109388, 346.70923244712344, 371.6409310139784, 302.68254199546743, 293.0808912873477, 303.6073869347618, 374.6301417645071, 315.93967014274614, 425.35717567525404, 281.66446472522244, 272.9541582177369, 309.3661550505635, 386.86708663347747, 271.87347210143326, 277.7318698489165, 414.19469219255257, 320.0340924334245, 299.6400023327422, 264.0017234818908, 321.8417442566404, 312.5873024702465, 321.3432528273985, 297.9492964212742, 303.611940276548, 384.4968751234461, 345.8862124416995, 305.29745540024624, 260.99927165675956, 341.2251801211022, 342.91809148101925, 632.3137110031523, 274.82203293799296, 322.6412604812475, 292.24971387390775, 300.7584268354177, 321.16258328918786, 280.31294606634594, 307.55752685447465, 299.84506896162895, 320.2038498516457, 334.02323432074627, 270.4141388793223, 277.02913384670455, 410.57725014364485

In [18]:
print(top_anoms)

[131 239 466  37 440 381 327 411 307 210]
