<a href="https://colab.research.google.com/github/andrewm4894/pysad_colabs/blob/main/pysad_example_full_usage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# uncomment below to install pysad
#!pip install mmh3==2.5.1 rrcf==0.4.3 PyNomaly==0.3.3 pysad==0.1.1

In [2]:
# uncomment below to get data for the examples
#!wget https://github.com/selimfirat/pysad/blob/master/examples/data/arrhythmia.mat?raw=true
#!mkdir data
#!mv arrhythmia.mat\?raw\=true data/arrhythmia.mat

In [3]:
# Import modules.
from sklearn.utils import shuffle
from pysad.evaluation import AUROCMetric
from pysad.models import xStream, RobustRandomCutForest, KNNCAD, ExactStorm, HalfSpaceTrees, IForestASD, KitNet, LODA, LocalOutlierProbability, RSHash
from pysad.utils import ArrayStreamer
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.utils import Data
from tqdm import tqdm
import numpy as np

# This example demonstrates the usage of the most modules in PySAD framework.
if __name__ == "__main__":
    np.random.seed(61)  # Fix random seed.

    n_initial = 100

    # Get data to stream.
    data = Data("data")
    X_all, y_all = data.get_data("arrhythmia.mat")
    #X_all, y_all = shuffle(X_all, y_all)
    X_initial, y_initial = X_all[:n_initial], y_all[:n_initial]
    X_stream, y_stream = X_all[n_initial:], y_all[n_initial:]

    iterator = ArrayStreamer(shuffle=False)  # Init streamer to simulate streaming data.

    #model = RSHash(feature_mins=[1], feature_maxes=[5], sampling_points=100, decay=0.015, num_components=100, num_hash_fns=1)
    model = LocalOutlierProbability(initial_X=X_initial, num_neighbors=10, extent=3)
    #model = LODA(num_bins=10, num_random_cuts=100)
    #model = KitNet(max_size_ae=10, grace_feature_mapping=100, grace_anomaly_detector=100, learning_rate=0.1, hidden_ratio=0.75)
    #model = IForestASD(initial_window_X=X_initial, window_size=100)
    #model = HalfSpaceTrees(feature_mins=[1],feature_maxes=[10])
    #model = ExactStorm(window_size=1000, max_radius=0.1)
    #model = KNNCAD(probationary_period=50)
    #model = RobustRandomCutForest(num_trees=10, shingle_size=4, tree_size=256)
    #model = xStream(num_components=20, n_chains=20, depth=10, window_size=25)  # Init xStream anomaly detection model.
    preprocessor = InstanceUnitNormScaler()  # Init normalizer.
    postprocessor = RunningAveragePostprocessor(window_size=5)  # Init running average postprocessor.
    auroc = AUROCMetric()  # Init area under receiver-operating- characteristics curve metric.

    for X, y in tqdm(iterator.iter(X_stream, y_stream)):  # Stream data.
        X = preprocessor.fit_transform_partial(X)  # Fit preprocessor to and transform the instance.

        score = model.fit_score_partial(X)  # Fit model to and score the instance.
        if np.isnan(score):
            continue
        score = postprocessor.fit_transform_partial(score)  # Apply running averaging to the score.
        #print(score)

        auroc.update(y, score)  # Update AUROC metric.

    # Output resulting AUROCS metric.
    print("\nAUROC: ", auroc.get())

352it [00:00, 1087.02it/s]


AUROC:  0.6781112938596491



