In [2]:
# !pip install -U pysad
# ! pip install tqdm
# ! pip install mmh3

# Paper
https://arxiv.org/pdf/2009.02572

In [12]:
from sklearn.utils import shuffle
from pysad.evaluation import AUROCMetric
from pysad.models import xStream, LODA
from pysad.utils import ArrayStreamer
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.transform.ensemble import AverageScoreEnsembler
from pysad.utils import Data
from tqdm import tqdm
import numpy as np

# Load data

Download arrhythmia.mat and place in data
https://odds.cs.stonybrook.edu/arrhythmia-dataset/

In [9]:
# Get data to stream.
data = Data("data")
X_all, y_all = data.get_data("arrhythmia.mat")
X_all, y_all = shuffle(X_all, y_all)


In [10]:
X_all.shape, y_all.shape

((452, 274), (452,))

# Models
https://pysad.readthedocs.io/en/latest/api.html#module-pysad.core

In [13]:

iterator = ArrayStreamer(shuffle=False)  # Init streamer to simulate streaming data.

preprocessor = InstanceUnitNormScaler()  # Init normalizer.
postprocessor = RunningAveragePostprocessor(window_size=5)  # Init running average postprocessor.
auroc = AUROCMetric()  # Init area under receiver-operating- characteristics curve metric.

models = [  # Models to be ensembled.
    xStream(),
    LODA()
]
ensembler = AverageScoreEnsembler()  # Ensembler module.

for X, y in tqdm(iterator.iter(X_all, y_all)):  # Iterate over examples.
    model_scores = np.empty(len(models), dtype=np.float64)

    # Fit & Score via for each model.
    for i, model in enumerate(models):
        model.fit_partial(X)
        model_scores[i] = model.score_partial(X)

    score = ensembler.fit_transform_partial(model_scores)  # fit to ensembler model and get ensembled score.

    auroc.update(y, score)  # update AUROC metric.

# Output score.
print("AUROC: ", auroc.get())

452it [00:57,  7.84it/s]

AUROC:  0.69983513895431



