In [2]:
# !pip install -U pysad
# ! pip install tqdm
# ! pip install mmh3

# Streaming usecase

Requirements:
- access one sample at at time
- limited memory and processing time

Paper: https://arxiv.org/pdf/2009.02572

In [5]:
from pysad.evaluation import AUROCMetric
from pysad.models import xStream, LODA
from pysad.utils import ArrayStreamer
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.transform.ensemble import AverageScoreEnsembler
from pysad.utils import Data
from tqdm import tqdm
import numpy as np
import evaluation_utils, data_utils

In [6]:
X_all, y_all = data_utils.get_data('data/6_cardio.npz')

# Models
https://pysad.readthedocs.io/en/latest/api.html#module-pysad.core

In [7]:

iterator = ArrayStreamer(shuffle=False)  # Init streamer to simulate streaming data.

preprocessor = InstanceUnitNormScaler()  # Init normalizer.
postprocessor = RunningAveragePostprocessor(window_size=5)  # Init running average postprocessor.

models = [  # Models to be ensembled.
    xStream(),
    LODA()
]
ensembler = AverageScoreEnsembler()  # Ensembler module.

y_pred = []
for X, y in tqdm(iterator.iter(X_all, y_all)):  # Iterate over examples.
    model_scores = np.empty(len(models), dtype=np.float64)

    # Fit & Score via for each model.
    for i, model in enumerate(models):
        model.fit_partial(X)
        model_scores[i] = model.score_partial(X)

    score = ensembler.fit_transform_partial(model_scores)  # fit to ensembler model and get ensembled score.
    y_pred.append(score)

1831it [02:26, 12.50it/s]

AUROC:  0.5910464158198298





# Evaluation

In [16]:
print(evaluation_utils.run_evaluation(
    y_all, np.array(y_pred).reshape(-1),
    do_point_adjustment=True))

{'AUCROC': 0.5910464158198298, 'AUCPR': 0.20462602994407797, 'F1': 0.26756889987567156, 'Precision': 0.22264150943396227, 'Recall': 0.3352272727272727, 'Adjusted AUCROC': 1.0, 'Adjusted AUCPR': 1.0, 'Adjusted F1': 0.9999950000249999, 'Adjusted Precision': 1.0, 'Adjusted Recall': 1.0}
