In [17]:
# Import modules.
from pysad.transform.probability_calibration import ConformalProbabilityCalibrator
from sklearn.utils import shuffle
from pysad.evaluation import AUROCMetric
from pysad.models import xStream
from pysad.utils import ArrayStreamer
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.utils import Data
from tqdm import tqdm
import numpy as np

In [19]:
np.random.seed(61)  # Fix seed.

model = xStream()  # Init model.
calibrator = ConformalProbabilityCalibrator(windowed=True, window_size=300)  # Init probability calibrator.
streaming_data = Data("data").get_iterator("arrhythmia.mat")  # Get streamer.

for i, (x, y_true) in enumerate(streaming_data):  # Stream data.
    anomaly_score = model.fit_score_partial(x)  # Fit to an instance x and score it.

    calibrated_score = calibrator.fit_transform(anomaly_score)  # Fit & calibrate score.

    # Output if the instance is anomalous.
    if calibrated_score > 0.95:  # If probability of being normal is less than 5%.
        print(f"Alert: {i}th data point is anomalous.")

Alert: 21th data point is anomalous.
Alert: 23th data point is anomalous.
Alert: 49th data point is anomalous.
Alert: 50th data point is anomalous.
Alert: 53th data point is anomalous.
Alert: 62th data point is anomalous.
Alert: 64th data point is anomalous.
Alert: 71th data point is anomalous.
Alert: 80th data point is anomalous.
Alert: 83th data point is anomalous.
Alert: 87th data point is anomalous.
Alert: 96th data point is anomalous.
Alert: 100th data point is anomalous.
Alert: 109th data point is anomalous.
Alert: 132th data point is anomalous.
Alert: 141th data point is anomalous.
Alert: 261th data point is anomalous.
Alert: 265th data point is anomalous.
Alert: 274th data point is anomalous.
Alert: 275th data point is anomalous.
Alert: 278th data point is anomalous.
Alert: 281th data point is anomalous.
Alert: 286th data point is anomalous.
Alert: 289th data point is anomalous.
Alert: 297th data point is anomalous.
Alert: 298th data point is anomalous.
Alert: 342th data point 

In [None]:
# Get data to stream.
data = Data("data")
X_all, y_all = data.get_data("arrhythmia.mat")
X_all, y_all = shuffle(X_all, y_all)

iterator = ArrayStreamer(shuffle=False)  # Init streamer to simulate streaming data.

model = xStream()  # Init xStream anomaly detection model.
preprocessor = InstanceUnitNormScaler()  # Init normalizer.
postprocessor = RunningAveragePostprocessor(window_size=5)  # Init running average postprocessor.
auroc = AUROCMetric()  # Init area under receiver-operating- characteristics curve metric.

for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])):  # Stream data.
    X = preprocessor.fit_transform_partial(X)  # Fit preprocessor to and transform the instance.

    score = model.fit_score_partial(X)  # Fit model to and score the instance.
    score = postprocessor.fit_transform_partial(score)  # Apply running averaging to the score.

    auroc.update(y, score)  # Update AUROC metric.

# Output resulting AUROCS metric.
print("AUROC: ", auroc.get())