In [None]:
import pandas as pd
from river import (
    preprocessing,
    cluster,
    evaluate,
    stream,
    metrics
)
from sklearn import metrics as skmetrics
import matplotlib.pyplot as plt

import sys
sys.path.append('../')
from vibrodiagnostics import mafaulda

In [None]:
def run_simulation(X, y):
    denstream = (
        preprocessing.StandardScaler() | 
        cluster.DenStream(
            decaying_factor=0.01, beta=0.5, mu=2.5, epsilon=0.5, n_samples_init=10
        )
    )
    step = len(X) // 8

    labels = []
    scores = []

    for i, m in enumerate(stream.iter_pandas(X, y)):
        x, y = m
        y_predict = denstream.predict_one(x)
        labels.append(y_predict)
        if i > 0 and i % step == 0:
            s = skmetrics.silhouette_score(X.iloc[:i+1], labels, metric='euclidean')
            scores.append({'step': i, 'score': s})

        denstream.learn_one(x)

    return scores

In [None]:
X, _, y, _ = mafaulda.load_source('TD', {'placement': 'A', 'online': True})
scores = run_simulation(X, y)
success = pd.DataFrame.from_records(scores).set_index('step')
success.plot(grid=True, figsize=(20, 5), marker='o', ylabel='Silhouette score', xlabel='Observation')
plt.show()

In [None]:
X, _, y, _ = mafaulda.load_source('FD', {'placement': 'A', 'online': True})
scores = run_simulation(X, y)
success = pd.DataFrame.from_records(scores).set_index('step')
success.plot(grid=True, figsize=(20, 5), marker='o', ylabel='Silhouette score', xlabel='Observation')
plt.show()