# scikit-multiflow demo

In [None]:
# Required to  see' dynamic plots in Jupyter notebooks
%matplotlib notebook

In [None]:
from skmultiflow.data import FileStream
from skmultiflow.data import SEAGenerator
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.trees import HoeffdingTree
from skmultiflow.trees import HAT
from skmultiflow.drift_detection import ADWIN

from sklearn.linear_model import SGDClassifier

import numpy as np
import matplotlib.pyplot as plt

---
## 1. Running a classification task

We will use the `SEA` stream generator

In [None]:
stream = SEAGenerator(classification_function=2, random_state=1)

Prepare the stream for use

In [None]:
stream.prepare_for_use()

Setup a classifier, in this case `Linear SVM` with `SGD` training*

In [None]:
classifier = SGDClassifier()

Setup the evaluator, we will use prequential evaluation

In [None]:
eval = EvaluatePrequential(show_plot=True,
                           max_samples=20000,
                           metrics=['accuracy', 'kappa', 'running_time', 'model_size'])

Run the evaluation

In [None]:
eval.evaluate(stream=stream, model=classifier, model_names=['SVM-SGD']);

---
## 2. Concept Drift detection

Load a data stream with concept drift

In [None]:
drift_stream = np.load('drift_stream.npy')

In [None]:
x = [i for i in range(len(drift_stream))]

fig, ax = plt.subplots(1, 2, figsize=(9,4))
ax[0].scatter(x[:999], drift_stream[:999]);
ax[0].scatter(x[1000:], drift_stream[1000:]);
ax[1].hist(drift_stream[:999], bins=2);
ax[1].hist(drift_stream[1000:], bins=8);

The first half of the data stream contains a sequence corresponding to a normal distribution of integers from 0 to 1. From index 999 to 1999 the sequence is a normal distribution of integers from 0 to 7.

Next, we instantiate the drift detector, in this case `ADWIN`

In [None]:
adwin = ADWIN()

Run the detection test

In [None]:
for i in range(drift_stream.size):
    adwin.add_element(drift_stream[i])
    if adwin.detected_change():
        print('Change detected at index {}'.format(i))

---
## 3. Comparing classifiers

Load stream data from a file

In [None]:
stream = FileStream("agr_a_20k.arff.csv")

Prepare the stream for use

In [None]:
stream.prepare_for_use()

Create a list of classifiers to compare, in this case `Hoeffding Tree` and `Hoeffding Adaptive Tree`

In [None]:
cfiers = [HoeffdingTree(), HAT()]

Setup the evaluator, we will use prequential evaluation

In [None]:
eval = EvaluatePrequential(show_plot=True,
                           metrics=['accuracy', 'kappa', 'model_size'],
                           n_wait=100)

Run the evaluation

In [None]:
eval.evaluate(stream=stream, model=cfiers, model_names=['HT', 'HAT']);