In [2]:
!pip install scikit-multiflow

Collecting scikit-multiflow
  Downloading scikit-multiflow-0.5.3.tar.gz (450 kB)
     ------------------------------------ 450.6/450.6 kB 741.3 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting sortedcontainers>=1.5.7
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)
Collecting scipy>=1.0.0
  Using cached scipy-1.9.0-cp310-cp310-win_amd64.whl (38.6 MB)
Collecting scikit-learn>=0.20
  Using cached scikit_learn-1.1.2-cp310-cp310-win_amd64.whl (7.4 MB)
Collecting pandas>=0.25.3
  Using cached pandas-1.4.3-cp310-cp310-win_amd64.whl (10.5 MB)
Collecting pytz>=2020.1
  Using cached pytz-2022.2.1-py2.py3-none-any.whl (500 kB)
Collecting joblib>=1.0.0
  Using cached joblib-1.1.0-py2.py3-none-any.whl (306 kB)
Collecting threadpoolctl>=2.0.0
  Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Building wheels for collected packages: scikit-multiflow
  Building wheel for scikit-multiflow (setup.

In [1]:
# Required to correctly display interactive (dynamic) plots in Jupyter notebooks.
# This code cell must be run before any other code cell.
%matplotlib widget

# Imports
from skmultiflow.data import FileStream
from skmultiflow.data import SEAGenerator
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.bayes import NaiveBayes
from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.drift_detection import ADWIN

from sklearn.linear_model import SGDClassifier

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec

# scikit-multiflow demo
In this notebook, we show how to easily setup and run experiments in scikit-multiflow.

The demo is divided into the follwing parts:

Running a classification task

    1. Prequential evaluation implementation
        A. The EvaluatePrequential class
        B. Concept drift detection

# 1. Running a classification task
In this example we will use the SEA stream generator. A data generator does not store any data, but generates it on demand.

Next we will setup a learning method (model, estimator, algorithm), in this case the Naive Bayes classifier:

In [None]:
stream = SEAGenerator(random_state=1)
classifier = NaiveBayes()

## Prequential evaluation
The prequential evaluation is easily implemented as a loop:

In [None]:
# Variables to control evaluation loop and track performance
n_samples = 0
correct_cnt = 0
max_samples = 2000

# Prequential evaluation loop
while n_samples < max_samples and stream.has_more_samples():
   X, y = stream.next_sample()      # Get one sample from the stream
   y_pred = classifier.predict(X)   # Predict class for new data
   if y[0] == y_pred[0]:
       correct_cnt += 1
   classifier.partial_fit(X, y)     # Incrementally train the model with the new data
   n_samples += 1

print('{} samples analyzed.'.format(n_samples))   
print('NaiveBayes classifier accuracy: {}'.format(correct_cnt / n_samples))

## EvaluatePrequential class
Implements the prequential evaluation method and provides extra functionalities.

Let's run the same experiment on the SEA data but this time we will compare two classifiers:

    1. NaiveBayes
    2. SGDClassifier: Linear SVM with SGD training.
We choose the SGDClassifier in order to demonstrate the compatibility with incremental methods from scikit-learn.

**Note:** scikit-learn focuses on the batch learing setting and only a limited number of its methods are capable to learn incrementally.

In [None]:
# Setup stream and estimators
stream = SEAGenerator(random_state=1)
nb = NaiveBayes()
svm = SGDClassifier()

# Setup evaluator
eval = EvaluatePrequential(show_plot=True,
                           max_samples=20000,
                           metrics=['accuracy', 'kappa', 'running_time', 'model_size'])

In [None]:
# Run the evaluation
eval.evaluate(stream=stream, model=[nb, svm], model_names=['NB', 'SVM']);

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.ion()
for i in range(50):
    y = np.random.random([10,1])
    plt.plot(y)
    plt.draw()
    plt.pause(0.0001)
    plt.clf()

In [None]:
x = np.linspace(0, 6*np.pi, 100)
y = np.sin(x)

# You probably won't need this if you're embedding things in a tkinter plot...
plt.ion()

fig = plt.figure()
ax = fig.add_subplot(111)
line1, = ax.plot(x, y, 'r-') # Returns a tuple of line objects, thus the comma

for phase in np.linspace(0, 10*np.pi, 500):
    line1.set_ydata(np.sin(x + phase))
    fig.canvas.draw()
    fig.canvas.flush_events()