In [None]:
In diesem Notebook werden zwei Streaming Klassifizierungen mit 10.000 Instanzen durchgeführt:

1. Klassifizierung der sea_a_t Daten mit verspäteten Eintreffen der Label
2. Klassifizierung der sea_a Daten

In [1]:
import numpy as np
import pandas as pd

from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.bayes import NaiveBayes
from skmultiflow.meta import AccuracyWeightedEnsembleClassifier
from skmultiflow.lazy import KNNADWINClassifier

from skmultiflow.data import TemporalDataStream
from skmultiflow.data.file_stream import FileStream
from skmultiflow.data.data_stream import DataStream

from skmultiflow.evaluation import EvaluatePrequentialDelayed

In [2]:
import warnings
warnings.filterwarnings('ignore')

#set columns
attribute = ['att_num_0', 'att_num_1', 'att_num_2']
target = 'target_0'
timestamp = 'datetime'

#read csv
t_sea_a = pd.read_csv("data/t_sea_a.csv")

#convert time to datetime
t_sea_a[timestamp] = pd.to_datetime(t_sea_a[timestamp])

#get X, y and time
X = t_sea_a[attribute].values
y = t_sea_a[target].values
time = t_sea_a[timestamp].values

#set delay
delay_time = 500

#create stream
t_sea_a_stream = TemporalDataStream(X, y, time, sample_delay=delay_time, ordered=False)

#set classifier
ht = HoeffdingTreeClassifier(leaf_prediction='nb')
hat = HoeffdingAdaptiveTreeClassifier()
efdt = ExtremelyFastDecisionTreeClassifier()
arf = AdaptiveRandomForestClassifier()
snb = NaiveBayes()
awe = AccuracyWeightedEnsembleClassifier(n_estimators=15, base_estimator=NaiveBayes())
knn_adwin = KNNADWINClassifier(n_neighbors=10)

#set evaluator
evaluator = EvaluatePrequentialDelayed(
                                max_samples=X.shape[0],
                                n_wait=10,
                                pretrain_size=100,
                                output_file='results_t_sea_a.csv',
                                metrics=['accuracy', 'kappa', 
                                         'precision', 'recall', 
                                         'f1', 'running_time', 
                                         'model_size'])

#evaluate
evaluator.evaluate(stream=t_sea_a_stream, 
                   model=[ht, hat, efdt, arf, 
                          snb, awe, knn_adwin], 
                   model_names=['Hoeffding Tree', 'Hoeffding Adaptive Tree', 
                                'Extremely Fast Decision Tree', 
                                'Adaptive Random Forest', 
                                'Naive Bayes', 'Accuracy Weighted Ensembler', 'KNN ADWIN' ])

Prequential Evaluation Delayed
Evaluating 1 target(s).
Pre-training on 100 sample(s).
Evaluating...
 ###################- [95%] [251.69s]Processed samples: 10000
Mean performance:
Hoeffding Tree - Accuracy     : 0.9134
Hoeffding Tree - Kappa        : 0.8155
Hoeffding Tree - Precision: 0.9273
Hoeffding Tree - Recall: 0.9337
Hoeffding Tree - F1 score: 0.9305
Hoeffding Tree - Training time (s)  : 0.71
Hoeffding Tree - Testing time  (s)  : 0.81
Hoeffding Tree - Total time    (s)  : 1.53
Hoeffding Tree - Size (kB)          : 44.6641
Hoeffding Adaptive Tree - Accuracy     : 0.9080
Hoeffding Adaptive Tree - Kappa        : 0.8037
Hoeffding Adaptive Tree - Precision: 0.9203
Hoeffding Adaptive Tree - Recall: 0.9326
Hoeffding Adaptive Tree - F1 score: 0.9264
Hoeffding Adaptive Tree - Training time (s)  : 3.01
Hoeffding Adaptive Tree - Testing time  (s)  : 0.77
Hoeffding Adaptive Tree - Total time    (s)  : 3.78
Hoeffding Adaptive Tree - Size (kB)          : 161.2051
Extremely Fast Decision Tree -

[HoeffdingTreeClassifier(binary_split=False, grace_period=200,
                         leaf_prediction='nb', max_byte_size=33554432,
                         memory_estimate_period=1000000, nb_threshold=0,
                         no_preprune=False, nominal_attributes=None,
                         remove_poor_atts=False, split_confidence=1e-07,
                         split_criterion='info_gain', stop_mem_management=False,
                         tie_threshold=0.05),
 HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True,
                                 grace_period=200, leaf_prediction='nba',
                                 max_byte_size=33554432,
                                 memory_estimate_period=1000000, nb_threshold=0,
                                 no_preprune=False, nominal_attributes=None,
                                 random_state=None, remove_poor_atts=False,
                                 split_confidence=1e-07,
                        