In diesem Notebook werden zwei Streaming Klassifizierungen mit 10.000 Instanzen durchgeführt:

1. Klassifizierung der agr_g_t Daten mit verspäteten Eintreffen der Label
2. Klassifizierung der agr_g Daten

In [1]:
import numpy as np
import pandas as pd

from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.bayes import NaiveBayes
from skmultiflow.meta import AccuracyWeightedEnsembleClassifier
from skmultiflow.lazy import KNNADWINClassifier

from skmultiflow.data import TemporalDataStream
from skmultiflow.data.file_stream import FileStream
from skmultiflow.data.data_stream import DataStream

from skmultiflow.evaluation import EvaluatePrequentialDelayed
from skmultiflow.evaluation import EvaluatePrequential

In [6]:
import warnings
warnings.filterwarnings('ignore')

#set columns
attribute = ['salary', 'comission', 'age', 'elevel',
                 'car', 'zipcode', 'hvalue', 'hyears', 'loan']
target = 'class'
timestamp = 'datetime'

#read csv 
t_agr_a = pd.read_csv("data/t_agr_a.csv", )
t_agr_a = t_agr_a.loc[:, ~t_agr_a.columns.str.contains('^Unnamed')]

#convert time to datetime
t_agr_a[timestamp] = pd.to_datetime(t_agr_a[timestamp])

#get X, y and time
X = t_agr_a[attribute].values
y = t_agr_a[target].values
time = t_agr_a[timestamp].values

#set a delay of 1 day
delay_time = np.timedelta64(1, "D")

#create stream
t_agr_a_stream = TemporalDataStream(X, y, time, sample_delay=delay_time, ordered=False)
nominal=['elevel', 'car', 'zipcode']

#set classifier
ht = HoeffdingTreeClassifier(leaf_prediction='nb', nominal_attributes=nominal)
hat = HoeffdingAdaptiveTreeClassifier(nominal_attributes=nominal)
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal)
arf = AdaptiveRandomForestClassifier(nominal_attributes=nominal)
snb = NaiveBayes(nominal_attributes=nominal)
awe = AccuracyWeightedEnsembleClassifier(n_estimators=15, base_estimator=NaiveBayes(nominal_attributes=nominal))
knn_adwin = KNNADWINClassifier(n_neighbors=10)

#set evaluator
evaluator = EvaluatePrequentialDelayed(
                                n_wait=10,
                                pretrain_size=100,
                                max_samples=X.shape[0],
                                output_file='results_t_agr_a.csv',
                                metrics=['accuracy', 'kappa', 
                                         'precision', 'recall', 
                                         'f1', 'running_time', 
                                         'model_size'])

#evaluate
evaluator.evaluate(stream=t_agr_a_stream, 
                   model=[ht, hat, efdt, arf, 
                          snb, awe, knn_adwin], 
                   model_names=['Hoeffding Tree', 'Hoeffding Adaptive Tree', 
                                'Extremely Fast Decision Tree', 
                                'Adaptive Random Forest', 
                                'Naive Bayes', 'Accuracy Weighted Ensembler', 'KNN ADWIN'])

Prequential Evaluation Delayed
Evaluating 1 target(s).
Pre-training on 100 sample(s).
Evaluating...
 ###################- [95%] [491.44s]Processed samples: 10000
Mean performance:
Hoeffding Tree - Accuracy     : 0.5526
Hoeffding Tree - Kappa        : 0.0164
Hoeffding Tree - Precision: 0.4610
Hoeffding Tree - Recall: 0.1563
Hoeffding Tree - F1 score: 0.2335
Hoeffding Tree - Training time (s)  : 1.03
Hoeffding Tree - Testing time  (s)  : 1.36
Hoeffding Tree - Total time    (s)  : 2.40
Hoeffding Tree - Size (kB)          : 63.1201
Hoeffding Adaptive Tree - Accuracy     : 0.5523
Hoeffding Adaptive Tree - Kappa        : 0.0206
Hoeffding Adaptive Tree - Precision: 0.4643
Hoeffding Adaptive Tree - Recall: 0.1765
Hoeffding Adaptive Tree - F1 score: 0.2558
Hoeffding Adaptive Tree - Training time (s)  : 3.37
Hoeffding Adaptive Tree - Testing time  (s)  : 1.06
Hoeffding Adaptive Tree - Total time    (s)  : 4.43
Hoeffding Adaptive Tree - Size (kB)          : 91.5752
Extremely Fast Decision Tree - 

[HoeffdingTreeClassifier(binary_split=False, grace_period=200,
                         leaf_prediction='nb', max_byte_size=33554432,
                         memory_estimate_period=1000000, nb_threshold=0,
                         no_preprune=False,
                         nominal_attributes=['elevel', 'car', 'zipcode'],
                         remove_poor_atts=False, split_confidence=1e-07,
                         split_criterion='info_gain', stop_mem_management=False,
                         tie_threshold=0.05),
 HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True,
                                 grace_period=200, leaf_prediction='nba',
                                 max_byte_size=33554432,
                                 memory_estimate_period=1000000, nb_threshold=0,
                                 no_preprune=False,
                                 nominal_attributes=['elevel', 'car', 'zipcode'],
                                 random_state=None,

In [7]:
#drop timestamp
t_agr_a = pd.read_csv('data/t_agr_a.csv')
agr_a = t_agr_a.drop(['datetime'], axis=1)

#create stream
agr_a_stream = DataStream(agr_a)

#set classifier
ht = HoeffdingTreeClassifier(leaf_prediction='nb', nominal_attributes=nominal)
hat = HoeffdingAdaptiveTreeClassifier(nominal_attributes=nominal)
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal)
arf = AdaptiveRandomForestClassifier(nominal_attributes=nominal)
snb = NaiveBayes(nominal_attributes=nominal)
awe = AccuracyWeightedEnsembleClassifier(n_estimators=15, base_estimator=NaiveBayes(nominal_attributes=nominal))
knn_adwin = KNNADWINClassifier(n_neighbors=10)

#set evaluator
evaluator = EvaluatePrequential(max_samples=10000,
                                n_wait=10,
                                pretrain_size=100,
                                metrics=['accuracy', 'kappa', 
                                         'precision', 'recall', 
                                         'f1', 'running_time', 
                                         'model_size'])

#evaluate
evaluator.evaluate(stream=agr_a_stream, 
                   model=[ht, hat, efdt, arf, 
                          snb, awe, knn_adwin], 
                   model_names=['Hoeffding Tree', 'Hoeffding Adaptive Tree', 
                                'Extremely Fast Decision Tree', 
                                'Adaptive Random Forest', 
                                'Naive Bayes', 'Accuracy Weighted Ensembler', 'KNN ADWIN' ])


Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 100 sample(s).
Evaluating...
 #################### [100%] [227.24s]
Processed samples: 10000
Mean performance:
Hoeffding Tree - Accuracy     : 0.7021
Hoeffding Tree - Kappa        : 0.4036
Hoeffding Tree - Precision: 0.6394
Hoeffding Tree - Recall: 0.7270
Hoeffding Tree - F1 score: 0.6804
Hoeffding Tree - Training time (s)  : 0.92
Hoeffding Tree - Testing time  (s)  : 1.26
Hoeffding Tree - Total time    (s)  : 2.19
Hoeffding Tree - Size (kB)          : 41.3818
Hoeffding Adaptive Tree - Accuracy     : 0.7642
Hoeffding Adaptive Tree - Kappa        : 0.5226
Hoeffding Adaptive Tree - Precision: 0.7217
Hoeffding Adaptive Tree - Recall: 0.7478
Hoeffding Adaptive Tree - F1 score: 0.7345
Hoeffding Adaptive Tree - Training time (s)  : 4.81
Hoeffding Adaptive Tree - Testing time  (s)  : 1.16
Hoeffding Adaptive Tree - Total time    (s)  : 5.97
Hoeffding Adaptive Tree - Size (kB)          : 58.9502
Extremely Fast Decision Tree - Accura

[HoeffdingTreeClassifier(binary_split=False, grace_period=200,
                         leaf_prediction='nb', max_byte_size=33554432,
                         memory_estimate_period=1000000, nb_threshold=0,
                         no_preprune=False,
                         nominal_attributes=['elevel', 'car', 'zipcode'],
                         remove_poor_atts=False, split_confidence=1e-07,
                         split_criterion='info_gain', stop_mem_management=False,
                         tie_threshold=0.05),
 HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True,
                                 grace_period=200, leaf_prediction='nba',
                                 max_byte_size=33554432,
                                 memory_estimate_period=1000000, nb_threshold=0,
                                 no_preprune=False,
                                 nominal_attributes=['elevel', 'car', 'zipcode'],
                                 random_state=None,