Streaming Klassifizierung der agr_g2 Daten

In [1]:
import numpy as np
import pandas as pd

from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.bayes import NaiveBayes
from skmultiflow.meta import AccuracyWeightedEnsembleClassifier
from skmultiflow.lazy import KNNADWINClassifier

from skmultiflow.data.file_stream import FileStream

from skmultiflow.evaluation import EvaluatePrequential

In [2]:
#create stream
agr_g2_stream = FileStream('data/agr_g2.csv') 
nominal=['elevel', 'car', 'zipcode']

#set classifier
ht = HoeffdingTreeClassifier(leaf_prediction='nb', nominal_attributes=nominal)
hat = HoeffdingAdaptiveTreeClassifier(nominal_attributes=nominal)
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal)
arf = AdaptiveRandomForestClassifier(nominal_attributes=nominal)
snb = NaiveBayes(nominal_attributes=nominal)
awe = AccuracyWeightedEnsembleClassifier(n_estimators=15, base_estimator=NaiveBayes(nominal_attributes=nominal))
knn_adwin = KNNADWINClassifier(n_neighbors=10)

#set evaluator
evaluator = EvaluatePrequential(max_samples=100000,
                                n_wait=100,
                                pretrain_size=100,
                                output_file='results_agr_g2_stream.csv',
                                metrics=['accuracy', 'kappa', 
                                         'precision', 'recall', 
                                         'f1', 'running_time', 
                                         'model_size'])

#evaluate
evaluator.evaluate(stream=agr_g2_stream, 
                   model=[ht, hat, efdt, arf, 
                          snb, awe, knn_adwin], 
                   model_names=['Hoeffding Tree', 'Hoeffding Adaptive Tree', 
                                'Extremely Fast Decision Tree', 
                                'Adaptive Random Forest', 
                                'Naive Bayes', 'Accuracy Weighted Ensembler', 'KNN ADWIN'])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 100 sample(s).
Evaluating...
 #################### [100%] [3226.73s]
Processed samples: 100000
Mean performance:
Hoeffding Tree - Accuracy     : 0.7760
Hoeffding Tree - Kappa        : 0.4806
Hoeffding Tree - Precision: 0.7818
Hoeffding Tree - Recall: 0.9031
Hoeffding Tree - F1 score: 0.8381
Hoeffding Tree - Training time (s)  : 8.79
Hoeffding Tree - Testing time  (s)  : 12.22
Hoeffding Tree - Total time    (s)  : 21.01
Hoeffding Tree - Size (kB)          : 742.7256
Hoeffding Adaptive Tree - Accuracy     : 0.8026
Hoeffding Adaptive Tree - Kappa        : 0.5442
Hoeffding Adaptive Tree - Precision: 0.8023
Hoeffding Adaptive Tree - Recall: 0.9189
Hoeffding Adaptive Tree - F1 score: 0.8567
Hoeffding Adaptive Tree - Training time (s)  : 46.39
Hoeffding Adaptive Tree - Testing time  (s)  : 7.58
Hoeffding Adaptive Tree - Total time    (s)  : 53.97
Hoeffding Adaptive Tree - Size (kB)          : 381.0010
Extremely Fast Decision Tree 

[HoeffdingTreeClassifier(binary_split=False, grace_period=200,
                         leaf_prediction='nb', max_byte_size=33554432,
                         memory_estimate_period=1000000, nb_threshold=0,
                         no_preprune=False,
                         nominal_attributes=['elevel', 'car', 'zipcode'],
                         remove_poor_atts=False, split_confidence=1e-07,
                         split_criterion='info_gain', stop_mem_management=False,
                         tie_threshold=0.05),
 HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True,
                                 grace_period=200, leaf_prediction='nba',
                                 max_byte_size=33554432,
                                 memory_estimate_period=1000000, nb_threshold=0,
                                 no_preprune=False,
                                 nominal_attributes=['elevel', 'car', 'zipcode'],
                                 random_state=None,