In [1]:
import numpy as np
import pandas as pd

from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.bayes import NaiveBayes
from skmultiflow.meta import AccuracyWeightedEnsembleClassifier
from skmultiflow.lazy import KNNADWINClassifier

from skmultiflow.data.file_stream import FileStream

from skmultiflow.evaluation import EvaluatePrequential


In [4]:
#create stream
air_stream = FileStream('https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/master/airlines.csv')
nominal=['Airline', 'Flight', 'AirportFrom', 'AirportTo', 'DayOfWeek']

#set classifier
ht = HoeffdingTreeClassifier(leaf_prediction='nb', nominal_attributes=nominal)
hat = HoeffdingAdaptiveTreeClassifier(nominal_attributes=nominal)
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal)
arf = AdaptiveRandomForestClassifier(nominal_attributes=nominal)
snb = NaiveBayes(nominal_attributes=nominal)
awe = AccuracyWeightedEnsembleClassifier(n_estimators=15, base_estimator=NaiveBayes(nominal_attributes=nominal))
knn_adwin = KNNADWINClassifier(n_neighbors=10)

#set evaluation
evaluator = EvaluatePrequential(max_samples=100000,
                                n_wait=100,
                                pretrain_size=100,
                                output_file='results_air.csv',
                                metrics=['accuracy', 'kappa', 
                                         'precision', 'recall', 
                                         'f1', 'running_time', 
                                         'model_size'])
#run classification + ev
evaluator.evaluate(stream=air_stream, 
                   model=[ht, hat, efdt, arf, 
                          snb, awe, knn_adwin], 
                   model_names=['Hoeffding Tree', 'Hoeffding Adaptive Tree', 
                                'Extremely Fast Decision Tree', 
                                'Adaptive Random Forest', 
                                'Naive Bayes', 'Accuracy Weighted Ensembler', 'KNN ADWIN'])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 100 sample(s).
Evaluating...
 #################### [100%] [2077.75s]
Processed samples: 100000
Mean performance:
Hoeffding Tree - Accuracy     : 0.6530
Hoeffding Tree - Kappa        : 0.1599
Hoeffding Tree - Precision: 0.5310
Hoeffding Tree - Recall: 0.2839
Hoeffding Tree - F1 score: 0.3700
Hoeffding Tree - Training time (s)  : 13.19
Hoeffding Tree - Testing time  (s)  : 14.85
Hoeffding Tree - Total time    (s)  : 28.04
Hoeffding Tree - Size (kB)          : 351.0850
Hoeffding Adaptive Tree - Accuracy     : 0.6651
Hoeffding Adaptive Tree - Kappa        : 0.1582
Hoeffding Adaptive Tree - Precision: 0.5866
Hoeffding Adaptive Tree - Recall: 0.2258
Hoeffding Adaptive Tree - F1 score: 0.3261
Hoeffding Adaptive Tree - Training time (s)  : 67.21
Hoeffding Adaptive Tree - Testing time  (s)  : 9.15
Hoeffding Adaptive Tree - Total time    (s)  : 76.36
Hoeffding Adaptive Tree - Size (kB)          : 237.7627
Extremely Fast Decision Tree

[HoeffdingTreeClassifier(binary_split=False, grace_period=200,
                         leaf_prediction='nb', max_byte_size=33554432,
                         memory_estimate_period=1000000, nb_threshold=0,
                         no_preprune=False,
                         nominal_attributes=['Airline', 'Flight', 'AirportFrom',
                                             'AirportTo', 'DayOfWeek'],
                         remove_poor_atts=False, split_confidence=1e-07,
                         split_criterion='info_gain', stop_mem_management=False,
                         tie_threshold=0.05),
 HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True,
                                 grace_period=200, leaf_prediction='nba',
                                 max_byte_size=33554432,
                                 memory_estimate_period=1000000, nb_threshold=0,
                                 no_preprune=False,
                                 nominal_attributes=['