In diesem Notebook wird eine Delayed Streaming Klassifizierung für t_cov mit V = 7000 durchgeführt

In [1]:
import numpy as np
import pandas as pd

from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.bayes import NaiveBayes
from skmultiflow.meta import AccuracyWeightedEnsembleClassifier
from skmultiflow.lazy import KNNADWINClassifier

from skmultiflow.data import TemporalDataStream

from skmultiflow.evaluation import EvaluatePrequentialDelayed

In [3]:
import warnings
warnings.filterwarnings('ignore')

#set columns
attribute = ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
       'Horizontal_Distance_To_Fire_Points', 'Wilderness_Area1',
       'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4',
       'Soil_Type1', 'Soil_Type2', 'Soil_Type3', 'Soil_Type4', 'Soil_Type5',
       'Soil_Type6', 'Soil_Type7', 'Soil_Type8', 'Soil_Type9', 'Soil_Type10',
       'Soil_Type11', 'Soil_Type12', 'Soil_Type13', 'Soil_Type14',
       'Soil_Type15', 'Soil_Type16', 'Soil_Type17', 'Soil_Type18',
       'Soil_Type19', 'Soil_Type20', 'Soil_Type21', 'Soil_Type22',
       'Soil_Type23', 'Soil_Type24', 'Soil_Type25', 'Soil_Type26',
       'Soil_Type27', 'Soil_Type28', 'Soil_Type29', 'Soil_Type30',
       'Soil_Type31', 'Soil_Type32', 'Soil_Type33', 'Soil_Type34',
       'Soil_Type35', 'Soil_Type36', 'Soil_Type37', 'Soil_Type38',
       'Soil_Type39', 'Soil_Type40']
target = "Cover_Type"
timestamp = "datetime"

#read csv
t_cov = pd.read_csv("data/t_cov.csv")

#convert time to datetime
t_cov[timestamp] = pd.to_datetime(t_cov[timestamp])

#get X, y and time
X = t_cov[attribute].values
y = t_cov[target].values
time = t_cov[timestamp].values

delay_time = 7000

#create stream
t_cov_stream = TemporalDataStream(X, y, time, sample_delay=delay_time, ordered=False)

#set classifier
ht = HoeffdingTreeClassifier(leaf_prediction='nb')
hat = HoeffdingAdaptiveTreeClassifier()
efdt = ExtremelyFastDecisionTreeClassifier()
arf = AdaptiveRandomForestClassifier()
snb = NaiveBayes()
awe = AccuracyWeightedEnsembleClassifier(n_estimators=15, base_estimator=NaiveBayes())
knn_adwin = KNNADWINClassifier(n_neighbors=10)

# Set evaluator
evaluator = EvaluatePrequentialDelayed(n_wait=10,
                                pretrain_size=100,
                                max_samples=X.shape[0],
                                output_file='results_cov_t.csv',
                                metrics=['accuracy', 'kappa', 
                                         'precision', 'recall', 
                                         'f1', 'running_time', 
                                         'model_size'])

#evaluate
evaluator.evaluate(stream=t_cov_stream, 
                   model=[ht, hat, efdt, arf, 
                          snb, awe, knn_adwin], 
                   model_names=['Hoeffding Tree', 'Hoeffding Adaptive Tree', 
                                'Extremely Fast Decision Tree', 
                                'Adaptive Random Forest', 
                                'Naive Bayes', 'Accuracy Weighted Ensembler', 'KNN ADWIN' ])

Prequential Evaluation Delayed
Evaluating 1 target(s).
Pre-training on 100 sample(s).
Evaluating...
 ###################- [95%] [1303.47s]Processed samples: 10000
Mean performance:
Hoeffding Tree - Accuracy     : 0.0857
Hoeffding Tree - Kappa        : 0.0467
Hoeffding Tree - Precision: 0.2083
Hoeffding Tree - Recall: 0.0751
Hoeffding Tree - F1 score: 0.0688
Hoeffding Tree - Training time (s)  : 6.73
Hoeffding Tree - Testing time  (s)  : 9.17
Hoeffding Tree - Total time    (s)  : 15.90
Hoeffding Tree - Size (kB)          : 488.9688
Hoeffding Adaptive Tree - Accuracy     : 0.0867
Hoeffding Adaptive Tree - Kappa        : 0.0477
Hoeffding Adaptive Tree - Precision: 0.2116
Hoeffding Adaptive Tree - Recall: 0.0759
Hoeffding Adaptive Tree - F1 score: 0.0687
Hoeffding Adaptive Tree - Training time (s)  : 38.29
Hoeffding Adaptive Tree - Testing time  (s)  : 8.99
Hoeffding Adaptive Tree - Total time    (s)  : 47.28
Hoeffding Adaptive Tree - Size (kB)          : 263.3379
Extremely Fast Decision T

[HoeffdingTreeClassifier(binary_split=False, grace_period=200,
                         leaf_prediction='nb', max_byte_size=33554432,
                         memory_estimate_period=1000000, nb_threshold=0,
                         no_preprune=False, nominal_attributes=None,
                         remove_poor_atts=False, split_confidence=1e-07,
                         split_criterion='info_gain', stop_mem_management=False,
                         tie_threshold=0.05),
 HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True,
                                 grace_period=200, leaf_prediction='nba',
                                 max_byte_size=33554432,
                                 memory_estimate_period=1000000, nb_threshold=0,
                                 no_preprune=False, nominal_attributes=None,
                                 random_state=None, remove_poor_atts=False,
                                 split_confidence=1e-07,
                        