# hoeffding adaptive tree

## Doc
  * https://scikit-multiflow.github.io/scikit-multiflow/skmultiflow.classification.trees.html
  * https://scikit-multiflow.github.io/scikit-multiflow/skmultiflow.data.file_stream.html
  * https://scikit-multiflow.github.io/scikit-multiflow/skmultiflow.evaluation.evaluate_prequential.html

In [7]:
from skmultiflow.trees.hoeffding_adaptive_tree import HAT
from skmultiflow.data.file_stream import FileStream
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential
# Setup the File Stream
stream = FileStream("../data/covtype.csv", -1, 1)
stream.prepare_for_use()

classifier = HAT()
evaluator = EvaluatePrequential(pretrain_size=200, max_samples=50000, batch_size=50, n_wait=200, max_time=1000,
                                 output_file=None, show_plot=False, metrics=['accuracy'])

evaluator.evaluate(stream=stream, model=classifier)

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [174.27s]
Processed samples: 50000
Mean performance:
M0 - Accuracy     : 0.7487


[HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05)]

# Transaction data

In [6]:
from skmultiflow.trees.hoeffding_adaptive_tree import HAT
from skmultiflow.data.file_stream import FileStream
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential
# Setup the File Stream

stream = FileStream("../data_output/export_dataframe_0v3.csv", -1, 1)
stream.prepare_for_use()

classifier = HAT()
evaluator = EvaluatePrequential(pretrain_size=10000, max_samples=1000000, batch_size=10, n_wait=200, max_time=1000,
                                 output_file='summary_hat.txt', show_plot=False, metrics=['accuracy','running_time','model_size']
                               )

evaluator.evaluate(stream=stream, model=classifier)
#M0 - Accuracy     : 0.5884

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 10000 sample(s).
Evaluating...
 #################### [100%] [539.58s]
Processed samples: 1000000
Mean performance:
M0 - Accuracy     : 0.5884
M0 - Training time (s)  : 379.96
M0 - Testing time  (s)  : 60.31
M0 - Total time    (s)  : 440.27
M0 - Size (kB)          : 69.6475


[HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05)]

In [8]:
from skmultiflow.trees import HoeffdingTree
from skmultiflow.data.file_stream import FileStream
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential
# Setup the File Stream

stream = FileStream("../data_output/export_dataframe_0v3.csv", -1, 1)
stream.prepare_for_use()

classifier = HoeffdingTree()
#classifier = HoeffdingTree(no_preprune=True)
evaluator = EvaluatePrequential(pretrain_size=10000, max_samples=1000000, batch_size=10, n_wait=200, max_time=1000,
                                 output_file='summary_ht.txt', show_plot=False, metrics=['accuracy','running_time','model_size']
                               )

evaluator.evaluate(stream=stream, model=classifier)
#M0 - Accuracy     : 0.6120

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 10000 sample(s).
Evaluating...
 #################### [100%] [567.57s]
Processed samples: 1000000
Mean performance:
M0 - Accuracy     : 0.6129
M0 - Training time (s)  : 188.94
M0 - Testing time  (s)  : 62.15
M0 - Total time    (s)  : 251.10
M0 - Size (kB)          : 3803.1641


[HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05)]