In [3]:
from scipy.io import arff
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load and preprocess data


In [4]:
data = arff.loadarff("elecNormNew.arff")
df = pd.DataFrame(data[0])

print(df.dtypes)
print(df.head())

date         float64
day           object
period       float64
nswprice     float64
nswdemand    float64
vicprice     float64
vicdemand    float64
transfer     float64
class         object
dtype: object
   date   day    period  nswprice  nswdemand  vicprice  vicdemand  transfer  \
0   0.0  b'2'  0.000000  0.056443   0.439155  0.003467   0.422915  0.414912   
1   0.0  b'2'  0.021277  0.051699   0.415055  0.003467   0.422915  0.414912   
2   0.0  b'2'  0.042553  0.051489   0.385004  0.003467   0.422915  0.414912   
3   0.0  b'2'  0.063830  0.045485   0.314639  0.003467   0.422915  0.414912   
4   0.0  b'2'  0.085106  0.042482   0.251116  0.003467   0.422915  0.414912   

     class  
0    b'UP'  
1    b'UP'  
2    b'UP'  
3    b'UP'  
4  b'DOWN'  


In [5]:
df = df.astype({'day': 'int8'})
print(df.dtypes)
print(df.head())

date         float64
day             int8
period       float64
nswprice     float64
nswdemand    float64
vicprice     float64
vicdemand    float64
transfer     float64
class         object
dtype: object
   date  day    period  nswprice  nswdemand  vicprice  vicdemand  transfer  \
0   0.0    2  0.000000  0.056443   0.439155  0.003467   0.422915  0.414912   
1   0.0    2  0.021277  0.051699   0.415055  0.003467   0.422915  0.414912   
2   0.0    2  0.042553  0.051489   0.385004  0.003467   0.422915  0.414912   
3   0.0    2  0.063830  0.045485   0.314639  0.003467   0.422915  0.414912   
4   0.0    2  0.085106  0.042482   0.251116  0.003467   0.422915  0.414912   

     class  
0    b'UP'  
1    b'UP'  
2    b'UP'  
3    b'UP'  
4  b'DOWN'  


In [6]:
from skmultiflow.data import DataStream
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.trees import HoeffdingTree
from skmultiflow.trees import HAT

enc = LabelEncoder()
class_num = enc.fit_transform(df["class"])
stream = DataStream(df.drop(["class"], axis=1), y = class_num)
stream.prepare_for_use()


In [7]:
print(stream.get_data_info())
print(stream.target_names)
print(stream.feature_names)

1 target(s), 2 classes
[0]
['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice', 'vicdemand', 'transfer']


# Set the evaluator

In [11]:
evaluator = EvaluatePrequential(show_plot=False,
                                metrics=['accuracy', 'kappa'])

# Set the model

In [24]:
ht = HoeffdingTree()
hat = HAT()

# Run evaluation

In [25]:
evaluator.evaluate(stream=stream,
                   model=[ht, hat],
                   model_names = ["ht","hat"])


Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [33.00s]
Processed samples: 45312
Mean performance:
ht - Accuracy     : 0.8030
ht - Kappa        : 0.5982
hat - Accuracy     : 0.8164
hat - Kappa        : 0.6158


[HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05),
 HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05)]

# Set the model

In [22]:
hat = HAT()
hat_lp_nb = HAT(leaf_prediction="nb")
hat_lp_mc = HAT(leaf_prediction="mc")
hat_gini = HAT(split_criterion="gini")

In [23]:
evaluator.evaluate(stream=stream,
                   model=[hat, hat_lp_nb, hat_lp_mc, hat_gini],
                   model_names = ["hat","hat_lp_nb","hat_lp_mc", "hat_gini"])


Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [75.29s]
Processed samples: 45312
Mean performance:
hat - Accuracy     : 0.8164
hat - Kappa        : 0.6158
hat_lp_nb - Accuracy     : 0.8072
hat_lp_nb - Kappa        : 0.5987
hat_lp_mc - Accuracy     : 0.7356
hat_lp_mc - Kappa        : 0.4542
hat_gini - Accuracy     : 0.8111
hat_gini - Kappa        : 0.6075


[HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=False, grace_period=200, leaf_prediction='nb',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=False, grace_period=200, leaf_prediction='mc',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=Fal

# Set the model

In [18]:
hat = HAT()

hat_binary_split = HAT(binary_split =True) 
# ht_remove_poor_atts = HoeffdingTree(remove_poor_atts=True) # Error? 
hat_no_preprune = HAT(no_preprune =True)
hat_stop_mem_management = HAT(stop_mem_management =True)

In [19]:
evaluator.evaluate(stream=stream,
                   model=[hat, hat_binary_split, hat_no_preprune, hat_stop_mem_management],
                   model_names = ["hat","hat_binary_split", "hat_no_preprune","hat_stop_mem_management"])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [79.51s]
Processed samples: 45312
Mean performance:
hat - Accuracy     : 0.8164
hat - Kappa        : 0.6158
hat_binary_split - Accuracy     : 0.8164
hat_binary_split - Kappa        : 0.6158
hat_no_preprune - Accuracy     : 0.8164
hat_no_preprune - Kappa        : 0.6158
hat_stop_mem_management - Accuracy     : 0.8164
hat_stop_mem_management - Kappa        : 0.6158


[HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=True, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=True, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=Fal

# Set the model

In [30]:
hat = HAT()
hat_high_mem = HAT(memory_estimate_period =10000, 
                           max_byte_size = 2**28) # Around 128MB
hat_low_mem = HAT(memory_estimate_period =10000, 
                           max_byte_size = 2**12) # Around 4MB
hat_grace_100 = HAT(grace_period =100)
hat_grace_50 = HAT(grace_period =50)
hat_grace_10 = HAT(grace_period =10)
hat_grace_1 = HAT(grace_period =1)

In [31]:
evaluator.evaluate(stream=stream,
                   model=[hat, hat_high_mem, hat_low_mem, hat_grace_100, hat_grace_50, hat_grace_10, hat_grace_1],
                   model_names = ["hat","hat_high_mem", "hat_low_mem","hat_grace_100", "hat_grace_50", "hat_grace_10", "hat_grace_1"])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [159.67s]
Processed samples: 45312
Mean performance:
hat - Accuracy     : 0.8164
hat - Kappa        : 0.6158
hat_high_mem - Accuracy     : 0.8164
hat_high_mem - Kappa        : 0.6158
hat_low_mem - Accuracy     : 0.8164
hat_low_mem - Kappa        : 0.6158
hat_grace_100 - Accuracy     : 0.8195
hat_grace_100 - Kappa        : 0.6244
hat_grace_50 - Accuracy     : 0.8194
hat_grace_50 - Kappa        : 0.6238
hat_grace_10 - Accuracy     : 0.8137
hat_grace_10 - Kappa        : 0.6108
hat_grace_1 - Accuracy     : 0.5727
hat_grace_1 - Kappa        : 0.0073


[HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=268435456, memory_estimate_period=10000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=False, grace_period=200, leaf_prediction='nba',
     max_byte_size=4096, memory_estimate_period=10000, nb_threshold=0,
     no_preprune=False, nominal_attributes=None, remove_poor_atts=False,
     split_confidence=1e-07, split_criterion='info_gain',
     stop_mem_management=False, tie_threshold=0.05),
 HAT(binary_split=False, g