In [1]:
from scipy.io import arff
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load and preprocess data


In [2]:
data = arff.loadarff("elecNormNew.arff")
df = pd.DataFrame(data[0])

print(df.dtypes)
print(df.head())

date         float64
day           object
period       float64
nswprice     float64
nswdemand    float64
vicprice     float64
vicdemand    float64
transfer     float64
class         object
dtype: object
   date   day    period  nswprice  nswdemand  vicprice  vicdemand  transfer  \
0   0.0  b'2'  0.000000  0.056443   0.439155  0.003467   0.422915  0.414912   
1   0.0  b'2'  0.021277  0.051699   0.415055  0.003467   0.422915  0.414912   
2   0.0  b'2'  0.042553  0.051489   0.385004  0.003467   0.422915  0.414912   
3   0.0  b'2'  0.063830  0.045485   0.314639  0.003467   0.422915  0.414912   
4   0.0  b'2'  0.085106  0.042482   0.251116  0.003467   0.422915  0.414912   

     class  
0    b'UP'  
1    b'UP'  
2    b'UP'  
3    b'UP'  
4  b'DOWN'  


In [3]:
df.shape

(45312, 9)

In [4]:
df = df.astype({'day': 'int8'})
print(df.dtypes)
print(df.head())

date         float64
day             int8
period       float64
nswprice     float64
nswdemand    float64
vicprice     float64
vicdemand    float64
transfer     float64
class         object
dtype: object
   date  day    period  nswprice  nswdemand  vicprice  vicdemand  transfer  \
0   0.0    2  0.000000  0.056443   0.439155  0.003467   0.422915  0.414912   
1   0.0    2  0.021277  0.051699   0.415055  0.003467   0.422915  0.414912   
2   0.0    2  0.042553  0.051489   0.385004  0.003467   0.422915  0.414912   
3   0.0    2  0.063830  0.045485   0.314639  0.003467   0.422915  0.414912   
4   0.0    2  0.085106  0.042482   0.251116  0.003467   0.422915  0.414912   

     class  
0    b'UP'  
1    b'UP'  
2    b'UP'  
3    b'UP'  
4  b'DOWN'  


In [5]:
from skmultiflow.data import DataStream
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.trees import HoeffdingTree

enc = LabelEncoder()
class_num = enc.fit_transform(df["class"])
stream = DataStream(df.drop(["class"], axis=1), y = class_num)
stream.prepare_for_use()


In [6]:
print(stream.get_data_info())
print(stream.target_names)
print(stream.feature_names)

1 target(s), 2 classes
[0]
['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice', 'vicdemand', 'transfer']


# Set the evaluator

In [7]:
evaluator = EvaluatePrequential(show_plot=False,
                                metrics=['accuracy', 'kappa'])

# Set the model

In [8]:
ht = HoeffdingTree()

# Run evaluation

In [9]:
evaluator.evaluate(stream=stream,
                   model=[ht],
                   model_names = ["HT"])


Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [11.84s]
Processed samples: 45312
Mean performance:
HT - Accuracy     : 0.8030
HT - Kappa        : 0.5982


[HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05)]

# Set the model

In [10]:
ht = HoeffdingTree()
ht_lp_nb = HoeffdingTree(leaf_prediction="nb")
ht_lp_mc = HoeffdingTree(leaf_prediction="mc")
ht_gini = HoeffdingTree(split_criterion="gini")
ht_hellinger = HoeffdingTree(split_criterion="hellinger")

In [11]:
evaluator.evaluate(stream=stream,
                   model=[ht, ht_lp_nb, ht_lp_mc, ht_gini, ht_hellinger],
                   model_names = ["ht","ht_lp_nb","ht_lp_mc", "ht_gini","ht_hellinger"])


Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [49.42s]
Processed samples: 45312
Mean performance:
ht - Accuracy     : 0.8030
ht - Kappa        : 0.5982
ht_lp_nb - Accuracy     : 0.7922
ht_lp_nb - Kappa        : 0.5764
ht_lp_mc - Accuracy     : 0.7572
ht_lp_mc - Kappa        : 0.5011
ht_gini - Accuracy     : 0.7924
ht_gini - Kappa        : 0.5746
ht_hellinger - Accuracy     : 0.8079
ht_hellinger - Kappa        : 0.6074


[HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05),
 HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nb',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05),
 HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='mc',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=No

# Set the model

In [12]:
ht = HoeffdingTree()
ht_binary_split = HoeffdingTree(binary_split =True) 
# ht_remove_poor_atts = HoeffdingTree(remove_poor_atts=True) # Error? 
ht_no_preprune = HoeffdingTree(no_preprune =True)
ht_stop_mem_management = HoeffdingTree(stop_mem_management =True)

In [13]:
evaluator.evaluate(stream=stream,
                   model=[ht, ht_binary_split, ht_no_preprune, ht_stop_mem_management],
                   model_names = ["ht","ht_binary_split", "ht_no_preprune","ht_stop_mem_management"])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [44.90s]
Processed samples: 45312
Mean performance:
ht - Accuracy     : 0.8030
ht - Kappa        : 0.5982
ht_binary_split - Accuracy     : 0.8030
ht_binary_split - Kappa        : 0.5982
ht_no_preprune - Accuracy     : 0.8030
ht_no_preprune - Kappa        : 0.5982
ht_stop_mem_management - Accuracy     : 0.8030
ht_stop_mem_management - Kappa        : 0.5982


[HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05),
 HoeffdingTree(binary_split=True, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05),
 HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=True, nominal_attributes=No

# Set the model

In [20]:
ht = HoeffdingTree()
ht_high_mem = HoeffdingTree(memory_estimate_period =10000, 
                           max_byte_size = 2**28) # Around 128MB
ht_low_mem = HoeffdingTree(memory_estimate_period =10000, 
                           max_byte_size = 2**12) # Around 4MB
ht_grace_100 = HoeffdingTree(grace_period =100)
ht_grace_50 = HoeffdingTree(grace_period =50)
ht_grace_10 = HoeffdingTree(grace_period =10)
ht_grace_1 = HoeffdingTree(grace_period =1)

In [21]:
evaluator.evaluate(stream=stream,
                   model=[ht, ht_high_mem, ht_low_mem, ht_grace_100, ht_grace_50, ht_grace_10, ht_grace_1],
                   model_names = ["ht","ht_high_mem", "ht_low_mem","ht_grace_100", "ht_grace_50", "ht_grace_10", "ht_grace_1"])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 200 sample(s).
Evaluating...
 #################### [100%] [114.81s]
Processed samples: 45312
Mean performance:
ht - Accuracy     : 0.8030
ht - Kappa        : 0.5982
ht_high_mem - Accuracy     : 0.8030
ht_high_mem - Kappa        : 0.5982
ht_low_mem - Accuracy     : 0.7594
ht_low_mem - Kappa        : 0.4848
ht_grace_100 - Accuracy     : 0.8059
ht_grace_100 - Kappa        : 0.6015
ht_grace_50 - Accuracy     : 0.8112
ht_grace_50 - Kappa        : 0.6122
ht_grace_10 - Accuracy     : 0.8167
ht_grace_10 - Kappa        : 0.6228
ht_grace_1 - Accuracy     : 0.6759
ht_grace_1 - Kappa        : 0.2997


[HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=33554432, memory_estimate_period=1000000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05),
 HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=268435456, memory_estimate_period=10000,
               nb_threshold=0, no_preprune=False, nominal_attributes=None,
               remove_poor_atts=False, split_confidence=1e-07,
               split_criterion='info_gain', stop_mem_management=False,
               tie_threshold=0.05),
 HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',
               max_byte_size=4096, memory_estimate_period=10000, nb_threshold=0,
               no_preprune=False, nominal_attributes=None,
 