In [1]:
import pandas as pd
import numpy as np

In [2]:
""" Read data to be used """
# In this example, a dataset about spamming is used
data = pd.read_csv('spambase.csv',sep=",")
data.head()

""" Some preprocessing on data """
# Number of features
m = data.shape[1]
# Remove unwanted features
X = data.iloc[:,0:48]
y = data.iloc[:,(m-1):]

# Turn data into onehot format
X_onehot = pd.get_dummies(X)

In [3]:
from sklearn.model_selection import train_test_split
""" Splitting training and testing data """
X_train, X_test, y_train, y_test = train_test_split(X_onehot, y, test_size=0.25, random_state=33)

In [4]:
""" Import BRF classifiers """
from BoostedRandomForest import BoostedRandomForest
"""brf = BoostedRandomForest()
rf = BoostedRandomForest(weight_update=False, boosting=False)
brf_wout_update = BoostedRandomForest(weight_update=False)"""

'brf = BoostedRandomForest()\nrf = BoostedRandomForest(weight_update=False, boosting=False)\nbrf_wout_update = BoostedRandomForest(weight_update=False)'

In [5]:
# Number of iterations to train classifier to get results
iterations = 20

In [26]:
""" Function for evaluating a classifier"""
from sklearn.metrics import accuracy_score
from time import time

def eval_clf(method, CLF, params, X_train, X_test, y_train, y_test, iterations) :
    # List of accuracies in each iteration
    accs = []
    # List of time used for training
    times = []
    
    # Train classifier for iterations
    for it in range(iterations) :
        print("Iteraiton: ", it)        
        # Create classifier
        clf = CLF(**params)
        
        #Start timer
        start = time()
        # Train classifier
        clf.fit(X_train, y_train)
        # End timer 
        end = time()
        # Record time
        times.append(end-start)
        
        # Give predictions
        if method == "Ensemble" :
            pred = clf.ensemble_predict(X_test)
        else :
            pred = clf.RF_predict(X_test)
            
        # Calculate accuracy
        acc = accuracy_score(y_test, pred)
        
        # Record accuracy
        accs.append(acc)
        
    # Return results and times used
    return accs, times

In [27]:
""" Boosted Random Forest """
brf_params = {'weight_update': False, 'boosting': True}
brf_accs, brf_times = eval_clf("Ensemble", BoostedRandomForest, brf_params, X_train, X_test, y_train, y_test, iterations)

Iteraiton:  0
Iteraiton:  1
Iteraiton:  2
Iteraiton:  3
Iteraiton:  4
Iteraiton:  5
Iteraiton:  6
Iteraiton:  7
Iteraiton:  8
Iteraiton:  9
Iteraiton:  10
Iteraiton:  11
Iteraiton:  12
Iteraiton:  13
Iteraiton:  14
Iteraiton:  15
Iteraiton:  16
Iteraiton:  17
Iteraiton:  18
Iteraiton:  19


In [28]:
print(brf_accs)

[0.9009556907037359, 0.9339704604691572, 0.9278887923544744, 0.9331016507384883, 0.9131190269331017, 0.9365768896611643, 0.9348392701998263, 0.9400521285838401, 0.8748913987836664, 0.9287576020851434, 0.9383145091225021, 0.9374456993918332, 0.9383145091225021, 0.9374456993918332, 0.945264986967854, 0.9348392701998263, 0.9304952215464813, 0.9304952215464813, 0.9374456993918332, 0.8792354474370113]


In [32]:
params = {'weight_update': False, 'boosting': True}

test_brf = BoostedRandomForest(**params)

In [33]:
print(test_brf.weight_update)

False
