In [1]:
import pandas as pd
import numpy as np

In [2]:
""" Read data to be used """
# In this example, a dataset about spamming is used
data = pd.read_csv('spambase.csv',sep=",")
data.head()

""" Some preprocessing on data """
# Number of features
m = data.shape[1]
# Remove unwanted features
X = data.iloc[:,0:48]
y = data.iloc[:,(m-1):]

# Turn data into onehot format
X_onehot = pd.get_dummies(X)

In [3]:
from sklearn.model_selection import train_test_split
""" Splitting training and testing data """
X_train, X_test, y_train, y_test = train_test_split(X_onehot, y, test_size=0.25, random_state=33)

In [4]:
print("% of spams (train): ", np.mean(y_train))
print("% of spams (test): ", np.mean(y_test))

% of spams (train):  Spam    0.402899
dtype: float64
% of spams (test):  Spam    0.367507
dtype: float64


In [None]:
""" Import BRF classifiers """
from BoostedRandomForest import BoostedRandomForest
""" Example usage """
#brf = BoostedRandomForest()
#rf = BoostedRandomForest(weight_update=False, boosting=False)
#brf_wout_update = BoostedRandomForest(weight_update=False)

' Example usage '

In [None]:
""" Default parameters """
# Max number of trees
brf_params = {'T': 250,
              'depth_max': 20,
              'weight_update': True,
              'boosting': True,
             }


In [None]:
""" Without Early Stopping """
brf_nonstop_params = brf_params
brf_nonstop_clf = BoostedRandomForest(**brf_nonstop_params)

In [None]:
brf_nonstop_clf.fit(X_train, y_train)

In [None]:
# Training accuracies 
brf_train_accs = brf_nonstop_clf.train_accs
print(len(brf_train_accs))
# Epsilons
brf_eps = brf_nonstop_clf.all_eps
print(len(brf_eps))
# Alphas
brf_alphas = brf_nonstop_clf.all_alphas
print(len(brf_alphas))

In [None]:
""" Early Stopping with eps=[1e-20, 0.5]"""
brf_es_params = brf_params
brf_es_params['eps_ub'] = 0.5
brf_es_params['eps_lb'] = 1e-20
brf_es_params['eps_exceed_limit'] = 5


brf_es_clf = BoostedRandomForest(**brf_es_params)

In [None]:
brf_es_clf.fit(X_train, y_train)

In [None]:
# Training accuracies 
brf_train_accs = brf_nonstop_clf.train_accs
print(len(brf_train_accs))
# Epsilons
brf_eps = brf_nonstop_clf.all_eps
print(len(brf_eps))
# Alphas
brf_alphas = brf_nonstop_clf.all_alphas
print(len(brf_alphas))