# Random Forest Performance Benchmarks

In [3]:
import numpy as np
import time

import matplotlib.pyplot as plt

from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from IPython.display import display, Image
from sklearn.datasets import load_breast_cancer

# Import our custom utilities
from imp import reload
from utils import irf_jupyter_utils
from utils import irf_utils
from utils import iRF_benchmarks_lib
reload(irf_jupyter_utils)
reload(irf_utils)

<module 'utils.irf_utils' from '/home/runjing_liu/Documents/iRF/scikit-learn-sandbox/jupyter/utils/irf_utils.py'>

## Load data

In [4]:
raw_data = load_breast_cancer()              
responses = raw_data.target
features = raw_data.data

## Call function to compute benchmarks

In [3]:
n_trials = 10 # number of times to run random forest for our benchmarks
n_estimators = 20 # number of trees in the random forest
train_split_propn = 0.8

metrics_all, metrics_summary, feature_importances = \
            iRF_benchmarks_lib.RF_benchmarks(features, responses, 
                                    n_trials = n_trials,
                                    train_split_propn = train_split_propn, 
                                    n_estimators=n_estimators,
                                    seed = 2017)


In [4]:
metrics_summary.keys()

dict_keys(['precision_score', 'confusion_matrix', 'f1_score', 'hamming_loss', 'recall_score', 'log_loss', 'accuracy_score', 'zero_one_loss', 'time'])

## Lets look at the results

### Relevant parameters

In [5]:
print('Dimensions of full dataset (#samples , # features): ', np.shape(features))
print('Number of training samples: ', np.round(np.shape(features)[0] * train_split_propn))
print('Number of test samples: ', np.round(np.shape(features)[0]*(1-train_split_propn)))
print('number of trees in the random forest: ', n_estimators)

Dimensions of full dataset (#samples , # features):  (569, 30)
Number of training samples:  455.0
Number of test samples:  114.0
number of trees in the random forest:  20


### Timing and some accuracy scores across trials

In [6]:
print('time (seconds) to compute RF [mean, std]: ', metrics_summary['time'])
print('accuracy_score [mean, std]: ', metrics_summary['accuracy_score'])
print('hammming_loss [mean, std]: ', metrics_summary['hamming_loss'])

time (seconds) to compute RF [mean, std]:  [0.038167762756347659, 0.0056543663450365289]
accuracy_score [mean, std]:  [0.95350877192982464, 0.013040411181858341]
hammming_loss [mean, std]:  [0.04649122807017543, 0.013040411181858337]


### Lets look the stability of feature importances across trials

In [7]:
print('top five feature importances across trials')

for i in range(n_trials): 
    # sort by feature importance
    importances_rank = np.argsort(feature_importances[i])[::-1]
    print('trial' + str(i) + ': ', importances_rank[0:5])

top five feature importances across trials
trial0:  [20 23 27 22  0]
trial1:  [27 22  7 23 20]
trial2:  [22 20 23  7  3]
trial3:  [20 23  2 22 27]
trial4:  [27 22  7  0 20]
trial5:  [23  7 22  3 20]
trial6:  [23 27 22  0 13]
trial7:  [23 20 27  7  6]
trial8:  [27 20 23 22  3]
trial9:  [23 27 20  7  3]


## iRF benchmarks

### Call function to compute iRF benchmarks

In [8]:
n_trials = 5 # number of times to run iRF in benchmarking

# parameters for iRF
train_split_propn = 0.8
n_estimators = 20
B = 20
M = 20
max_depth = 5
n_estimators_bootstrap = 5

metrics_all_iRF, metrics_summary_iRF, stability_all, feature_importances_iRF = \
    iRF_benchmarks_lib.iRF_benchmarks(features, responses, n_trials = n_trials,
                                    K=5,
                                    train_split_propn = train_split_propn,
                                    n_estimators=n_estimators,
                                    B=B,
                                    propn_n_samples=.2,
                                    bin_class_type=1,
                                    M=M,
                                    max_depth=max_depth,
                                    noisy_split=False,
                                    num_splits=2,
                                    n_estimators_bootstrap=n_estimators_bootstrap, 
                                    seed = 2018)

## Lets look at the results

### Relevant parameters

In [9]:
print('Dimensions of full dataset (#samples , # features): ', np.shape(features))
print('Number of training samples: ', np.round(np.shape(features)[0] * train_split_propn))
print('Number of test samples: ', np.round(np.shape(features)[0]*(1-train_split_propn)))

print('\n')

print('number of trees in full random forest: ', n_estimators)
print('number of bootstrap samples: ', B)
print('number of trees in RIT: ', M)
print('max depth of RIT: ', max_depth)
print('number of trees is RF bootstrap: ', n_estimators_bootstrap)


Dimensions of full dataset (#samples , # features):  (569, 30)
Number of training samples:  455.0
Number of test samples:  114.0


number of trees in full random forest:  20
number of bootstrap samples:  20
number of trees in RIT:  20
max depth of RIT:  5
number of trees is RF bootstrap:  5


### Timing and some accuracy scores across trials
These are metrics on the random forest at iteration K

In [10]:
print('time (seconds) to compute iRF [mean, std]: ', metrics_summary_iRF['time'])
print('\n')
print('accuracy_score [mean, std]: ', metrics_summary_iRF['accuracy_score'])
print('hammming_loss [mean, std]: ', metrics_summary_iRF['hamming_loss'])

time (seconds) to compute iRF [mean, std]:  [2.640289306640625, 0.10714668470600612]


accuracy_score [mean, std]:  [0.93859649122807021, 0.012405382126079773]
hammming_loss [mean, std]:  [0.061403508771929814, 0.012405382126079782]


### Lets look the stability of feature importances across trials

Again, feature importances are measured for the last forest

In [12]:
print('top five important features across trials')

for i in range(n_trials): 
    importances_rank = np.argsort(feature_importances_iRF[i])[::-1]
    print('trial' + str(i) + ': ', importances_rank[0:5])

top five feature importances across trials
trial0:  [22 27 20 23  3]
trial1:  [27 22  7 23 20]
trial2:  [27 22 23 20  7]
trial3:  [27  7 22 20 23]
trial4:  [22 27 20 23  7]


### Finally, lets examine the discovered interactions across trials 

In [18]:
print('top five stable interactions across trials')

for i in range(n_trials): 
    
    # sort by stability
    stability = sorted(stability_all[i].values(), reverse=True)
    interactions = sorted(stability_all[i], key=stability_all[i].get, reverse=True)

    print('trial' + str(i) + ': ', interactions[0:5])

top five stable interactions across trials
trial0:  ['27', '22_27', '22', '13_27', '13_22_27']
trial1:  ['22', '22_27', '27', '7', '7_22']
trial2:  ['27', '22_27', '22', '23_27', '22_23_27']
trial3:  ['27', '7', '22', '22_27', '7_27']
trial4:  ['27', '22_27', '22', '23_27', '20_27']
