# Run Benchmarks skeleton code

In [10]:
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
import numpy as np
from functools import reduce

# Needed for the scikit-learn wrapper function
from sklearn.tree import irf_utils
from sklearn.ensemble import RandomForestClassifier
from math import ceil
from sklearn.model_selection import train_test_split

import itertools
import py_irf_benchmarks2

# Import our custom utilities
from imp import reload
import sys
sys.path.insert(0, '../jupyter')

from utils import irf_jupyter_utils
reload(irf_jupyter_utils)

<module 'utils.irf_jupyter_utils' from '../jupyter/utils/irf_jupyter_utils.py'>

# Load data and specs

In [11]:
features = np.loadtxt('./data/breast_cancer_features.csv', delimiter=',')
responses = np.loadtxt('./data/breast_cancer_responses.csv', delimiter=',')

In [12]:
specs = py_irf_benchmarks2.yaml_to_dict(inp_yaml='./specs/iRF_mod01.yaml')
print(specs)

{'inp_dsname': ['breast_cancer'], 'n_trials': [5], 'n_iter': [5], 'train_split_propn': [0.8], 'n_estimators': [2, 4, 6], 'n_bootstraps': [20], 'propn_n_samples': [0.2], 'bin_class_type': [1], 'n_RIT': [20], 'max_depth': [5], 'noisy_split': [False], 'num_splits': [2], 'n_estimators_bootstrap': [5], 'N_obs': ['all'], 'N_features': ['all']}


# Set up loop

In [13]:
varNames = sorted(specs)
spec_comb = [dict(zip(varNames, prod)) \
    for prod in itertools.product(*(specs[name] for name in varNames))]
print(spec_comb)

len(spec_comb)

[{'N_features': 'all', 'N_obs': 'all', 'bin_class_type': 1, 'inp_dsname': 'breast_cancer', 'max_depth': 5, 'n_RIT': 20, 'n_bootstraps': 20, 'n_estimators': 2, 'n_estimators_bootstrap': 5, 'n_iter': 5, 'n_trials': 5, 'noisy_split': False, 'num_splits': 2, 'propn_n_samples': 0.2, 'train_split_propn': 0.8}, {'N_features': 'all', 'N_obs': 'all', 'bin_class_type': 1, 'inp_dsname': 'breast_cancer', 'max_depth': 5, 'n_RIT': 20, 'n_bootstraps': 20, 'n_estimators': 4, 'n_estimators_bootstrap': 5, 'n_iter': 5, 'n_trials': 5, 'noisy_split': False, 'num_splits': 2, 'propn_n_samples': 0.2, 'train_split_propn': 0.8}, {'N_features': 'all', 'N_obs': 'all', 'bin_class_type': 1, 'inp_dsname': 'breast_cancer', 'max_depth': 5, 'n_RIT': 20, 'n_bootstraps': 20, 'n_estimators': 6, 'n_estimators_bootstrap': 5, 'n_iter': 5, 'n_trials': 5, 'noisy_split': False, 'num_splits': 2, 'propn_n_samples': 0.2, 'train_split_propn': 0.8}]


3

# Parse data

In [14]:
print(len(spec_comb))
for i in range(len(spec_comb)): 
    
    print(spec_comb[i])
    
    [X_train, X_test, y_train, y_test] =\
             py_irf_benchmarks2.parse_data(features, responses, spec_comb[i]['train_split_propn'],\
                        N_obs = 'all', N_features = 'all', seed = 200)
    
    for j in range(spec_comb[i]['n_trials']): 
        irf_utils.run_iRF(X_train=X_train,
                              X_test=X_test,
                              y_train=y_train,
                              y_test=y_test,
                              K=spec_comb[i]['n_iter'],
                              n_estimators=spec_comb[i]['n_estimators'],
                              B=spec_comb[i]['n_bootstraps'],
                              random_state_classifier=152,
                              propn_n_samples=spec_comb[i]['propn_n_samples'],
                              bin_class_type=spec_comb[i]['bin_class_type'],
                              M=spec_comb[i]['n_RIT'],
                              max_depth=spec_comb[i]['max_depth'],
                              noisy_split=spec_comb[i]['noisy_split'],
                              num_splits=spec_comb[i]['num_splits'],
                              n_estimators_bootstrap=spec_comb[i]['n_estimators_bootstrap'])

3
{'N_features': 'all', 'N_obs': 'all', 'bin_class_type': 1, 'inp_dsname': 'breast_cancer', 'max_depth': 5, 'n_RIT': 20, 'n_bootstraps': 20, 'n_estimators': 2, 'n_estimators_bootstrap': 5, 'n_iter': 5, 'n_trials': 5, 'noisy_split': False, 'num_splits': 2, 'propn_n_samples': 0.2, 'train_split_propn': 0.8}
{'N_features': 'all', 'N_obs': 'all', 'bin_class_type': 1, 'inp_dsname': 'breast_cancer', 'max_depth': 5, 'n_RIT': 20, 'n_bootstraps': 20, 'n_estimators': 4, 'n_estimators_bootstrap': 5, 'n_iter': 5, 'n_trials': 5, 'noisy_split': False, 'num_splits': 2, 'propn_n_samples': 0.2, 'train_split_propn': 0.8}


IndexError: index 992 is out of bounds for axis 1 with size 30