In [5]:
import os
import sys
from sklearn.ensemble import RandomForestClassifier

PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath('.')))
sys.path.append(os.path.join(PROJECT_ROOT, 'data_preprocessing'))

from data_interface import get_data_sklearn

In [6]:
X, y = get_data_sklearn('indian liver')

INFO:C:\Users\simonr04\git\GRAIMatter\data_preprocessing\data_interface.py:DATASET FOLDER = C:\Users\simonr04\git\GRAIMatter\data


Hyperparameter arguments can be passed as a dictionary:

In [11]:
hyperpars = {'n_estimators': 50, 'bootstrap': False, 'min_samples_split': 10}

In [12]:
rf = RandomForestClassifier(**hyperpars)

In [15]:
rf.get_params()

{'bootstrap': False,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 10,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 50,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [16]:
rf.fit(X, y)

RandomForestClassifier(bootstrap=False, min_samples_split=10, n_estimators=50)

So, it would be easy enough to put this in a loop. For example, here is a dictionary with the range of values we might want to try.

In [40]:
# Here's some parameters we might want to search over
param_grid = {
    'n_estimators': [10, 20, 100],
    'bootstrap': [True, False],
    'min_samples_split': [2, 10]
}
# Create a list with all combinations of the values
all_combinations = itertools.product(*param_grid.values())

for combination in all_combinations:
    # Turn this particular combination into a dictionary
    comb_dict = {n: v for n, v in zip(param_grid.keys(), combination)}
    print(comb_dict) # just to see that it is correct
    
    # Create the RF with these hyper-params
    rf = RandomForestClassifier(**comb_dict)
    
    # Fit, do MI, etc
    rf.fit(X, y)

{'n_estimators': 10, 'bootstrap': True, 'min_samples_split': 2}
{'n_estimators': 10, 'bootstrap': True, 'min_samples_split': 10}
{'n_estimators': 10, 'bootstrap': False, 'min_samples_split': 2}
{'n_estimators': 10, 'bootstrap': False, 'min_samples_split': 10}
{'n_estimators': 20, 'bootstrap': True, 'min_samples_split': 2}
{'n_estimators': 20, 'bootstrap': True, 'min_samples_split': 10}
{'n_estimators': 20, 'bootstrap': False, 'min_samples_split': 2}
{'n_estimators': 20, 'bootstrap': False, 'min_samples_split': 10}
{'n_estimators': 100, 'bootstrap': True, 'min_samples_split': 2}
{'n_estimators': 100, 'bootstrap': True, 'min_samples_split': 10}
{'n_estimators': 100, 'bootstrap': False, 'min_samples_split': 2}
{'n_estimators': 100, 'bootstrap': False, 'min_samples_split': 10}
