# Develop a random forest model for the BBBP classifier using the default hyperparameters and Morgan fingerprints, and report the performance measure (ROC_AUC) for all three datasets.

In [1]:
!pip install scikit-learn
!pip install rdkit-pypi



In [2]:
# import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors

# read BBBP.csv file
df = pd.read_csv('BBBP.csv')
df.head()

Unnamed: 0,num,name,p_np,smiles
0,1,Propanolol,1,[Cl].CC(C)NCC(O)COc1cccc2ccccc12
1,2,Terbutylchlorambucil,1,C(=O)(OC(C)(C)C)CCCc1ccc(cc1)N(CCCl)CCCl
2,3,40730,1,c12c3c(N4CCN(C)CC4)c(F)cc1c(c(C(O)=O)cn2C(C)CO...
3,4,24,1,C1CCN(CC1)Cc1cccc(c1)OCCCNC(=O)C
4,5,cloxacillin,1,Cc1onc(c2ccccc2Cl)c1C(=O)N[C@H]3[C@H]4SC(C)(C)...


In [3]:
# use default hyperparameters and morgan fingerprint to develop random forest model
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

# split data into training and testing sets
from sklearn.model_selection import train_test_split
train, temp = train_test_split(df, test_size=0.2, random_state=42)
test, val = train_test_split(temp, test_size=0.5, random_state=42)

# verify length of each set
print("Training set length: ", len(train))
print("Validation set length: ", len(val))
print("Testing set length: ", len(test))

Training set length:  1640
Validation set length:  205
Testing set length:  205


In [4]:
# define function to calculate morgan fingerprints
Morgan_fpts = []
p_np = []
for i in range(len(df)):
    mol = Chem.MolFromSmiles(df['smiles'][i])
    if mol != None:
        Morgan_fpts.append(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
        p_np.append(df['p_np'][i])

[17:33:48] Explicit valence for atom # 1 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 6 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 6 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 11 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 12 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 5 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 5 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 5 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 5 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 5 N, 4, is greater than permitted
[17:33:48] Explicit valence for atom # 5 N, 4, is greater than permitted


In [5]:
# convert fingerprints to numpy array
Morgan_fpts = np.array(Morgan_fpts)

In [6]:
# convert p_np to numpy array
p_np = np.array(p_np)

In [7]:
# use default morgan fingerprints and hyperparameters to train random forest model
rf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=42)
rf.fit(Morgan_fpts, p_np)

In [8]:
# predict p_np values for test set
pred = rf.predict(Morgan_fpts)

In [9]:
# calculate AUC
auc = roc_auc_score(p_np, pred)
print("AUC: ", auc)

AUC:  0.5816665328408543


In [10]:
# list default hyperparameters
print("n_estimators: ", rf.n_estimators)
print("max_depth: ", rf.max_depth)
print("min_samples_leaf: ", rf.min_samples_leaf)
print("min_impurity_decrease: ", rf.min_impurity_decrease)
print("max_features: ", rf.max_features)

n_estimators:  100
max_depth:  2
min_samples_leaf:  1
min_impurity_decrease:  0.0
max_features:  sqrt


In [11]:
# optimize model using grid search to find best hyperparameters
from sklearn.model_selection import GridSearchCV

# define hyperparameters
param_grid = {'n_estimators': [100, 200, 300, 400, 500],
                'max_depth': [2, 4, 6, 8, 10],
                'min_samples_leaf': [1, 2, 4],
                'min_impurity_decrease': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
                'max_features': ['auto', 'sqrt', 'log2']}

# define grid search
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, cv = 3, n_jobs = -1, verbose = 2)

# fit grid search
grid_search.fit(Morgan_fpts, p_np)

# print best hyperparameters
print("Best n_estimators: ", grid_search.best_params_['n_estimators'])
print("Best max_depth: ", grid_search.best_params_['max_depth'])
print("Best min_samples_leaf: ", grid_search.best_params_['min_samples_leaf'])
print("Best min_impurity_decrease: ", grid_search.best_params_['min_impurity_decrease'])
print("Best max_features: ", grid_search.best_params_['max_features'])

Fitting 3 folds for each of 1350 candidates, totalling 4050 fits


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=100; total time=   0.4s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=100; total time=   0.4s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=300; total time=   1.1s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=400; total time=   1.5s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=400; total time=   1.5s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=500; total time=   1.9s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=100; total time=   0.4s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=300; total time=   1.1s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=300; total time=   1.1s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=400; total time=   1.6s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=500; total time=   1.9s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=500; total time=   1.9s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=2, n_estimators=200; total time=   0.8s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=300; total time=   1.0s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=300; total time=   1.0s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=400; total time=   1.4s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=500; total time=   1.6s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=500; total time=   1.7s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=200; total time=   0.6s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   1.0s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.4s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   1.0s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   1.0s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=400; total time=   1.3s
[CV] END max_depth=2, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=500; total time=   1.7s


  warn(


[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=200; total time=   0.6s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   0.9s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=400; total time=   1.1s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=400; total time=   1.2s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=500; total time=   1.5s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=200; total time=   0.6s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=100; total time=   0.3s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=200; total time=   0.6s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=300; total time=   0.9s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=300; total time=   0.9s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=400; total time=   1.1s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=500; total time=   1.6s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=500; total time=   1.7s
[CV] END max_depth=2, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=200; total time=   0.6s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=100; total time=   0.4s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=100; total time=   0.5s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=200; total time=   0.9s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=200; total time=   1.0s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=300; total time=   1.5s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=400; total time=   1.9s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=400; total time=   1.9s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=500; total time=   2.2s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=300; total time=   0.9s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=400; total time=   1.4s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=500; total time=   1.6s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=500; total time=   1.7s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=200; total time=   0.7s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=200; total time=   0.7s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=300; total time=   1.0s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.4s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   1.0s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   1.0s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=400; total time=   1.3s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=500; total time=   1.6s
[CV] END max_depth=4, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=500; total time=   1.7s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=300; total time=   0.8s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=400; total time=   1.1s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=400; total time=   1.1s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=500; total time=   1.6s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=200; total time=   0.6s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=100; total time=   0.3s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=100; total time=   0.3s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=200; total time=   0.6s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=300; total time=   0.8s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=300; total time=   0.8s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=400; total time=   1.1s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=500; total time=   1.4s
[CV] END max_depth=4, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=4, n_estimators=500; total time=   1.4s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=2, n_estimators=500; total time=   2.7s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=4, n_estimators=100; total time=   0.5s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=4, n_estimators=100; total time=   0.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=4, n_estimators=100; total time=   0.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=4, n_estimators=200; total time=   1.0s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=4, n_estimators=300; total time=   1.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=4, n_estimators=300; total time=   1.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=4, n_estimators=400; total time=   2.1s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=2, n_estimators=400; total time=   1.2s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=2, n_estimators=400; total time=   1.3s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=2, n_estimators=500; total time=   1.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=100; total time=   0.4s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=100; total time=   0.3s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=100; total time=   0.4s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=200; total time=   0.7s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=400; total time=   1.3s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=500; total time=   1.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=500; total time=   1.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=200; total time=   0.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=200; total time=   0.6s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=300; total time=   1.0s
[CV] END max_depth=6, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=4, n_estimators=400; total time=   1.3s


  warn(


[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=200; total time=   0.6s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   0.9s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   0.9s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=400; total time=   1.2s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=500; total time=   1.4s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=500; total time=   1.4s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=200; total time=   0.6s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.3, min_samples_leaf=2, n_estimators=200; total time=   0.6s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=400; total time=   1.1s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=500; total time=   1.4s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=500; total time=   1.5s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=200; total time=   0.6s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=200; total time=   0.6s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=300; total time=   0.9s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=400; total time=   1.2s
[CV] END max_depth=6, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=400; total time=   1.2s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=100; total time=   0.6s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=100; total time=   0.6s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=200; total time=   1.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=200; total time=   1.2s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=300; total time=   1.8s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=400; total time=   2.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=400; total time=   2.4s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.0, min_samples_leaf=1, n_estimators=500; total time=   3.1s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=100; total time=   0.4s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=200; total time=   0.7s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=300; total time=   1.0s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=300; total time=   1.0s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=400; total time=   1.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=500; total time=   1.7s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=4, n_estimators=500; total time=   1.8s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=400; total time=   1.4s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=400; total time=   1.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=4, n_estimators=500; total time=   1.7s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=100; total time=   0.4s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.3, min_samples_leaf=1, n_estimators=300; total time=   1.0s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=200; total time=   0.7s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=300; total time=   1.1s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=300; total time=   1.0s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=400; total time=   1.3s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=500; total time=   1.7s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=4, n_estimators=500; total time=   1.6s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=8, max_features=auto, min_impurity_decrease=0.5, min_samples_leaf=1, n_estimators=300; total time=   1.0s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=500; total time=   1.7s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=100; total time=   0.5s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=300; total time=   0.8s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=300; total time=   0.9s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=400; total time=   1.2s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=200; total time=   0.6s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=300; total time=   0.8s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=300; total time=   0.9s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=400; total time=   1.2s
[CV] END max_depth=8, max_features=log2, min_impurity_decrease=0.5, min_samples_leaf=2, n_estimators=500; total time=   1.6s


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=1, n_estimators=400; total time=   1.3s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=1, n_estimators=500; total time=   1.6s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=1, n_estimators=500; total time=   1.6s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.1, min_samples_leaf=2, n_estimators=400; total time=

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=1, n_estimators=200; total time=   0.7s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=1, n_estimators=300; total time=   1.1s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=1, n_estimators=300; total time=   1.2s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=1, n_estimators=400; total time=   1.6s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=1, n_estimators=500; total time=   1.9s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=1, n_estimators=500; total time=   1.6s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.2, min_samples_leaf=2, n_estimators=200; total time=

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=500; total time=   1.7s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=400; total time=   1.4s
[CV] END max_depth=10, max_features=auto, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=400; total time=

  warn(


Best n_estimators:  100
Best max_depth:  10
Best min_samples_leaf:  2
Best min_impurity_decrease:  0.0
Best max_features:  auto


In [12]:
# use optimized hyperparameters to train random forest model
rf = RandomForestClassifier(n_estimators=grid_search.best_params_['n_estimators'], max_depth=grid_search.best_params_['max_depth'], min_samples_leaf=grid_search.best_params_['min_samples_leaf'], min_impurity_decrease=grid_search.best_params_['min_impurity_decrease'], max_features=grid_search.best_params_['max_features'], random_state=42)
rf.fit(Morgan_fpts, p_np)

# predict p_np values for test set
pred = rf.predict(Morgan_fpts)

# calculate AUC for train, test, and validation sets
auc = roc_auc_score(p_np, pred)
print("AUC: ", auc)

  warn(


AUC:  0.7490839623146512
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=400; total time=   1.2s
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=500; total time=   1.4s
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=1, n_estimators=500; total time=   1.6s
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=200; total time=   0.5s
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=200; total time=   0.6s
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=300; total time=   0.9s
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_estimators=400; total time=   1.2s
[CV] END max_depth=10, max_features=log2, min_impurity_decrease=0.4, min_samples_leaf=2, n_es

# Compared with the literature value (AUC~0.9+), the AUC is significantly lower. This could be due to the limitations of not having enough processors to run grid search on more hyperparameters like the literature has done. Therefore, because of the limited search area, there could be better values for hyperparameters that this model could not find.