In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import pandas as pd
import sklearn as sklearn
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
import scipy.stats as stats

from utils.base_set import X_train, y_train, seed

### Decision Tree

In [2]:
p = X_train.shape[1]
model = DecisionTreeClassifier(random_state=seed)
k = 5
n = 50
cv = StratifiedKFold(n_splits=k, shuffle=True, random_state=seed)
params = {
    "criterion": ["gini", "entropy", "log_loss"],
    "max_depth": stats.randint(1, p),
    "max_features": stats.randint(1, p),
}
classifier_Dtree = RandomizedSearchCV(
   model, param_distributions=params, n_iter=n, cv=cv, random_state=seed, scoring='roc_auc'
)
classifier_Dtree.fit(X_train, y_train)

In [3]:
best = classifier_Dtree.best_params_
auc_roc = classifier_Dtree.best_score_
print(best)
print(auc_roc)

{'criterion': 'entropy', 'max_depth': 33, 'max_features': 99}
0.6434340914833812


In [4]:
pd.DataFrame(classifier_Dtree.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_max_features,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.031583,0.0037,0.001846,0.000402,entropy,40,124,"{'criterion': 'entropy', 'max_depth': 40, 'max...",0.603111,0.620968,0.614631,0.721877,0.569087,0.625935,0.051205,6
1,0.0022,0.000312,0.001483,0.000217,gini,2,16,"{'criterion': 'gini', 'max_depth': 2, 'max_fea...",0.452477,0.525922,0.59591,0.567552,0.544789,0.53733,0.048437,49
2,0.005323,0.000435,0.001313,8.7e-05,log_loss,113,15,"{'criterion': 'log_loss', 'max_depth': 113, 'm...",0.513825,0.593318,0.552995,0.652063,0.659836,0.594408,0.05624,23
3,0.02087,0.000722,0.001514,9.6e-05,log_loss,57,88,"{'criterion': 'log_loss', 'max_depth': 57, 'ma...",0.601382,0.586982,0.572581,0.671001,0.596604,0.60571,0.034096,11
4,0.006667,0.000594,0.001549,0.000208,log_loss,188,18,"{'criterion': 'log_loss', 'max_depth': 188, 'm...",0.549539,0.515553,0.612903,0.583946,0.603923,0.573173,0.036107,38
5,0.011508,0.000578,0.001537,0.000203,entropy,161,45,"{'criterion': 'entropy', 'max_depth': 161, 'ma...",0.552995,0.601382,0.698733,0.670153,0.475995,0.599852,0.080322,18
6,0.022124,0.00477,0.001852,0.000802,gini,161,88,"{'criterion': 'gini', 'max_depth': 161, 'max_f...",0.593318,0.634217,0.595046,0.63567,0.617389,0.615128,0.018278,8
7,0.01652,0.002547,0.002586,0.00098,entropy,25,48,"{'criterion': 'entropy', 'max_depth': 25, 'max...",0.565668,0.578917,0.569124,0.524873,0.500585,0.547834,0.02999,47
8,0.041772,0.002843,0.001725,0.000134,log_loss,99,171,"{'criterion': 'log_loss', 'max_depth': 99, 'ma...",0.541475,0.604839,0.586982,0.569248,0.559426,0.572394,0.021907,39
9,0.018616,0.002799,0.001669,0.00017,gini,36,71,"{'criterion': 'gini', 'max_depth': 36, 'max_fe...",0.562788,0.638825,0.577189,0.686546,0.51171,0.595412,0.06098,21
