In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [2]:
dataset = load_breast_cancer()
t = dataset.target
x = dataset.data
x_train_val, x_test, t_train_val, t_test = train_test_split(x, t, test_size=0.2, random_state=1)

In [3]:
from sklearn.model_selection import GridSearchCV
# estimator, 学習に使用するモデル
# param_grid, ハイパーパラメータを探索する範囲
# CV, K-分割交差検証のKの値

In [4]:
estimator = DecisionTreeClassifier(random_state=0)

In [5]:
param_grid = [{
    'max_depth': [3, 20, 50], 
    'min_samples_split': [3, 20, 50]
}]

In [6]:
cv = 5

In [7]:
tuned_model = GridSearchCV(estimator=estimator,
                           param_grid=param_grid,
                           cv=cv,
                           return_train_score=False)

In [8]:
tuned_model.fit(x_train_val, t_train_val)

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=0),
             param_grid=[{'max_depth': [3, 20, 50],
                          'min_samples_split': [3, 20, 50]}])

In [9]:
pd.DataFrame(tuned_model.cv_results_).T

Unnamed: 0,0,1,2,3,4,5,6,7,8
mean_fit_time,0.002858,0.00271,0.002689,0.003253,0.003177,0.003157,0.003263,0.003183,0.00314
std_fit_time,0.000117,0.000022,0.000015,0.000133,0.000166,0.000166,0.000156,0.000175,0.000182
mean_score_time,0.000295,0.000277,0.00026,0.000263,0.000264,0.000258,0.000295,0.000259,0.000262
std_score_time,0.000037,0.000043,0.00001,0.000016,0.000015,0.000006,0.000048,0.000006,0.000014
param_max_depth,3,3,3,20,20,20,50,50,50
param_min_samples_split,3,20,50,3,20,50,3,20,50
params,"{'max_depth': 3, 'min_samples_split': 3}","{'max_depth': 3, 'min_samples_split': 20}","{'max_depth': 3, 'min_samples_split': 50}","{'max_depth': 20, 'min_samples_split': 3}","{'max_depth': 20, 'min_samples_split': 20}","{'max_depth': 20, 'min_samples_split': 50}","{'max_depth': 50, 'min_samples_split': 3}","{'max_depth': 50, 'min_samples_split': 20}","{'max_depth': 50, 'min_samples_split': 50}"
split0_test_score,0.923077,0.912088,0.912088,0.956044,0.912088,0.912088,0.956044,0.912088,0.912088
split1_test_score,0.901099,0.901099,0.901099,0.912088,0.901099,0.901099,0.912088,0.901099,0.901099
split2_test_score,0.934066,0.934066,0.945055,0.923077,0.934066,0.945055,0.923077,0.934066,0.945055


In [10]:
param_grid = [{
    'max_depth': [5, 10, 15], 
    'min_samples_split': [10, 12, 15]
}]

In [11]:
tuned_model = GridSearchCV(estimator=estimator,
                           param_grid=param_grid,
                           cv=cv,
                           return_train_score=False)

In [12]:
tuned_model.fit(x_train_val, t_train_val)

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=0),
             param_grid=[{'max_depth': [5, 10, 15],
                          'min_samples_split': [10, 12, 15]}])

In [13]:
pd.DataFrame(tuned_model.cv_results_).T

Unnamed: 0,0,1,2,3,4,5,6,7,8
mean_fit_time,0.003706,0.003275,0.003198,0.003244,0.00323,0.003239,0.00332,0.003236,0.003202
std_fit_time,0.000452,0.000148,0.00013,0.000149,0.000151,0.000175,0.00026,0.000115,0.000128
mean_score_time,0.000384,0.000316,0.000269,0.000291,0.000282,0.000278,0.000284,0.000296,0.000268
std_score_time,0.000043,0.000069,0.000021,0.000065,0.000025,0.000025,0.00002,0.000066,0.000016
param_max_depth,5,5,5,10,10,10,15,15,15
param_min_samples_split,10,12,15,10,12,15,10,12,15
params,"{'max_depth': 5, 'min_samples_split': 10}","{'max_depth': 5, 'min_samples_split': 12}","{'max_depth': 5, 'min_samples_split': 15}","{'max_depth': 10, 'min_samples_split': 10}","{'max_depth': 10, 'min_samples_split': 12}","{'max_depth': 10, 'min_samples_split': 15}","{'max_depth': 15, 'min_samples_split': 10}","{'max_depth': 15, 'min_samples_split': 12}","{'max_depth': 15, 'min_samples_split': 15}"
split0_test_score,0.967033,0.923077,0.912088,0.967033,0.923077,0.912088,0.967033,0.923077,0.912088
split1_test_score,0.912088,0.901099,0.901099,0.912088,0.901099,0.901099,0.912088,0.901099,0.901099
split2_test_score,0.923077,0.934066,0.934066,0.923077,0.934066,0.934066,0.923077,0.934066,0.934066


In [18]:
tuned_model.best_params_

{'max_depth': 5, 'min_samples_split': 10}

In [19]:
best_model = tuned_model.best_estimator_

In [20]:
print(best_model.score(x_train_val, t_train_val))
print(best_model.score(x_test, t_test))

0.9934065934065934
0.956140350877193
