In [28]:
# 科学计算模块
import numpy as np
import pandas as pd

# 绘图模块
import matplotlib as mpl
import matplotlib.pyplot as plt

# 自定义模块
from ML_basic_function import *

# Scikit-Learn相关模块
# 评估器类
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

# 实用函数
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### sklearn中GridSearchCV的参数解释

In [29]:
from sklearn.model_selection import GridSearchCV

In [30]:
GridSearchCV?

[0;31mInit signature:[0m
[0mGridSearchCV[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mestimator[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mparam_grid[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscoring[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_jobs[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrefit[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcv[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpre_dispatch[0m[0;34m=[0m[0;34m'2*n_jobs'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0merror_score[0m[0;34m=[0m[0mnan[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreturn_train_score[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Exhaustive search over speci

### GridSearchCV评估器训练过程

#### 创建评估器

In [31]:
# 数据导入
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)

In [32]:
clf = LogisticRegression(max_iter=int(1e6), solver='saga')

In [33]:
clf.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 1000000,
 'multi_class': 'deprecated',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'saga',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

#### 创建参数空间

In [34]:
param_grid_simple = {'penalty': ['l1', 'l2'],
                    'C': [1, 0.5, 0.1, 0.05, 0.01]}

In [35]:
param_grid_ra = [
    {'penalty': ['l1', 'l2'], 'C': [1, 0.5, 0.1, 0.05, 0.01]}, 
    {'penalty': ['elasticnet'], 'C': [1, 0.5, 0.1, 0.05, 0.01], 'l1_ratio': [0.3, 0.6, 0.9]}
]

#### 实例化网格搜索评估器

In [36]:
search = GridSearchCV(estimator=clf,
                    param_grid=param_grid_simple)

#### 训练网格搜索评估器

In [37]:
search.fit(X_train, y_train)

0,1,2
,estimator,LogisticRegre...solver='saga')
,param_grid,"{'C': [1, 0.5, ...], 'penalty': ['l1', 'l2']}"
,scoring,
,n_jobs,
,refit,True
,cv,
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,1
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'saga'
,max_iter,1000000


### GridSearchCV评估器结果查看

In [38]:
search.best_estimator_

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,1
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'saga'
,max_iter,1000000


In [39]:
# 查看参数
search.best_estimator_.coef_

array([[ 0.        ,  0.        , -3.47333648,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.55510947, -0.34235824,  3.03230392,  4.12147388]])

In [40]:
# 查看训练误差、测试误差
search.best_estimator_.score(X_train,y_train), search.best_estimator_.score(X_test,y_test)

(0.9732142857142857, 0.9736842105263158)

In [41]:
# 查看参数
search.best_estimator_.get_params()

{'C': 1,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 1000000,
 'multi_class': 'deprecated',
 'n_jobs': None,
 'penalty': 'l1',
 'random_state': None,
 'solver': 'saga',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [42]:
search.best_score_

np.float64(0.9644268774703558)

In [43]:
search.cv_results_

{'mean_fit_time': array([0.020649  , 0.00850258, 0.01315155, 0.00741682, 0.00257301,
        0.00397997, 0.00102592, 0.00293441, 0.00027442, 0.00117688]),
 'std_fit_time': array([1.94428920e-03, 4.52797259e-04, 8.63770087e-04, 2.46167551e-04,
        9.68851674e-05, 1.31424262e-04, 4.58302280e-05, 6.81862798e-05,
        1.05736637e-04, 4.51470368e-05]),
 'mean_score_time': array([0.00032263, 0.00023646, 0.00024343, 0.00018291, 0.00013232,
        0.00013375, 0.00010934, 0.0001265 , 0.00013108, 0.00010982]),
 'std_score_time': array([5.38778303e-05, 4.67962652e-05, 4.59479058e-05, 2.68152122e-05,
        1.95636936e-05, 1.72058282e-05, 7.39881653e-06, 1.99826063e-05,
        4.07223958e-05, 2.41545203e-06]),
 'param_C': masked_array(data=[1.0, 1.0, 0.5, 0.5, 0.1, 0.1, 0.05, 0.05, 0.01, 0.01],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value=1e+20),
 'param_penalty': masked_array(data=['l1', 'l2', 'l1', 'l2

In [44]:
search.best_params_

{'C': 1, 'penalty': 'l1'}

In [45]:
search.best_index_

np.int64(0)

In [46]:
# 等价于search.best_estimator_.score
search.score(X_train,y_train), search.score(X_test,y_test)

(0.9732142857142857, 0.9736842105263158)

In [47]:
search.n_splits_

5

In [48]:
search.refit_time_

0.026224136352539062