In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import make_scorer
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [2]:
digits = datasets.load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [3]:
knn_clf = KNeighborsClassifier()  # 对k近邻算法的超参数进行网格搜索

In [4]:
param_grid = [
    {
        'weights': ['uniform', 'distance'],
        'n_neighbors': [i for i in range(1, 5)],
        'p': [i for i in range(1, 3)]
    }
]  # 可以看出共有2*5*3=30种组合方式

In [5]:
title = 'learning curve for LinearRegression'


def my_custom_loss_func(y_true, y_pred):
    diff = np.abs(y_true - y_pred).max()
    return np.log1p(diff)  # 必须返回一个浮点数


score_list = [None, mean_squared_error, mean_absolute_error, r2_score,
              my_custom_loss_func]  # 自定义Score function or function function

for i in score_list:
    if i:
        print('score=', i.__name__)
        grid_search = GridSearchCV(knn_clf, param_grid, cv=5, n_jobs=-1,
                                   scoring=make_scorer(i))
        grid_search.fit(X_train, y_train)
        print(grid_search.best_estimator_)
        print(grid_search.best_score_)
        print(grid_search.best_params_, end='\n\n')
    else:
        print('score:', '训练器的score方法')
        grid_search = GridSearchCV(knn_clf,  # 训练器对象
                                   param_grid,  # 字典或字典列表(需要优化超参数的取值)
                                   cv=5,  # 使用几折交叉验证
                                   n_jobs=-1,  # 进行训练的CPU核心个数,默认n_jobs=1(n_jobs=-1:使用所有CPU核心进行训练)
                                   scoring=None)  # 默认为None,使用训练器的score方法(可使用make_score设定其他score function or loss function)
        grid_search.fit(X_train, y_train)  # 一般针对的是训练数据集
        print(grid_search.best_estimator_)  # 返回最优的训练器
        print(grid_search.best_score_)  # 输出最优训练器的score
        print(grid_search.best_params_, end='\n\n')  # 返回最优化的超参数组合

score: 训练器的score方法
KNeighborsClassifier(n_neighbors=1)
0.9860820751064653
{'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}

score= mean_squared_error
KNeighborsClassifier(n_neighbors=2, p=1)
0.8376016260162601
{'n_neighbors': 2, 'p': 1, 'weights': 'uniform'}

score= mean_absolute_error
KNeighborsClassifier(n_neighbors=2, p=1)
0.14192799070847853
{'n_neighbors': 2, 'p': 1, 'weights': 'uniform'}

score= r2_score
KNeighborsClassifier(n_neighbors=1)
0.9473972839207176
{'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}

score= my_custom_loss_func
KNeighborsClassifier(n_neighbors=2)
2.1029981488111127
{'n_neighbors': 2, 'p': 2, 'weights': 'uniform'}

