In [59]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score, accuracy_score
import warnings

warnings.filterwarnings("ignore")

In [60]:
digits = datasets.load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [61]:
knn_clf = KNeighborsClassifier()  # 对k近邻算法的超参数进行网格搜索
print(knn_clf.get_params())

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


In [62]:
param_grid = [
    {
        'weights': ['uniform', 'distance'],
        'n_neighbors': [i for i in range(1, 5)],
        'p': [i for i in range(1, 3)]
    }
]  # 可以看出共有2*5*3=30种组合方式

In [63]:
title = 'learning curve for LinearRegression'


def my_custon_loss_func(clf,  # 训练器对象
                        X,  # 训练数据集
                        y):  # 真实标签
    """自定义评估函数"""
    predict = clf.predict(X)
    return accuracy_score(y, predict)


score_list = [None, mean_squared_error, mean_absolute_error, r2_score,
              my_custon_loss_func]  # 自定义Score function or function function

for i in score_list:
    if i:
        print('score=', i.__name__)
        grid_search = GridSearchCV(knn_clf, param_grid, cv=5, n_jobs=-1,
                                   scoring=i)
        grid_search.fit(X_train, y_train)
        print(grid_search.best_estimator_)
        print(grid_search.best_score_)
        print(grid_search.best_params_, end='\n\n')
    else:
        print('score:', '训练器对象内置评估方法')
        grid_search = GridSearchCV(knn_clf,  # 训练器对象
                                   param_grid,  # 字典或字典列表(需要超参数搜索的取值)
                                   cv=5,  # 使用几折交叉验证
                                   n_jobs=-1,  # 进行训练的CPU核心个数,默认n_jobs=1(n_jobs=-1:使用所有CPU核心进行训练)
                                   # 交叉验证在测试数据集上的评价指标
                                   scoring=None)  # 默认scoring=None,使用训练器的score方法
        grid_search.fit(X_train, y_train)  # 训练器对象的fit函数(一般针对的是训练数据集)
        print(grid_search.best_estimator_)  # 返回最优的训练器
        print(grid_search.best_score_)  # Mean cross-validated score of the best_estimator
        print(grid_search.best_params_, end='\n\n')  # 返回最优化的超参数组合

score: 训练器对象内置评估方法
{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
KNeighborsClassifier(n_neighbors=1)
0.9860820751064653
{'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}

score= mean_squared_error
{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
KNeighborsClassifier(n_neighbors=1, p=1)
nan
{'n_neighbors': 1, 'p': 1, 'weights': 'uniform'}

score= mean_absolute_error
{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
KNeighborsClassifier(n_neighbors=1, p=1)
nan
{'n_neighbors': 1, 'p': 1, 'weights': 'uniform'}

score= r2_score
{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
KNeighborsClassifier(