# 网格搜索

In [1]:
import numpy as np
from sklearn import datasets

In [3]:
digits = datasets.load_digits()
X = digits.data
y = digits.target

In [4]:
from sklearn.model_selection import  train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=666)

In [5]:
from sklearn.neighbors import  KNeighborsClassifier

In [6]:
sk_knn_clf = KNeighborsClassifier(n_neighbors=4,weights='uniform')
sk_knn_clf.fit(X_train,y_train)
sk_knn_clf.score(X_test,y_test)

0.9916666666666667

# 网格搜索   Grid Search

In [7]:
param_grid =[
    {
        'weights':['uniform'],
        'n_neighbors':[i for i in range(1,11)]
    },
    {
        'weights':['distance'],
        'n_neighbors':[i for i in range(1,11)],
        'p':[i for i in range(1,6)]
    }
]

In [8]:
knn_clf = KNeighborsClassifier()

In [11]:
#调用网格方法    CV代表交叉验证  cross validation
from sklearn.model_selection import GridSearchCV
#第一个参数为我们的机器学习的算法    第二个参数为我们写出的网格函数
#定义我们网格搜索的对象
grid_search = GridSearchCV(knn_clf,param_grid)

In [12]:
%%time
grid_search.fit(X_train,y_train)

Wall time: 3min 35s


GridSearchCV(cv=None, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['uniform']}, {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['distance'], 'p': [1, 2, 3, 4, 5]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [13]:
#返回我们网格搜索搜索到的最佳的分类器对应的参数  的分类器
grid_search.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=3,
           weights='distance')

In [14]:
#返回网格搜索出分类器的准确度
grid_search.best_score_

0.98538622129436326

In [16]:
#返回对应于我们的网格搜索数组搜索到的参数
grid_search.best_params_

{'n_neighbors': 3, 'p': 3, 'weights': 'distance'}

In [17]:
#获取到我们的这个分类器
knn_clf = grid_search.best_estimator_

In [18]:
knn_clf.predict(X_test)

array([8, 1, 3, 4, 4, 0, 7, 0, 8, 0, 4, 6, 1, 1, 2, 0, 1, 6, 7, 3, 3, 6, 5,
       2, 9, 4, 0, 2, 0, 3, 0, 8, 7, 2, 3, 5, 1, 3, 1, 5, 8, 6, 2, 6, 3, 1,
       3, 0, 0, 4, 9, 9, 2, 8, 7, 0, 5, 4, 0, 9, 5, 5, 8, 7, 4, 2, 8, 8, 7,
       5, 4, 3, 0, 2, 7, 2, 1, 2, 4, 0, 9, 0, 6, 6, 2, 0, 0, 5, 4, 4, 3, 1,
       3, 8, 6, 4, 4, 7, 5, 6, 8, 4, 8, 4, 6, 9, 7, 7, 0, 8, 8, 3, 9, 7, 1,
       8, 4, 2, 7, 0, 0, 4, 9, 6, 7, 3, 4, 6, 4, 8, 4, 7, 2, 6, 9, 5, 8, 7,
       2, 5, 5, 9, 7, 9, 3, 1, 9, 4, 4, 1, 5, 1, 6, 4, 4, 8, 1, 6, 2, 5, 2,
       1, 4, 4, 3, 9, 4, 0, 6, 0, 8, 3, 8, 7, 3, 0, 3, 0, 5, 9, 2, 7, 1, 8,
       1, 4, 3, 3, 7, 8, 2, 7, 2, 2, 8, 0, 5, 7, 6, 7, 3, 4, 7, 1, 7, 0, 9,
       2, 8, 9, 3, 8, 9, 1, 1, 1, 9, 8, 8, 0, 3, 7, 3, 3, 4, 8, 2, 1, 8, 6,
       0, 1, 7, 7, 5, 8, 3, 8, 7, 6, 8, 4, 2, 6, 2, 3, 7, 4, 9, 3, 5, 0, 6,
       3, 8, 3, 3, 1, 4, 5, 3, 2, 5, 6, 9, 6, 9, 5, 5, 3, 6, 5, 9, 3, 7, 7,
       0, 2, 4, 9, 9, 9, 2, 5, 6, 1, 9, 6, 9, 7, 7, 4, 5, 0, 0, 5, 3, 8, 4,
       4, 3,

In [19]:
knn_clf.score(X_test,y_test)

0.98333333333333328

In [23]:
#我们可以往gridsearchCV中传入参数，来使得我们的网格搜索速度更加的快
#n_jobs  是代表我们使用几核进行运算  
#verbose 标识输出   来让我们更好的了解相当于日志
grid_search = GridSearchCV(knn_clf,param_grid=param_grid,n_jobs=4,verbose=2)

In [21]:
%%time
grid_search.fit(X_train,y_train)

Wall time: 1min 49s


GridSearchCV(cv=None, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=3,
           weights='distance'),
       fit_params=None, iid=True, n_jobs=4,
       param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['uniform']}, {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['distance'], 'p': [1, 2, 3, 4, 5]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [24]:
%%time
grid_search.fit(X_train,y_train)

Fitting 3 folds for each of 60 candidates, totalling 180 fits


[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   27.4s
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  1.6min


Wall time: 1min 58s


[Parallel(n_jobs=4)]: Done 180 out of 180 | elapsed:  2.0min finished


GridSearchCV(cv=None, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=3,
           weights='distance'),
       fit_params=None, iid=True, n_jobs=4,
       param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['uniform']}, {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['distance'], 'p': [1, 2, 3, 4, 5]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=2)