# 超参数

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
digits = datasets.load_digits()
X = digits.data
y = digits.target

In [3]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=666)

In [4]:
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=6)
knn_clf.fit(X_train,y_train)
knn_clf.score(X_test,y_test)

0.9888888888888889

## 寻找最好的k

In [5]:
best_score = 0.0
best_k = -1
# 迭代寻找最佳k
for k in range(1,11):
    knn_clf = KNeighborsClassifier(n_neighbors=k)
    knn_clf.fit(X_train,y_train)
    score = knn_clf.score(X_test,y_test)
    if score > best_score:
        best_score = score
        best_k = k
        
print('best_score= ' ,best_score)
print('best_k=',best_k)

best_score=  0.9916666666666667
best_k= 4


## 考虑距离,不考虑距离

```
weights : str or callable, optional (default = ‘ uniform ’)
weight function used in prediction. Possible values:

‘uniform’ : uniform weights. All points in each neighborhood are weighted equally.
‘distance’ : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away.
[callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights.
```

**weights这个参数很重要,uniform是不考虑权重,distance是欧拉距离的倒数作为权重,在这种情况下，查询点的较近邻居将比远离的邻居具有更大的影响力。**

In [6]:
best_score = 0.0
best_k = -1
best_method = ""

for method in ['uniform','distance']:
    for k in range(1,11):
        knn_clf = KNeighborsClassifier(n_neighbors=k,weights=method)
        knn_clf.fit(X_train,y_train)
        score = knn_clf.score(X_test,y_test)
        if best_score < score:
            best_k = k
            best_method = method
            best_score = score

print('best_score : ' , best_score)
print('best_k : ' , k)
print('best_method : ' , best_method)

best_score :  0.9916666666666667
best_k :  10
best_method :  uniform


## 搜索明可夫斯基距离相应的p

p : integer, optional (default = 2)
p：整数，可选（默认= 2）

Power parameter for the Minkowski metric. 
明可夫斯基参数p

When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2.
p=1是曼哈顿参数,p=2是欧拉参数

For arbitrary p, minkowski_distance (l_p) is used.
p是任意值.

In [7]:
best_score = 0.0
best_k = 0
best_p = 0

# weights = distance
for k in range(1,11):
    for p in range(1,6):
        knn_clf = KNeighborsClassifier(n_neighbors=k,p=p,weights ='distance')
        knn_clf.fit(X_train,y_train)
        score = knn_clf.score(X_test,y_test)
        if score > best_score:
            best_score = score
            best_k = k
            best_p = p
            
print('best_score',best_score)
print('best_k',best_k)
print('best_p',best_p)

best_score 0.9888888888888889
best_k 3
best_p 2
