### 超参数

In [6]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score

In [16]:
# 加载数据
digits = datasets.load_digits()
X = digits.data
y = digits.target

In [22]:
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [20]:
knn_clf = KNeighborsClassifier(n_neighbors=6)

In [23]:
knn_clf.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')

In [24]:
knn_clf.score(X_test,y_test)

0.9866666666666667

### 超参数

参数 k 表示所依据的周围的点的个数，不同的点的个数对于结果的判定是不一样的，所以要寻找最优的那个参数就是超参数

### 寻找最好的参数 k

In [32]:
best_k=0
best_score=0.0
# 运用 for 循环判断哪个 k 是最好的
for i in range(1,5):
    knn_clf = KNeighborsClassifier(n_neighbors=i)
    knn_clf.fit(X_train,y_train)
    score = knn_clf.score(X_test,y_test)
    print(str(score))
    if score > best_score:
        best_score = score
        best_k = i
print("best_k:"+str(best_k))
print("best_score:" + str(best_score))

0.9911111111111112
0.9866666666666667
0.9888888888888889
0.9866666666666667
best_k:1
best_score:0.9911111111111112


### 第二个参数：距离参数 weight
使得距离预测点距离近的点在结果的预测中占得比重更大

In [39]:
best_method = 'uniform'
best_k=0
best_score=0.0
# 运用 for 循环判断哪个 k 是最好的
for method in ['distance','uniform']:
    for i in range(1,5):
        knn_clf = KNeighborsClassifier(n_neighbors=i,weights=method)
        knn_clf.fit(X_train,y_train)
        score = knn_clf.score(X_test,y_test)
        print(str(score))
        if score > best_score:
            best_score = score
            best_k = i
            best_method = method
print("best_k:"+str(best_k))
print("best_score:" + str(best_score))
print("best_method:" + str(best_method))

0.9911111111111112
0.9911111111111112
0.9866666666666667
0.9888888888888889
0.9911111111111112
0.9866666666666667
0.9888888888888889
0.9866666666666667
best_k:1
best_score:0.9911111111111112
best_method:distance


### 明科夫斯基距离 p
见链接 https://blog.csdn.net/xiaoduan_/article/details/79327781
在测试过程中比较费时
<br>这个参数在 weight = distance 的时候才有意义

In [41]:
best_p = 0
best_k=0
best_score=0.0
# 运用 for 循环判断哪个 k 是最好的

for p in range(1,5):
    for p in range(1,5):
        knn_clf = KNeighborsClassifier(n_neighbors=i,weights='distance',p = p)
        knn_clf.fit(X_train,y_train)
        score = knn_clf.score(X_test,y_test)
        print(str(score))
        if score > best_score:
            best_score = score
            best_k = i
            best_p = p
print("best_k:"+str(best_k))
print("best_score:" + str(best_score))
print("best_p:" + str(best_p))

0.9844444444444445
0.9888888888888889
0.9888888888888889
0.9866666666666667
0.9844444444444445
0.9888888888888889
0.9888888888888889
0.9866666666666667
0.9844444444444445
0.9888888888888889
0.9888888888888889
0.9866666666666667
0.9844444444444445
0.9888888888888889
0.9888888888888889
0.9866666666666667
best_k:4
best_score:0.9888888888888889
best_p:2
