In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

def knn_iris():
    # 第一步：获取数据
    iris = load_iris()
    # 第二步：划分数据集
    x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=6)
    # 第三步：特征工程（标准化）
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)
    # 第四步：KNN算法预估器
    estimator = KNeighborsClassifier(n_neighbors=3)
    estimator.fit(x_train, y_train)
    # 第五步：模型评估
    y_predict = estimator.predict(x_test)
    print("y_predict: ", y_predict)
    print("对比真实值和预测值：", y_test == y_predict)
    score = estimator.score(x_test, y_test)
    print("准确率为：", score)
    return None

def knn_iris_grid_search():
    # 第一步：获取数据
    iris = load_iris()
    # 第二步：划分数据集
    x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=6)
    # 第三步：特征工程（标准化）
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)
    # 第四步：KNN算法预估器
    estimator = KNeighborsClassifier(p=2)
    # 第五步：模型选择和调优（网格搜索，交叉验证）
    param_dict = {"n_neighbors": [1, 3, 5, 7, 9, 11]}
    estimator = GridSearchCV(estimator, param_grid=param_dict, cv=10)
    estimator.fit(x_train, y_train)
    # 第六步：模型评估
    y_predict = estimator.predict(x_test)
    print("y_predict: ", y_predict)
    print("对比真实值和预测值：", y_test == y_predict)
    score = estimator.score(x_test, y_test)
    print("准确率为：", score)
    print("最佳参数：", estimator.best_params_)
    print("最佳结果：", estimator.best_score_)
    print("最佳预估器：", estimator.best_estimator_)
    print("交叉验证结果：", estimator.cv_results_)
    return None

if __name__ == "__main__":
    # knn_iris()
    knn_iris_grid_search()

y_predict:  [0 2 0 0 2 1 2 0 2 1 2 1 2 2 1 1 2 1 1 0 0 2 0 0 1 1 1 2 0 1 0 1 0 0 1 2 1
 2]
对比真实值和预测值： [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True False  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True False  True
  True  True]
准确率为： 0.9473684210526315
最佳参数： {'n_neighbors': 11}
最佳结果： 0.9734848484848484
最佳预估器： KNeighborsClassifier(n_neighbors=11)
交叉验证结果： {'mean_fit_time': array([3.99422646e-04, 4.95862961e-04, 9.93967056e-05, 2.99024582e-04,
       3.99875641e-04, 3.01480293e-04]), 'std_fit_time': array([0.00048919, 0.0004959 , 0.00029819, 0.00045677, 0.00048975,
       0.00046055]), 'mean_score_time': array([0.00079858, 0.00070312, 0.00099714, 0.00089762, 0.00070035,
       0.00089529]), 'std_score_time': array([3.99313487e-04, 4.60494354e-04, 1.38200744e-05, 2.99210253e-04,
       4.58520550e-04, 2.98680943e-04]), 'param_n_neighbors': masked_array(data=[1, 3, 5, 7, 9, 11],
      