In [8]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [9]:
def knn_iris():
    """
    use KNN to classify the iris type
    :return:
    """
    # 1）get the iris dataset
    iris = load_iris()

    # 2）splite the dataset to train and test set seperately
    x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=22)

    # 3）feature engineer: standard
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4）KNN algrithm to generate model
    estimator = KNeighborsClassifier(n_neighbors=3)
    estimator.fit(x_train, y_train)

    # 5）model assessment
    # method1：compare the real target number with pridiction number
    y_predict = estimator.predict(x_test)
    print("y_predict:\n", y_predict)
    print("the pridiction target vs real test target is :\n", y_test == y_predict)

    # method2：get the accuracy
    score = estimator.score(x_test, y_test)
    print("The accuracy is ：\n", score)

    return None

In [16]:
def knn_iris_gscv():
    """
    use KNN to classify the iris type
    :return:
    """
    # 1）get the iris dataset
    iris = load_iris()

    # 2）splite the dataset to train and test set seperately
    x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=22)

    # 3）feature engineer: standard
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4）KNN algrithm to generate model
    estimator = KNeighborsClassifier()
    
    # add the grid search and cross validation
    # prepare the parameters first
    param_dict = {"n_neighbors": [1, 3, 5, 7, 9, 11]}
    estimator = GridSearchCV(estimator, param_grid=param_dict, cv=10)
    estimator.fit(x_train, y_train)

    # 5）model assessment
    # method1：compare the real target number with pridiction number
    y_predict = estimator.predict(x_test)
    print("y_predict:\n", y_predict)
    print("the pridiction target vs real test target is :\n", y_test == y_predict)

    # method2：get the accuracy
    score = estimator.score(x_test, y_test)
    print("The accuracy is ：\n", score)
    
    # 6)find in cross validation
    # best_params_
    print("The best parameter is ：\n", estimator.best_params_)
    # best_score_
    print("The best score is ：\n", estimator.best_score_)
    # best_estimator_
    print("The best estimator is :\n", estimator.best_estimator_)
    # cv_results_
    print("The cross validation is :\n", estimator.cv_results_)

    return None

In [12]:
knn_iris()

y_predict:
 [0 2 1 2 1 1 1 2 1 0 2 1 2 2 0 2 1 1 1 1 0 2 0 1 2 0 2 2 2 2 0 0 1 1 1 0 0
 0]
the pridiction target vs real test target is :
 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True False  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True]
The accuracy is ：
 0.9736842105263158


In [13]:
knn_iris_gscv()

y_predict:
 [0 2 1 2 1 1 1 2 1 0 2 1 2 2 0 2 1 1 1 1 0 2 0 1 2 0 2 2 2 2 0 0 1 1 1 0 0
 0]
the pridiction target vs real test target is :
 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True False  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True]
The accuracy is ：
 0.9736842105263158
The best parameter is ：
 {'n_neighbors': 3}
The best score is ：
 0.9553030303030303
The best estimator is :
 KNeighborsClassifier(n_neighbors=3)
The cross validation is :
 {'mean_fit_time': array([0.00074921, 0.00056539, 0.00046496, 0.00047016, 0.00042117,
       0.00042408]), 'std_fit_time': array([2.95234165e-04, 1.63648651e-04, 1.23730964e-04, 1.47346463e-04,
       9.72213113e-05, 1.01880868e-04]), 'mean_score_time': array([0.00129473, 0.0010776 , 0.00101671, 0.00083075, 0.0008127 ,
       0.0010273 ]), 'std_score_time': array([0.00037004, 0.00015799, 0.00030356, 0.00011015, 0.00