In [1]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

**Grid Search Cross Validation** ile hem KNN'deki optimum k değerini bulabilir hem de farklı k değerlerini denerken cross validation yaparız.

# 1) Read Data

In [2]:
iris = load_iris()
x = iris.data
y = iris.target

# 2) Preprocessing

In [3]:
x = (x-np.min(x)) / (np.max(x) - np.min(x))

# 3) Split Data

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

# 4) Create KNN Model

In [5]:
grid = {"n_neighbors" : np.arange(1, 50)}
knn= KNeighborsClassifier()

In [6]:
grid

{'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])}

# 5) Create Grid Search Model with Cross Validation

KNN'deki k değeri 50 olana kadar kontrol edilecek. Hangi k değeri optimumsa o seçilir. Mantık aşağıdaki gibidir:

<img src="grid_search.png"
     alt="grid"
     style="float: left; margin-top: 10px;" />

In [7]:
knn_cv = GridSearchCV(knn, grid, cv = 10)  # GridSearchCV
knn_cv.fit(x_train,y_train)

GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})

# 6) Test Data

Aşağıdaki sonuca göre en optimum k değeri 11 dir. k=11 değerine göre en iyi accuracy = 0.96 çıkmaktadır.

In [8]:
print("tuned hyperparameter K: ", knn_cv.best_params_)
print("tuned parametreye gore en iyi accuracy (best score): ", knn_cv.best_score_)

tuned hyperparameter K:  {'n_neighbors': 11}
tuned parametreye gore en iyi accuracy (best score):  0.9627272727272727
