In [144]:
import numpy as np
np.random.seed(42)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split, KFold

In [145]:
dataset = load_iris()
x = dataset.data
y = dataset.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4)

In [146]:
n_neighbors = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
weights = ['uniform', 'distance']

best_score = 0
best_hyperparameters = {}

In [147]:
for n in n_neighbors:
    for w in weights:
        clf = KNeighborsClassifier(n_neighbors=n, weights=w)
        clf.fit(x_train, y_train)

        score = clf.score(x_test, y_test)
        if score > best_score:
            best_score = score
            best_hyperparameters['n_neighbors'] = n
            best_hyperparameters['weights'] = w


In [148]:
print(f"best score: {best_score*100:.4}%")
print(f"best hyper-parameters: {best_hyperparameters}")

best score: 98.33%
best hyper-parameters: {'n_neighbors': 2, 'weights': 'uniform'}


#### Built-in Grid Search Functionality

In [149]:
dataset = load_breast_cancer()
x = dataset.data
y = dataset.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
print(f"x_train shape: {x_train.shape}, x_test shape: {x_test.shape}")

x_train shape: (398, 30), x_test shape: (171, 30)


In [150]:
from sklearn.model_selection import GridSearchCV

params = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
    'weights': ['uniform', 'distance'],
}

kf = KFold(n_splits=10, shuffle=True)

clf = KNeighborsClassifier()
grid_cv = GridSearchCV(clf, param_grid=params, cv=kf)
fitted = grid_cv.fit(x_train, y_train)

In [151]:
print(f"GridSearch results:")
for k in grid_cv.cv_results_.keys():
    print(f"k={k}")

GridSearch results:
k=mean_fit_time
k=std_fit_time
k=mean_score_time
k=std_score_time
k=param_n_neighbors
k=param_weights
k=params
k=split0_test_score
k=split1_test_score
k=split2_test_score
k=split3_test_score
k=split4_test_score
k=split5_test_score
k=split6_test_score
k=split7_test_score
k=split8_test_score
k=split9_test_score
k=mean_test_score
k=std_test_score
k=rank_test_score


In [152]:
means = grid_cv.cv_results_.get('mean_test_score')
stds = grid_cv.cv_results_.get('std_test_score')
params = grid_cv.cv_results_.get('params')

for mean, std, params in zip(means, stds, params):
    print(f"{mean:.3} (+/-{std}) for {params}")

0.914 (+/-0.042830107937718374) for {'n_neighbors': 2, 'weights': 'uniform'}
0.914 (+/-0.0346832193830368) for {'n_neighbors': 2, 'weights': 'distance'}
0.932 (+/-0.03562447734255631) for {'n_neighbors': 3, 'weights': 'uniform'}
0.932 (+/-0.031923398721475565) for {'n_neighbors': 3, 'weights': 'distance'}
0.932 (+/-0.03933354436141698) for {'n_neighbors': 4, 'weights': 'uniform'}
0.932 (+/-0.039072077228308584) for {'n_neighbors': 4, 'weights': 'distance'}
0.932 (+/-0.034236350740573344) for {'n_neighbors': 5, 'weights': 'uniform'}
0.924 (+/-0.03752579651838518) for {'n_neighbors': 5, 'weights': 'distance'}
0.929 (+/-0.037260080642256464) for {'n_neighbors': 6, 'weights': 'uniform'}
0.932 (+/-0.032041023999474644) for {'n_neighbors': 6, 'weights': 'distance'}
0.94 (+/-0.028321511050183726) for {'n_neighbors': 7, 'weights': 'uniform'}
0.937 (+/-0.030514167175626626) for {'n_neighbors': 7, 'weights': 'distance'}
0.94 (+/-0.028321511050183726) for {'n_neighbors': 8, 'weights': 'uniform'}


In [153]:
print(f"best params: {grid_cv.best_params_}")

best params: {'n_neighbors': 7, 'weights': 'uniform'}


#### Train new model with Best Hyper-Parameters

In [154]:
n, w = (grid_cv.best_params_[k] for k in ['n_neighbors', 'weights'])
clf = KNeighborsClassifier(n_neighbors=n, weights=w)
clf.fit(x_train, y_train)

score = clf.score(x_test, y_test)
print(f"accuracy: {score*100:.4}%")

accuracy: 92.98%
