#### Data preparation


In [29]:
import numpy as np


np.random.seed(42)

In [30]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [31]:
dataset = load_breast_cancer()
x = dataset.data
y = dataset.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
print(f"x_train shape: {x_train.shape} x_test.shape: {x_test.shape}")

x_train shape: (455, 30) x_test.shape: (114, 30)


#### Grid Search


In [32]:
from sklearn.model_selection import GridSearchCV

In [33]:
# Params for KNN: n_neighbors and weights
parameters = {"n_neighbors": [3, 5, 7, 9], "weights": ["uniform", "distance"]}

clf = KNeighborsClassifier()
grid_cv = GridSearchCV(clf, parameters, cv=3)
grid_cv.fit(x_train, y_train)

GridSearchCV(cv=3, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [3, 5, 7, 9],
                         'weights': ['uniform', 'distance']})

In [34]:
print("GridSearch Keys:")
for key in grid_cv.cv_results_.keys():
    print(f"\t{key}")

GridSearch Keys:
	mean_fit_time
	std_fit_time
	mean_score_time
	std_score_time
	param_n_neighbors
	param_weights
	params
	split0_test_score
	split1_test_score
	split2_test_score
	mean_test_score
	std_test_score
	rank_test_score


In [35]:
print("GridSearch Params:")
for param in grid_cv.cv_results_["params"]:
    print(param)

GridSearch Params:
{'n_neighbors': 3, 'weights': 'uniform'}
{'n_neighbors': 3, 'weights': 'distance'}
{'n_neighbors': 5, 'weights': 'uniform'}
{'n_neighbors': 5, 'weights': 'distance'}
{'n_neighbors': 7, 'weights': 'uniform'}
{'n_neighbors': 7, 'weights': 'distance'}
{'n_neighbors': 9, 'weights': 'uniform'}
{'n_neighbors': 9, 'weights': 'distance'}


In [36]:
print(f"Best parameters set found on development set: {grid_cv.best_params_}\n")

means = grid_cv.cv_results_["mean_test_score"]
stds = grid_cv.cv_results_["std_test_score"]

for mean, std, params in zip(means, stds, grid_cv.cv_results_["params"]):
    print(f"{mean:.3f} (+/-{2*std:.3f}) for {params}")

Best parameters set found on development set: {'n_neighbors': 5, 'weights': 'uniform'}

0.914 (+/-0.048) for {'n_neighbors': 3, 'weights': 'uniform'}
0.919 (+/-0.044) for {'n_neighbors': 3, 'weights': 'distance'}
0.927 (+/-0.033) for {'n_neighbors': 5, 'weights': 'uniform'}
0.921 (+/-0.050) for {'n_neighbors': 5, 'weights': 'distance'}
0.923 (+/-0.038) for {'n_neighbors': 7, 'weights': 'uniform'}
0.921 (+/-0.044) for {'n_neighbors': 7, 'weights': 'distance'}
0.921 (+/-0.048) for {'n_neighbors': 9, 'weights': 'uniform'}
0.921 (+/-0.048) for {'n_neighbors': 9, 'weights': 'distance'}


#### Best Found model


In [37]:
clf = KNeighborsClassifier(n_neighbors=5, weights="uniform")
clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print(f"Accuracy: {score}")

Accuracy: 0.956140350877193
