#### Data preparation


In [10]:
import numpy as np


np.random.seed(42)

In [11]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [12]:
dataset = load_breast_cancer()
x = dataset.data
y = dataset.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
print(f"x_train shape: {x_train.shape} x_test.shape: {x_test.shape}")

x_train shape: (455, 30) x_test.shape: (114, 30)


#### Random Search


In [13]:
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV

In [14]:
param_dist = {
    "n_neighbors": sp_randint(1, 15),
    "weights": ["uniform", "distance"],
}
n_iter_search = 10

clf = KNeighborsClassifier()
rand_cv = RandomizedSearchCV(
    clf, param_distributions=param_dist, n_iter=n_iter_search, cv=3
)
rand_cv.fit(x_train, y_train)

RandomizedSearchCV(cv=3, estimator=KNeighborsClassifier(),
                   param_distributions={'n_neighbors': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002C3248B9CD0>,
                                        'weights': ['uniform', 'distance']})

In [15]:
print("GridSearch Keys:")
for key in rand_cv.cv_results_.keys():
    print(f"\t{key}")

GridSearch Keys:
	mean_fit_time
	std_fit_time
	mean_score_time
	std_score_time
	param_n_neighbors
	param_weights
	params
	split0_test_score
	split1_test_score
	split2_test_score
	mean_test_score
	std_test_score
	rank_test_score


In [16]:
print("GridSearch Params:")
for param in rand_cv.cv_results_["params"]:
    print(param)

GridSearch Params:
{'n_neighbors': 9, 'weights': 'uniform'}
{'n_neighbors': 12, 'weights': 'distance'}
{'n_neighbors': 4, 'weights': 'uniform'}
{'n_neighbors': 10, 'weights': 'uniform'}
{'n_neighbors': 5, 'weights': 'uniform'}
{'n_neighbors': 3, 'weights': 'distance'}
{'n_neighbors': 9, 'weights': 'distance'}
{'n_neighbors': 11, 'weights': 'uniform'}
{'n_neighbors': 4, 'weights': 'uniform'}
{'n_neighbors': 7, 'weights': 'distance'}


In [17]:
print(f"Best parameters set found on development set: {rand_cv.best_params_}\n")

means = rand_cv.cv_results_["mean_test_score"]
stds = rand_cv.cv_results_["std_test_score"]

for mean, std, params in zip(means, stds, rand_cv.cv_results_["params"]):
    print(f"{mean:.3f} (+/-{2*std:.3f}) for {params}")

Best parameters set found on development set: {'n_neighbors': 5, 'weights': 'uniform'}

0.921 (+/-0.048) for {'n_neighbors': 9, 'weights': 'uniform'}
0.919 (+/-0.056) for {'n_neighbors': 12, 'weights': 'distance'}
0.916 (+/-0.006) for {'n_neighbors': 4, 'weights': 'uniform'}
0.921 (+/-0.054) for {'n_neighbors': 10, 'weights': 'uniform'}
0.927 (+/-0.033) for {'n_neighbors': 5, 'weights': 'uniform'}
0.919 (+/-0.044) for {'n_neighbors': 3, 'weights': 'distance'}
0.921 (+/-0.048) for {'n_neighbors': 9, 'weights': 'distance'}
0.919 (+/-0.056) for {'n_neighbors': 11, 'weights': 'uniform'}
0.916 (+/-0.006) for {'n_neighbors': 4, 'weights': 'uniform'}
0.921 (+/-0.044) for {'n_neighbors': 7, 'weights': 'distance'}


#### Best Found model


In [19]:
clf = KNeighborsClassifier(n_neighbors=5, weights="uniform")
clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print(f"Accuracy: {score}")

Accuracy: 0.956140350877193
