In [110]:
import numpy as np
np.random.seed(42)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [111]:
dataset = load_breast_cancer()
x = dataset.data
y = dataset.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
print(f"x_train shape: {x_train.shape} x_test shape: {x_test.shape}")

x_train shape: (398, 30) x_test shape: (171, 30)


#### Random Search

In [112]:
from sklearn.model_selection import RandomizedSearchCV, KFold
from scipy.stats import randint as rand_generator

params = {
    "n_neighbors": rand_generator(1, 15),
    "weights": ["uniform", "distance"],
}
kf = KFold(n_splits=10, shuffle=True)

clf = KNeighborsClassifier()
cv = RandomizedSearchCV(clf, params, n_iter=20, cv=kf)
cv.fit(x_train, y_train)

RandomizedSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=True),
                   estimator=KNeighborsClassifier(), n_iter=20,
                   param_distributions={'n_neighbors': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fa7064b1c40>,
                                        'weights': ['uniform', 'distance']})

In [113]:
print(f"Random Search keys: {cv.cv_results_.keys()}")
for key in cv.cv_results_.keys():
    print(f"\t{key}")

Random Search keys: dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_n_neighbors', 'param_weights', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'split5_test_score', 'split6_test_score', 'split7_test_score', 'split8_test_score', 'split9_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])
	mean_fit_time
	std_fit_time
	mean_score_time
	std_score_time
	param_n_neighbors
	param_weights
	params
	split0_test_score
	split1_test_score
	split2_test_score
	split3_test_score
	split4_test_score
	split5_test_score
	split6_test_score
	split7_test_score
	split8_test_score
	split9_test_score
	mean_test_score
	std_test_score
	rank_test_score


In [114]:
print(f"Random Search params: {cv.cv_results_.get('params')}")

Random Search params: [{'n_neighbors': 9, 'weights': 'uniform'}, {'n_neighbors': 12, 'weights': 'distance'}, {'n_neighbors': 4, 'weights': 'uniform'}, {'n_neighbors': 10, 'weights': 'uniform'}, {'n_neighbors': 5, 'weights': 'uniform'}, {'n_neighbors': 3, 'weights': 'distance'}, {'n_neighbors': 9, 'weights': 'distance'}, {'n_neighbors': 11, 'weights': 'uniform'}, {'n_neighbors': 4, 'weights': 'uniform'}, {'n_neighbors': 7, 'weights': 'distance'}, {'n_neighbors': 9, 'weights': 'uniform'}, {'n_neighbors': 13, 'weights': 'uniform'}, {'n_neighbors': 10, 'weights': 'distance'}, {'n_neighbors': 7, 'weights': 'distance'}, {'n_neighbors': 10, 'weights': 'uniform'}, {'n_neighbors': 3, 'weights': 'uniform'}, {'n_neighbors': 13, 'weights': 'uniform'}, {'n_neighbors': 11, 'weights': 'distance'}, {'n_neighbors': 12, 'weights': 'uniform'}, {'n_neighbors': 10, 'weights': 'distance'}]


In [115]:
print(f"Best params: {cv.best_params_}")

Best params: {'n_neighbors': 12, 'weights': 'uniform'}


#### Display all Random Hyper-Parameter combinations and their evaluation

In [116]:
mean_score = cv.cv_results_.get("mean_test_score")
std_score = cv.cv_results_.get("std_test_score")
params = cv.cv_results_.get("params")

for mean, std, prm in zip(mean_score, std_score, params):
    print(f"{mean*100:.3}% (+/-{std*100:.3}%) for {prm}")

91.0% (+/-4.62%) for {'n_neighbors': 9, 'weights': 'uniform'}
91.5% (+/-4.2%) for {'n_neighbors': 12, 'weights': 'distance'}
91.0% (+/-3.59%) for {'n_neighbors': 4, 'weights': 'uniform'}
91.2% (+/-3.92%) for {'n_neighbors': 10, 'weights': 'uniform'}
90.7% (+/-3.9%) for {'n_neighbors': 5, 'weights': 'uniform'}
90.2% (+/-4.43%) for {'n_neighbors': 3, 'weights': 'distance'}
91.0% (+/-4.63%) for {'n_neighbors': 9, 'weights': 'distance'}
91.0% (+/-4.5%) for {'n_neighbors': 11, 'weights': 'uniform'}
91.0% (+/-3.59%) for {'n_neighbors': 4, 'weights': 'uniform'}
91.2% (+/-3.91%) for {'n_neighbors': 7, 'weights': 'distance'}
91.0% (+/-4.62%) for {'n_neighbors': 9, 'weights': 'uniform'}
91.2% (+/-4.63%) for {'n_neighbors': 13, 'weights': 'uniform'}
90.9% (+/-4.23%) for {'n_neighbors': 10, 'weights': 'distance'}
91.2% (+/-3.91%) for {'n_neighbors': 7, 'weights': 'distance'}
91.2% (+/-3.92%) for {'n_neighbors': 10, 'weights': 'uniform'}
90.2% (+/-4.26%) for {'n_neighbors': 3, 'weights': 'uniform'}

#### Train the Best Found Model

In [117]:
n, w = (cv.best_params_[k] for k in ["n_neighbors", "weights"])
clf = KNeighborsClassifier(n_neighbors=n, weights=w)
clf.fit(x_train, y_train)

score = clf.score(x_test, y_test)
print(f"best model score: {score*100:.4}%")

best model score: 97.66%
