In [1]:
from sklearn import datasets

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# ML
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

In [2]:
dataset = datasets.load_diabetes()

In [3]:
X = dataset.data
y = dataset.target
       
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [4]:
# instantiate the KNN classifier
# {'metric': 'minkowski', 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
clf = KNeighborsClassifier()

In [5]:
# get the KNN parameters
clf.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [6]:
# get the KNN parameters
clf.get_params().keys()

dict_keys(['algorithm', 'leaf_size', 'metric', 'metric_params', 'n_jobs', 'n_neighbors', 'p', 'weights'])

In [7]:
n_neighbors = [3, 5, 7, 9, 11, 13, 15, 19, 21, 23, 25, 27, 29]
algos       = ['ball_tree', 'kd_tree']
dist_metric = ['minkowski', 'manhattan', 'euclidean']
p_root      = [1, 2, 3, 4, 5]
weights     = ['uniform', 'distance']
leaf_size   = [15, 30, 40, 50, 60,70]

In [8]:
# define the parameters dict
parameters = dict(
                n_neighbors= n_neighbors,
                algorithm= algos,
                metric= dist_metric,
                p= p_root,
                weights= weights,
                leaf_size= leaf_size
            )

print(parameters)

{'n_neighbors': [3, 5, 7, 9, 11, 13, 15, 19, 21, 23, 25, 27, 29], 'algorithm': ['ball_tree', 'kd_tree'], 'metric': ['minkowski', 'manhattan', 'euclidean'], 'p': [1, 2, 3, 4, 5], 'weights': ['uniform', 'distance'], 'leaf_size': [15, 30, 40, 50, 60, 70]}


In [9]:
# define splits
n_splits = 5

kf  = KFold(n_splits =n_splits, shuffle=True, random_state=100)

In [10]:
randm_src = RandomizedSearchCV(estimator           = clf, 
                               param_distributions = parameters,
                               cv = kf, 
                               n_iter = 100, 
                               verbose=2)

In [11]:
# fit the data to the grid object
randm_src.fit(X, y)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] END algorithm=kd_tree, leaf_size=60, metric=minkowski, n_neighbors=13, p=4, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, leaf_size=60, metric=minkowski, n_neighbors=13, p=4, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, leaf_size=60, metric=minkowski, n_neighbors=13, p=4, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, leaf_size=60, metric=minkowski, n_neighbors=13, p=4, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, leaf_size=60, metric=minkowski, n_neighbors=13, p=4, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, leaf_size=50, metric=manhattan, n_neighbors=9, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=kd_tree, leaf_size=50, metric=manhattan, n_neighbors=9, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=kd_tree, leaf_size=50, metric=manhattan, n_neighbors=9, p=2, weights=distance; total time=   0.0s
[

In [12]:
print(" Results from Random Search " )
print("\n The best estimator across ALL searched params:\n", randm_src.best_estimator_)
print("\n The best score across ALL searched params:\n", randm_src.best_score_)
print("\n The best parameters across ALL searched params:\n", randm_src.best_params_)

 Results from Random Search 

 The best estimator across ALL searched params:
 KNeighborsClassifier(algorithm='ball_tree', leaf_size=50, metric='euclidean',
                     n_neighbors=9, p=4, weights='distance')

 The best score across ALL searched params:
 0.013585291113381002

 The best parameters across ALL searched params:
 {'weights': 'distance', 'p': 4, 'n_neighbors': 9, 'metric': 'euclidean', 'leaf_size': 50, 'algorithm': 'ball_tree'}
