In [31]:
import numpy as np
import pandas as pd

In [32]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv')

In [33]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [34]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [35]:
from sklearn.model_selection import cross_val_score,KFold
from sklearn.neighbors import KNeighborsRegressor

In [36]:
knn = KNeighborsRegressor()

In [37]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(knn, X, y, cv=kfold, scoring='r2')

In [38]:
scores.mean()

0.4761976351913221

### GridSearchCV

In [40]:
from sklearn.model_selection import GridSearchCV

In [41]:
knn = KNeighborsRegressor()

In [42]:
param_grid = {
    'n_neighbors':[1,3,5,7,10,12,15,17,20],
    'metric':['minkowski'],
    'weights':['uniform','distance'],
    'algorithm':['ball_tree', 'kd_tree', 'brute'],
    'p':[1,2,3]
}

In [43]:
gcv = GridSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2)

In [44]:
gcv.fit(X,y)

Fitting 5 folds for each of 162 candidates, totalling 810 fits
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=1, p=1, weights=distance; total time=   0.

In [45]:
gcv.best_params_

{'algorithm': 'ball_tree',
 'metric': 'minkowski',
 'n_neighbors': 5,
 'p': 1,
 'weights': 'distance'}

In [46]:
gcv.best_score_

0.6117139367845081

In [47]:
gcv.cv_results_

{'mean_fit_time': array([0.0043519 , 0.0034142 , 0.0073617 , 0.00316377, 0.00436206,
        0.00381465, 0.00261188, 0.00235324, 0.00282645, 0.00284901,
        0.00342145, 0.00283728, 0.00233455, 0.0026876 , 0.00260534,
        0.00231962, 0.00256629, 0.00282907, 0.00223298, 0.0025034 ,
        0.00261292, 0.00290732, 0.00332327, 0.00313916, 0.00264726,
        0.00282049, 0.0026371 , 0.00253386, 0.00302629, 0.00371919,
        0.00220976, 0.00273371, 0.00262208, 0.00215335, 0.00290465,
        0.00272703, 0.0022357 , 0.00229468, 0.00203681, 0.00161657,
        0.0031517 , 0.00262914, 0.00210652, 0.00240321, 0.00362277,
        0.0051775 , 0.00456858, 0.00281043, 0.00260997, 0.00271735,
        0.00212946, 0.002209  , 0.00281992, 0.00339947, 0.00229731,
        0.00260472, 0.00242996, 0.00321903, 0.00333834, 0.00283532,
        0.00270853, 0.0022203 , 0.00254798, 0.0032516 , 0.0023241 ,
        0.01117659, 0.00433531, 0.00281343, 0.0022892 , 0.00325518,
        0.00261936, 0.00333929,

In [48]:
pd.DataFrame(gcv.cv_results_)[['param_algorithm',	'param_n_neighbors',	'param_p', 'param_weights', 'mean_test_score']].sort_values('mean_test_score',ascending=False)

Unnamed: 0,param_algorithm,param_n_neighbors,param_p,param_weights,mean_test_score
67,kd_tree,5,1,distance,0.611714
121,brute,5,1,distance,0.611714
13,ball_tree,5,1,distance,0.611714
19,ball_tree,7,1,distance,0.605716
73,kd_tree,7,1,distance,0.605716
...,...,...,...,...,...
59,kd_tree,1,3,distance,0.304151
113,brute,1,3,distance,0.304151
5,ball_tree,1,3,distance,0.304151
58,kd_tree,1,3,uniform,0.304151


In [25]:
# gcv.predict(new_data)

### RandomizedSearchCV

In [26]:
from sklearn.model_selection import RandomizedSearchCV

In [55]:
rcv = RandomizedSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2, n_iter=10)

In [56]:
rcv.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, metric=minkowski, n_neighbors=20, p=2, weights=uniform; total ti

In [58]:
rcv.best_score_

0.6057158068725681

In [59]:
rcv.best_params_

{'weights': 'distance',
 'p': 1,
 'n_neighbors': 7,
 'metric': 'minkowski',
 'algorithm': 'kd_tree'}