<a href="https://colab.research.google.com/github/dsmohiit/Machine-Learning-Repo/blob/main/Hyperparameter_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv("/content/BostonHousing.csv")

In [4]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


### Baseline Model - With default hyperparameter values

In [20]:
X = df.iloc[:, :-1]
y = df["medv"]

In [24]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [25]:
knn = KNeighborsRegressor()
k_fold = KFold(n_splits= 5, shuffle= True, random_state= 1)

In [27]:
scores = cross_val_score(estimator= knn, X= X, y= y, cv= k_fold, scoring= "r2")
print("Mean R2 score is: ", scores.mean())

Mean R2 score is:  0.4761976351913221


### GridSearch CV

In [38]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

In [76]:
knn = KNeighborsRegressor()

In [77]:
kfold = KFold(n_splits= 5, shuffle= True, random_state= 42)

In [78]:
parameter_grid = {
    "n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
    "weights": ["uniform", "distance"],
    "algorithm": ["brute", "kd_tree", "ball_tree"],
    "p": [1, 2]
}

In [79]:
gcv = GridSearchCV(knn, parameter_grid, scoring= "r2", cv= kfold, n_jobs= -1, verbose= 3)

In [80]:
gcv.fit(X, y)

Fitting 5 folds for each of 180 candidates, totalling 900 fits


In [81]:
gcv.best_params_

{'algorithm': 'brute', 'n_neighbors': 5, 'p': 1, 'weights': 'distance'}

In [82]:
print(gcv.best_score_)

0.6434974189445056


In [83]:
pd.DataFrame(gcv.cv_results_)[["param_algorithm",	"param_n_neighbors", "param_p",	"param_weights", "mean_test_score"]].sort_values("mean_test_score", ascending= False)

Unnamed: 0,param_algorithm,param_n_neighbors,param_p,param_weights,mean_test_score
17,brute,5,1,distance,0.643497
77,kd_tree,5,1,distance,0.643497
137,ball_tree,5,1,distance,0.643497
21,brute,6,1,distance,0.640962
81,kd_tree,6,1,distance,0.640962
...,...,...,...,...,...
2,brute,1,2,uniform,0.406357
62,kd_tree,1,2,uniform,0.406357
63,kd_tree,1,2,distance,0.406357
123,ball_tree,1,2,distance,0.406357


### RandomizedSearch CV

In [62]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score

In [63]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [None]:
knn = KNeighborsRegressor()

In [64]:
para_dist = {
    "n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "weights": ["uniform", "distance"],
    "algorithm": ["brute", "kd_tree", "ball_tree"],
    "p": [1, 2]
}

In [65]:
kfold = KFold(n_splits= 5, shuffle= True, random_state= 42)

In [66]:
rcv = RandomizedSearchCV(knn, para_dist, n_iter= 100, scoring= "r2", n_jobs= -1, refit= True, cv= kfold, verbose= 3)

In [67]:
rcv.fit(X, y)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


In [69]:
print(rcv.best_score_)

0.6434974189445056


In [70]:
rcv.best_params_

{'weights': 'distance', 'p': 1, 'n_neighbors': 5, 'algorithm': 'brute'}

In [74]:
pd.DataFrame(rcv.cv_results_)[["param_weights",	"param_p", "param_n_neighbors", "param_algorithm", "mean_test_score"]].sort_values("mean_test_score", ascending= False)

Unnamed: 0,param_weights,param_p,param_n_neighbors,param_algorithm,mean_test_score
67,distance,1,5,ball_tree,0.643497
89,distance,1,5,kd_tree,0.643497
38,distance,1,5,brute,0.643497
31,distance,1,6,ball_tree,0.640962
56,distance,1,6,kd_tree,0.640962
...,...,...,...,...,...
92,uniform,2,10,ball_tree,0.462477
24,distance,2,1,ball_tree,0.406357
26,uniform,2,1,ball_tree,0.406357
19,distance,2,1,brute,0.406357
