In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import make_regression
X,y = make_regression(n_samples=1000, n_features=2, noise=3, random_state=1)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.3, random_state=1)

In [6]:
from sklearn.neighbors import KNeighborsRegressor
reg = KNeighborsRegressor()
reg

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [7]:
reg.fit(X_train,y_train)

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [8]:
ypred = reg.predict(X_test)
ypred

array([ -31.27303942,  -88.14468143,  168.85297226,   22.12939785,
         80.27311394,   20.83384703,  -19.51050133,  156.86509311,
       -159.67902716,  109.91042019,   64.70847727,   24.20010719,
        -76.62806594,  102.64061341,   24.1667387 , -144.06636376,
        159.05068046,   37.60732392,  -15.31142591,  -11.41367664,
       -164.93285599,   15.05024527,   22.53173543,   36.05628259,
         33.28398704,   20.08907639,  -82.17930555,  -86.87411828,
        147.90956025,  204.87861218,  193.1519867 ,   32.02173891,
         99.94951543,   34.02619759,   69.08547936,   30.81498965,
        -80.92377831,  105.63786574, -178.03626419,  -19.07953517,
        146.46089555,  131.52754938,  -43.00693924, -201.06575474,
        -40.26849982,   10.16533245,  -58.218269  ,   93.23650932,
        -54.09413803,  -46.18563326,   50.62859043,   39.3920481 ,
          8.95089607,  -39.18181838,   -3.12276664,   -6.36871729,
        132.99010398,   57.81542445,  -91.27564861,  131.94178

In [9]:
from sklearn.metrics import r2_score
r2_score(y_test,ypred)

0.9900695068947865

In [10]:
#hyper parameter tuning
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_neighbors':[3,5,6,7,9,11,13],
    'weights':['uniform','distance'],
    'algorithm':['auto', 'ball_tree', 'kd_tree','brute'],
    'leaf_size':[20,30,40,50]
}

In [11]:
grid = GridSearchCV(estimator = reg, param_grid = param_grid, cv=5, verbose=3)
grid

0,1,2
,estimator,KNeighborsRegressor()
,param_grid,"{'algorithm': ['auto', 'ball_tree', ...], 'leaf_size': [20, 30, ...], 'n_neighbors': [3, 5, ...], 'weights': ['uniform', 'distance']}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,3
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [12]:
grid.fit(X_train,y_train)

Fitting 5 folds for each of 224 candidates, totalling 1120 fits
[CV 1/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=uniform;, score=0.991 total time=   0.0s
[CV 2/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=uniform;, score=0.994 total time=   0.0s
[CV 3/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=uniform;, score=0.990 total time=   0.0s
[CV 4/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=uniform;, score=0.987 total time=   0.0s
[CV 5/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=uniform;, score=0.989 total time=   0.0s
[CV 1/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=distance;, score=0.993 total time=   0.0s
[CV 2/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=distance;, score=0.995 total time=   0.0s
[CV 3/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=distance;, score=0.992 total time=   0.0s
[CV 4/5] END algorithm=auto, leaf_size=20, n_neighbors=3, weights=distance;, 

0,1,2
,estimator,KNeighborsRegressor()
,param_grid,"{'algorithm': ['auto', 'ball_tree', ...], 'leaf_size': [20, 30, ...], 'n_neighbors': [3, 5, ...], 'weights': ['uniform', 'distance']}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,3
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,3
,weights,'distance'
,algorithm,'brute'
,leaf_size,20
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [13]:
grid.best_params_

{'algorithm': 'brute',
 'leaf_size': 20,
 'n_neighbors': 3,
 'weights': 'distance'}

In [14]:
# predict the best estimator
best_model = grid.best_estimator_
ypred_tuned = best_model.predict(X_test)
ypred_tuned

array([ -32.88832171,  -90.97740821,  179.30815934,   21.13629047,
         79.58006408,   21.53959122,  -17.76257937,  165.23760412,
       -165.02436358,  105.53485332,   71.88313795,   23.13048114,
        -75.62331949,  103.85375127,   21.42839755, -155.4173305 ,
        166.48426468,   37.20727059,  -12.42794663,  -13.65117805,
       -173.60338448,   14.77621169,   21.77290561,   35.53131896,
         33.26806275,   19.20486317,  -75.1511186 ,  -87.5206855 ,
        149.920301  ,  208.68859892,  205.97504468,   30.85928266,
         96.26373639,   33.88203184,   71.93494727,   29.93747158,
        -76.26712297,  107.81191222, -185.89645447,  -19.82860285,
        146.20666021,  129.70062133,  -45.32678971, -193.42671029,
        -42.16454251,    8.83486331,  -61.53250251,   93.77398924,
        -59.07011306,  -47.14850162,   45.90532505,   43.85401268,
          9.70145083,  -38.3432325 ,  -10.19326026,   -6.94789407,
        140.06604714,   57.0767716 ,  -99.70492361,  132.31490

In [15]:
r2_score(y_test,ypred)

0.9900695068947865