In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [3]:
from sklearn.datasets import load_iris

In [4]:
dataset=load_iris()

In [6]:
type(dataset)

sklearn.utils.Bunch

In [11]:
df=pd.DataFrame(dataset.data)
df.columns=dataset.feature_names

In [14]:
dataset.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [15]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [18]:
df.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64

In [19]:
X=df

In [20]:
y=dataset.target

In [74]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=54)

## KNN without Hyperparameter Tuning

In [75]:
knn=KNeighborsClassifier()

In [76]:
knn.fit(X_train,y_train)

KNeighborsClassifier()

In [77]:
ypred=knn.predict(X_test)

In [78]:
from sklearn.metrics import classification_report,accuracy_score

In [79]:
accuracy_score(y_test,ypred)

0.9473684210526315

In [31]:
print(classification_report(y_test,ypred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.93      0.93      0.93        15
           2       0.90      0.90      0.90        10

    accuracy                           0.95        38
   macro avg       0.94      0.94      0.94        38
weighted avg       0.95      0.95      0.95        38



## Finding the best parameters

### GridSearchCV

In [32]:
k=np.arange(1,20)
dist=[1,2]
hyperparams={'n_neighbors':k,
            'p':dist}

In [33]:
knn=KNeighborsClassifier()

In [38]:
gcv=GridSearchCV(knn,hyperparams,cv=7)

In [39]:
gcv.fit(X_train,y_train)

GridSearchCV(cv=7, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19]),
                         'p': [1, 2]})

In [42]:
gcv.best_params_
#gcv.best_score_

{'n_neighbors': 11, 'p': 2}

### RandomizedSearchCV

In [None]:
k=np.arange(1,20)
dist=[1,2]
hyperparams={'n_neighbors':k,
            'p':dist}

In [43]:
rcv=RandomizedSearchCV(knn,hyperparams,cv=7)

In [44]:
rcv.fit(X_train,y_train)

RandomizedSearchCV(cv=7, estimator=KNeighborsClassifier(),
                   param_distributions={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19]),
                                        'p': [1, 2]})

In [46]:
rcv.best_params_
rcv.best_score_

0.9821428571428571

## Training using best parameters

In [48]:
knn=KNeighborsClassifier(n_neighbors=11,p=2) #from GridSearch

In [49]:
knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=11)

In [51]:
y_pred=knn.predict(X_test)

In [52]:
accuracy_score(y_test,y_pred)

0.9210526315789473

In [54]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.88      0.93      0.90        15
           2       0.89      0.80      0.84        10

    accuracy                           0.92        38
   macro avg       0.92      0.91      0.92        38
weighted avg       0.92      0.92      0.92        38



In [57]:
knn=KNeighborsClassifier(n_neighbors=18,p=2)#from GridSearch

In [58]:
knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=18)

In [60]:
ypred=knn.predict(X_test)

In [61]:
accuracy_score(y_test,ypred)

0.868421052631579