## K Nearest Neighbour Classifier

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
from sklearn.datasets import make_classification
x , y  = make_classification(
    n_samples=1000, #100 observations
    n_features= 3, # total features
    n_redundant=1,
    n_classes=2, # binary target  /label
    random_state = 999
)

In [4]:
from sklearn.model_selection import train_test_split


In [5]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.33,random_state=42)

In [6]:
from sklearn.neighbors  import KNeighborsClassifier

In [7]:
classifier = KNeighborsClassifier(n_neighbors=5,algorithm='auto',n_jobs=-1)
classifier.fit(x_train,y_train)

In [8]:
y_pred =  classifier.predict(x_test)

In [9]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [10]:
score = accuracy_score(y_test,y_pred)
report = classification_report(y_test,y_pred)
matrix = confusion_matrix(y_test,y_pred)

In [11]:
print("confusion matrix is ")
print(matrix)
print("classification report is ")
print(report)
print("accuracy score is ",score)

confusion matrix is 
[[158  11]
 [ 20 141]]
classification report is 
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       169
           1       0.93      0.88      0.90       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

accuracy score is  0.906060606060606


## Hyperparameter Tuning and Cross Validation

## GridsearchCV

In [12]:
model = KNeighborsClassifier()
algorithm = [ 'ball_tree', 'kd_tree', 'brute']
weights = ['uniform', 'distance']
p = [1,2]
n_neighbors = [1,2,3,4,5,6,7,8,9,10]

In [13]:
params = dict(algorithm = algorithm,weights=weights,p = p,n_neighbors=n_neighbors)

In [14]:
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()

In [15]:
## GridSearchCV
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model,param_grid=params,scoring='accuracy',cv=cv,n_jobs=-1)

In [16]:
grid

In [17]:
grid.fit(x_train,y_train)

In [18]:
grid.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [19]:
grid.best_score_

0.9029850746268657

In [20]:
y_pred = grid.predict(x_test)

In [22]:
score=accuracy_score(y_pred,y_test)
print("accuracy of the model is :",score)
print("classification report is : ")
print(classification_report(y_pred,y_test))
print("Confusion matrix")
print(confusion_matrix(y_pred,y_test))

accuracy of the model is : 0.9121212121212121
classification report is : 
              precision    recall  f1-score   support

           0       0.92      0.91      0.91       172
           1       0.90      0.92      0.91       158

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

Confusion matrix
[[156  16]
 [ 13 145]]


## Randomizedsearch CV

In [24]:
from sklearn.model_selection import RandomizedSearchCV

In [25]:
model_random = KNeighborsClassifier()

In [26]:
randomcv=RandomizedSearchCV(estimator=model,param_distributions=params,cv=5,scoring='accuracy')

In [27]:
randomcv.fit(x_train,y_train)

In [28]:
randomcv.best_score_

0.8970149253731343

In [29]:
randomcv.best_params_

{'weights': 'uniform', 'p': 1, 'n_neighbors': 10, 'algorithm': 'ball_tree'}

In [30]:
y_pred_rcv=randomcv.predict(x_test)

In [31]:
score_rcv=accuracy_score(y_pred_rcv,y_test)
print("accuracy of the model is :",score_rcv)
print("classification report is : ")
print(classification_report(y_pred_rcv,y_test))
print("Confusion matrix")
print(confusion_matrix(y_pred_rcv,y_test))

accuracy of the model is : 0.906060606060606
classification report is : 
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       176
           1       0.88      0.92      0.90       154

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

Confusion matrix
[[157  19]
 [ 12 142]]
