# K Nearest Neighbour Classifier

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [6]:
from sklearn.datasets import make_classification
x,y = make_classification(n_samples= 1000,n_features= 3 , n_redundant=1,n_classes = 2,random_state = 999)

In [8]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size =0.33,random_state = 42)

In [10]:
from sklearn.neighbors import KNeighborsClassifier
knn= KNeighborsClassifier(n_neighbors= 5 ,algorithm='auto')

In [11]:
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)

In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

[[158  20]
 [ 11 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



# HyperParameter Tuning

## GridSearch CV

In [20]:
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 15, 2],
    'weights': ['uniform', 'distance'],
    'p': [1, 2],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    #'leaf_size': [20, 30, 40]  # This is optional and used only with ball_tree and kd_tree
}

In [21]:
from sklearn.model_selection import GridSearchCV
cv =  GridSearchCV(
    estimator=knn,
    param_grid=param_grid,  
    refit=True,
    verbose=3,
    cv=5,
    n_jobs = -1
)


In [22]:
cv.fit(x_train, y_train)

Fitting 5 folds for each of 112 candidates, totalling 560 fits


In [23]:
cv.best_params_

{'algorithm': 'auto', 'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [24]:
cv1_prediction = cv.predict(x_test)

In [25]:
# Evaluate predictions
print(confusion_matrix(y_test, cv1_prediction))
print(accuracy_score(y_test, cv1_prediction))
print(classification_report(y_test, cv1_prediction))

[[156  13]
 [ 16 145]]
0.9121212121212121
              precision    recall  f1-score   support

           0       0.91      0.92      0.91       169
           1       0.92      0.90      0.91       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

