## K Nearest Neighbour Classifier

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [13]:
from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=1000,
    n_features=3,
    n_redundant=1,
    n_classes=2,
    random_state=499
)

In [14]:
X

array([[-1.33488836, -0.2652031 ,  1.30531793],
       [-0.83825545,  1.51862308, -1.62231061],
       [-1.33446423, -0.58585688,  1.76969076],
       ...,
       [ 0.18351251, -0.20622187,  0.17222545],
       [-0.72662506, -0.18123932,  0.76397276],
       [ 0.74331362,  0.16024889, -0.74506941]])

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=50)

In [17]:
from sklearn.neighbors import KNeighborsClassifier

In [18]:
classifier = KNeighborsClassifier(n_neighbors=5, algorithm='auto')
classifier.fit(X_train, y_train)

In [19]:
y_pred = classifier.predict(X_test)

In [21]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [22]:
print(confusion_matrix(y_pred, y_test))
print(accuracy_score(y_pred, y_test))
print(classification_report(y_pred, y_test))

[[149  19]
 [  9 153]]
0.9151515151515152
              precision    recall  f1-score   support

           0       0.94      0.89      0.91       168
           1       0.89      0.94      0.92       162

    accuracy                           0.92       330
   macro avg       0.92      0.92      0.92       330
weighted avg       0.92      0.92      0.92       330



In [23]:
from sklearn.model_selection import GridSearchCV

In [45]:
param_grid = {
    'n_neighbors' : [x for x in range(1, 30)],
    'weights' : ['uniform', 'distance'],
    'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute']
}

In [46]:
classifier = KNeighborsClassifier()

In [47]:
clf = GridSearchCV(classifier, param_grid = param_grid, cv = 5, scoring='accuracy', verbose=3)

In [48]:
clf.fit(X_train, y_train)

Fitting 5 folds for each of 232 candidates, totalling 1160 fits
[CV 1/5] END algorithm=auto, n_neighbors=1, weights=uniform;, score=0.948 total time=   0.0s
[CV 2/5] END algorithm=auto, n_neighbors=1, weights=uniform;, score=0.896 total time=   0.0s
[CV 3/5] END algorithm=auto, n_neighbors=1, weights=uniform;, score=0.910 total time=   0.0s
[CV 4/5] END algorithm=auto, n_neighbors=1, weights=uniform;, score=0.963 total time=   0.0s
[CV 5/5] END algorithm=auto, n_neighbors=1, weights=uniform;, score=0.888 total time=   0.0s
[CV 1/5] END algorithm=auto, n_neighbors=1, weights=distance;, score=0.948 total time=   0.0s
[CV 2/5] END algorithm=auto, n_neighbors=1, weights=distance;, score=0.896 total time=   0.0s
[CV 3/5] END algorithm=auto, n_neighbors=1, weights=distance;, score=0.910 total time=   0.0s
[CV 4/5] END algorithm=auto, n_neighbors=1, weights=distance;, score=0.963 total time=   0.0s
[CV 5/5] END algorithm=auto, n_neighbors=1, weights=distance;, score=0.888 total time=   0.0s
[

In [49]:
clf.best_params_

{'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'uniform'}

In [50]:
y_pred = clf.predict(X_test)

In [51]:
accuracy_score(y_pred, y_test)

0.9151515151515152