In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import make_classification

In [14]:
X,y = make_classification(n_samples=1000,n_classes=2,n_features=3,n_redundant=1,random_state=999)

In [15]:
X

array([[-0.33504974,  0.02852654,  1.16193084],
       [-1.37746253, -0.4058213 ,  0.44359618],
       [-1.04520026, -0.72334759, -3.10470423],
       ...,
       [-0.75602574, -0.51816111, -2.20382324],
       [ 0.56066316, -0.07335845, -2.15660348],
       [-1.87521902, -1.11380394, -4.04620773]])

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)

In [18]:
# model training
from sklearn.neighbors import KNeighborsClassifier

In [19]:
classifier = KNeighborsClassifier()

In [20]:
classifier.fit(X_train,y_train)

In [21]:
y_pred = classifier.predict(X_test)

In [22]:
from sklearn.metrics import classification_report,accuracy_score,confusion_matrix

In [23]:
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.9066666666666666
[[144  10]
 [ 18 128]]
              precision    recall  f1-score   support

           0       0.89      0.94      0.91       154
           1       0.93      0.88      0.90       146

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300



### Hyperparameter tuning

In [24]:
from sklearn.model_selection import GridSearchCV

In [26]:
param_grid = {
    'n_neighbors':[1,2,3,4,5,6,7,8,9,10],
    'algorithm':['ball_tree','kd_tree','brute'],
    'p':[1,2]
}

In [27]:
grid = GridSearchCV(estimator=KNeighborsClassifier(),param_grid=param_grid,verbose=2,scoring='accuracy')

In [28]:
grid.fit(X_train,y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=1; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=1; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=1; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=1; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=1; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=2; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=2; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=2; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=2; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=1, p=2; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_neighbors=2, p=1; total time=   0.0s
[CV] END ............algorithm=ball_tree, n_nei

In [29]:
grid.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 9, 'p': 1}

In [30]:
y_pred = grid.predict(X_test)

In [31]:
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.9133333333333333
[[143  11]
 [ 15 131]]
              precision    recall  f1-score   support

           0       0.91      0.93      0.92       154
           1       0.92      0.90      0.91       146

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300

