In [49]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
%matplotlib inline

In [53]:
import warnings
warnings.filterwarnings('ignore')

In [33]:
X, y = make_classification(
    n_samples = 1000, # 1000 observations
    n_features = 3, # Total Features = 3
    n_redundant = 1,
    n_classes = 2, # Binary Target/Label
    random_state = 999
)

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [37]:
classifier = KNeighborsClassifier(n_neighbors=2, algorithm='auto')
classifier.fit(X_train, y_train)

In [39]:
y_pred = classifier.predict(X_test)
print(f"Accuracy Score : {accuracy_score(y_test, y_pred)}")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy Score : 0.8727272727272727
[[158  11]
 [ 31 130]]
              precision    recall  f1-score   support

           0       0.84      0.93      0.88       169
           1       0.92      0.81      0.86       161

    accuracy                           0.87       330
   macro avg       0.88      0.87      0.87       330
weighted avg       0.88      0.87      0.87       330



# Solving the problem with Gridsearch CV

In [75]:
params = {'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9],
          'p': [1, 2],
         'algorithm': ['ball_tree', 'kd_tree', 'auto']}

In [77]:
grid = GridSearchCV(KNeighborsClassifier(), param_grid=params, refit=True, cv=5, verbose=3)

In [79]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END algorithm=ball_tree, n_neighbors=2, p=1;, score=0.881 total time=   0.0s
[CV 2/5] END algorithm=ball_tree, n_neighbors=2, p=1;, score=0.903 total time=   0.0s
[CV 3/5] END algorithm=ball_tree, n_neighbors=2, p=1;, score=0.858 total time=   0.0s
[CV 4/5] END algorithm=ball_tree, n_neighbors=2, p=1;, score=0.851 total time=   0.0s
[CV 5/5] END algorithm=ball_tree, n_neighbors=2, p=1;, score=0.866 total time=   0.0s
[CV 1/5] END algorithm=ball_tree, n_neighbors=2, p=2;, score=0.881 total time=   0.0s
[CV 2/5] END algorithm=ball_tree, n_neighbors=2, p=2;, score=0.903 total time=   0.0s
[CV 3/5] END algorithm=ball_tree, n_neighbors=2, p=2;, score=0.858 total time=   0.0s
[CV 4/5] END algorithm=ball_tree, n_neighbors=2, p=2;, score=0.843 total time=   0.0s
[CV 5/5] END algorithm=ball_tree, n_neighbors=2, p=2;, score=0.873 total time=   0.0s
[CV 1/5] END algorithm=ball_tree, n_neighbors=3, p=1;, score=0.933 total time=

In [81]:
grid.best_estimator_

In [83]:
grid.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 9, 'p': 2}

In [85]:
grid.best_score_

0.9029850746268657

In [87]:
y_pred_grid = grid.predict(X_test)

In [89]:
print(f"Accuracy Score : {accuracy_score(y_test, y_pred_grid)}")
print(confusion_matrix(y_test, y_pred_grid))
print(classification_report(y_test, y_pred_grid))

Accuracy Score : 0.9121212121212121
[[156  13]
 [ 16 145]]
              precision    recall  f1-score   support

           0       0.91      0.92      0.91       169
           1       0.92      0.90      0.91       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

