### Implementing KNN on a Classification Problem

In [1]:
# importing required libraries

import pandas as pd
import numpy as np

In [3]:
# creating a dataset

from sklearn.datasets import make_classification
X,y=make_classification(
    n_samples=1000,
    n_classes=3,
    n_features=4,
    n_redundant=1,
    n_clusters_per_class=1
)

In [4]:
pd.DataFrame(X)

Unnamed: 0,0,1,2,3
0,1.608922,-0.375037,-0.260706,0.929034
1,-2.151695,2.136324,-1.028767,-0.950903
2,-0.857547,-1.592109,1.584371,-0.814754
3,1.337265,-0.205187,0.125269,0.791170
4,-1.819599,1.720949,-0.604845,-0.819414
...,...,...,...,...
995,-0.058065,0.179764,1.030787,-0.003883
996,-1.163746,0.973230,0.866805,-0.546790
997,-1.030961,0.662333,-0.233400,-0.520042
998,-1.121816,1.467632,-0.700391,-0.432665


In [5]:
y

array([0, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 2, 2, 1, 0, 2, 1,
       1, 2, 2, 2, 0, 0, 1, 1, 0, 1, 2, 1, 2, 1, 0, 0, 2, 2, 1, 2, 0, 1,
       1, 0, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 2, 1, 0, 2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1,
       2, 2, 2, 2, 1, 2, 0, 2, 0, 2, 1, 1, 0, 1, 0, 0, 1, 1, 0, 2, 2, 2,
       1, 0, 1, 1, 1, 0, 2, 0, 2, 2, 1, 2, 1, 2, 2, 2, 0, 0, 1, 2, 2, 1,
       1, 1, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, 1, 0, 1, 2, 2, 1, 2, 1,
       1, 1, 0, 2, 2, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 0, 1, 2, 0, 1, 2, 1,
       0, 1, 0, 0, 2, 1, 2, 1, 1, 2, 0, 1, 2, 1, 2, 0, 1, 2, 0, 0, 1, 2,
       0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 1, 2, 0, 2, 0, 0, 2, 2, 0,
       0, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 0, 0, 1, 0, 1, 2, 2, 1, 1, 0, 2,
       2, 0, 1, 1, 0, 1, 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 2,
       1, 1, 2, 1, 0, 2, 2, 0, 1, 0, 2, 1, 2, 1, 0, 2, 0, 2, 2, 0, 2, 1,
       1, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 0, 0, 2, 2,

In [7]:
# creating the KNN ML model

from sklearn.neighbors import KNeighborsClassifier
knn_classifier=KNeighborsClassifier(n_neighbors=5)

In [8]:
# splitting our data into training and testing splits

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=23)

In [9]:
# fitting our data to the model

knn_classifier.fit(X_train, y_train)

In [10]:
# making predictions with our model

y_pred=knn_classifier.predict(X_test)

In [11]:
# scoring our model

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[96  4  1]
 [ 2 76 10]
 [ 2 11 98]]
              precision    recall  f1-score   support

           0       0.96      0.95      0.96       101
           1       0.84      0.86      0.85        88
           2       0.90      0.88      0.89       111

    accuracy                           0.90       300
   macro avg       0.90      0.90      0.90       300
weighted avg       0.90      0.90      0.90       300



In [12]:
# trying for different values of k and finding the best k value

from sklearn.metrics import accuracy_score

highest_acc_score=0
k_best=0
for i in range(1,11):
    test_knn_classifier=KNeighborsClassifier(n_neighbors=i)
    test_knn_classifier.fit(X_train, y_train)
    test_y_pred=test_knn_classifier.predict(X_test)
    acc_score=accuracy_score(y_test, test_y_pred)
    if(acc_score>highest_acc_score):
        highest_acc_score=acc_score
        k_best=i

print(f"Best Parametrs are:\nK:{k_best}\nAccuracy Score:{highest_acc_score}")
    

Best Parametrs are:
K:7
Accuracy Score:0.9166666666666666
