# K-Nearest Neighbors (K-NN)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_stats_rstd.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
X = dataset.iloc[:, 1:].values

In [4]:
print(X)

[[11.47299349  1.94311847  0.         ...  0.          4.2391925
   1.        ]
 [ 0.62627967  0.          0.         ...  0.          5.75003946
   1.        ]
 [10.13296607  0.          0.         ...  0.          5.4924065
   1.        ]
 ...
 [ 0.6653804   0.          0.         ...  0.          5.19082469
   0.        ]
 [ 0.68957349  0.          0.         ...  7.73722214  5.84018037
   0.        ]
 [ 0.70063203  0.          0.         ...  0.          5.531078
   0.        ]]


In [5]:
print(y)

[1 1 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [7]:
print(X_train)

[[ 0.75267894  0.          0.         ...  0.          4.72223854
   1.        ]
 [ 0.89413628  0.          0.         ...  0.          6.33519204
   0.        ]
 [ 1.87559177  0.          0.         ...  0.          6.36874859
   1.        ]
 ...
 [10.53910908  2.35589996  0.         ...  0.          8.36097702
   1.        ]
 [ 0.77291508  0.          0.         ...  0.          3.75911814
   1.        ]
 [13.83790214  1.73501636  0.         ...  0.          8.7514359
   1.        ]]


In [8]:
print(y_train)

[1 0 1 ... 1 1 1]


In [9]:
print(X_test)

[[1.60749525 0.         0.         ... 0.         5.41081759 1.        ]
 [0.63356695 0.75504713 0.         ... 0.         5.69443138 1.        ]
 [0.82874811 0.         0.         ... 0.         3.27176973 1.        ]
 ...
 [0.62286496 0.         0.         ... 0.         4.68729276 1.        ]
 [0.79029336 0.35355339 0.         ... 0.         8.65407489 1.        ]
 [1.68777535 0.         0.         ... 0.         4.53368809 1.        ]]


In [10]:
print(y_test)

[1 1 1 ... 1 1 1]


## Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
print(X_train)

[[-0.48937732 -0.34241367 -0.05590587 ... -0.16019321 -0.41547319
   1.00456598]
 [-0.45175701 -0.34241367 -0.05590587 ... -0.16019321  0.4612004
  -0.99545477]
 [-0.19074079 -0.34241367 -0.05590587 ... -0.16019321  0.47943908
   1.00456598]
 ...
 [ 2.11330516  2.63803803 -0.05590587 ... -0.16019321  1.56225645
   1.00456598]
 [-0.48399556 -0.34241367 -0.05590587 ... -0.16019321 -0.93894905
   1.00456598]
 [ 2.99061289  1.85255749 -0.05590587 ... -0.16019321  1.77447893
   1.00456598]]


In [13]:
print(X_test)

[[-0.26204055 -0.34241367 -0.05590587 ... -0.16019321 -0.04121623
   1.00456598]
 [-0.52105493  0.6127973  -0.05590587 ... -0.16019321  0.11293373
   1.00456598]
 [-0.46914687 -0.34241367 -0.05590587 ... -0.16019321 -1.20383299
   1.00456598]
 ...
 [-0.5239011  -0.34241367 -0.05590587 ... -0.16019321 -0.43446694
   1.00456598]
 [-0.47937384  0.10486712 -0.05590587 ... -0.16019321  1.7215612
   1.00456598]
 [-0.24069021 -0.34241367 -0.05590587 ... -0.16019321 -0.51795426
   1.00456598]]


## Training the K-NN model on the Training set

In [14]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

## Making the Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2946   13]
 [   1 3040]]


0.9976666666666667

In [16]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2959
           1       1.00      1.00      1.00      3041

    accuracy                           1.00      6000
   macro avg       1.00      1.00      1.00      6000
weighted avg       1.00      1.00      1.00      6000



In [17]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2946   13]
 [   1 3040]]
Accuracy: 0.998
F1 Score: 0.998
Sensitivity: 1.000
Precision: 0.996
Recall: 1.000
