# K-Nearest Neighbors (K-NN)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_stats_bstd.csv')
X = dataset[['bstd_bins0','bstd_bins1','bstd_bins2','bstd_bins3','bstd_bins4','bstd_bins5','bstd_bins6','bstd_bins7']]
y = dataset.iloc[:, -1].values

In [3]:
print(X)

       bstd_bins0  bstd_bins1  bstd_bins2  bstd_bins3  bstd_bins4  bstd_bins5  \
0       16.049200    2.289502         0.0         0.0    0.000000    7.165589   
1        3.308282    0.707107         0.0         0.0    0.000000   11.991367   
2       14.322616    0.353553         0.0         0.0    5.876929    9.913628   
3        3.045115    2.077074         0.0         0.0    0.000000    9.200093   
4        3.343711    0.000000         0.0         0.0    0.000000    0.000000   
...           ...         ...         ...         ...         ...         ...   
23995    3.820777    0.000000         0.0         0.0    0.000000    0.000000   
23996    3.785639    0.000000         0.0         0.0    0.000000    0.000000   
23997    3.741288    0.000000         0.0         0.0    0.000000    0.000000   
23998    3.503925    0.000000         0.0         0.0    0.000000    0.000000   
23999    4.148065    0.000000         0.0         0.0    0.000000    0.000000   

       bstd_bins6  bstd_bin

In [4]:
print(y)

[1 1 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

       bstd_bins0  bstd_bins1  bstd_bins2  bstd_bins3  bstd_bins4  bstd_bins5  \
11524    2.462189    0.000000         0.0         0.0    0.000000    6.636120   
15653    0.700565    0.000000         0.0         0.0    0.000000    0.000000   
7256     0.730785    0.000000         0.0         0.0    4.897551    8.710600   
15235    1.123231    0.000000         0.0         0.0    0.000000    0.000000   
9717     9.464486    2.238137         0.0         0.0    3.818913    7.222075   
...           ...         ...         ...         ...         ...         ...   
13123    3.567899    0.000000         0.0         0.0    0.000000    0.000000   
19648    0.707402    0.000000         0.0         0.0    0.000000    0.000000   
9845    14.237212    5.323906         0.0         0.0    1.509482    9.206914   
10799    3.158148    0.000000         0.0         0.0    0.000000    3.307752   
2732    19.035945    3.167779         0.0         0.0    0.000000    8.054865   

       bstd_bins6  bstd_bin

In [7]:
print(y_train)

[1 0 1 ... 1 1 1]


In [8]:
print(X_test)

       bstd_bins0  bstd_bins1  bstd_bins2  bstd_bins3  bstd_bins4  bstd_bins5  \
5118     1.164218    0.000000         0.0         0.0    0.000000    0.000000   
10284    3.688101    1.718037         0.0         0.0    0.000000    8.675163   
6208     0.639308    0.000000         0.0         0.0    0.000000    3.517225   
3361    17.455010    3.017825         0.0         0.0    0.000000    9.825642   
7068     0.793672    0.000000         0.0         0.0    0.000000    8.448370   
...           ...         ...         ...         ...         ...         ...   
15607    1.236646    0.000000         0.0         0.0    0.000000    2.412471   
9154    19.878765    1.301518         0.0         0.0    5.919044    6.890599   
3464     3.373206    0.000000         0.0         0.0    0.000000    8.452198   
9808     3.544724    0.353553         0.0         0.0    1.335935    8.072601   
6956     0.719335    0.000000         0.0         0.0    0.000000    6.639750   

       bstd_bins6  bstd_bin

In [9]:
print(y_test)

[1 1 1 ... 1 1 1]


## Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[-0.34126195 -0.35966761 -0.05321077 ...  0.65019931 -0.15555019
  -0.79206692]
 [-0.72218256 -0.35966761 -0.05321077 ... -0.85101294 -0.15555019
   1.17706822]
 [-0.71564784 -0.35966761 -0.05321077 ...  1.11948501 -0.15555019
   1.64688372]
 ...
 [ 2.204882    4.69503956 -0.05321077 ...  1.23176025 -0.15555019
  -0.0886051 ]
 [-0.19077279 -0.35966761 -0.05321077 ... -0.10273866 -0.15555019
   0.35805987]
 [ 3.24252462  2.64793579 -0.05321077 ...  0.97114558 -0.15555019
   0.09689265]]


In [12]:
print(X_test)

[[-0.62192552 -0.35966761 -0.05321077 ... -0.85101294 -0.15555019
   0.86538581]
 [-0.07617972  1.27149811 -0.05321077 ...  1.11146848 -0.15555019
   0.03817552]
 [-0.73542832 -0.35966761 -0.05321077 ... -0.05535196 -0.15555019
  -0.45859311]
 ...
 [-0.14427024 -0.35966761 -0.05321077 ...  1.06102959 -0.15555019
  -0.44675755]
 [-0.10718265 -0.02399134 -0.05321077 ...  0.97515788 -0.15555019
  -0.32463582]
 [-0.71812382 -0.35966761 -0.05321077 ...  0.65102049 -0.15555019
   1.52859537]]


## Training the K-NN model on the Training set

In [13]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2857  102]
 [ 220 2821]]


0.9463333333333334

In [15]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.97      0.95      2959
           1       0.97      0.93      0.95      3041

    accuracy                           0.95      6000
   macro avg       0.95      0.95      0.95      6000
weighted avg       0.95      0.95      0.95      6000



In [16]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2857  102]
 [ 220 2821]]
Accuracy: 0.946
F1 Score: 0.946
Sensitivity: 0.928
Precision: 0.965
Recall: 0.928


In [17]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 95.09 %
Standard Deviation: 0.43 %
