# K-Nearest Neighbors (K-NN)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_stats_bskew.csv')
X = dataset[['bskew_bins0','bskew_bins1','bskew_bins2','bskew_bins3','bskew_bins4','bskew_bins5','bskew_bins6','bskew_bins7']]
y = dataset.iloc[:, -1].values

In [3]:
print(X)

       bskew_bins0  bskew_bins1  bskew_bins2  bskew_bins3  bskew_bins4  \
0        32.843447     2.701954          0.0          0.0     0.000000   
1         5.277079     0.793701          0.0          0.0     0.000000   
2        29.440846     0.396850          0.0          0.0     6.863098   
3         4.902997     2.390244          0.0          0.0     0.000000   
4         5.179433     0.000000          0.0          0.0     0.000000   
...            ...          ...          ...          ...          ...   
23995     3.820777     0.000000          0.0          0.0     0.000000   
23996     3.785639     0.000000          0.0          0.0     0.000000   
23997     3.741288     0.000000          0.0          0.0     0.000000   
23998     3.503925     0.000000          0.0          0.0     0.000000   
23999     4.148065     0.000000          0.0          0.0     0.000000   

       bskew_bins5  bskew_bins6  bskew_bins7  
0         7.871852     0.000000     7.358843  
1        13.63686

In [4]:
print(y)

[1 1 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

       bskew_bins0  bskew_bins1  bskew_bins2  bskew_bins3  bskew_bins4  \
11524     3.600737     0.000000          0.0          0.0     0.000000   
15653     0.700565     0.000000          0.0          0.0     0.000000   
7256      1.139135     0.000000          0.0          0.0     5.726467   
15235     1.123231     0.000000          0.0          0.0     0.000000   
9717     21.224799     2.511144          0.0          0.0     4.517168   
...            ...          ...          ...          ...          ...   
13123     3.567899     0.000000          0.0          0.0     0.000000   
19648     0.707402     0.000000          0.0          0.0     0.000000   
9845     29.594795     5.929854          0.0          0.0     1.681071   
10799     4.966372     0.000000          0.0          0.0     0.000000   
2732     34.492614     3.638295          0.0          0.0     0.000000   

       bskew_bins5  bskew_bins6  bskew_bins7  
11524     7.516509          0.0     6.581319  
15653     0.00000

In [7]:
print(y_train)

[1 0 1 ... 1 1 1]


In [8]:
print(X_test)

       bskew_bins0  bskew_bins1  bskew_bins2  bskew_bins3  bskew_bins4  \
5118      1.757718     0.000000          0.0          0.0     0.000000   
10284     5.558420     2.043469          0.0          0.0     0.000000   
6208      1.043045     0.000000          0.0          0.0     0.000000   
3361     32.988147     3.671563          0.0          0.0     0.000000   
7068      1.226714     0.000000          0.0          0.0     0.000000   
...            ...          ...          ...          ...          ...   
15607     1.236646     0.000000          0.0          0.0     0.000000   
9154     36.650708     1.582749          0.0          0.0     7.033945   
3464      5.047560     0.000000          0.0          0.0     0.000000   
9808      5.498535     0.396850          0.0          0.0     1.637622   
6956      1.074451     0.000000          0.0          0.0     0.000000   

       bskew_bins5  bskew_bins6  bskew_bins7  
5118      0.000000          0.0    12.683617  
10284     9.57134

In [9]:
print(y_test)

[1 1 1 ... 1 1 1]


## Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[-0.36029554 -0.35932595 -0.05276475 ...  0.64152879 -0.15476765
  -0.45450346]
 [-0.67491386 -0.35932595 -0.05276475 ... -0.85196424 -0.15476765
   0.63755883]
 [-0.62733656 -0.35932595 -0.05276475 ...  1.08410914 -0.15476765
   2.00986548]
 ...
 [ 2.45960846  4.57340218 -0.05276475 ...  1.23675523 -0.15476765
   0.32884459]
 [-0.2121478  -0.35932595 -0.05276475 ... -0.10602402 -0.15476765
   0.59632865]
 [ 2.99093678  2.66717658 -0.05276475 ...  0.9061054  -0.15476765
   0.64292792]]


In [12]:
print(X_test)

[[-0.5602311  -0.35932595 -0.05276475 ... -0.85196424 -0.15476765
   1.3827838 ]
 [-0.14792093  1.34052683 -0.05276475 ...  1.04981425 -0.15476765
   0.32027132]
 [-0.63776068 -0.35932595 -0.05276475 ...  0.00334762 -0.15476765
  -0.22042876]
 ...
 [-0.20334032 -0.35932595 -0.05276475 ...  0.99265648 -0.15476765
  -0.14171581]
 [-0.15441735 -0.02920747 -0.05276475 ...  0.90383218 -0.15476765
  -0.00960335]
 [-0.63435373 -0.35932595 -0.05276475 ...  0.65055405 -0.15476765
   1.32849162]]


## Training the K-NN model on the Training set

In [13]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2873   86]
 [ 145 2896]]


0.9615

In [15]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.97      0.96      2959
           1       0.97      0.95      0.96      3041

    accuracy                           0.96      6000
   macro avg       0.96      0.96      0.96      6000
weighted avg       0.96      0.96      0.96      6000



In [16]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2873   86]
 [ 145 2896]]
Accuracy: 0.962
F1 Score: 0.962
Sensitivity: 0.952
Precision: 0.971
Recall: 0.952


In [17]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 96.28 %
Standard Deviation: 0.34 %
