# K-Nearest Neighbors (K-NN)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_stats_bkurto.csv')
X = dataset[['bkurto_bins0','bkurto_bins1','bkurto_bins2','bkurto_bins3','bkurto_bins4','bkurto_bins5','bkurto_bins6','bkurto_bins7']]
y = dataset.iloc[:, -1].values

In [3]:
print(X)

       bkurto_bins0  bkurto_bins1  bkurto_bins2  bkurto_bins3  bkurto_bins4  \
0         47.621590      3.092351           0.0           0.0      0.000000   
1          7.154429      0.840896           0.0           0.0      0.000000   
2         43.394240      0.420448           0.0           0.0      7.701832   
3          6.601182      2.619225           0.0           0.0      0.000000   
4          6.869720      0.000000           0.0           0.0      0.000000   
...             ...           ...           ...           ...           ...   
23995      8.170191      0.000000           0.0           0.0      0.000000   
23996      7.859719      0.000000           0.0           0.0      0.000000   
23997      7.887594      0.000000           0.0           0.0      0.000000   
23998      7.406609      0.000000           0.0           0.0      0.000000   
23999      8.664782      0.000000           0.0           0.0      0.000000   

       bkurto_bins5  bkurto_bins6  bkurto_bins7  
0

In [4]:
print(y)

[1 1 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

       bkurto_bins0  bkurto_bins1  bkurto_bins2  bkurto_bins3  bkurto_bins4  \
11524      4.697539      0.000000           0.0           0.0      0.000000   
15653      1.569338      0.000000           0.0           0.0      0.000000   
7256       1.539324      0.000000           0.0           0.0      6.449485   
15235      1.854283      0.000000           0.0           0.0      0.000000   
9717      33.515377      2.692971           0.0           0.0      5.072606   
...             ...           ...           ...           ...           ...   
13123      6.592747      0.000000           0.0           0.0      0.000000   
19648      1.480392      0.000000           0.0           0.0      0.000000   
9845      43.518837      6.420400           0.0           0.0      1.808437   
10799      6.627811      0.000000           0.0           0.0      0.000000   
2732      47.035442      3.984198           0.0           0.0      0.000000   

       bkurto_bins5  bkurto_bins6  bkurto_bins7  
1

In [7]:
print(y_train)

[1 0 1 ... 1 1 1]


In [8]:
print(X_test)

       bkurto_bins0  bkurto_bins1  bkurto_bins2  bkurto_bins3  bkurto_bins4  \
5118       2.380153      0.000000           0.0           0.0      0.000000   
10284      7.340075      2.300521           0.0           0.0      0.000000   
6208       1.414617      0.000000           0.0           0.0      0.000000   
3361      46.133754      4.357952           0.0           0.0      0.000000   
7068       1.634967      0.000000           0.0           0.0      0.000000   
...             ...           ...           ...           ...           ...   
15607      2.425029      0.000000           0.0           0.0      0.000000   
9154      50.215545      1.812567           0.0           0.0      8.060602   
3464       6.637429      0.000000           0.0           0.0      0.000000   
9808       7.379634      0.420448           0.0           0.0      1.847616   
6956       1.420615      0.000000           0.0           0.0      0.000000   

       bkurto_bins5  bkurto_bins6  bkurto_bins7  
5

In [9]:
print(y_test)

[1 1 1 ... 1 1 1]


## Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[-0.47546603 -0.3577115  -0.05292873 ...  0.63547352 -0.15345066
  -0.70327814]
 [-0.71061902 -0.3577115  -0.05292873 ... -0.85210606 -0.15345066
   1.38611849]
 [-0.71287525 -0.3577115  -0.05292873 ...  1.03913926 -0.15345066
   1.80518852]
 ...
 [ 2.44280714  4.47990857 -0.05292873 ...  1.23048779 -0.15345066
   0.21901956]
 [-0.33036374 -0.3577115  -0.05292873 ... -0.11553012 -0.15345066
   0.34979098]
 [ 2.70715725  2.64428778 -0.05292873 ...  0.8544216  -0.15345066
   0.49548712]]


In [12]:
print(X_test)

[[-0.64966851 -0.3577115  -0.05292873 ... -0.85210606 -0.15345066
   1.20922182]
 [-0.27682141  1.37567687 -0.05292873 ...  0.99129143 -0.15345066
   0.07000771]
 [-0.72224971 -0.3577115  -0.05292873 ...  0.05095222 -0.15345066
  -0.53336385]
 ...
 [-0.32964074 -0.3577115  -0.05292873 ...  0.92684206 -0.15345066
  -0.40704233]
 [-0.27384771 -0.04091369 -0.05292873 ...  0.84241394 -0.15345066
  -0.25378208]
 [-0.72179888 -0.3577115  -0.05292873 ...  0.65644747 -0.15345066
   0.7520308 ]]


## Training the K-NN model on the Training set

In [23]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

## Making the Confusion Matrix

In [24]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2860   99]
 [ 225 2816]]


0.946

In [25]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.97      0.95      2959
           1       0.97      0.93      0.95      3041

    accuracy                           0.95      6000
   macro avg       0.95      0.95      0.95      6000
weighted avg       0.95      0.95      0.95      6000



In [26]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2860   99]
 [ 225 2816]]
Accuracy: 0.946
F1 Score: 0.946
Sensitivity: 0.926
Precision: 0.966
Recall: 0.926


In [27]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 94.99 %
Standard Deviation: 0.36 %
