# K-Nearest Neighbors (K-NN)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_stats_bmean.csv')
X = dataset[['bmean_bins0','bmean_bins1','bmean_bins2','bmean_bins3','bmean_bins4','bmean_bins5','bmean_bins6','bmean_bins7']]
y = dataset.iloc[:, -1].values

In [3]:
print(X)

       bmean_bins0  bmean_bins1  bmean_bins2  bmean_bins3  bmean_bins4  \
0         3.238728   163.416667          0.0          0.0     0.000000   
1         1.294467   147.000000          0.0          0.0     0.000000   
2         3.003968   161.500000          0.0          0.0   153.346154   
3         1.093969   144.250000          0.0          0.0     0.000000   
4         1.368313     0.000000          0.0          0.0     0.000000   
...            ...          ...          ...          ...          ...   
23994     1.479808     0.000000          0.0          0.0     0.000000   
23995     1.482517     0.000000          0.0          0.0     0.000000   
23996     1.491798     0.000000          0.0          0.0     0.000000   
23997     1.523047     0.000000          0.0          0.0     0.000000   
23998     1.701011     0.000000          0.0          0.0     0.000000   

       bmean_bins5  bmean_bins6  bmean_bins7  
0       177.137931     0.000000   214.994616  
1       167.90000

In [4]:
print(y)

[1 1 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

       bmean_bins0  bmean_bins1  bmean_bins2  bmean_bins3  bmean_bins4  \
6856      0.202875     0.000000          0.0          0.0     0.000000   
19867     0.322276     0.000000          0.0          0.0     0.000000   
22163     1.188329     0.000000          0.0          0.0     0.000000   
15231     0.377778     0.000000          0.0          0.0     0.000000   
9716      2.675175   156.333333          0.0          0.0   143.595745   
...            ...          ...          ...          ...          ...   
13123     1.581616     0.000000          0.0          0.0     0.000000   
19648     0.198078     0.000000          0.0          0.0     0.000000   
9845      3.610692   148.000000        122.1        139.0   131.775281   
10799     3.153682   162.315789          0.0          0.0     0.000000   
2732      4.683243   142.454545          0.0          0.0   136.000000   

       bmean_bins5  bmean_bins6  bmean_bins7  
6856    138.623377     0.000000   160.331154  
19867     0.00000

In [7]:
print(y_train)

[1 0 0 ... 1 1 1]


In [8]:
print(X_test)

       bmean_bins0  bmean_bins1  bmean_bins2  bmean_bins3  bmean_bins4  \
5118      0.466565     0.000000          0.0          0.0     0.000000   
10283     1.650472   139.222222          0.0          0.0     0.000000   
6208      0.185614     0.000000          0.0          0.0     0.000000   
3361      4.196513   149.576923          0.0          0.0     0.000000   
7067      0.448416     0.000000          0.0          0.0   130.333333   
...            ...          ...          ...          ...          ...   
11969     1.028877   119.000000          0.0          0.0   115.600000   
15355     0.201008     0.000000          0.0          0.0     0.000000   
3464      1.505532     0.000000          0.0          0.0     0.000000   
8441      1.286241     0.000000          0.0          0.0     0.000000   
12300     1.117400     0.000000          0.0          0.0     0.000000   

       bmean_bins5  bmean_bins6  bmean_bins7  
5118      0.000000          0.0   158.116885  
10283   153.25316

In [9]:
print(y_test)

[1 1 1 ... 1 1 0]


## Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[-0.87848225 -0.4791117  -0.09555524 ...  0.83574953 -0.24323451
  -1.28884863]
 [-0.77970072 -0.4791117  -0.09555524 ... -0.94813652 -0.24323451
  -1.19764222]
 [-0.06321131 -0.4791117  -0.09555524 ... -0.94813652 -0.24323451
  -0.05057695]
 ...
 [ 1.94082047  2.08895958 11.40211115 ...  1.14928465  3.72429511
  -0.03222136]
 [ 1.56273413  2.33736477 -0.09555524 ...  1.30047855 -0.24323451
   1.17607284]
 [ 2.82814733  1.99273578 -0.09555524 ...  1.04376915 -0.24323451
  -0.0432475 ]]


In [12]:
print(X_test)

[[-0.66032961 -0.4791117  -0.09555524 ... -0.94813652 -0.24323451
  -1.40618254]
 [ 0.31912146  1.93664905 -0.09555524 ...  1.02401413 -0.24323451
   0.80089953]
 [-0.89276192 -0.4791117  -0.09555524 ...  0.75284467 -0.24323451
  -0.59024702]
 ...
 [ 0.19921233 -0.4791117  -0.09555524 ...  0.95502216 -0.24323451
   0.03344418]
 [ 0.01779207 -0.4791117  -0.09555524 ...  1.31097661 -0.24323451
   0.79864218]
 [-0.12189078 -0.4791117  -0.09555524 ... -0.94813652 -0.24323451
   1.72311742]]


## Training the K-NN model on the Training set

In [13]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2848  110]
 [ 201 2841]]


0.9481666666666667

In [15]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.96      0.95      2958
           1       0.96      0.93      0.95      3042

    accuracy                           0.95      6000
   macro avg       0.95      0.95      0.95      6000
weighted avg       0.95      0.95      0.95      6000



In [16]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2848  110]
 [ 201 2841]]
Accuracy: 0.948
F1 Score: 0.948
Sensitivity: 0.934
Precision: 0.963
Recall: 0.934


In [17]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 95.25 %
Standard Deviation: 0.43 %
