# KNN Classifier

### Import Libraries

In [1]:
import pandas as pd
import numpy  as np

from matplotlib        import pyplot  as plt
from sklearn           import metrics as mt
from sklearn.neighbors import KNeighborsClassifier

### Load Dataset

In [2]:
X_train = pd.read_csv('Training/X_training.csv')
y_train = pd.read_csv('Training/y_training.csv')
X_val = pd.read_csv('Validation/X_validation.csv')
y_val = pd.read_csv('Validation/y_validation.csv')
X_test = pd.read_csv('Test/X_test.csv')
y_test = pd.read_csv('Test/y_test.csv')

In [3]:
y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

### Training

In [4]:
k = np.arange( 3, 21, 2)
acc_list = []
precision_list = []
recall_list = []
f1_list = []

for i in k:
    print(f'For K={i}')

    # model definition
    knn = KNeighborsClassifier(n_neighbors=i)

    # model training
    knn.fit(X_train, y_train)

    # model performance
    y_pred = knn.predict( X_train )
    
    # accuracy
    acc = mt.accuracy_score( y_train, y_pred )
    print( f'Accuracy = {acc}')
    acc_list.append( acc )
    
    # precision
    precision = mt.precision_score( y_train, y_pred)
    print( f'Precision = {precision}')
    precision_list.append( precision )
    
    # recall
    recall = mt.recall_score( y_train, y_pred)
    print( f'Recall = {recall}')
    recall_list.append( recall )
    
    # f1-score
    f1 = mt.f1_score( y_train, y_pred)
    print( f'F1-score = {f1}')
    f1_list.append( f1 )

For K=3


found 0 physical cores < 1
  File "c:\Users\Felipe\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


Accuracy = 0.8321864441839619
Precision = 0.8120079059067492
Recall = 0.7974099529082347
F1-score = 0.8046427252733139
For K=5
Accuracy = 0.7815624353582018
Precision = 0.7558933613500558
Recall = 0.7325633193330788
F1-score = 0.744045503021685
For K=7
Accuracy = 0.7563124870716403
Precision = 0.731248949403261
Recall = 0.6920898561792033
F1-score = 0.7111307276085855
For K=9
Accuracy = 0.7399986209749707
Precision = 0.7138144470140116
Recall = 0.6678439607992872
F1-score = 0.6900644397685429
For K=11
Accuracy = 0.7295594015031373
Precision = 0.7046517266461155
Recall = 0.6473208603792796
F1-score = 0.6747707258826846
For K=13
Accuracy = 0.7189684892780804
Precision = 0.6947234852490219
Recall = 0.627147766323024
F1-score = 0.6592083479656851
For K=15
Accuracy = 0.7106805488519616
Precision = 0.6864515668498822
Recall = 0.6119702176403208
F1-score = 0.6470746559903106
For K=17
Accuracy = 0.7025443011790664
Precision = 0.6796937659496901
Recall = 0.5932289677994146
F1-score = 0.63352475

In [6]:
best_k = { 'K': k, 'Accuracy': acc_list, 'Precision': precision_list, 'Recall': recall_list, 'F1-score': f1_list}
k_dataframe = pd.DataFrame(best_k)
k_dataframe

Unnamed: 0,K,Accuracy,Precision,Recall,F1-score
0,3,0.832186,0.812008,0.79741,0.804643
1,5,0.781562,0.755893,0.732563,0.744046
2,7,0.756312,0.731249,0.69209,0.711131
3,9,0.739999,0.713814,0.667844,0.690064
4,11,0.729559,0.704652,0.647321,0.674771
5,13,0.718968,0.694723,0.627148,0.659208
6,15,0.710681,0.686452,0.61197,0.647075
7,17,0.702544,0.679694,0.593229,0.633525
8,19,0.696808,0.675789,0.577479,0.622778


### Validation

In [7]:
# Best parameters retraining with validation data
best_k = acc_list.index( max( acc_list ) )

# model definition
knn = KNeighborsClassifier (n_neighbors=k[best_k])

# model training
knn.fit( X_train, y_train )

# model performance
yhat_val = knn.predict( X_val )

# accuracy
acc_val = mt.accuracy_score ( y_val, yhat_val)
print( f'Accuracy = {acc_val}')

# precision
precision_val = mt.precision_score( y_val, yhat_val)
print( f'Precision = {precision_val}')
    
# recall
recall_val = mt.recall_score( y_val, yhat_val)
print( f'Recall = {recall_val}')
    
# f1-score
f1_val = mt.f1_score( y_val, yhat_val)
print( f'F1-score = {f1_val}')

Accuracy = 0.6762765854757231
Precision = 0.6278511404561825
Recall = 0.6212784913505086
F1-score = 0.6245475239765645


### Test

In [9]:
# Best parameters retraining with test data
best_k = acc_list.index( max( acc_list ) )

# model definition
knn = KNeighborsClassifier (n_neighbors=k[best_k])

# model training
knn.fit( np.concatenate( (X_train, X_val) ),
         np.concatenate( (y_train, y_val) ))

# model performance
y_pred = knn.predict( X_test )

# accuracy
acc_val = mt.accuracy_score ( y_test, y_pred)
print( f'Accuracy = {acc_val}')

# precision
precision_val = mt.precision_score( y_test, y_pred)
print( f'Precision = {precision_val}')
    
# recall
recall_val = mt.recall_score( y_test, y_pred)
print( f'Recall = {recall_val}')
    
# f1-score
f1_val = mt.f1_score( y_test, y_pred)
print( f'F1-score = {f1_val}')



Accuracy = 0.6884486154559147
Precision = 0.6480251346499102
Recall = 0.6351957765068191
F1-score = 0.6415463230393246
