In [42]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from statistics import mean
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
dataset_path = 'dataset.csv'
dataset = pd.read_csv(dataset_path)

In [3]:
dataset

Unnamed: 0,Cografya,Yerlesim,AracYogunlugu,YayaYogunlugu,VeriHizi,Guvenilirlik,Gecikme,Kapsama,Standart
0,5,6,6,7,9,6,4,6,1
1,6,2,6,1,3,2,6,7,0
2,5,1,6,9,3,6,6,5,1
3,8,2,3,5,7,5,7,9,0
4,5,7,4,8,5,9,5,6,1
...,...,...,...,...,...,...,...,...,...
9995,4,6,4,4,1,9,9,9,0
9996,1,10,6,10,6,4,8,5,0
9997,4,4,2,6,4,4,9,5,0
9998,4,9,7,2,5,4,2,7,0


In [9]:
x = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1:]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

In [39]:
num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, random_state=42, shuffle=True)
num_neighbors = [i+1 for i in range(20)]
results = []

for k in num_neighbors:
    accuracy_list = []
    for train_cv_idx, val_idx in skf.split(x_train, y_train):
        x_train_cv, x_val = x_train.iloc[train_cv_idx], x_train.iloc[val_idx]
        y_train_cv, y_val = y_train.iloc[train_cv_idx].values.ravel(), y_train.iloc[val_idx].values.ravel()
        model = KNeighborsClassifier(n_neighbors=k)
        model.fit(x_train_cv, y_train_cv)
        accuracy_list.append(model.score(x_val, y_val))
    print('Validation accuracy for k =', k, '\t:', mean(accuracy_list))
    results.append(mean(accuracy_list))
    
max_val = max(results)
max_idx = results.index(max_val)

print('*' * 50)
print('The best validation accuracy was obtained with k =', num_neighbors[max_idx], 'as', max_val)

Validation accuracy for k = 1 	: 0.753625
Validation accuracy for k = 2 	: 0.751
Validation accuracy for k = 3 	: 0.784125
Validation accuracy for k = 4 	: 0.7785
Validation accuracy for k = 5 	: 0.79775
Validation accuracy for k = 6 	: 0.797
Validation accuracy for k = 7 	: 0.802875
Validation accuracy for k = 8 	: 0.79925
Validation accuracy for k = 9 	: 0.804625
Validation accuracy for k = 10 	: 0.804625
Validation accuracy for k = 11 	: 0.811125
Validation accuracy for k = 12 	: 0.80825
Validation accuracy for k = 13 	: 0.812375
Validation accuracy for k = 14 	: 0.810625
Validation accuracy for k = 15 	: 0.81575
Validation accuracy for k = 16 	: 0.81175
Validation accuracy for k = 17 	: 0.81625
Validation accuracy for k = 18 	: 0.811375
Validation accuracy for k = 19 	: 0.813125
Validation accuracy for k = 20 	: 0.8123750000000001
**************************************************
The best validation accuracy was obtained with k = 17 as 0.81625


In [48]:
model = KNeighborsClassifier(n_neighbors=num_neighbors[max_idx])
model.fit(x_train, y_train.values.ravel())
pred = model.predict(x_test)

cm = confusion_matrix(y_test, pred)
print(cm)

[[1031  160]
 [ 234  575]]


In [49]:
print('Test accuracy:', accuracy_score(y_test, pred))

Test accuracy: 0.803


In [50]:
label_names = ['3GPP', 'IEEE']
report = classification_report(y_test, pred, target_names=label_names)
print(report)

              precision    recall  f1-score   support

        3GPP       0.82      0.87      0.84      1191
        IEEE       0.78      0.71      0.74       809

    accuracy                           0.80      2000
   macro avg       0.80      0.79      0.79      2000
weighted avg       0.80      0.80      0.80      2000

