In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,f1_score
from sklearn.model_selection import GridSearchCV

In [17]:
train = pd.read_csv('data_source/train.csv')
test = pd.read_csv('data_source/test.csv')

x_train_byte = train.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_byte = train.loc[:,'Action']
x_test_byte = test.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_byte = test.loc[:,'Action']

In [18]:
scaler = StandardScaler()
x_train_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_train_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_test_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])

In [19]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train_byte,y_train_byte)
answer = knn.predict(x_test_byte)
print(classification_report(y_test_byte, answer))

              precision    recall  f1-score   support

       allow       1.00      1.00      1.00     11292
        deny       0.99      1.00      0.99      4496
        drop       1.00      1.00      1.00      3856
  reset-both       0.78      0.44      0.56        16

    accuracy                           1.00     19660
   macro avg       0.94      0.86      0.89     19660
weighted avg       1.00      1.00      1.00     19660



In [20]:
parameters = {
    'n_neighbors':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'weights':['uniform', 'distance'],
    'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute']}

knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_byte,y_train_byte)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_byte)
print(classification_report(y_test_byte, answer))

{'algorithm': 'brute', 'n_neighbors': 6, 'weights': 'distance'}
KNeighborsClassifier(algorithm='brute', n_neighbors=6, weights='distance')
              precision    recall  f1-score   support

       allow       1.00      1.00      1.00     11292
        deny       0.99      1.00      0.99      4496
        drop       1.00      1.00      1.00      3856
  reset-both       1.00      0.62      0.77        16

    accuracy                           1.00     19660
   macro avg       1.00      0.90      0.94     19660
weighted avg       1.00      1.00      1.00     19660



In [21]:
knn = KNeighborsClassifier(n_neighbors=6,algorithm='brute',weights='distance')
knn.fit(x_train_byte,y_train_byte)

train_yhat = knn.predict(x_train_byte)
train_f1 = f1_score(y_train_byte,train_yhat,average='weighted')

test_yhat = knn.predict(x_test_byte)
test_f1 = f1_score(y_test_byte,test_yhat,average='weighted')
print('Train Score\n',classification_report(y_train_byte,train_yhat))
print('Test Score\n',classification_report(y_test_byte, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

       allow       1.00      1.00      1.00     26348
        deny       1.00      1.00      1.00     10491
        drop       1.00      1.00      1.00      8995
  reset-both       1.00      0.97      0.99        38

    accuracy                           1.00     45872
   macro avg       1.00      0.99      1.00     45872
weighted avg       1.00      1.00      1.00     45872

Test Score
               precision    recall  f1-score   support

       allow       1.00      1.00      1.00     11292
        deny       0.99      1.00      0.99      4496
        drop       1.00      1.00      1.00      3856
  reset-both       1.00      0.62      0.77        16

    accuracy                           1.00     19660
   macro avg       1.00      0.90      0.94     19660
weighted avg       1.00      1.00      1.00     19660

AVG F1-Score Train: 0.9989747319149351
AVG F1-Score Test: 0.9961038549811314
