In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,RobustScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,f1_score
from sklearn.model_selection import GridSearchCV,ParameterGrid,cross_val_score

In [5]:
x_train = pd.read_csv('data_source/X_train.csv')
y_train = pd.read_csv('data_source/y_train.csv')

x_test = pd.read_csv('data_source/X_test.csv')
y_test = pd.read_csv('data_source/y_test.csv')

x_train_std = x_train.copy()
y_train_std = y_train.copy()

x_train_robust = x_train.copy()
y_train_robust = y_train.copy()

x_test_std = x_test.copy()
y_test_std = y_test.copy()

x_test_robust = x_test.copy()
y_test_robust = y_test.copy()

x_train_std = x_train_std.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_std = y_train_std.loc[:,'Action']
x_test_std = x_test_std.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_std = y_test_std.loc[:,'Action']

x_train_robust = x_train_robust.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_robust = y_train_robust.loc[:,'Action']
x_test_robust = x_test_robust.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_robust = y_test_robust.loc[:,'Action']

In [6]:
scaler = StandardScaler()
robust = RobustScaler()
x_train_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_train_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_test_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])

x_train_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = robust.fit_transform(x_train_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = robust.fit_transform(x_test_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])

In [7]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train_std,y_train_std)
answer = knn.predict(x_test_std)
print(classification_report(y_test_std, answer))

              precision    recall  f1-score   support

           0       1.00      0.97      0.99     11292
           1       0.97      0.98      0.97      4496
           2       1.00      1.00      1.00      3856
           3       0.04      0.62      0.07        16

    accuracy                           0.98     19660
   macro avg       0.75      0.89      0.76     19660
weighted avg       0.99      0.98      0.98     19660



In [8]:
parameters = {
    'n_neighbors':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'weights':['uniform', 'distance'],
    'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute']}


#### Parameter from Gridsearch

In [52]:
knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_robust,y_train_robust)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_robust)
print(classification_report(y_test_robust, answer))

{'algorithm': 'auto', 'n_neighbors': 9, 'weights': 'distance'}
KNeighborsClassifier(n_neighbors=9, weights='distance')
              precision    recall  f1-score   support

           0       1.00      0.96      0.98     11292
           1       0.96      0.96      0.96      4496
           2       1.00      1.00      1.00      3856
           3       0.03      0.94      0.06        16

    accuracy                           0.97     19660
   macro avg       0.75      0.96      0.75     19660
weighted avg       0.99      0.97      0.98     19660



In [59]:
knn = KNeighborsClassifier(n_neighbors=9,algorithm='auto',weights='distance')
knn.fit(x_train_robust,y_train_robust)

train_yhat = knn.predict(x_train_robust)
train_f1 = f1_score(y_train_robust,train_yhat,average='macro')

test_yhat = knn.predict(x_test_robust)
test_f1 = f1_score(y_test_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_robust,train_yhat))
print('Test Score\n',classification_report(y_test_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     25830
           1       0.89      0.94      0.91     25165
           2       1.00      1.00      1.00     26245
           3       0.94      0.89      0.91     25744

    accuracy                           0.96    102984
   macro avg       0.96      0.96      0.96    102984
weighted avg       0.96      0.96      0.96    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.96      0.98     11292
           1       0.96      0.96      0.96      4496
           2       1.00      1.00      1.00      3856
           3       0.03      0.94      0.06        16

    accuracy                           0.97     19660
   macro avg       0.75      0.96      0.75     19660
weighted avg       0.99      0.97      0.98     19660

AVG F1-Score Train: 0.9566764274719737
AVG F1-Score Test: 0.7505109293385833


#### Parameter grid for Standardize transform

In [54]:
best_score = 0
best_grid = ''
for g in ParameterGrid(parameters):
    knn.set_params(**g)
    knn.fit(x_train_std,y_train_std)
    answer = knn.predict(x_test_std)
    f1 = f1_score(y_test_std,answer,average='macro')

    if f1 > best_score:
        best_score = f1
        best_grid = g
print("F1: %0.5f" % best_score)
print("Grid:", best_grid)

F1: 0.75928
Grid: {'algorithm': 'ball_tree', 'n_neighbors': 4, 'weights': 'uniform'}


In [60]:
knn = KNeighborsClassifier(n_neighbors=4,algorithm='ball_tree',weights='uniform')
knn.fit(x_train_std,y_train_std)

train_yhat = knn.predict(x_train_std)
train_f1 = f1_score(y_train_std,train_yhat,average='macro')

test_yhat = knn.predict(x_test_std)
test_f1 = f1_score(y_test_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_std,train_yhat))
print('Test Score\n',classification_report(y_test_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       0.99      0.99      0.99     25830
           1       0.77      0.98      0.86     25165
           2       1.00      1.00      1.00     26245
           3       0.96      0.71      0.82     25744

    accuracy                           0.92    102984
   macro avg       0.93      0.92      0.92    102984
weighted avg       0.93      0.92      0.92    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.97      0.99     11292
           1       0.97      0.98      0.97      4496
           2       1.00      1.00      1.00      3856
           3       0.04      0.62      0.08        16

    accuracy                           0.98     19660
   macro avg       0.75      0.89      0.76     19660
weighted avg       0.99      0.98      0.99     19660

AVG F1-Score Train: 0.9195590918206044
AVG F1-Score Test: 0.7592758028332826


### Parameter for Robust transform

In [56]:
best_score = 0
best_grid = ''
for g in ParameterGrid(parameters):
    knn.set_params(**g)
    knn.fit(x_train_robust,y_train_robust)
    answer = knn.predict(x_test_robust)
    f1 = f1_score(y_test_robust,answer,average='macro')

    if f1 > best_score:
        best_score = f1
        best_grid = g
print("F1: %0.5f" % best_score)
print("Grid:", best_grid)

F1: 0.75365
Grid: {'algorithm': 'brute', 'n_neighbors': 4, 'weights': 'uniform'}


In [61]:
knn = KNeighborsClassifier(n_neighbors=4,algorithm='brute',weights='uniform')
knn.fit(x_train_robust,y_train_robust)

train_yhat = knn.predict(x_train_robust)
train_f1 = f1_score(y_train_robust,train_yhat,average='macro')

test_yhat = knn.predict(x_test_robust)
test_f1 = f1_score(y_test_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_robust,train_yhat))
print('Test Score\n',classification_report(y_test_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.99      1.00     25830
           1       0.77      0.98      0.86     25165
           2       1.00      1.00      1.00     26245
           3       0.97      0.72      0.82     25744

    accuracy                           0.92    102984
   macro avg       0.93      0.92      0.92    102984
weighted avg       0.93      0.92      0.92    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.97      0.98     11292
           1       0.97      0.98      0.97      4496
           2       1.00      1.00      1.00      3856
           3       0.03      0.62      0.06        16

    accuracy                           0.98     19660
   macro avg       0.75      0.89      0.75     19660
weighted avg       0.99      0.98      0.98     19660

AVG F1-Score Train: 0.9203482948476003
AVG F1-Score Test: 0.7536457020645934


In [71]:
cross_std = cross_val_score(KNeighborsClassifier(n_neighbors=4,algorithm='ball_tree',weights='uniform'),x_train_std,y_train_std,cv=5)
print(round(np.mean(cross_std),4))

0.9137


In [72]:
cross_std = cross_val_score(KNeighborsClassifier(n_neighbors=4,algorithm='brute',weights='uniform'),x_train_robust,y_train_robust,cv=5)
print(round(np.mean(cross_std),4))

0.9146


In [3]:
x_train.columns

Index(['Source Port', 'Destination Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received'],
      dtype='object')

#### Gridsearch for FCLASS

In [9]:
x_train_f_std = x_train.copy()
y_train_f_std = y_train.copy()

x_test_f_std = x_test.copy()
y_test_f_std = y_test.copy()

x_train_f_std = x_train_f_std.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_f_std = y_train_f_std.loc[:,'Action']
x_test_f_std = x_test_f_std.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_f_std = y_test_f_std.loc[:,'Action']


x_train_f_robust = x_train.copy()
y_train_f_robust = y_train.copy()

x_test_f_robust = x_test.copy()
y_test_f_robust = y_test.copy()

x_train_f_robust = x_train_f_robust.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_f_robust = y_train_f_robust.loc[:,'Action']
x_test_f_robust = x_test_f_robust.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_f_robust = y_test_f_robust.loc[:,'Action']

In [10]:
scaler = StandardScaler()
robust = RobustScaler()
x_train_f_std = scaler.fit_transform(x_train_f_std)
x_test_f_std = scaler.fit_transform(x_test_f_std)

x_train_f_robust = robust.fit_transform(x_train_f_robust)
x_test_f_robust = robust.fit_transform(x_test_f_robust)

#### STD

In [11]:
knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_f_std,y_train_f_std)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_f_std)
print(classification_report(y_test_f_std, answer))

{'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
KNeighborsClassifier(n_neighbors=3, weights='distance')
              precision    recall  f1-score   support

           0       1.00      0.78      0.88     11292
           1       0.42      1.00      0.59      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.68     19660
   macro avg       0.36      0.45      0.37     19660
weighted avg       0.67      0.68      0.64     19660



In [12]:
knn = KNeighborsClassifier(n_neighbors=3,algorithm='auto',weights='distance')
knn.fit(x_train_f_std,y_train_f_std)

train_yhat = knn.predict(x_train_f_std)
train_f1 = f1_score(y_train_f_std,train_yhat,average='macro')

test_yhat = knn.predict(x_test_f_std)
test_f1 = f1_score(y_test_f_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_f_std,train_yhat))
print('Test Score\n',classification_report(y_test_f_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     25830
           1       0.35      1.00      0.52     25165
           2       0.00      0.00      0.00     26245
           3       1.00      0.24      0.38     25744

    accuracy                           0.55    102984
   macro avg       0.59      0.56      0.48    102984
weighted avg       0.59      0.55      0.47    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.78      0.88     11292
           1       0.42      1.00      0.59      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.68     19660
   macro avg       0.36      0.45      0.37     19660
weighted avg       0.67      0.68      0.64     19660

AVG F1-Score Train: 0.47695517855756975
AVG F1-Score Test: 0.3678591135050651


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### robust

In [13]:
knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_f_robust,y_train_f_robust)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_f_robust)
print(classification_report(y_test_f_robust, answer))

{'algorithm': 'auto', 'n_neighbors': 1, 'weights': 'uniform'}
KNeighborsClassifier(n_neighbors=1)
              precision    recall  f1-score   support

           0       1.00      0.48      0.65     11292
           1       0.49      1.00      0.66      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.50     19660
   macro avg       0.37      0.37      0.33     19660
weighted avg       0.69      0.50      0.52     19660



In [14]:
knn = KNeighborsClassifier(n_neighbors=1,algorithm='auto',weights='uniform')
knn.fit(x_train_f_robust,y_train_f_robust)

train_yhat = knn.predict(x_train_f_robust)
train_f1 = f1_score(y_train_f_robust,train_yhat,average='macro')

test_yhat = knn.predict(x_test_f_robust)
test_f1 = f1_score(y_test_f_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_f_robust,train_yhat))
print('Test Score\n',classification_report(y_test_f_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     25830
           1       0.35      1.00      0.52     25165
           2       0.00      0.00      0.00     26245
           3       1.00      0.24      0.38     25744

    accuracy                           0.55    102984
   macro avg       0.59      0.56      0.48    102984
weighted avg       0.59      0.55      0.47    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.48      0.65     11292
           1       0.49      1.00      0.66      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.50     19660
   macro avg       0.37      0.37      0.33     19660
weighted avg       0.69      0.50      0.52     19660

AVG F1-Score Train: 0.47695517855756975
AVG F1-Score Test: 0.32651879865532585


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Gridsearch RF

In [15]:
x_train_rf_std = x_train.copy()
y_train_rf_std = y_train.copy()

x_test_rf_std = x_test.copy()
y_test_rf_std = y_test.copy()

x_train_rf_std = x_train_rf_std.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_train_rf_std = y_train_rf_std.loc[:,'Action']
x_test_rf_std = x_test_rf_std.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_test_rf_std = y_test_rf_std.loc[:,'Action']


x_train_rf_robust = x_train.copy()
y_train_rf_robust = y_train.copy()

x_test_rf_robust = x_test.copy()
y_test_rf_robust = y_test.copy()

x_train_rf_robust = x_train_rf_robust.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_train_rf_robust = y_train_rf_robust.loc[:,'Action']
x_test_rf_robust = x_test_rf_robust.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_test_rf_robust = y_test_rf_robust.loc[:,'Action']

In [17]:
scaler = StandardScaler()
robust = RobustScaler()
x_train_rf_std = scaler.fit_transform(x_train_rf_std.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])
x_test_rf_std = scaler.fit_transform(x_test_rf_std.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])

x_train_rf_robust = robust.fit_transform(x_train_rf_robust.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])
x_test_rf_robust = robust.fit_transform(x_test_rf_robust.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])

#### std

In [18]:
knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_rf_std,y_train_rf_std)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_rf_std)
print(classification_report(y_test_rf_std, answer))

{'algorithm': 'auto', 'n_neighbors': 9, 'weights': 'distance'}
KNeighborsClassifier(n_neighbors=9, weights='distance')
              precision    recall  f1-score   support

           0       1.00      0.78      0.88     11292
           1       0.41      1.00      0.59      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.68     19660
   macro avg       0.35      0.45      0.37     19660
weighted avg       0.67      0.68      0.64     19660



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
knn = KNeighborsClassifier(n_neighbors=9,algorithm='auto',weights='distance')
knn.fit(x_train_rf_std,y_train_rf_std)

train_yhat = knn.predict(x_train_rf_std)
train_f1 = f1_score(y_train_rf_std,train_yhat,average='macro')

test_yhat = knn.predict(x_test_rf_std)
test_f1 = f1_score(y_test_rf_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_rf_std,train_yhat))
print('Test Score\n',classification_report(y_test_rf_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     25830
           1       0.35      1.00      0.52     25165
           2       0.00      0.00      0.00     26245
           3       1.00      0.24      0.38     25744

    accuracy                           0.55    102984
   macro avg       0.59      0.56      0.48    102984
weighted avg       0.59      0.55      0.47    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.78      0.88     11292
           1       0.41      1.00      0.59      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.68     19660
   macro avg       0.35      0.45      0.37     19660
weighted avg       0.67      0.68      0.64     19660

AVG F1-Score Train: 0.4770480209357309
AVG F1-Score Test: 0.365879562791236


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### robust

In [20]:
knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_rf_robust,y_train_rf_robust)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_rf_robust)
print(classification_report(y_test_rf_robust, answer))

{'algorithm': 'auto', 'n_neighbors': 9, 'weights': 'distance'}
KNeighborsClassifier(n_neighbors=9, weights='distance')
              precision    recall  f1-score   support

           0       1.00      0.39      0.56     11292
           1       0.45      1.00      0.62      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.19      0.00        16

    accuracy                           0.45     19660
   macro avg       0.36      0.39      0.30     19660
weighted avg       0.68      0.45      0.46     19660



In [21]:
knn = KNeighborsClassifier(n_neighbors=9,algorithm='auto',weights='distance')
knn.fit(x_train_rf_robust,y_train_rf_robust)

train_yhat = knn.predict(x_train_rf_robust)
train_f1 = f1_score(y_train_rf_robust,train_yhat,average='macro')

test_yhat = knn.predict(x_test_rf_robust)
test_f1 = f1_score(y_test_rf_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_rf_robust,train_yhat))
print('Test Score\n',classification_report(y_test_rf_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     25830
           1       0.36      1.00      0.53     25165
           2       0.00      0.00      0.00     26245
           3       0.99      0.26      0.41     25744

    accuracy                           0.56    102984
   macro avg       0.59      0.56      0.48    102984
weighted avg       0.59      0.56      0.48    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.39      0.56     11292
           1       0.45      1.00      0.62      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.19      0.00        16

    accuracy                           0.45     19660
   macro avg       0.36      0.39      0.30     19660
weighted avg       0.68      0.45      0.46     19660

AVG F1-Score Train: 0.48405744819879537
AVG F1-Score Test: 0.29547386060299125


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Gridsearch NAD

In [22]:
x_train_nad_std = x_train.copy()
y_train_nad_std = y_train.copy()

x_test_nad_std = x_test.copy()
y_test_nad_std = y_test.copy()

x_train_nad_std = x_train_nad_std.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_train_nad_std = y_train_nad_std.loc[:,'Action']
x_test_nad_std = x_test_nad_std.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_test_nad_std = y_test_nad_std.loc[:,'Action']


x_train_nad_robust = x_train.copy()
y_train_nad_robust = y_train.copy()

x_test_nad_robust = x_test.copy()
y_test_nad_robust = y_test.copy()

x_train_nad_robust = x_train_nad_robust.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_train_nad_robust = y_train_nad_robust.loc[:,'Action']
x_test_nad_robust = x_test_nad_robust.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_test_nad_robust = y_test_nad_robust.loc[:,'Action']

In [25]:
scaler = StandardScaler()
robust = RobustScaler()
x_train_nad_std = scaler.fit_transform(x_train_nad_std.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])
x_test_nad_std = scaler.fit_transform(x_test_nad_std.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])

x_train_nad_robust = robust.fit_transform(x_train_nad_robust.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])
x_test_nad_robust = robust.fit_transform(x_test_nad_robust.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])

#### std

In [26]:
knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_nad_std,y_train_nad_std)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_nad_std)
print(classification_report(y_test_nad_std, answer))

{'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
KNeighborsClassifier(weights='distance')
              precision    recall  f1-score   support

           0       0.57      0.97      0.72     11292
           1       0.00      0.00      0.00      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.56     19660
   macro avg       0.14      0.24      0.18     19660
weighted avg       0.33      0.56      0.41     19660



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
knn = KNeighborsClassifier(n_neighbors=5,algorithm='auto',weights='distance')
knn.fit(x_train_nad_std,y_train_nad_std)

train_yhat = knn.predict(x_train_nad_std)
train_f1 = f1_score(y_train_nad_std,train_yhat,average='macro')

test_yhat = knn.predict(x_test_nad_std)
test_f1 = f1_score(y_test_nad_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_nad_std,train_yhat))
print('Test Score\n',classification_report(y_test_nad_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       0.36      1.00      0.53     25830
           1       0.28      0.28      0.28     25165
           2       0.00      0.00      0.00     26245
           3       1.00      0.25      0.40     25744

    accuracy                           0.38    102984
   macro avg       0.41      0.38      0.30    102984
weighted avg       0.41      0.38      0.30    102984

Test Score
               precision    recall  f1-score   support

           0       0.57      0.97      0.72     11292
           1       0.00      0.00      0.00      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.56     19660
   macro avg       0.14      0.24      0.18     19660
weighted avg       0.33      0.56      0.41     19660

AVG F1-Score Train: 0.30335877265770783
AVG F1-Score Test: 0.17886311662855275


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### robust

In [28]:
knn_best = GridSearchCV(knn, parameters, cv=5)
#knn_best.fit(X,y_train)
knn_best.fit(x_train_nad_robust,y_train_nad_robust)
print(knn_best.best_params_)
print(knn_best.best_estimator_)
answer = knn_best.predict(x_test_nad_robust)
print(classification_report(y_test_nad_robust, answer))

{'algorithm': 'auto', 'n_neighbors': 1, 'weights': 'uniform'}
KNeighborsClassifier(n_neighbors=1)
              precision    recall  f1-score   support

           0       0.48      0.61      0.53     11292
           1       0.68      0.14      0.24      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.19      0.00        16

    accuracy                           0.38     19660
   macro avg       0.29      0.23      0.19     19660
weighted avg       0.43      0.38      0.36     19660



In [29]:
knn = KNeighborsClassifier(n_neighbors=1,algorithm='auto',weights='uniform')
knn.fit(x_train_nad_robust,y_train_nad_robust)

train_yhat = knn.predict(x_train_nad_robust)
train_f1 = f1_score(y_train_nad_robust,train_yhat,average='macro')

test_yhat = knn.predict(x_test_nad_robust)
test_f1 = f1_score(y_test_nad_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_nad_robust,train_yhat))
print('Test Score\n',classification_report(y_test_nad_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       0.35      1.00      0.52     25830
           1       0.26      0.24      0.25     25165
           2       0.00      0.00      0.00     26245
           3       0.99      0.27      0.42     25744

    accuracy                           0.38    102984
   macro avg       0.40      0.38      0.30    102984
weighted avg       0.40      0.38      0.30    102984

Test Score
               precision    recall  f1-score   support

           0       0.48      0.61      0.53     11292
           1       0.68      0.14      0.24      4496
           2       0.00      0.00      0.00      3856
           3       0.00      0.19      0.00        16

    accuracy                           0.38     19660
   macro avg       0.29      0.23      0.19     19660
weighted avg       0.43      0.38      0.36     19660

AVG F1-Score Train: 0.29822524343756746
AVG F1-Score Test: 0.1926388296012017


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
