In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,RobustScaler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import balanced_accuracy_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV,ParameterGrid
from sklearn.naive_bayes import CategoricalNB

In [10]:
train = pd.read_csv('data_source/train.csv')
test = pd.read_csv('data_source/test.csv')

train_robust = train.copy()
test_robust = test.copy()

x_train_robust = train_robust.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_robust = train_robust.loc[:,'Action']
x_test_robust = test_robust.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_robust = test_robust.loc[:,'Action']

In [11]:
scaler = RobustScaler()

x_train_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_train_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_test_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])

# y_train_robust.replace({'allow':0, 'deny':1, 'drop':2, 'reset-both':3},inplace=True)
# y_test_robust.replace({'allow':0, 'deny':1, 'drop':2, 'reset-both':3},inplace=True)

In [12]:
clf = CategoricalNB()
clf.fit(x_train_robust,y_train_robust)
answer = clf.predict(x_test_robust)
print(classification_report(y_test_robust, answer))

              precision    recall  f1-score   support

       allow       1.00      1.00      1.00     11292
        deny       0.99      0.99      0.99      4496
        drop       1.00      1.00      1.00      3856
  reset-both       0.00      0.00      0.00        16

    accuracy                           1.00     19660
   macro avg       0.75      0.75      0.75     19660
weighted avg       0.99      1.00      1.00     19660



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
parameters = [
{'alpha': [0.01, 0.1, 0.5, 1.0, 10.0],
          'fit_prior': [True, False],
          'class_prior': [None, [0.1,]* 4, ]
         }
]

In [16]:
best_score = 0
best_grid = ''
for g in ParameterGrid(parameters):
    clf.set_params(**g)
    clf.fit(x_train_robust,y_train_robust)
    answer = clf.predict(x_test_robust)
    f1 = f1_score(y_test_robust,answer,average='weighted')

    if f1 > best_score:
        best_score = f1
        best_grid = g
print("F1: %0.5f" % best_score)
print("Grid:", best_grid)

F1: 0.99527
Grid: {'alpha': 10.0, 'class_prior': None, 'fit_prior': True}


In [17]:
clf = CategoricalNB(alpha=10.0,class_prior=None,fit_prior=True)
clf.fit(x_train_robust,y_train_robust)

train_yhat = clf.predict(x_train_robust)
train_f1 = f1_score(y_train_robust,train_yhat,average='weighted')

test_yhat = clf.predict(x_test_robust)
test_f1 = f1_score(y_test_robust,test_yhat,average='weighted')
print('Train Score\n',classification_report(y_train_robust,train_yhat))
print('Test Score\n',classification_report(y_test_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Score
               precision    recall  f1-score   support

       allow       1.00      1.00      1.00     26348
        deny       0.99      0.99      0.99     10491
        drop       1.00      1.00      1.00      8995
  reset-both       0.00      0.00      0.00        38

    accuracy                           1.00     45872
   macro avg       0.75      0.75      0.75     45872
weighted avg       1.00      1.00      1.00     45872

Test Score
               precision    recall  f1-score   support

       allow       1.00      1.00      1.00     11292
        deny       0.99      0.99      0.99      4496
        drop       1.00      1.00      1.00      3856
  reset-both       0.00      0.00      0.00        16

    accuracy                           1.00     19660
   macro avg       0.75      0.75      0.75     19660
weighted avg       0.99      1.00      1.00     19660

AVG F1-Score Train: 0.9956863959776495
AVG F1-Score Test: 0.995274550080834


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
