In [29]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,RobustScaler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import balanced_accuracy_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV,ParameterGrid,cross_val_score
from sklearn.naive_bayes import CategoricalNB

In [30]:
# train = pd.read_csv('data_source/train.csv')
# test = pd.read_csv('data_source/test.csv')

# train_robust = train.copy()
# test_robust = test.copy()

x_train = pd.read_csv('data_source/X_train.csv')
y_train = pd.read_csv('data_source/y_train.csv')

x_test = pd.read_csv('data_source/X_test.csv')
y_test = pd.read_csv('data_source/y_test.csv')


x_train_std = x_train.copy()
y_train_std = y_train.copy()

x_train_robust = x_train.copy()
y_train_robust = y_train.copy()

x_test_std = x_test.copy()
y_test_std = y_test.copy()

x_test_robust = x_test.copy()
y_test_robust = y_test.copy()

x_train_std = x_train_std.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_std = y_train_std.loc[:,'Action']
x_test_std = x_test_std.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_std = y_test_std.loc[:,'Action']

x_train_robust = x_train_robust.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_robust = y_train_robust.loc[:,'Action']
x_test_robust = x_test_robust.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_robust = y_test_robust.loc[:,'Action']

In [31]:
scaler = StandardScaler()
robust = RobustScaler()

x_train_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_train_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_test_std.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])

x_train_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = robust.fit_transform(x_train_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = robust.fit_transform(x_test_robust.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])


In [32]:
clf = CategoricalNB()
clf.fit(x_train_robust,y_train_robust)
answer = clf.predict(x_test_robust)
print(classification_report(y_test_robust, answer))

              precision    recall  f1-score   support

           0       1.00      0.46      0.63     11292
           1       0.47      0.97      0.64      4496
           2       0.99      1.00      0.99      3856
           3       0.01      0.88      0.02        16

    accuracy                           0.69     19660
   macro avg       0.62      0.83      0.57     19660
weighted avg       0.88      0.69      0.70     19660



In [33]:
parameters = [
{'alpha': [0.01, 0.1, 0.5, 1.0, 10.0],
          'fit_prior': [True, False],
          'class_prior': [None, [0.1]* 4, ]
         }
]

### Parameter Grid for robust

In [13]:
best_score = 0
best_grid = ''
for g in ParameterGrid(parameters):
    clf.set_params(**g)
    clf.fit(x_train_robust,y_train_robust)
    answer = clf.predict(x_test_robust)
    f1 = f1_score(y_test_robust,answer,average='macro')

    if f1 > best_score:
        best_score = f1
        best_grid = g
print("F1: %0.5f" % best_score)
print("Grid:", best_grid)

F1: 0.61178
Grid: {'alpha': 0.01, 'class_prior': None, 'fit_prior': False}


In [14]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=False)
clf.fit(x_train_robust,y_train_robust)

train_yhat = clf.predict(x_train_robust)
train_f1 = f1_score(y_train_robust,train_yhat,average='macro')

test_yhat = clf.predict(x_test_robust)
test_f1 = f1_score(y_test_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_robust,train_yhat))
print('Test Score\n',classification_report(y_test_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     25830
           1       0.87      0.96      0.91     25165
           2       1.00      1.00      1.00     26245
           3       0.95      0.86      0.90     25744

    accuracy                           0.95    102984
   macro avg       0.96      0.95      0.95    102984
weighted avg       0.96      0.95      0.95    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.58      0.73     11292
           1       0.54      0.97      0.70      4496
           2       1.00      1.00      1.00      3856
           3       0.01      0.88      0.02        16

    accuracy                           0.75     19660
   macro avg       0.64      0.85      0.61     19660
weighted avg       0.89      0.75      0.77     19660

AVG F1-Score Train: 0.9537144790708485
AVG F1-Score Test: 0.6117765369343429


### Parameter Grid for Std

In [15]:
best_score = 0
best_grid = ''
for g in ParameterGrid(parameters):
    clf.set_params(**g)
    clf.fit(x_train_std,y_train_std)
    answer = clf.predict(x_test_std)
    f1 = f1_score(y_test_std,answer,average='macro')

    if f1 > best_score:
        best_score = f1
        best_grid = g
print("F1: %0.5f" % best_score)
print("Grid:", best_grid)

F1: 0.73283
Grid: {'alpha': 0.01, 'class_prior': None, 'fit_prior': True}


In [17]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True)
clf.fit(x_train_robust,y_train_robust)

train_yhat = clf.predict(x_train_std)
train_f1 = f1_score(y_train_std,train_yhat,average='macro')

test_yhat = clf.predict(x_test_std)
test_f1 = f1_score(y_test_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_std,train_yhat))
print('Test Score\n',classification_report(y_test_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.82      0.90     25830
           1       0.84      0.95      0.89     25165
           2       1.00      1.00      1.00     26245
           3       0.83      0.86      0.85     25744

    accuracy                           0.91    102984
   macro avg       0.92      0.91      0.91    102984
weighted avg       0.92      0.91      0.91    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.23      0.38     11292
           1       0.39      0.97      0.56      4496
           2       1.00      1.00      1.00      3856
           3       0.01      0.88      0.01        16

    accuracy                           0.55     19660
   macro avg       0.60      0.77      0.49     19660
weighted avg       0.86      0.55      0.54     19660

AVG F1-Score Train: 0.910731665502352
AVG F1-Score Test: 0.4865793902448771


In [27]:
cross_std = cross_val_score(CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True),x_train_std,y_train_std,cv=5)
print(round(np.mean(cross_std),4))

nan


Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 117, in __call__
    score = scorer(estimator, *args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log_prob_[i][

In [28]:
cross_std = cross_val_score(CategoricalNB(alpha=0.01,class_prior=None,fit_prior=False),x_train_robust,y_train_robust,cv=5)
print(round(np.mean(cross_std),4))

Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 117, in __call__
    score = scorer(estimator, *args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log_prob_[i][

nan


Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 117, in __call__
    score = scorer(estimator, *args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log_prob_[i][

### Gridsearch for F class

In [34]:
x_train_f_std = x_train.copy()
y_train_f_std = y_train.copy()

x_test_f_std = x_test.copy()
y_test_f_std = y_test.copy()

x_train_f_std = x_train_f_std.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_f_std = y_train_f_std.loc[:,'Action']
x_test_f_std = x_test_f_std.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_f_std = y_test_f_std.loc[:,'Action']


x_train_f_robust = x_train.copy()
y_train_f_robust = y_train.copy()

x_test_f_robust = x_test.copy()
y_test_f_robust = y_test.copy()

x_train_f_robust = x_train_f_robust.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_f_robust = y_train_f_robust.loc[:,'Action']
x_test_f_robust = x_test_f_robust.loc[:,['pkts_received','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_f_robust = y_test_f_robust.loc[:,'Action']

In [35]:
scaler = StandardScaler()
robust = RobustScaler()
x_train_f_std = scaler.fit_transform(x_train_f_std)
x_test_f_std = scaler.fit_transform(x_test_f_std)

x_train_f_robust = robust.fit_transform(x_train_f_robust)
x_test_f_robust = robust.fit_transform(x_test_f_robust)

#### std

In [36]:
clf_best = GridSearchCV(clf, parameters, cv=5)
#knn_best.fit(X,y_train)
clf_best.fit(x_train_f_std,y_train_f_std)
print(clf_best.best_params_)
print(clf_best.best_estimator_)
answer = clf_best.predict(x_test_f_std)
print(classification_report(y_test_f_std, answer))

Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

{'alpha': 0.01, 'class_prior': None, 'fit_prior': True}
CategoricalNB(alpha=0.01)
              precision    recall  f1-score   support

           0       1.00      0.06      0.11     11292
           1       0.00      0.00      0.00      4496
           2       0.20      1.00      0.34      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.23     19660
   macro avg       0.30      0.26      0.11     19660
weighted avg       0.61      0.23      0.13     19660



Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

In [37]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True)
clf.fit(x_train_f_std,y_train_f_std)

train_yhat = clf.predict(x_train_f_std)
train_f1 = f1_score(y_train_f_std,train_yhat,average='macro')

test_yhat = clf.predict(x_test_f_std)
test_f1 = f1_score(y_test_f_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_f_std,train_yhat))
print('Test Score\n',classification_report(y_test_f_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.08      0.16     25830
           1       0.00      0.00      0.00     25165
           2       0.26      1.00      0.41     26245
           3       0.00      0.00      0.00     25744

    accuracy                           0.28    102984
   macro avg       0.32      0.27      0.14    102984
weighted avg       0.32      0.28      0.14    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.06      0.11     11292
           1       0.00      0.00      0.00      4496
           2       0.20      1.00      0.34      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.23     19660
   macro avg       0.30      0.26      0.11     19660
weighted avg       0.61      0.23      0.13     19660

AVG F1-Score Train: 0.14209792665485574
AVG F1-Score Test: 0.11089337412494472


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### robust

In [38]:
clf_best = GridSearchCV(clf, parameters, cv=5)
#knn_best.fit(X,y_train)
clf_best.fit(x_train_f_robust,y_train_f_robust)
print(clf_best.best_params_)
print(clf_best.best_estimator_)
answer = clf_best.predict(x_test_f_robust)
print(classification_report(y_test_f_robust, answer))

Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

{'alpha': 0.01, 'class_prior': None, 'fit_prior': True}
CategoricalNB(alpha=0.01)
              precision    recall  f1-score   support

           0       1.00      0.41      0.59     11292
           1       0.00      0.00      0.00      4496
           2       0.28      1.00      0.44      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.43     19660
   macro avg       0.32      0.35      0.26     19660
weighted avg       0.63      0.43      0.42     19660



Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

In [39]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True)
clf.fit(x_train_f_robust,y_train_f_robust)

train_yhat = clf.predict(x_train_f_robust)
train_f1 = f1_score(y_train_f_robust,train_yhat,average='macro')

test_yhat = clf.predict(x_test_f_robust)
test_f1 = f1_score(y_test_f_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_f_robust,train_yhat))
print('Test Score\n',classification_report(y_test_f_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.99      1.00     25830
           1       0.00      0.00      0.00     25165
           2       0.37      1.00      0.54     26245
           3       0.96      0.23      0.37     25744

    accuracy                           0.56    102984
   macro avg       0.58      0.56      0.48    102984
weighted avg       0.59      0.56      0.48    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.41      0.59     11292
           1       0.00      0.00      0.00      4496
           2       0.28      1.00      0.44      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.43     19660
   macro avg       0.32      0.35      0.26     19660
weighted avg       0.63      0.43      0.42     19660

AVG F1-Score Train: 0.47637990158968735
AVG F1-Score Test: 0.2561157002183153


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Gridsearch RF

In [40]:
x_train_rf_std = x_train.copy()
y_train_rf_std = y_train.copy()

x_test_rf_std = x_test.copy()
y_test_rf_std = y_test.copy()

x_train_rf_std = x_train_rf_std.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_train_rf_std = y_train_rf_std.loc[:,'Action']
x_test_rf_std = x_test_rf_std.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_test_rf_std = y_test_rf_std.loc[:,'Action']


x_train_rf_robust = x_train.copy()
y_train_rf_robust = y_train.copy()

x_test_rf_robust = x_test.copy()
y_test_rf_robust = y_test.copy()

x_train_rf_robust = x_train_rf_robust.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_train_rf_robust = y_train_rf_robust.loc[:,'Action']
x_test_rf_robust = x_test_rf_robust.loc[:,['Source Port', 'NAT Source Port',
       'NAT Destination Port', 'Bytes', 'Bytes Sent', 'Bytes Received',
       'Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']]
y_test_rf_robust = y_test_rf_robust.loc[:,'Action']

In [41]:
scaler = StandardScaler()
robust = RobustScaler()
x_train_rf_std = scaler.fit_transform(x_train_rf_std.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])
x_test_rf_std = scaler.fit_transform(x_test_rf_std.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])

x_train_rf_robust = robust.fit_transform(x_train_rf_robust.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])
x_test_rf_robust = robust.fit_transform(x_test_rf_robust.loc[:,['Bytes','Bytes Sent', 'Bytes Received','Packets', 'Elapsed Time (sec)', 'pkts_sent', 'pkts_received']])

#### std

In [42]:
clf_best = GridSearchCV(clf, parameters, cv=5)
#knn_best.fit(X,y_train)
clf_best.fit(x_train_rf_std,y_train_rf_std)
print(clf_best.best_params_)
print(clf_best.best_estimator_)
answer = clf_best.predict(x_test_rf_std)
print(classification_report(y_test_rf_std, answer))

Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

{'alpha': 0.01, 'class_prior': None, 'fit_prior': True}
CategoricalNB(alpha=0.01)
              precision    recall  f1-score   support

           0       1.00      0.06      0.11     11292
           1       0.00      0.00      0.00      4496
           2       0.20      1.00      0.34      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.23     19660
   macro avg       0.30      0.27      0.11     19660
weighted avg       0.61      0.23      0.13     19660



Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

In [43]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True)
clf.fit(x_train_rf_std,y_train_rf_std)

train_yhat = clf.predict(x_train_rf_std)
train_f1 = f1_score(y_train_rf_std,train_yhat,average='macro')

test_yhat = clf.predict(x_test_rf_std)
test_f1 = f1_score(y_test_rf_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_rf_std,train_yhat))
print('Test Score\n',classification_report(y_test_rf_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.08      0.16     25830
           1       0.00      0.00      0.00     25165
           2       0.26      1.00      0.41     26245
           3       0.00      0.00      0.00     25744

    accuracy                           0.28    102984
   macro avg       0.32      0.27      0.14    102984
weighted avg       0.32      0.28      0.14    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.06      0.11     11292
           1       0.00      0.00      0.00      4496
           2       0.20      1.00      0.34      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.23     19660
   macro avg       0.30      0.27      0.11     19660
weighted avg       0.61      0.23      0.13     19660

AVG F1-Score Train: 0.14214976562348094
AVG F1-Score Test: 0.11283508999905505


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### robust

In [44]:
clf_best = GridSearchCV(clf, parameters, cv=5)
#knn_best.fit(X,y_train)
clf_best.fit(x_train_rf_robust,y_train_rf_robust)
print(clf_best.best_params_)
print(clf_best.best_estimator_)
answer = clf_best.predict(x_test_rf_robust)
print(classification_report(y_test_rf_robust, answer))

Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

{'alpha': 0.01, 'class_prior': None, 'fit_prior': True}
CategoricalNB(alpha=0.01)
              precision    recall  f1-score   support

           0       1.00      0.40      0.58     11292
           1       0.25      0.01      0.01      4496
           2       0.29      1.00      0.44      3856
           3       0.00      0.19      0.00        16

    accuracy                           0.43     19660
   macro avg       0.38      0.40      0.26     19660
weighted avg       0.69      0.43      0.42     19660



In [46]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True)
clf.fit(x_train_rf_robust,y_train_rf_robust)

train_yhat = clf.predict(x_train_rf_robust)
train_f1 = f1_score(y_train_rf_robust,train_yhat,average='macro')

test_yhat = clf.predict(x_test_rf_robust)
test_f1 = f1_score(y_test_rf_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_rf_robust,train_yhat))
print('Test Score\n',classification_report(y_test_rf_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.99      0.99     25830
           1       1.00      0.01      0.02     25165
           2       0.39      1.00      0.56     26245
           3       0.96      0.34      0.50     25744

    accuracy                           0.59    102984
   macro avg       0.84      0.58      0.52    102984
weighted avg       0.83      0.59      0.52    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.40      0.58     11292
           1       0.25      0.01      0.01      4496
           2       0.29      1.00      0.44      3856
           3       0.00      0.19      0.00        16

    accuracy                           0.43     19660
   macro avg       0.38      0.40      0.26     19660
weighted avg       0.69      0.43      0.42     19660

AVG F1-Score Train: 0.5178653365285346
AVG F1-Score Test: 0.2595664619312022


### Gridsearch NAD

In [45]:
x_train_nad_std = x_train.copy()
y_train_nad_std = y_train.copy()

x_test_nad_std = x_test.copy()
y_test_nad_std = y_test.copy()

x_train_nad_std = x_train_nad_std.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_train_nad_std = y_train_nad_std.loc[:,'Action']
x_test_nad_std = x_test_nad_std.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_test_nad_std = y_test_nad_std.loc[:,'Action']


x_train_nad_robust = x_train.copy()
y_train_nad_robust = y_train.copy()

x_test_nad_robust = x_test.copy()
y_test_nad_robust = y_test.copy()

x_train_nad_robust = x_train_nad_robust.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_train_nad_robust = y_train_nad_robust.loc[:,'Action']
x_test_nad_robust = x_test_nad_robust.loc[:,[ 'Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']]
y_test_nad_robust = y_test_nad_robust.loc[:,'Action']

In [47]:
scaler = StandardScaler()
robust = RobustScaler()
x_train_nad_std = scaler.fit_transform(x_train_nad_std.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])
x_test_nad_std = scaler.fit_transform(x_test_nad_std.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])

x_train_nad_robust = robust.fit_transform(x_train_nad_robust.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])
x_test_nad_robust = robust.fit_transform(x_test_nad_robust.loc[ :,['Bytes Sent', 'Bytes Received', 'pkts_sent', 'pkts_received']])

#### std

In [48]:
clf_best = GridSearchCV(clf, parameters, cv=5)
#knn_best.fit(X,y_train)
clf_best.fit(x_train_nad_std,y_train_nad_std)
print(clf_best.best_params_)
print(clf_best.best_estimator_)
answer = clf_best.predict(x_test_nad_std)
print(classification_report(y_test_nad_std, answer))

Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

{'alpha': 0.01, 'class_prior': None, 'fit_prior': True}
CategoricalNB(alpha=0.01)
              precision    recall  f1-score   support

           0       1.00      0.01      0.03     11292
           1       0.00      0.00      0.00      4496
           2       0.20      1.00      0.33      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.20     19660
   macro avg       0.30      0.25      0.09     19660
weighted avg       0.61      0.20      0.08     19660



Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

In [50]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True)
clf.fit(x_train_nad_std,y_train_nad_std)

train_yhat = clf.predict(x_train_nad_std)
train_f1 = f1_score(y_train_nad_std,train_yhat,average='macro')

test_yhat = clf.predict(x_test_nad_std)
test_f1 = f1_score(y_test_nad_std,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_nad_std,train_yhat))
print('Test Score\n',classification_report(y_test_nad_std, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.01      0.02     25830
           1       0.00      0.00      0.00     25165
           2       0.26      1.00      0.41     26245
           3       0.00      0.00      0.00     25744

    accuracy                           0.26    102984
   macro avg       0.31      0.25      0.11    102984
weighted avg       0.32      0.26      0.11    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.01      0.03     11292
           1       0.00      0.00      0.00      4496
           2       0.20      1.00      0.33      3856
           3       0.00      0.00      0.00        16

    accuracy                           0.20     19660
   macro avg       0.30      0.25      0.09     19660
weighted avg       0.61      0.20      0.08     19660

AVG F1-Score Train: 0.10615829310498215
AVG F1-Score Test: 0.08922545501514575


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### robust

In [51]:
clf_best = GridSearchCV(clf, parameters, cv=5)
#knn_best.fit(X,y_train)
clf_best.fit(x_train_nad_robust,y_train_nad_robust)
print(clf_best.best_params_)
print(clf_best.best_estimator_)
answer = clf_best.predict(x_test_nad_robust)
print(classification_report(y_test_nad_robust, answer))

Traceback (most recent call last):
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 444, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/base.py", line 668, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 106, in predict
    jll = self._joint_log_likelihood(X)
  File "/Users/itthisak/Desktop/Nida/dads-6003-firewall-ml/venv/lib/python3.10/site-packages/sklearn/naive_bayes.py", line 1530, in _joint_log_likelihood
    jll += self.feature_log

{'alpha': 0.01, 'class_prior': None, 'fit_prior': True}
CategoricalNB(alpha=0.01)
              precision    recall  f1-score   support

           0       1.00      0.42      0.59     11292
           1       0.31      0.01      0.01      4496
           2       0.28      1.00      0.44      3856
           3       0.00      0.19      0.01        16

    accuracy                           0.44     19660
   macro avg       0.40      0.40      0.26     19660
weighted avg       0.70      0.44      0.43     19660



In [49]:
clf = CategoricalNB(alpha=0.01,class_prior=None,fit_prior=True)
clf.fit(x_train_nad_robust,y_train_nad_robust)

train_yhat = clf.predict(x_train_nad_robust)
train_f1 = f1_score(y_train_nad_robust,train_yhat,average='macro')

test_yhat = clf.predict(x_test_nad_robust)
test_f1 = f1_score(y_test_nad_robust,test_yhat,average='macro')
print('Train Score\n',classification_report(y_train_nad_robust,train_yhat))
print('Test Score\n',classification_report(y_test_nad_robust, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.90      0.95     25830
           1       1.00      0.01      0.02     25165
           2       0.38      1.00      0.55     26245
           3       0.83      0.34      0.48     25744

    accuracy                           0.57    102984
   macro avg       0.80      0.56      0.50    102984
weighted avg       0.80      0.57      0.50    102984

Test Score
               precision    recall  f1-score   support

           0       1.00      0.42      0.59     11292
           1       0.31      0.01      0.01      4496
           2       0.28      1.00      0.44      3856
           3       0.00      0.19      0.01        16

    accuracy                           0.44     19660
   macro avg       0.40      0.40      0.26     19660
weighted avg       0.70      0.44      0.43     19660

AVG F1-Score Train: 0.5001103052650867
AVG F1-Score Test: 0.2639220958248781
