In [1]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import classification_report,f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid

In [2]:
train = pd.read_csv('data_source/train.csv')
test = pd.read_csv('data_source/test.csv')

x_train_byte = train.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_byte = train.loc[:,'Action']
x_test_byte = test.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_byte = test.loc[:,'Action']

In [3]:
scaler = StandardScaler()
x_train_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_train_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_test_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])


In [4]:
parameters = [{'kernel': ['rbf'], 'gamma': ['scale', 'auto'],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]},
                    {'kernel': ['sigmoid'], 'gamma': ['scale', 'auto'],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['rbf'], 'gamma': ['scale', 'auto'],
                     'C': [1, 10, 100, 1000]}]

In [5]:
# svc = GridSearchCV(SVC(), parameters, cv=5)
# svc.fit(x_train_byte, y_train_byte)
# print(svc.best_params_)
# print(svc.best_estimator_)
# answer = svc.predict(x_test_byte)
# print(classification_report(y_test_byte, answer)) 

In [6]:
grid = ParameterGrid(parameters)

In [7]:
# for params in grid:
#     clf = SVC(**params)
#     clf.fit(x_train_byte, y_train_byte)
#     print("Parameters:", params)
#     print("Score:", clf.score(x_train_byte, y_train_byte))

In [8]:
clf = svm.SVC()
clf.fit(x_train_byte, y_train_byte)

trainn = clf.predict(x_train_byte)
train_f1 = f1_score(y_train_byte,trainn,average='weighted')

testt = clf.predict(x_test_byte)
test_f1 = f1_score(y_test_byte,testt,average='weighted')
print('Train Score\n',classification_report(y_train_byte,trainn))
print('Test Score\n',classification_report(y_test_byte, testt))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Score
               precision    recall  f1-score   support

       allow       1.00      0.98      0.99     26348
        deny       0.98      0.94      0.96     10491
        drop       0.91      1.00      0.95      8995
  reset-both       0.00      0.00      0.00        38

    accuracy                           0.98     45872
   macro avg       0.72      0.73      0.73     45872
weighted avg       0.98      0.98      0.98     45872

Test Score
               precision    recall  f1-score   support

       allow       1.00      0.98      0.99     11292
        deny       0.98      0.93      0.96      4496
        drop       0.90      1.00      0.95      3856
  reset-both       0.00      0.00      0.00        16

    accuracy                           0.97     19660
   macro avg       0.72      0.73      0.72     19660
weighted avg       0.98      0.97      0.97     19660

AVG F1-Score Train: 0.9754640308170344
AVG F1-Score Test: 0.9741304113402885


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
grid = ParameterGrid(parameters)
best_score = 0
best_params = {}

for params in grid:
    clf = SVC(**params)
    clf.fit(x_train_byte, y_train_byte)
    y_pred = clf.predict(x_train_byte)
    f1 = f1_score(y_train_byte, y_pred,average='weighted')
    if f1 > best_score:
        best_score = f1
        best_params = params

print("Best parameters:", best_params)
print("Best f1 score:", best_score)