## Środa

In [1]:
from sklearn.model_selection import train_test_split, learning_curve
import pandas as pd
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB

data = pd.read_parquet('./IDS2017/Final/Wednesday.parquet')

X = data.drop(columns=['Label'])
y = data['Label']
X_train, X_val, Y_train, Y_val = train_test_split(X,
                                                  y, 
                                                  test_size = 0.3, 
                                                  random_state=2022)
print(X_train.shape, X_val.shape)

gnb = GaussianNB()
gnb.fit(X_train, Y_train)

print('Training Accuracy : ', 
      metrics.accuracy_score(Y_train,
                             gnb.predict(X_train))*100)
print('Validation Accuracy : ', 
      metrics.accuracy_score(Y_val, 
                             gnb.predict(X_val))*100)

class_labels = y.unique()
confusion_matrix = metrics.confusion_matrix(Y_val, gnb.predict(X_val))
confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, gnb.predict(X_val), target_names=class_labels)
print("Classification Report:")
print(classification_report)

(427344, 10) (183148, 10)
Training Accuracy :  30.284501478902243
Validation Accuracy :  30.452420992858237
Confusion Matrix:
                  BENIGN  DoS slowloris  DoS Slowhttptest  DoS Hulk  \
BENIGN              8173          13853              1457     11205   
DoS slowloris         12            322              1878         0   
DoS Slowhttptest       0           1008             45630         0   
DoS Hulk               0             97                 0        96   
DoS GoldenEye          0              3                 0        35   
Heartbleed             0              0                 0         0   

                  DoS GoldenEye  Heartbleed  
BENIGN                    90211           8  
DoS slowloris               876           0  
DoS Slowhttptest           5403           0  
DoS Hulk                   1329           0  
DoS GoldenEye              1550           0  
Heartbleed                    0           2  
Classification Report:
                  precision    

## Piątek

In [2]:
data2 = pd.read_parquet('./IDS2017/Final/Friday.parquet')

X = data2.drop(columns=['Label'])
y = data2['Label']
X_train, X_val, Y_train, Y_val = train_test_split(X,
                                                  y, 
                                                  test_size = 0.3, 
                                                  random_state=2022)
print(X_train.shape, X_val.shape)

gnb2 = GaussianNB()
gnb2.fit(X_train, Y_train)

print('Training Accuracy : ', 
      metrics.accuracy_score(Y_train,
                             gnb2.predict(X_train))*100)
print('Validation Accuracy : ', 
      metrics.accuracy_score(Y_val, 
                             gnb2.predict(X_val))*100)

confusion_matrix = metrics.confusion_matrix(Y_val, gnb2.predict(X_val))
class_labels = sorted(Y_val.unique())

confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, gnb2.predict(X_val), target_names=class_labels)
print("Classification Report:")
print(classification_report)


(156157, 10) (66925, 10)
Training Accuracy :  82.39207976587664
Validation Accuracy :  82.33395592080687
Confusion Matrix:
        BENIGN   DDoS
BENIGN   16707  11821
DDoS         2  38395
Classification Report:
              precision    recall  f1-score   support

      BENIGN       1.00      0.59      0.74     28528
        DDoS       0.76      1.00      0.87     38397

    accuracy                           0.82     66925
   macro avg       0.88      0.79      0.80     66925
weighted avg       0.86      0.82      0.81     66925



## Połączone DDoSy

In [3]:
data3 = pd.read_parquet('./IDS2017/Final/Combined.parquet')

X = data3.drop(columns=['Label'])
y = data3['Label']
X_train, X_val, Y_train, Y_val = train_test_split(X,
                                                  y, 
                                                  test_size = 0.3, 
                                                  random_state=2022)
print(X_train.shape, X_val.shape)

gnb3 = GaussianNB()
gnb3.fit(X_train, Y_train)

print('Training Accuracy : ', 
      metrics.accuracy_score(Y_train,
                             gnb3.predict(X_train))*100)
print('Validation Accuracy : ', 
      metrics.accuracy_score(Y_val, 
                             gnb3.predict(X_val))*100)

confusion_matrix = metrics.confusion_matrix(Y_val, gnb3.predict(X_val))
class_labels = sorted(Y_val.unique())

confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, gnb3.predict(X_val), target_names=class_labels, zero_division=0)
print("Classification Report:")
print(classification_report)

(935356, 10) (400868, 10)
Training Accuracy :  21.74893837212783
Validation Accuracy :  21.914944570282486
Confusion Matrix:
                  BENIGN   DDoS  DoS GoldenEye  DoS Hulk  DoS Slowhttptest  \
BENIGN             20063    322          33847      3755             26461   
DDoS                   0  20380              0      4134                 0   
DoS GoldenEye          7      0            334      1829                 0   
DoS Hulk               0     13            989     45407                 0   
DoS Slowhttptest       1     76              5         0                62   
DoS slowloris          0      0              2         0                37   
Heartbleed             1      0              0         0                 0   

                  DoS slowloris  Heartbleed  
BENIGN                   220017           0  
DDoS                      13959           0  
DoS GoldenEye               869           0  
DoS Hulk                   5357           0  
DoS Slowhttptest    