## Środa

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

data = pd.read_parquet('./IDS2017/Final/Wednesday.parquet')

X = data.drop(columns=['Label'])
y = data['Label']

X_train, X_val, Y_train, Y_val = train_test_split(X,
                                                  y, 
                                                  test_size = 0.3, 
                                                  random_state=2022)
print(X_train.shape, X_val.shape)

model = RandomForestClassifier()
model.fit(X_train, Y_train)
print('Training Accuracy : ', 
      metrics.accuracy_score(Y_train,
                             model.predict(X_train))*100)
print('Validation Accuracy : ', 
      metrics.accuracy_score(Y_val, 
                             model.predict(X_val))*100)

class_labels = y.unique()
confusion_matrix = metrics.confusion_matrix(Y_val, model.predict(X_val))
confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, model.predict(X_val), target_names=class_labels)
print("Classification Report:")
print(classification_report)

(427344, 10) (183148, 10)
Training Accuracy :  98.95189823654947
Validation Accuracy :  98.94839146482626
Confusion Matrix:
                  BENIGN  DoS slowloris  DoS Slowhttptest  DoS Hulk  \
BENIGN            124561              2               291        51   
DoS slowloris        748           2339                 0         1   
DoS Slowhttptest     787              0             51253         1   
DoS Hulk              15              0                 1      1495   
DoS GoldenEye         13              0                 0         3   
Heartbleed             0              0                 0         0   

                  DoS GoldenEye  Heartbleed  
BENIGN                        2           0  
DoS slowloris                 0           0  
DoS Slowhttptest              0           0  
DoS Hulk                     11           0  
DoS GoldenEye              1572           0  
Heartbleed                    0           2  
Classification Report:
                  precision    re

## Piątek

In [12]:
data2 = pd.read_parquet('./IDS2017/Final/Friday.parquet')

# Split data into features and labels
X = data2.drop(columns=['Label'])
y = data2['Label']


X_train, X_val, Y_train, Y_val = train_test_split(X, y, test_size=0.3, random_state=2022)
print(X_train.shape, X_val.shape)

model2 = RandomForestClassifier()
model2.fit(X_train, Y_train)

print('Training Accuracy : ', metrics.accuracy_score(Y_train, model2.predict(X_train)) * 100)
print('Validation Accuracy : ', metrics.accuracy_score(Y_val, model2.predict(X_val)) * 100)


confusion_matrix = metrics.confusion_matrix(Y_val, model2.predict(X_val))
class_labels = sorted(Y_val.unique())

confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, model2.predict(X_val), target_names=class_labels)
print("Classification Report:")
print(classification_report)

(156157, 10) (66925, 10)
Training Accuracy :  99.92059273679695
Validation Accuracy :  99.90885319387374
Confusion Matrix:
        BENIGN   DDoS
BENIGN   28523      5
DDoS        56  38341
Classification Report:
              precision    recall  f1-score   support

      BENIGN       1.00      1.00      1.00     28528
        DDoS       1.00      1.00      1.00     38397

    accuracy                           1.00     66925
   macro avg       1.00      1.00      1.00     66925
weighted avg       1.00      1.00      1.00     66925



## Połączone DDoSy

In [14]:
data3 = pd.read_parquet('./IDS2017/Final/Combined.parquet')


X = data3.drop(columns=['Label'])
y = data3['Label']
X_train, X_val, Y_train, Y_val = train_test_split(X,
                                                  y, 
                                                  test_size = 0.3, 
                                                  random_state=2022)
print(X_train.shape, X_val.shape)

model3 = RandomForestClassifier()
model3.fit(X_train, Y_train)
print('Training Accuracy : ', 
      metrics.accuracy_score(Y_train,
                             model3.predict(X_train))*100)
print('Validation Accuracy : ', 
      metrics.accuracy_score(Y_val, 
                             model3.predict(X_val))*100)


confusion_matrix = metrics.confusion_matrix(Y_val, model2.predict(X_val))
class_labels = sorted(Y_val.unique())

confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, model2.predict(X_val), target_names=class_labels, zero_division=0)
print("Classification Report:")
print(classification_report)

(935356, 10) (400868, 10)
Training Accuracy :  99.22018995975864
Validation Accuracy :  99.20422682778371
Confusion Matrix:
                  BENIGN   DDoS  DoS GoldenEye  DoS Hulk  DoS Slowhttptest  \
BENIGN            304117    348              0         0                 0   
DDoS                  53  38420              0         0                 0   
DoS GoldenEye       2651    388              0         0                 0   
DoS Hulk           47654   4112              0         0                 0   
DoS Slowhttptest    1481      0              0         0                 0   
DoS slowloris       1641      0              0         0                 0   
Heartbleed             3      0              0         0                 0   

                  DoS slowloris  Heartbleed  
BENIGN                        0           0  
DDoS                          0           0  
DoS GoldenEye                 0           0  
DoS Hulk                      0           0  
DoS Slowhttptest     