## Środa

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import metrics


data = pd.read_parquet('./IDS2017/Final/Wednesday.parquet')

X = data.drop(columns=['Label'])
y = data['Label']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_val, Y_train, Y_val = train_test_split(X_scaled, y, test_size=0.3, random_state=2022)

clf = LogisticRegression(random_state=0, max_iter=1000)

clf.fit(X_train, Y_train)

y_pred_train = clf.predict(X_train)
y_pred_val = clf.predict(X_val)

print('Training Accuracy : ', metrics.accuracy_score(Y_train, y_pred_train) * 100)
print('Validation Accuracy : ', metrics.accuracy_score(Y_val, y_pred_val) * 100)

class_labels = y.unique()
confusion_matrix = metrics.confusion_matrix(Y_val, clf.predict(X_val))
confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, clf.predict(X_val), target_names=class_labels)
print("Classification Report:")
print(classification_report)

Training Accuracy :  92.7999457111835
Validation Accuracy :  92.86970100683601
Confusion Matrix:
                  BENIGN  DoS slowloris  DoS Slowhttptest  DoS Hulk  \
BENIGN            124074             47               770         4   
DoS slowloris       1097            131              1860         0   
DoS Slowhttptest    5856            304             45878         0   
DoS Hulk            1455              0                62         5   
DoS GoldenEye       1588              0                 0         0   
Heartbleed             1              0                 0         0   

                  DoS GoldenEye  Heartbleed  
BENIGN                       12           0  
DoS slowloris                 0           0  
DoS Slowhttptest              0           3  
DoS Hulk                      0           0  
DoS GoldenEye                 0           0  
Heartbleed                    0           1  
Classification Report:
                  precision    recall  f1-score   support

 

## Piątek

In [2]:
data2 = pd.read_parquet('./IDS2017/Final/Friday.parquet')

X = data2.drop(columns=['Label'])
y = data2['Label']

scaler2 = StandardScaler()
X_scaled = scaler2.fit_transform(X)

X_train, X_val, Y_train, Y_val = train_test_split(X_scaled, y, test_size=0.3, random_state=2022)

clf2 = LogisticRegression(random_state=0, max_iter=1000)

clf2.fit(X_train, Y_train)

y_pred_train = clf2.predict(X_train)
y_pred_val = clf2.predict(X_val)

print('Training Accuracy : ', metrics.accuracy_score(Y_train, y_pred_train) * 100)
print('Validation Accuracy : ', metrics.accuracy_score(Y_val, y_pred_val) * 100)

confusion_matrix = metrics.confusion_matrix(Y_val, clf2.predict(X_val))
class_labels = sorted(Y_val.unique())

confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, clf2.predict(X_val), target_names=class_labels)
print("Classification Report:")
print(classification_report)

Training Accuracy :  97.67477602669109
Validation Accuracy :  97.6421367202092
Confusion Matrix:
        BENIGN   DDoS
BENIGN   27034   1494
DDoS        84  38313
Classification Report:
              precision    recall  f1-score   support

      BENIGN       1.00      0.95      0.97     28528
        DDoS       0.96      1.00      0.98     38397

    accuracy                           0.98     66925
   macro avg       0.98      0.97      0.98     66925
weighted avg       0.98      0.98      0.98     66925



In [3]:
data3 = pd.read_parquet('./IDS2017/Final/Combined.parquet')

X = data3.drop(columns=['Label'])
y = data3['Label']

scaler3 = StandardScaler()
X_scaled = scaler3.fit_transform(X)

X_train, X_val, Y_train, Y_val = train_test_split(X_scaled, y, test_size=0.3, random_state=2022)

clf3 = LogisticRegression(random_state=0, max_iter=1000)

clf3.fit(X_train, Y_train)

y_pred_train = clf3.predict(X_train)
y_pred_val = clf3.predict(X_val)

print('Training Accuracy : ', metrics.accuracy_score(Y_train, y_pred_train) * 100)
print('Validation Accuracy : ', metrics.accuracy_score(Y_val, y_pred_val) * 100)

confusion_matrix = metrics.confusion_matrix(Y_val, clf3.predict(X_val))
class_labels = sorted(Y_val.unique())

confusion_df = pd.DataFrame(confusion_matrix, index=class_labels, columns=class_labels)

print("Confusion Matrix:")
print(confusion_df)

classification_report = metrics.classification_report(Y_val, clf3.predict(X_val), target_names=class_labels, zero_division=0)
print("Classification Report:")
print(classification_report)

Training Accuracy :  93.16570375343719
Validation Accuracy :  93.2279453585719
Confusion Matrix:
                  BENIGN   DDoS  DoS GoldenEye  DoS Hulk  DoS Slowhttptest  \
BENIGN            303471     61             16       917                 0   
DDoS               13970  24503              0         0                 0   
DoS GoldenEye       1127      6            101      1805                 0   
DoS Hulk            5861     12            248     45645                 0   
DoS Slowhttptest    1406     75              0         0                 0   
DoS slowloris       1641      0              0         0                 0   
Heartbleed             2      0              0         0                 0   

                  DoS slowloris  Heartbleed  
BENIGN                        0           0  
DDoS                          0           0  
DoS GoldenEye                 0           0  
DoS Hulk                      0           0  
DoS Slowhttptest              0           0  
Do