# Prototype: Alpha

| Properties      | Data    |
|---------------|-----------|
| *Labels* | `['BENIGN', 'DDoS']` |
| *Normalization* | `Min-Max` |
| *Sample Size* | `1000`|
| *Adversarial Attack* | `FGSM` |
| *Explanations* | `SHAP` |


---

In [85]:
# To import modules from the functions directory
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

## Data Preprocessing

In [86]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

encoding_type = 0 # binary encoding
norm_type = 0 # min-max normalization
label_names = ['BENIGN', 'DDoS'] # labels to include
sample_size = 1000 # sample size for each label

label_df, feature_df = dp.preprocess(encoding_type, norm_type, label_names=label_names, sample_size=sample_size)
label_df.value_counts()

--- Combining all CICIDS2017 files ---
Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv
Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv
Friday-WorkingHours-Morning.pcap_ISCX.csv
Monday-WorkingHours.pcap_ISCX.csv
Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv
Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv
Tuesday-WorkingHours.pcap_ISCX.csv
Wednesday-workingHours.pcap_ISCX.csv
--- Removing NaN and Infinity values ---
Number of rows with NaN values:  1358
Removing NaN values....
Number of rows with Infinity values: 1509
Removing Infinity values....
--- Extracting labels ---
 Label
BENIGN    2271320
DDoS       128025
Name: count, dtype: int64
--- Sampling balanced data ---
Sample to shape: (2000, 79)
--- Splitting labels and features ---
--- Encoding labels as binary one-hot values ---
--- Removing irrelevant features ---
Removed Zero Columns: [' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' RST Flag Count', ' CWE Flag Count', ' ECE Flag Count', 'Fwd Avg B

BENIGN  ATTACK
False   True      1000
True    False     1000
Name: count, dtype: int64

## Split Data

In [87]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(feature_df, label_df, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1600, 66) (400, 66) (1600, 2) (400, 2)


## Create IDS

In [88]:
import functions.intrusion_detection_system as ids
import importlib
importlib.reload(ids)

ids_model = ids.build_intrusion_detection_system(X_train, y_train, X_test, y_test)

Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.5545 - loss: 0.6725 - val_accuracy: 0.7406 - val_loss: 0.6267
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7523 - loss: 0.6045 - val_accuracy: 0.7969 - val_loss: 0.5615
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7937 - loss: 0.5342 - val_accuracy: 0.8781 - val_loss: 0.4761
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8997 - loss: 0.4298 - val_accuracy: 0.9594 - val_loss: 0.3629
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9700 - loss: 0.3083 - val_accuracy: 0.9656 - val_loss: 0.2407
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9767 - loss: 0.1991 - val_accuracy: 0.9688 - val_loss: 0.1469
Epoch 7/10
[1m13/13[0m [32m━━━━━━━━━

## Generate Attacks

In [89]:
import functions.attack_generator as ag
import importlib
importlib.reload(ag)

art_model = ag.convert_to_art_model(ids_model, X_train)

# import numpy as np
# target_label = np.zeros_like(y_train)
# target_label[:, 0] = 1 # desired predicted label = [1, 0] = BENIGN
# print(target_label[:3])

X_adv_fgsm = ag.generate_fgsm_attacks(art_model, X_train)
ag.evaluate_art_model(art_model, X_adv_fgsm, y_train)

Adversarial FGSM examples generated. Shape: (1600, 66)
Accuracy: 0.3475
              precision    recall  f1-score   support

      BENIGN       0.41      0.69      0.52       801
      ATTACK       0.00      0.00      0.00       799

   micro avg       0.35      0.35      0.35      1600
   macro avg       0.21      0.35      0.26      1600
weighted avg       0.21      0.35      0.26      1600
 samples avg       0.35      0.35      0.35      1600

Confusion Matrix: Positive == BENIGN
TN: 0, FP: 799, FN: 245, TP: 556


0.3475