# Prototype: Alpha

| Properties      | Data    |
|---------------|-----------|
| *Labels* | `['BENIGN', 'DDoS']` |
| *Normalization* | `Min-Max` |
| *Sample Size* | `2000`|
| *Adversarial Attack* | `FGSM` |
| *Explanations* | `SHAP` |


---

In [17]:
# To import modules from the functions directory
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

## Data Preprocessing

In [18]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

encoding_type = 0 # binary encoding
norm_type = 0 # min-max normalization
label_names = ['BENIGN', 'DDoS'] # labels to include
sample_size = 200 # sample size for each label

label_df, feature_df = dp.preprocess(encoding_type, norm_type, label_names=label_names, sample_size=sample_size)
label_df.value_counts()

--- Combining all CICIDS2017 files ---
Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv
Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv
Friday-WorkingHours-Morning.pcap_ISCX.csv
Monday-WorkingHours.pcap_ISCX.csv
Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv
Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv
Tuesday-WorkingHours.pcap_ISCX.csv
Wednesday-workingHours.pcap_ISCX.csv
--- Removing NaN and Infinity values ---
Number of rows with NaN values:  1358
Removing NaN values....
Number of rows with Infinity values: 1509
Removing Infinity values....
--- Extracting labels ---
 Label
BENIGN    2271320
DDoS       128025
Name: count, dtype: int64
--- Sampling balanced data ---
Sample to shape: (400, 79)
--- Splitting labels and features ---
--- Encoding labels as binary one-hot values ---
--- Removing irrelevant features ---
Removed Zero Columns: [' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' RST Flag Count', ' CWE Flag Count', ' ECE Flag Count', 'Fwd Avg By

BENIGN  ATTACK
False   True      200
True    False     200
Name: count, dtype: int64

## Split Data

In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(feature_df, label_df, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(320, 66) (80, 66) (320, 2) (80, 2)


## Create IDS

In [20]:
import functions.intrusion_detection_system as ids
import importlib
importlib.reload(ids)

ids_model = ids.build_intrusion_detection_system(X_train, y_train, X_test, y_test)

Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - accuracy: 0.5538 - loss: 0.7081 - val_accuracy: 0.4219 - val_loss: 0.7160
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.5351 - loss: 0.6880 - val_accuracy: 0.4375 - val_loss: 0.6921
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.5216 - loss: 0.6695 - val_accuracy: 0.4531 - val_loss: 0.6703
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.6417 - loss: 0.6416 - val_accuracy: 0.6875 - val_loss: 0.6541
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.7860 - loss: 0.6257 - val_accuracy: 0.7031 - val_loss: 0.6406
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.7845 - loss: 0.6144 - val_accuracy: 0.7344 - val_loss: 0.6254
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━

## Generate Attacks

In [21]:
import functions.attack_generator as ag
import importlib
importlib.reload(ag)

art_model = ag.convert_to_art_model(ids_model, X_train)

# import numpy as np
# target_label = np.zeros_like(y_train)
# target_label[:, 0] = 1 # desired predicted label = [1, 0] = BENIGN
# print(target_label[:3])

X_adv_fgsm = ag.generate_fgsm_attacks(art_model, X_train)
ag.evaluate_art_model(art_model, X_adv_fgsm, y_train)

Adversarial FGSM examples generated. Shape: (320, 66)
Accuracy: 0.4625
              precision    recall  f1-score   support

      BENIGN       0.41      0.22      0.29       156
      ATTACK       0.48      0.69      0.57       164

   micro avg       0.46      0.46      0.46       320
   macro avg       0.44      0.46      0.43       320
weighted avg       0.45      0.46      0.43       320
 samples avg       0.46      0.46      0.46       320

Confusion Matrix: Positive == BENIGN
TN: 113, FP: 51, FN: 121, TP: 35


0.4625

## Explainer

In [59]:
import functions.explainer as exp
import importlib
importlib.reload(exp)

explainer = exp.generate_shap_explainer(ids_model, X_train)

shap_values = exp.generate_shap_values(explainer, X_train)
print(shap_values.shape)
shap_values_df = exp.convert_shap_values_to_pd(shap_values, X_train.columns)

shap_values_adv = exp.generate_shap_values(explainer, X_adv_fgsm)
print(shap_values_adv.shape)
shap_values_adv_df = exp.convert_shap_values_to_pd(shap_values_adv, X_train.columns)

PermutationExplainer explainer: 321it [00:20,  8.72it/s]                         


(320, 66)


PermutationExplainer explainer: 321it [00:18,  8.00it/s]                         

(320, 66)



