# XAI-Assisted Intrusion Detection System
---

## Load required Data

In [44]:
# load model
from tensorflow import keras

ids = keras.models.load_model('../CICIDS2017/models/ids_dnn_poc.keras')
ids.summary()

In [45]:
# load explainer

explainer_ids = keras.models.load_model('../CICIDS2017/models/ids_explainer_poc.keras')
explainer_ids.summary()

In [46]:
# load normal data
import pandas as pd

X_test = pd.read_csv("../CICIDS2017/train_test_split/X_test_poc.csv")
y_test = pd.read_csv("../CICIDS2017/train_test_split/y_test_poc.csv")

print(X_test.shape, y_test.shape)


(64013, 68) (64013, 2)


In [47]:
# load adversarial data

X_test_adv_cw = pd.read_csv("../CICIDS2017/adversarial_samples/X_test_adv_cw_poc.csv")
print(X_test_adv_cw.shape)

(2500, 68)


## Create Smaller Sample

In [48]:
import numpy as np

n_samples = 1000

norm_index = X_test.sample(n=n_samples, random_state=30).index
X_norm = X_test.iloc[norm_index]
y_norm = y_test.iloc[norm_index]
print(X_norm.shape, y_norm.shape)

adv_index = X_test_adv_cw.sample(n=n_samples, random_state=10).index
X_adv = X_test_adv_cw.iloc[adv_index]
y_adv = y_test.iloc[adv_index]
print(X_adv.shape, y_adv.shape)

y_positive = np.array([1]*n_samples)
y_negative = np.array([0]*n_samples)
print(y_positive.shape, y_negative.shape)


(1000, 68) (1000, 2)
(1000, 68) (1000, 2)
(1000,) (1000,)


## Create Datasets

In [49]:
# X = pd.concat([X_norm, X_adv])
# print(X.shape)

# y_ids = pd.concat([y_norm, y_adv])
# print(y_ids.shape)

# y_explainer = np.concatenate([y_positive, y_negative])
# print(y_explainer.shape)

# # shuffle data
# from sklearn.utils import shuffle

# X = shuffle(X, random_state=30)
# y_ids = shuffle(y_ids, random_state=30)
# y_explainer = shuffle(y_explainer, random_state=30)

## Evaluate IDS

In [50]:
import numpy as np
from sklearn.metrics import accuracy_score

y_pred_ids = ids.predict(X_norm)
y_pred_ids = y_pred_ids > 0.5
# y_pred_ids = np.array(y_pred_ids).argmin(axis=1)

print(accuracy_score(y_norm, y_pred_ids))

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
0.994


In [51]:
y_pred_ids_adv_cw = ids.predict(X_adv)
y_pred_ids_adv_cw = y_pred_ids_adv_cw > 0.5
# y_pred_ids_adv_fgsm = np.array(y_pred_ids_adv_fgsm).argmin(axis=1)

print(accuracy_score(y_adv, y_pred_ids_adv_cw))

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
0.825


## Create Explanation

In [52]:
# Explainer for normal data
import shap

explainer_norm = shap.Explainer(ids, X_norm, feature_names=X_norm.columns)
shap_values_norm = explainer_norm(X_norm)

print(shap_values_norm.shape)

PermutationExplainer explainer: 1001it [00:58, 14.01it/s]                          

(1000, 68, 2)





In [53]:
# convert shap values to correct format
print(shap_values_norm.shape) # one shap value per feature per sample per class
shap_values_norm = shap_values_norm[:, :, 0] # 1 == Benign, 0 == Malicious
print(shap_values_norm.shape) # one shap value per feature per sample

# convert shap values to dataframe
shap_df = pd.DataFrame(shap_values_norm.values, columns=explainer_norm.feature_names)
print(shap_df.shape)

(1000, 68, 2)
(1000, 68)
(1000, 68)


In [54]:
# extract SHAP values
import shap

# Initialize SHAP explainer
explainer = shap.Explainer(ids, X_adv, feature_names=X_adv.columns)
shap_values_adv_cw = explainer(X_adv)

print(shap_values_adv_cw.shape)

PermutationExplainer explainer: 1001it [01:00, 13.80it/s]                         

(1000, 68, 2)





In [55]:
# convert shap values to correct format
print(shap_values_adv_cw.shape) # one shap value per feature per sample per class
shap_values_adv_cw = shap_values_adv_cw[:, :, 0] # 1 == Benign, 0 == Malicious
print(shap_values_adv_cw.shape) # one shap value per feature per sample

# convert shap values to dataframe
shap_df_adv_cw = pd.DataFrame(shap_values_adv_cw.values, columns=explainer.feature_names)
print(shap_df_adv_cw.shape)

(1000, 68, 2)
(1000, 68)
(1000, 68)


## Create Dataset

In [56]:
X = pd.concat([shap_df, shap_df_adv_cw])
print(X.shape)
y = np.concatenate([y_positive, y_negative])
print(y.shape)

# shuffle data
from sklearn.utils import shuffle

X, y = shuffle(X, y, random_state=30)

(2000, 68)
(2000,)


## Normalization

In [57]:
# check for columns which contain only 0 values
print("Columns which contain only 0 values....")
zero_columns = X.columns[(X.sum() == 0)]
print(f"Zero Columns: {zero_columns}")
# drop columns with only 0 values
X.drop(columns=zero_columns, inplace=True)
print("Dropped Zero Columns....")
print(X.shape)

Columns which contain only 0 values....
Zero Columns: Index([], dtype='object')
Dropped Zero Columns....
(2000, 68)


In [58]:
from sklearn.preprocessing import MinMaxScaler

print("No Normalization....")
print(X[[' Destination Port', ' Flow Duration', ' Total Fwd Packets']].head(2))

# min-max normalization
print("Min-Max Normalization....")
min_max_scaler = MinMaxScaler()
min_max_norm_feature_df = pd.DataFrame(min_max_scaler.fit_transform(X), columns=X.columns)
print(min_max_norm_feature_df.shape)
print(min_max_norm_feature_df[[' Destination Port', ' Flow Duration', ' Total Fwd Packets']].head(2))

No Normalization....
     Destination Port  Flow Duration  Total Fwd Packets
856         -0.069396       0.081157          -0.005508
364          0.186336      -0.001053           0.000847
Min-Max Normalization....
(2000, 68)
   Destination Port  Flow Duration  Total Fwd Packets
0          0.059083       0.750300           0.808561
1          0.606835       0.365714           0.836614


## Predict From Explanations

In [67]:
X = min_max_norm_feature_df

In [68]:
y_attack_pred = explainer_ids.predict(X)
print(y_attack_pred[:2])
y_attack_pred = np.array(y_attack_pred).argmin(axis=1)
print(y_attack_pred[:2])

# print classification report
from sklearn.metrics import classification_report

print(classification_report(y, y_attack_pred))

# print confusion matrix
from sklearn.metrics import confusion_matrix

tn, fp, fn, tp = confusion_matrix(y, y_attack_pred).ravel()
print(f"True Negative Rate: {tn/(tn+fp)*100:.2f}%")
print(f"False Positive Rate: {fp/(tn+fp)*100:.2f}%")
print(f"True Positive Rate: {tp/(tp+fn)*100:.2f}%")
print(f"False Negative Rate: {fn/(tp+fn)*100:.2f}%")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 983us/step
[[9.9999994e-01 5.7590014e-20]
 [9.9999994e-01 7.5433025e-23]]
[1 1]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1000
           1       0.50      1.00      0.67      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000

True Negative Rate: 0.00%
False Positive Rate: 100.00%
True Positive Rate: 100.00%
False Negative Rate: 0.00%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [64]:
# find indices of misclassified samples
correctly_classified_attack_samples = np.where((y == 0) & (y_attack_pred == 0))[0]
print(correctly_classified_attack_samples.shape)

correctly_classified_benign_samples = np.where((y == 1) & (y_attack_pred == 1))[0]
print(correctly_classified_benign_samples.shape)


(998,)
(209,)
