In [3]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from keras.models import load_model

from adversarialdefence.utils import ModelUtils, GeneralUtils

### ***Data Loading***

In [4]:
df = pd.read_csv('../csv/CICIDS2017_improved-preprocessed.csv')
df.drop(df.columns[0], axis=1, inplace=True)
df.shape

(1715326, 49)

### ***Data Splitting***

In [5]:
df_benign = df[df['Label'] == 0]
df_anomalous = df[df['Label'] == 1]

print(f'Number of benign samples: {df_benign.shape[0]}')
print(f'Number of anomalous samples: {df_anomalous.shape[0]}')

Number of benign samples: 1432918
Number of anomalous samples: 282408


In [6]:
df_benign = df_benign.sample(frac=0.20)

X_test = pd.concat([df_benign, df_anomalous]).sample(frac=1)
y_test = X_test.pop('Label')

print(f'Test Dataset: {df_benign.shape[0]} - {df_anomalous.shape[0]}')

Test Dataset: 286584 - 282408


### ***Data Standardization***

In [7]:
std_scaler_dnn = joblib.load('../modelli/std_scaler_dnn.bin')
std_scaler_aut = joblib.load('../modelli/std_scaler_aut.bin')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [10]:
X_test_dnn = std_scaler_dnn.transform(X_test)
X_test_aut = std_scaler_aut.transform(X_test)

### ***Testing***

In [8]:
dnn = load_model('../modelli/DNN_best_weights_99.hdf5')
aut = load_model('../modelli/autoencoder_best_weights_96-96.hdf5')







In [9]:
def define_target_preds(row):
    if row['preds_dnn'] == False and row['preds_aut'] == False:
        return False
    else:
        return True

In [11]:
preds_dnn = ModelUtils.binary_preds_supervised(dnn, X_test_dnn)
preds_aut = ModelUtils.binary_preds_unsupervised(aut, X_test_aut)

In [12]:
X_test_with_preds = X_test.assign(preds_dnn=preds_dnn, preds_aut=np.array(preds_aut))
X_test_with_preds['preds_target'] = X_test_with_preds.apply(define_target_preds, axis=1)

In [13]:
print("Target System classification report on test dataset\n\n")
print(classification_report(y_test.astype(bool), X_test_with_preds['preds_target'].astype(bool)))

Target System classification report on test dataset


              precision    recall  f1-score   support

       False       0.98      0.94      0.96    286584
        True       0.94      0.98      0.96    282408

    accuracy                           0.96    568992
   macro avg       0.96      0.96      0.96    568992
weighted avg       0.96      0.96      0.96    568992

