In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import QuantileTransformer, MinMaxScaler, StandardScaler
from sklearn.metrics import auc, accuracy_score, precision_score, recall_score, roc_curve, precision_recall_curve
import tensorflow as tf
from keras.callbacks import Callback
import warnings

2023-05-15 06:54:10.266484: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-15 06:54:10.396666: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-15 06:54:10.396685: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-05-15 06:54:11.014726: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [2]:
class AUROCEarlyStoppingPruneCallback(Callback):
    """Stop training when a monitored quantity has stopped improving.
    # Arguments
        x_val:
            Input vector of validation data.
        y_val:
            Labels for input vector of validation data.
        trial:
            A :class:`~optuna.trial.Trial` corresponding to the current evaluation of the
            objective function.
        min_delta: minimum change in the monitored quantity
            to qualify as an improvement, i.e. an absolute
            change of less than min_delta, will count as no
            improvement.
        patience: number of epochs that produced the monitored
            quantity with no improvement after which training will
            be stopped.
            Validation quantities may not be produced for every
            epoch, if the validation frequency
            (`model.fit(validation_freq=5)`) is greater than one.
        verbose: verbosity mode.
        mode: one of {auto, min, max}. In `min` mode,
            training will stop when the quantity
            monitored has stopped decreasing; in `max`
            mode it will stop when the quantity
            monitored has stopped increasing; in `auto`
            mode, the direction is automatically inferred
            from the name of the monitored quantity.
        baseline: Baseline value for the monitored quantity to reach.
            Training will stop if the model doesn't show improvement
            over the baseline.
        restore_best_weights: whether to restore model weights from
            the epoch with the best value of the monitored quantity.
            If False, the model weights obtained at the last step of
            training are used.
    """

    def __init__(self,
                 x_val, 
                 y_val, 
                 min_delta=0,
                 patience=0,
                 verbose=0,
                 mode='auto',
                 baseline=None,
                 restore_best_weights=False):
        super(AUROCEarlyStoppingPruneCallback, self).__init__()

        self.x_val = x_val
        self.y_val = y_val
        self.baseline = baseline
        self.patience = patience
        self.verbose = verbose
        self.min_delta = min_delta
        self.wait = 0
        self.stopped_epoch = 0
        self.restore_best_weights = restore_best_weights
        self.best_weights = None

        if mode not in ['auto', 'min', 'max']:
            warnings.warn('EarlyStopping mode %s is unknown, '
                          'fallback to auto mode.' % mode,
                          RuntimeWarning)
            mode = 'auto'

        if mode == 'min':
            self.monitor_op = np.less
        else:
            self.monitor_op = np.greater

        if self.monitor_op == np.greater:
            self.min_delta *= 1
        else:
            self.min_delta *= -1


    def on_train_begin(self, logs=None):
        # Allow instances to be re-used
        self.wait = 0
        self.stopped_epoch = 0
        if self.baseline is not None:
            self.best = self.baseline
        else:
            self.best = np.Inf if self.monitor_op == np.less else -np.Inf

    def on_epoch_end(self, epoch, logs=None):
        current = self.get_AUROC()
        if current is None:
            return
        
        if self.verbose > 0:
            print(f'Epoch #{epoch}\tValidation AUROC: {current}\tBest AUROC: {self.best}')
        

        if self.monitor_op(current - self.min_delta, self.best):
            self.best = current
            self.wait = 0
            if self.restore_best_weights:
                self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            if self.wait >= self.patience:
                self.stopped_epoch = epoch
                self.model.stop_training = True
                if self.restore_best_weights:
                    if self.verbose > 0:
                        print('Restoring model weights from the end of '
                              'the best epoch')
                    self.model.set_weights(self.best_weights)

    def on_train_end(self, logs=None):
        if self.stopped_epoch > 0 and self.verbose > 0:
            print('Epoch %05d: early stopping' % (self.stopped_epoch + 1))
    
    # Evaluation on custom metric
    def get_AUROC(self):
        x_pred = self.model.predict(self.x_val, verbose=0)
        sse = np.mean(abs(self.x_val - x_pred), axis=1)
        fpr, tpr, thresholds = roc_curve(self.y_val, sse)
        return auc(fpr, tpr)


In [3]:
datasets = {
    'NF-UNSW-NB15-V2' : './NF-UNSW-NB15-V2.parquet',
}

features_to_remove = ['L4_SRC_PORT', 'L4_DST_PORT', 'Attack', 'Label']

scaler = MinMaxScaler()

x_train = {}
x_val = {}
x_test = {}

In [4]:
for key, value in datasets.items():
    print(f'Processing {key}')
    print('='*20 + '\n')
    df = pd.read_parquet(value)
    Y = df.Label
    X_train, X_test, y_train, y_val = train_test_split(df, Y, test_size=0.3,stratify=df.Attack, random_state=42)
    del df
    del Y
    gc.collect()
    X_val, X_test, y_val, y_test = train_test_split(X_test, X_test.Label, test_size=0.15, stratify=X_test.Attack, random_state=42)
    X_train = X_train[X_train.Label==0].drop(columns=features_to_remove, axis=1)
    X_val.drop(columns=features_to_remove, axis=1, inplace=True)
    X_test.drop(columns=features_to_remove, axis=1, inplace=True)
    x_train[key] = scaler.fit_transform(X_train)
    x_val[key] = (scaler.transform(X_val), y_val)
    x_test[key] = (scaler.transform(X_test), y_test)
    del X_train
    del X_val
    del X_test
    gc.collect()
print()
print('Finished processing data sources.\n')

Processing NF-UNSW-NB15-V2


Finished processing data sources.



In [11]:
def create_generic_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(39,)))
    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dense(24, activation='relu'))
    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dense(24, activation='relu'))
    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dense(39, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model



'''
Function for fitting the model on the training data, and perform intra- and inter-dataset evaluation.

'''
def fit_and_test_model(model, name):

    print(f'Fitting and testing for {name}:')
    print('='*60 + '\n')
    
    #Train the model, using only the training data originating from the specific dataset.
    history = model.fit(
            x_train[name],
            x_train[name],
            epochs=15,
            batch_size=128,
            shuffle=True,
            verbose=0,
            callbacks=[
                AUROCEarlyStoppingPruneCallback(
                        x_val[name][0], 
                        x_val[name][1],
                        min_delta=0.0001,
                        patience=5,
                        mode='max',
                        restore_best_weights=True,
                        verbose=0
                )
            ]

    )
    
    #Evaluation of the model on the test sets of ALL datasets
    
    for test_name in datasets.keys():
        print(f'Evaluation on {test_name}:\n')
        
        # Predictions and losses
        val_predictions = model.predict(x_val[test_name][0], verbose=0)
        test_predictions =  model.predict(x_test[test_name][0], verbose=0)
        val_mae = np.mean(abs(x_val[test_name][0] - val_predictions), axis=1)
        print(x_val[test_name][0])
        test_mae = np.mean(abs(x_test[test_name][0]-test_predictions), axis=1)
    
        #Evaluate model using roc-curve and the Youden J statistic
        fpr, tpr, thresholds = roc_curve(x_val[test_name][1], val_mae)
        J = tpr - fpr
        result = test_mae > thresholds[J.argmax()]
        print('\tUSING ROC-CURVE:')
        print(f'\t\tAccuracy={accuracy_score(x_test[test_name][1], result)}\n\t\tPrecision={precision_score(x_test[test_name][1], result)}\n\t\tRecall={recall_score(x_test[test_name][1], result)}\n')

        #Evaluate model using precision-recall curve
        precision, recall, thresholds = precision_recall_curve(x_val[test_name][1], val_mae)
            #The Euclidean distance between each point on the curve and the upper right point (1,1) (=the ideal point)
        distance = np.sqrt((1-precision)**2 + (1-recall)**2)
            #The F1-score
        f1 = (2*precision*recall)/(precision+recall)
        
        print('\tUSING PR-CURVE and DISTANCE:')
        result = test_mae > thresholds[distance.argmin()]
        print(f'\t\tAccuracy={accuracy_score(x_test[test_name][1], result)}\n\t\tPrecision={precision_score(x_test[test_name][1], result)}\n\t\tRecall={recall_score(x_test[test_name][1], result)}\n')
        
        print('\tUSING PR-CURVE and F1:')
        result = test_mae > thresholds[f1.argmax()]
        print(f'\t\tAccuracy={accuracy_score(x_test[test_name][1], result)}\n\t\tPrecision={precision_score(x_test[test_name][1], result)}\n\t\tRecall={recall_score(x_test[test_name][1], result)}\n')
        print()

    
#Fit and test UNSW
fit_and_test_model(create_generic_model(), 'NF-UNSW-NB15-V2')

Fitting and testing for NF-UNSW-NB15-V2:

Evaluation on NF-UNSW-NB15-V2:

[[3.81679389e-02 4.08163265e-03 3.44502217e-04 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [3.81679389e-02 0.00000000e+00 4.42060367e-05 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [3.81679389e-02 0.00000000e+00 4.11989176e-04 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [3.81679389e-02 4.08163265e-03 1.73221147e-04 ... 0.00000000e+00
  0.00000000e+00 6.91842900e-01]
 [3.81679389e-02 0.00000000e+00 4.09771945e-04 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [3.81679389e-02 0.00000000e+00 3.80947946e-04 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]
	USING ROC-CURVE:
		Accuracy=0.9815220795490135
		Precision=0.6851812138108514
		Recall=0.945546019532406

	USING PR-CURVE and DISTANCE:
		Accuracy=0.983982819560646
		Precision=0.7263427109974424
		Recall=0.9245338857650193

	USING PR-CURVE and F1:
		Accuracy=0.9622052704576977
		Precision=0.0
		Recall=0.0




  f1 = (2*precision*recall)/(precision+recall)
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
x_train['NF-UNSW-NB15-V2'].shape

(1338166, 39)