In [46]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

from efc import EnergyBasedFlowClassifier

from keras.layers import Input
from keras.layers import Dense

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import IsolationForest
from sklearn.metrics import f1_score
from sklearn.metrics import fbeta_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import make_scorer
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import LocalOutlierFactor
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
import xgboost as xgb

from anomaly_flow.utils.binary_processing import split_flag_columns

In [47]:
APPLY_SYNTHETIC = False
APPLY_REAL = False
SAME_SCALE = True
DATASET = "NF-BoT-IoT-v2-DDoS"

In [48]:
dtypes_netflow = {
    "IPV4_SRC_ADDR":                "object",
    "L4_SRC_PORT":                  "float32",
    "IPV4_DST_ADDR":                "object",
    "L4_DST_PORT":                  "float32",
    "PROTOCOL":                     "float32",
    "L7_PROTO":                     "float64",
    "IN_BYTES":                     "float32",
    "IN_PKTS":                      "float32",
    "OUT_BYTES":                    "float32",
    "OUT_PKTS":                     "float32",
    "TCP_FLAGS":                    "int32",
    "CLIENT_TCP_FLAGS":             "int32",
    "SERVER_TCP_FLAGS":             "int32",
    "FLOW_DURATION_MILLISECONDS":   "float32",
    "DURATION_IN":                  "float32",
    "DURATION_OUT":                 "float32",
    "MIN_TTL":                      "float32",
    "MAX_TTL":                      "float32",
    "LONGEST_FLOW_PKT":             "float32",
    "SHORTEST_FLOW_PKT":            "float32",
    "MIN_IP_PKT_LEN":               "float32",
    "MAX_IP_PKT_LEN":               "float32",
    "SRC_TO_DST_SECOND_BYTES":      "float64",
    "DST_TO_SRC_SECOND_BYTES":      "float64",
    "RETRANSMITTED_IN_BYTES":       "float32",
    "RETRANSMITTED_IN_PKTS":        "float32",
    "RETRANSMITTED_OUT_BYTES":      "float32",
    "RETRANSMITTED_OUT_PKTS":       "float32",
    "SRC_TO_DST_AVG_THROUGHPUT":    "float32",
    "DST_TO_SRC_AVG_THROUGHPUT":    "float32",
    "NUM_PKTS_UP_TO_128_BYTES":     "float32",
    "NUM_PKTS_128_TO_256_BYTES":    "float32",
    "NUM_PKTS_256_TO_512_BYTES":    "float32",
    "NUM_PKTS_512_TO_1024_BYTES":   "float32",
    "NUM_PKTS_1024_TO_1514_BYTES":  "float32",
    "TCP_WIN_MAX_IN":               "float32",
    "TCP_WIN_MAX_OUT":              "float32",
    "ICMP_TYPE":                    "float32",
    "ICMP_IPV4_TYPE":               "float32",
    "DNS_QUERY_ID":                 "float32",
    "DNS_QUERY_TYPE":               "float32",
    "DNS_TTL_ANSWER":               "float32",
    "FTP_COMMAND_RET_CODE":         "float32",
    "Attack":                       "object",
    "Label":                        "float32",
}

In [49]:
FEATURES_TO_DROP = [
    'IPV4_SRC_ADDR', 
    'IPV4_DST_ADDR', 
    'L7_PROTO', 
    'L4_SRC_PORT', 
    'L4_DST_PORT', 
    'FTP_COMMAND_RET_CODE',
    'Attack'
]

In [50]:
df = pd.read_csv(
    "./datasets/NF-UNSW-NB15-v2-downsample.csv.gz",
    dtype=dtypes_netflow
)

In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 478055 entries, 0 to 478054
Data columns (total 45 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   IPV4_SRC_ADDR                478055 non-null  object 
 1   L4_SRC_PORT                  478055 non-null  float32
 2   IPV4_DST_ADDR                478055 non-null  object 
 3   L4_DST_PORT                  478055 non-null  float32
 4   PROTOCOL                     478055 non-null  float32
 5   L7_PROTO                     478055 non-null  float64
 6   IN_BYTES                     478055 non-null  float32
 7   IN_PKTS                      478055 non-null  float32
 8   OUT_BYTES                    478055 non-null  float32
 9   OUT_PKTS                     478055 non-null  float32
 10  TCP_FLAGS                    478055 non-null  int32  
 11  CLIENT_TCP_FLAGS             478055 non-null  int32  
 12  SERVER_TCP_FLAGS             478055 non-null  int32  
 13 

In [52]:
cross_df = pd.read_csv(
    f"./datasets/{DATASET}-downsample.csv.gz",
    dtype=dtypes_netflow
)

In [53]:
threshold = np.finfo(np.float32).max

In [54]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
print(df['Attack'].value_counts())
df.drop(FEATURES_TO_DROP, axis=1, inplace=True)
df = df[df < threshold]
df = split_flag_columns(df)
df.dropna(inplace=True)

Attack
Benign            459044
Exploits            6310
Fuzzers             4462
Generic             3312
Reconnaissance      2556
DoS                 1159
Analysis             460
Backdoor             434
Shellcode            285
Worms                 33
Name: count, dtype: int64
Using cached file: 2e1b49bdf7ef775ccf86409f36645a01.


In [55]:
cross_df.replace([np.inf, -np.inf], np.nan, inplace=True)
cross_df.dropna(inplace=True)
print(cross_df['Attack'].value_counts())
cross_df.drop(FEATURES_TO_DROP, axis=1, inplace=True)
cross_df = cross_df[cross_df < threshold]
cross_df = split_flag_columns(cross_df)
cross_df.dropna(inplace=True)

Attack
DDoS      5499554
Benign      40511
Name: count, dtype: int64
Creating column TCP_FLAGS_BIN
Created column TCP_FLAGS_BIN
Creating column CLIENT_TCP_FLAGS_BIN
Created column CLIENT_TCP_FLAGS_BIN
Creating column SERVER_TCP_FLAGS_BIN
Created column SERVER_TCP_FLAGS_BIN
Creating column URGENT_POINTER
Created column URGENT_POINTER
Creating column ACKNOWLEDGEMENT
Created column ACKNOWLEDGEMENT
Creating column PUSH
Created column PUSH
Creating column RESET
Created column RESET
Creating column SYNCHRONISATION
Created column SYNCHRONISATION
Creating column FIN
Created column FIN
Creating column CLIENT_URGENT_POINTER
Created column CLIENT_URGENT_POINTER
Creating column CLIENT_ACKNOWLEDGEMENT
Created column CLIENT_ACKNOWLEDGEMENT
Creating column CLIENT_PUSH
Created column CLIENT_PUSH
Creating column CLIENT_RESET
Created column CLIENT_RESET
Creating column CLIENT_SYNCHRONISATION
Created column CLIENT_SYNCHRONISATION
Creating column CLIENT_FIN
Created column CLIENT_FIN
Creating column SERVER

In [56]:
cross_x, cross_y = cross_df.drop(['Label', 'Unnamed: 0'], axis=1), cross_df['Label']

X, y = df.drop(['Label'], axis=1), df['Label']

X_train, X_test, y_train, y_test = train_test_split(
                                        X, y, stratify=y,
                                        test_size=0.33, random_state=42
                                   )
cross_x_train, cross_x_test, cross_y_train, cross_y_test = train_test_split(
                                                                cross_x, cross_y,
                                                                stratify=cross_y,
                                                                test_size=0.9,
                                                                random_state=42
                                                           )

# Reescale the models to train and test
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [57]:
if APPLY_SYNTHETIC is True:
    print("Using Synthetic Data")
    synthetic_df = pd.read_parquet(
        f"./datasets/{DATASET}-synthetic.parquet"
    )
    synthetic_df["Label"] = 0
    synthetic_x, synthetic_y = synthetic_df.drop(['Label'], axis=1), synthetic_df['Label']
    synthetic_x = synthetic_x.to_numpy()

    if SAME_SCALE is True:
        synthetic_x = scaler.transform(synthetic_x)
 
    X_train = np.concatenate((X_train, synthetic_x), axis=0)
    y_train = np.concatenate((y_train, synthetic_y))

if SAME_SCALE is True:
    cross_x_train = scaler.transform(cross_x_train)
    cross_x_test = scaler.transform(cross_x_test)
else:
    external_scaler = MinMaxScaler()
    cross_x_train = external_scaler.fit_transform(cross_x_train)

In [58]:
if APPLY_REAL is True:
    X_train = np.concatenate((X_train, cross_x_train), axis=0)
    y_train = np.concatenate((y_train, cross_y_train))

In [59]:
# Define parameter grids for each algorithm
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
}

param_grid_lr = {
    'C': [0.1, 1, 10],
    'penalty': ['l1', 'l2'],
}

param_grid_if = {
    'n_estimators': [50, 100, 150],
    'contamination': [0.01, 0.05, 0.1, 0.2]
}

param_grid_xgb = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.1, 0.01, 0.001]
}

param_grid_mlp = {
    'hidden_layer_sizes': [(50,), (100,), (50, 25), (100, 50)],
    'activation': ['relu', 'tanh'],
    'max_iter': [200, 300, 400],
}

param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto', 0.1, 1],
}

param_grid_lof = {
    'contamination': [0.01, 0.02, 0.03, 0.04, 0.05]
}

param_grid_efc = {}

In [60]:
rf_classifier = RandomForestClassifier()
lr_classifier = LogisticRegression(solver='lbfgs', max_iter=1000)
if_classifier = IsolationForest(random_state=42)
xgb_classifier = xgb.XGBClassifier()
mlp_classifier = MLPClassifier(random_state=42)
svm_classifier = SVC(random_state=42)
lof_classifier = LocalOutlierFactor(novelty=True)
efc_classifier = EnergyBasedFlowClassifier()


In [61]:
f1sc = make_scorer(f1_score, average='micro')

# Create a dictionary of classifiers and their respective parameter grids
classifiers = {
    'Random Forest': (rf_classifier, param_grid_rf, 'f1', False),
    'Logistic Regression': (lr_classifier, param_grid_lr, 'f1', False),
    'Isolation Forest': (if_classifier, param_grid_if, 'roc_auc', True), 
    'eXtreme Gradient Boosting': (xgb_classifier, param_grid_xgb, 'f1', False),
    'Multilayer Perceptron': (mlp_classifier, param_grid_mlp, 'f1', False),
    #'Support Vector Machine': (svm_classifier, param_grid_svm, 'f1', False),
    'Local Outlier Factor': (lof_classifier, param_grid_lof, f1sc, True),
    'Energy Based Flow Classifier': (efc_classifier, param_grid_efc, 'f1', False),
}

In [62]:
# Evaluate each classifier using GridSearchCV
for classifier_name, (classifier, param_grid, metric, outlier_discriminator) in classifiers.items():

    grid_search = GridSearchCV(
        classifier,
        param_grid,
        cv=3,
        scoring=metric,
        n_jobs=-1,
        verbose=2
    )
    
    # Make predictions on the test set
    grid_search.fit(X_train, y_train)
    
    # Print the best parameters and the corresponding accuracy on the test set
    print(f"Best parameters for {classifier_name}: {grid_search.best_params_}")

    y_pred = grid_search.predict(X_test)
    cross_pred = grid_search.predict(cross_x_test)
    
    if outlier_discriminator is True:
        y_pred[y_pred == 1] = 0
        y_pred[y_pred == -1] = 1
        cross_pred[cross_pred == 1] = 0
        cross_pred[cross_pred == -1] = 1
    
    # Evaluate the performance on the same silo
    accuracy_value = accuracy_score(y_test, y_pred)
    f1_value = f1_score(y_test, y_pred)
    f2_value = fbeta_score(y_test, y_pred, beta=2)

    print(f"Accuracy on the test set: {accuracy_value:.4f}")
    print(f"F1-Score on the test set: {f1_value:.4f}")
    print(f"F2-Score on the test set: {f2_value:.4f}")

    # Evaluate the performance on cross silos approach
    accuracy_cross = accuracy_score(cross_y_test, cross_pred)
    f1_cross = f1_score(cross_y_test, cross_pred)
    f2_cross = fbeta_score(cross_y_test, cross_pred, beta=2)

    print(f"Accuracy on the cross-evaluation set: {accuracy_cross:.4f}")
    print(f"F1-Score on the cross-evaluation set: {f1_cross:.4f}")
    print(f"F2-Score on the cross-evaluation set: {f2_cross:.4f}")


Fitting 3 folds for each of 27 candidates, totalling 81 fits
Best parameters for Random Forest: {'max_depth': 20, 'min_samples_split': 10, 'n_estimators': 100}
Accuracy on the test set: 0.9971
F1-Score on the test set: 0.9637
F2-Score on the test set: 0.9736
Accuracy on the cross-evaluation set: 0.7488
F1-Score on the cross-evaluation set: 0.8552
F2-Score on the cross-evaluation set: 0.7871
Fitting 3 folds for each of 6 candidates, totalling 18 fits


9 fits failed out of a total of 18.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
  File "l:\Experimentos\anomaly-flow\.env\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "l:\Experimentos\anomaly-flow\.env\lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "l:\Experimentos\anomaly-flow\.env\lib\site-packages\sklearn\linear_model\_logistic.py", line 1169, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "l:\Experimentos\anomaly-flow\.env\lib\site-packages\sklearn\linear_model

Best parameters for Logistic Regression: {'C': 10, 'penalty': 'l2'}
Accuracy on the test set: 0.9931
F1-Score on the test set: 0.9171
F2-Score on the test set: 0.9403
Accuracy on the cross-evaluation set: 0.0061
F1-Score on the cross-evaluation set: 0.0002
F2-Score on the cross-evaluation set: 0.0001
Fitting 3 folds for each of 12 candidates, totalling 36 fits




Best parameters for Isolation Forest: {'contamination': 0.01, 'n_estimators': 50}
Accuracy on the test set: 0.9506
F1-Score on the test set: 0.0177
F2-Score on the test set: 0.0131
Accuracy on the cross-evaluation set: 0.0069
F1-Score on the cross-evaluation set: 0.0000
F2-Score on the cross-evaluation set: 0.0000
Fitting 3 folds for each of 27 candidates, totalling 81 fits
Best parameters for eXtreme Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 9, 'n_estimators': 150}
Accuracy on the test set: 0.9971
F1-Score on the test set: 0.9641
F2-Score on the test set: 0.9733
Accuracy on the cross-evaluation set: 0.0071
F1-Score on the cross-evaluation set: 0.0006
F2-Score on the cross-evaluation set: 0.0004
Fitting 3 folds for each of 24 candidates, totalling 72 fits
Best parameters for Multilayer Perceptron: {'activation': 'tanh', 'hidden_layer_sizes': (50, 25), 'max_iter': 200}
Accuracy on the test set: 0.9961
F1-Score on the test set: 0.9522
F2-Score on the test set: 0.9717
Accurac

In [63]:
def eval_learning(y_test, preds):
    acc = accuracy_score(y_test, preds)
    rec = recall_score(y_test, preds)
    prec = precision_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    mcc = matthews_corrcoef(y_test, preds)
    tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()
    missrate = fn / (fn + tp)
    fallout = fp / (fp + tn)
    auc = roc_auc_score(y_test, preds)
    f2_value = fbeta_score(y_test, preds, beta=2)

    return acc, rec, prec, f1, mcc, missrate, fallout, auc, f2_value

In [64]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

def calculate_reconstruction_loss(x, x_hat):
    losses = np.mean(abs(x - x_hat), axis=1)  # MAE
    return losses

class AutoEncoder():
    def __init__(self, train_data, test_data):
        self.x_train, self.y_train = train_data
        self.x_test, self.y_test = test_data

        self.num_features = self.x_test.shape[1]
        print(self.num_features)
        self.model = self.model()
        print("> Loaded", DATASET, " | Trainset:", self.x_train.shape, " | Testset:", self.x_test.shape)
        print("> Train samples:", self.y_train.shape[0], " | Test samples:", self.y_test.value_counts().to_string().replace("\n", ", "))
        self.threshold = 0

    def model(self):
        model = tf.keras.models.Sequential([
            Input(shape=(self.num_features,)),
            Dense(32, activation="relu"),
            Dense(16, activation="relu"),
            Dense(8, activation="relu"),
            Dense(4, activation="relu"),
            Dense(8, activation="relu"),
            Dense(16, activation="relu"),
            Dense(32, activation="relu"),
            Dense(self.num_features, activation="sigmoid")
        ])  
        model.compile(optimizer="adam", loss="mean_squared_error")
        return model

    def fit(self):
        # training only on benign traffic
        history = self.model.fit(self.x_train[self.y_train == 0], self.x_train[self.y_train == 0], 
                epochs=10, 
                batch_size=128, 
                shuffle=True
                )
        self.threshold = history.history["loss"][-1]
        print(">>> Threshold:", self.threshold)
        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self):
        inference = self.model.predict(self.x_test)
        loss = self.model.evaluate(self.x_test, self.x_test)
        inference_loss = calculate_reconstruction_loss(self.x_test, inference)

        y_pred = inference_loss > self.threshold

        acc, rec, prec, f1, mcc, missrate, fallout, auc, f2_value = eval_learning(self.y_test, y_pred)

        output_dict = {"acc": acc, "rec": rec, "prec": prec, "f1": f1, "mcc": mcc, "missrate": missrate,
                "fallout": fallout, "auc": auc, "f2-score": f2_value}

        print(output_dict)

        return loss, len(self.x_test), output_dict

    def cross_evaluation(self, cross_test_data):
        x_validation, y_validation = cross_test_data

        inference = self.model.predict(x_validation) 
        loss = self.model.evaluate(x_validation, x_validation)
        inference_loss = calculate_reconstruction_loss(x_validation, inference)
        y_pred = inference_loss > self.threshold
        acc, rec, prec, f1, mcc, missrate, fallout, auc, f2_value = eval_learning(y_validation, y_pred)

        output_dict = {"acc": acc, "rec": rec, "prec": prec, "f1": f1, "mcc": mcc, "missrate": missrate,
                "fallout": fallout, "auc": auc, "f2-score": f2_value}

        print(f"Results:\n{output_dict}")

        return loss, len(self.x_test), output_dict


In [65]:
autoencoder = AutoEncoder((X_train, y_train), (X_test, y_test))
autoencoder.fit() 
autoencoder.evaluate()
autoencoder.cross_evaluation((cross_x_test, cross_y_test))

52
> Loaded NF-BoT-IoT-v2-DDoS  | Trainset: (320296, 52)  | Testset: (157759, 52)
> Train samples: 320296  | Test samples: Label, 0.0    151485, 1.0      6274
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
>>> Threshold: 0.002431360771879554
{'acc': 0.20400737834291546, 'rec': 1.0, 'prec': 0.047584737085605504, 'f1': 0.09084656429414363, 'mcc': 0.0902158256083926, 'missrate': 0.0, 'fallout': 0.8289599630326435, 'auc': 0.5855200184836782, 'f2-score': 0.19987893848163368}
Results:
{'acc': 0.9926876115986594, 'rec': 1.0, 'prec': 0.9926876115986594, 'f1': 0.996330388988832, 'mcc': 0.0, 'missrate': 0.0, 'fallout': 1.0, 'auc': 0.5, 'f2-score': 0.9985289166132563}


(193780300906496.0,
 157759,
 {'acc': 0.9926876115986594,
  'rec': 1.0,
  'prec': 0.9926876115986594,
  'f1': 0.996330388988832,
  'mcc': 0.0,
  'missrate': 0.0,
  'fallout': 1.0,
  'auc': 0.5,
  'f2-score': 0.9985289166132563})