**DSCI 565: Semester Project**

Reference Paper:

FlowTransformer: A Transformer Framework for Flow-based Network Intrusion Detection Systems

Dataset:

Towards a Standard Feature Set for Network Intrusion Detection System Datasets (NetFlow-v2)

https://staff.itee.uq.edu.au/marius/NIDS_datasets/

NF-UNSW-NB15-v2, NF-CSE-CIC-IDS2018-v2

Keyword: REQ

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os, sys
# framework library
# https://github.com/liamdm/FlowTransformer
path = "/content/drive/MyDrive/"

if path not in sys.path:
    sys.path.append(path)

import warnings
warnings.filterwarnings("ignore")
from typing import Tuple, List, Dict, Any, Optional
from enum import Enum

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder

try:
    from tensorflow._api.v2.v2 import keras
except ImportError:
    from tensorflow import keras

import tensorflow as tf
import keras.layers as layers
from keras.layers import Embedding, Dense, Layer, MultiHeadAttention, Dropout, LayerNormalization, Conv1D, Concatenate, Reshape, Flatten, Lambda, GlobalAveragePooling1D

# FlowTransformer framework
from framework.base_preprocessing import BasePreProcessing
from framework.enumerations import CategoricalFormat
from framework.base_input_encoding import BaseInputEncoding
from framework.base_classification_head import BaseClassificationHead
from framework.base_sequential import BaseSequential

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data_path = path + "data/"
feature = "NetFlow_v2_Features.csv"
datasets = ["NF-CSE-CIC-IDS2018-v2/NF-CSE-CIC-IDS2018-v2.csv", "NF-UNSW-NB15-v2/NF-UNSW-NB15-v2.csv"]

!wc -l drive/MyDrive/data/NetFlow_v2_Features.csv
!wc -l drive/MyDrive/data/NF-CSE-CIC-IDS2018-v2/NF-CSE-CIC-IDS2018-v2.csv
!wc -l drive/MyDrive/data/NF-UNSW-NB15-v2/NF-UNSW-NB15-v2.csv

44 drive/MyDrive/data/NetFlow_v2_Features.csv
18893709 drive/MyDrive/data/NF-CSE-CIC-IDS2018-v2/NF-CSE-CIC-IDS2018-v2.csv
2390276 drive/MyDrive/data/NF-UNSW-NB15-v2/NF-UNSW-NB15-v2.csv


**Implementation**

NetFlow Collector & Pre-processing

In [None]:
# NetFlow Collector
# REQ

# Pre-precessing
# https://github.com/liamdm/FlowTransformer/blob/master/implementations/pre_processings.py
class StandardPreProcessing(BasePreProcessing):
    def __init__(self, n_categorical_levels: int, clip_numerical_values:bool=False):
        super().__init__()
        self.n_categorical_levels:int = n_categorical_levels
        self.clip_numerical_values:bool = clip_numerical_values
        self.min_range = {}
        self.encoded_levels = {}

    @property
    def name(self) -> str:
        return "Standard Preprocessing"

    @property
    def parameters(self) -> dict:
        return {
            "n_categorical_levels": self.n_categorical_levels,
            "clip_numerical_values": self.clip_numerical_values
        }

    def fit_numerical(self, column_name: str, values: np.array):

        v0 = np.min(values)
        v1 = np.max(values)
        r = v1 - v0

        self.min_range[column_name] = (v0, r)

    def transform_numerical(self, column_name: str, values: np.array):
        col_min, col_range = self.min_range[column_name]

        if col_range == 0:
            return np.zeros_like(values, dtype="float32")

        # center on zero
        values -= col_min

        # apply a logarithm
        col_values = np.log(values + 1)

        # scale max to 1
        col_values *= 1. / np.log(col_range + 1)

        if self.clip_numerical_values:
            col_values = np.clip(col_values, 0., 1.)

        return col_values

    def fit_categorical(self, column_name: str, values: np.array):
        levels, level_counts = np.unique(values, return_counts=True)
        sorted_levels = list(sorted(zip(levels, level_counts), key=lambda x: x[1], reverse=True))
        self.encoded_levels[column_name] = [s[0] for s in sorted_levels[:self.n_categorical_levels]]


    def transform_categorical(self, column_name:str, values: np.array, expected_categorical_format: CategoricalFormat):
        encoded_levels = self.encoded_levels[column_name]
        print(f"Encoding the {len(encoded_levels)} levels for {column_name}")

        result_values = np.ones(len(values), dtype="uint32")
        for level_i, level in enumerate(encoded_levels):
            level_mask = values == level

            # we use +1 here, as 0 = previously unseen, and 1 to (n + 1) are the encoded levels
            result_values[level_mask] = level_i + 1

        if expected_categorical_format == CategoricalFormat.Integers:
            return result_values

        v = pd.get_dummies(result_values, prefix=column_name)
        return v

FlowTransformer Framework

In [None]:
# Input Encoder
# ref: https://github.com/liamdm/FlowTransformer/blob/master/implementations/input_encodings.py
class NoInputEncoder(BaseInputEncoding):
    def apply(self, X, prefix:str=None):

        numerical_feature_inputs = X[:self.model_input_specification.n_numeric_features]
        categorical_feature_inputs = X[self.model_input_specification.n_numeric_features:]

        if self.model_input_specification.categorical_format == CategoricalFormat.Integers:
            warnings.warn("It doesn't make sense to be using integer based inputs without encoding!")
            categorical_feature_inputs = [Lambda(lambda x: tf.cast(x, tf.float32))(c) for c in categorical_feature_inputs]

        concat = Concatenate()(numerical_feature_inputs + categorical_feature_inputs)

        return concat

    @property
    def name(self):
        return "No Input Encoding"

    @property
    def parameters(self):
        return {}

    @property
    def required_input_format(self) -> CategoricalFormat:
        return CategoricalFormat.OneHot

class EmbedLayerType(Enum):
    Dense = 0,
    Lookup = 1,
    Projection = 2

class RecordLevelEmbed(BaseInputEncoding):
    def __init__(self, embed_dimension: int, project:bool = False):
        super().__init__()

        self.embed_dimension: int = embed_dimension
        self.project: bool = project

    @property
    def name(self):
        if self.project:
            return "Record Level Projection"
        return "Record Level Embedding"

    @property
    def parameters(self):
        return {
            "dimensions_per_feature": self.embed_dimension
        }

    def apply(self, X:List[keras.Input], prefix: str = None):
        if prefix is None:
            prefix = ""

        assert self.model_input_specification.categorical_format == CategoricalFormat.OneHot

        x = Concatenate(name=f"{prefix}feature_concat", axis=-1)(X)
        x = Dense(self.embed_dimension, activation="linear", use_bias=not self.project, name=f"{prefix}embed")(x)

        return x

    @property
    def required_input_format(self) -> CategoricalFormat:
        return CategoricalFormat.OneHot

class CategoricalFeatureEmbed(BaseInputEncoding):
    def __init__(self, embed_layer_type: EmbedLayerType, dimensions_per_feature: int):
        super().__init__()

        self.dimensions_per_feature: int = dimensions_per_feature
        self.embed_layer_type: EmbedLayerType = embed_layer_type

    @property
    def name(self):
        if self.embed_layer_type == EmbedLayerType.Dense:
            return f"Categorical Feature Embed - Dense"
        elif self.embed_layer_type == EmbedLayerType.Lookup:
            return f"Categorical Feature Embed - Lookup"
        elif self.embed_layer_type == EmbedLayerType.Projection:
            return f"Categorical Feature Embed - Projection"
        raise RuntimeError()

    @property
    def parameters(self):
        return {
            "dimensions_per_feature": self.dimensions_per_feature
        }

    def apply(self, X:List[keras.Input], prefix:str=None):
        if prefix is None:
            prefix = ""

        if self.model_input_specification is None:
            raise Exception("Please call build() before calling apply!")

        numerical_feature_inputs = X[:self.model_input_specification.n_numeric_features]
        categorical_feature_inputs = X[self.model_input_specification.n_numeric_features:]

        #print(len(numerical_feature_inputs), len(categorical_feature_inputs))
        #print(len(self.model_input_specification.categorical_feature_names), self.model_input_specification.categorical_feature_names)

        collected_numeric = Concatenate(name=f"{prefix}concat_numeric")(numerical_feature_inputs)

        collected_categorical = []
        for categorical_field_i, categorical_field_name in enumerate(self.model_input_specification.categorical_feature_names):
            cat_field_x = categorical_feature_inputs[categorical_field_i]
            if self.embed_layer_type != EmbedLayerType.Lookup:
                assert self.model_input_specification.categorical_format == CategoricalFormat.OneHot

                x = Dense(self.dimensions_per_feature,
                          activation="linear",
                          use_bias=(self.embed_layer_type == EmbedLayerType.Dense),
                          name=f"{prefix}embed_{categorical_field_name.replace('/', '')}")(cat_field_x)
                collected_categorical.append(x)

            elif self.embed_layer_type == EmbedLayerType.Lookup:
                assert self.model_input_specification.categorical_format == CategoricalFormat.Integers

                # reshape the sequence to a flat array
                x = cat_field_x
                x = Embedding(input_dim=self.model_input_specification.levels_per_categorical_feature[categorical_field_i] + 1, output_dim=self.dimensions_per_feature, input_length=self.sequence_length)(x)
                x = Reshape((self.sequence_length, self.dimensions_per_feature), name=f"{prefix}expand_{categorical_field_name}")(x)

                collected_categorical.append(x)
        collected_categorical = Concatenate(name=f"{prefix}concat_categorical")(collected_categorical)

        collected = Concatenate()([collected_numeric, collected_categorical])

        return collected

    @property
    def required_input_format(self) -> CategoricalFormat:
        return CategoricalFormat.Integers if self.embed_layer_type == EmbedLayerType.Lookup else CategoricalFormat.OneHot

# Transformer Models

# Decoder Block
# ref: https://github.com/liamdm/FlowTransformer/blob/master/implementations/transformers/basic/decoder_block.py
class TransformerDecoderBlock(Layer):
    def __init__(self, input_dimension:int, inner_dimension:int, num_heads:int, dropout_rate=0.1):
        super().__init__()

        self.num_heads = num_heads
        self.input_dimension = input_dimension
        self.inner_dimension = inner_dimension
        self.dropout_rate = dropout_rate

        self.mha = MultiHeadAttention(num_heads=num_heads, key_dim=input_dimension)
        self.dropout1 = Dropout(dropout_rate)
        self.layernorm1 = LayerNormalization(epsilon=1e-6)

        self.ffn = tf.keras.Sequential([
            Dense(inner_dimension, activation='relu'),
            Dense(input_dimension)
        ])
        self.dropout2 = Dropout(dropout_rate)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)

    # noinspection PyMethodOverriding
    # SIAN
    def call(self, inputs, training=True, mask=None):
        # inputs = (target_seq, enc_output)
        target_seq = inputs
        enc_output = inputs

        # self attention of target_seq
        attn_output = self.mha(target_seq, target_seq)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = target_seq + attn_output
        out1 = self.layernorm1(out1)

        # multi-head attention with encoder output as the key and value, and target_seq as the query
        attn_output = self.mha(out1, enc_output)
        attn_output = self.dropout2(attn_output, training=training)
        out2 = out1 + attn_output
        out2 = self.layernorm2(out2)

        # feed forward network
        ffn_output = self.ffn(out2)
        ffn_output = self.dropout2(ffn_output, training=training)
        out3 = out2 + ffn_output
        out3 = self.layernorm2(out3)

        return out3

# Encoder Block
# ref: https://github.com/liamdm/FlowTransformer/blob/master/implementations/transformers/basic/encoder_block.py
class GPT3Attention(layers.Layer):
    def __init__(self, n_heads, d_model, dropout_rate=0.1):
        super(GPT3Attention, self).__init__()
        self.n_heads = n_heads
        self.d_model = d_model
        self.depth = d_model // n_heads

        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)

        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.n_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    # noinspection PyMethodOverriding
    def call(self, q, k, v, mask=None):
        batch_size = tf.shape(q)[0]

        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        # Scaled Dot-Product Attention
        scaled_attention_logits = tf.matmul(q, k, transpose_b=True)
        scaled_attention_logits = scaled_attention_logits / tf.math.sqrt(tf.cast(self.depth, tf.float32))

        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        attention_weights = self.dropout(attention_weights)

        output = tf.matmul(attention_weights, v)
        output = tf.transpose(output, perm=[0, 2, 1, 3])
        output = tf.reshape(output, (batch_size, -1, self.d_model))

        output = self.dense(output)
        output = self.dropout(output)

        return output

class MultiHeadAttentionImplementation:
    Keras = 0,
    GPT3 = 1

class TransformerEncoderBlock(layers.Layer):
    def __init__(self, input_dimension:int, inner_dimension:int, num_heads:int, dropout_rate=0.1, use_conv:bool=False, prefix:str=None, attn_implementation:MultiHeadAttentionImplementation = MultiHeadAttentionImplementation.Keras):

        if prefix is None:
            prefix = ""

        super().__init__(name=f"{prefix}transformer_encoder")

        if inner_dimension < input_dimension:
            warnings.warn(f"Typically inner_dimension should be greater than or equal to the input_dimension!")

        self.attn_implementation = attn_implementation

        self.dropout_rate = dropout_rate
        self.attention = \
            layers.MultiHeadAttention(num_heads=num_heads, key_dim=inner_dimension, name=f"{prefix}multi_head_attn") \
                if attn_implementation == MultiHeadAttentionImplementation.Keras else\
                GPT3Attention(num_heads, inner_dimension, dropout_rate=0.0)

        layer_norm = 1e-6

        self.attention_dropout = layers.Dropout(dropout_rate, name=f"{prefix}attention_dropout")
        self.attention_layer_norm = layers.LayerNormalization(epsilon=layer_norm, name=f"{prefix}attention_layer_norm")

        self.feed_forward_0 = Conv1D(filters=inner_dimension, kernel_size=1, activation="relu", name=f"{prefix}feed_forward_0") \
            if use_conv else Dense(inner_dimension, activation="relu", name=f"{prefix}feed_forward_0")
        self.feed_forward_1 = Conv1D(filters=input_dimension, kernel_size=1, activation="relu", name=f"{prefix}feed_forward_1") \
            if use_conv else Dense(input_dimension, activation="relu", name=f"{prefix}feed_forward_1")

        self.feed_forward_dropout = layers.Dropout(dropout_rate, name=f"{prefix}feed_forward_dropout")
        self.feed_forward_layer_norm = layers.LayerNormalization(epsilon=layer_norm, name=f"{prefix}feed_forward_layer_norm")

    # noinspection PyMethodOverriding
    # SIAN
    def call(self, inputs, training=True, mask=None):
        x = inputs
        x = self.attention(x, x) if self.attn_implementation == MultiHeadAttentionImplementation.Keras else self.attention(x, x, x, mask)

        attention_output = self.attention_dropout(x, training=training) if self.dropout_rate > 0 else x

        x = inputs + attention_output
        x = self.attention_layer_norm(x)
        x = self.feed_forward_0(x)
        x = self.feed_forward_1(x)
        x = self.feed_forward_dropout(x, training=training) if self.dropout_rate > 0 else x
        feed_forward_output = x

        return self.feed_forward_layer_norm(attention_output + feed_forward_output)

# Basic Transformers
# ref: https://github.com/liamdm/FlowTransformer/blob/master/implementations/transformers/basic_transformers.py
class BasicTransformer(BaseSequential):

    @property
    def name(self) -> str:
        if self.use_conv:
            return f"Basic Conv Transformer" + (" Decoder" if self.is_decoder else "")
        else:
            return f"Basic Dense Transformer" + (" Decoder" if self.is_decoder else "")

    @property
    def parameters(self) -> dict:
        return {
            "n_layers": self.n_layers,
            "internal_size": self.internal_size,
            "use_conv": self.use_conv,
            "n_heads": self.n_heads,
            "dropout_rate": self.dropout_rate,
            "head_size": self.internal_size
        }

    def __init__(self, n_layers:int, internal_size:int, n_heads:int, use_conv:bool=False, dropout_rate:float=0.1, is_decoder=False):
        super().__init__()
        self.n_layers = n_layers
        self.internal_size = internal_size
        self.use_conv = use_conv
        self.n_heads = n_heads
        self.dropout_rate = dropout_rate
        self.is_decoder = is_decoder

    def apply(self, X, prefix: str = None):
        #window_size = self.sequence_length
        real_size = X.shape[-1]

        m_x = X

        for layer_i in range(self.n_layers):
            if self.is_decoder:
                if self.use_conv:
                    raise NotImplementedError()
                m_x = TransformerDecoderBlock(real_size, self.internal_size, self.n_heads, dropout_rate=self.dropout_rate)(m_x)
            else:
                m_x = TransformerEncoderBlock(real_size, self.internal_size, self.n_heads, dropout_rate=self.dropout_rate, use_conv=self.use_conv, prefix=f"{prefix}block_{layer_i}_")(m_x)

        return m_x

# Named Transformers
# ref: https://github.com/liamdm/FlowTransformer/blob/master/implementations/transformers/named_transformers.py
class GPTSmallTransformer(BaseSequential):

    @property
    def name(self) -> str:
        return "GPT Model"

    @property
    def parameters(self) -> dict:
        return {
            "n_layers": self.n_layers,
            "internal_size": self.internal_size,
            "n_heads": self.n_heads,
            "dropout_rate": self.dropout_rate,
            "head_size": self.head_size
        }

    def __init__(self):
        super().__init__()
        self.n_layers = 12
        self.internal_size = 768
        self.n_heads = 12
        self.head_size = self.internal_size / self.n_heads
        self.dropout_rate = 0.02
        self.is_decoder = True

    def apply(self, X, prefix: str = None):
        #window_size = self.sequence_length
        real_size = X.shape[-1]

        m_x = X

        for layer_i in range(self.n_layers):
            m_x = TransformerDecoderBlock(real_size, self.internal_size, self.n_heads, dropout_rate=self.dropout_rate)(m_x)

        return m_x


class BERTSmallTransformer(BaseSequential):

    @property
    def name(self) -> str:
        return "BERT Model"

    @property
    def parameters(self) -> dict:
        return {
            "n_layers": self.n_layers,
            "internal_size": self.internal_size,
            "n_heads": self.n_heads,
            "dropout_rate": self.dropout_rate,
            "head_size": self.head_size
        }

    def __init__(self):
        super().__init__()
        self.n_layers = 12
        self.internal_size = 768
        self.n_heads = 12
        self.head_size = self.internal_size / self.n_heads
        self.dropout_rate = 0.02
        self.is_decoder = False

    def apply(self, X, prefix: str = None):
        #window_size = self.sequence_length
        real_size = X.shape[-1]

        m_x = X

        for layer_i in range(self.n_layers):
            m_x = TransformerEncoderBlock(real_size, self.internal_size, self.n_heads, dropout_rate=self.dropout_rate, prefix=f"block_{layer_i}_")(m_x)

        return m_x

# Classficiation Head
# ref: https://github.com/liamdm/FlowTransformer/blob/master/implementations/classification_heads.py
class FlattenClassificationHead(BaseClassificationHead):
    def apply(self, X, prefix: str = None):
        if prefix is None:
            prefix = ""
        x = Flatten(name=f"{prefix}flatten")(X)
        return x

    @property
    def name(self) -> str:
        return "Flatten"

    @property
    def parameters(self) -> dict:
        return {}


class FeaturewiseEmbedding(BaseClassificationHead):
    def __init__(self, project:bool=False):
        super().__init__()
        self.project: bool = project

    @property
    def name(self):
        if self.project:
            return f"Featurewise Embed - Projection"
        else:
            return f"Featurewise Embed - Dense"

    @property
    def parameters(self):
        return {}


    def apply(self, X, prefix:str=None):
        if prefix is None:
            prefix = ""

        if self.model_input_specification is None:
            raise Exception("Please call build() before calling apply!")

        x = Dense(1,
                  activation="linear",
                  use_bias=(not self.project),
                  name=f"{prefix}featurewise_embed")(X)

        x = Flatten()(x)

        return x

class GlobalAveragePoolingClassificationHead(BaseClassificationHead):
    def apply(self, X, prefix: str = None):
        if prefix is None:
            prefix = ""
        return GlobalAveragePooling1D(name=f"{prefix}global_avg_pooling_1d")(X)

    @property
    def name(self) -> str:
        return "Global Average Pooling"

    @property
    def parameters(self) -> dict:
        return {}


class LastTokenClassificationHead(BaseClassificationHead):
    def __init__(self):
        super().__init__()

    def apply(self, X, prefix: str = None):
        if prefix is None:
            prefix = ""

        x = Lambda(lambda x: x[..., -1, :], name=f"{prefix}slice_last")(X)
        #x = Flatten(name=f"{prefix}flatten_last")(x)

        return x

    @property
    def name(self) -> str:
        return "Last Token"

    @property
    def parameters(self) -> dict:
        return {}


class CLSTokenClassificationHead(LastTokenClassificationHead):


    @property
    def name(self) -> str:
        return "CLS Token"

    @property
    def parameters(self) -> dict:
        return {}

    def apply_before_transformer(self, X, prefix: str = None):
        if prefix is None:
            prefix = ""

        window_size = self.sequence_length

        x = X
        batch_size = tf.shape(x)[0]
        flow_size = tf.shape(x)[2]

        cls_token_horizontal_single = np.zeros((window_size + 1,))
        cls_token_horizontal_single[-1] = 1.
        cls_token_horizontal_single = tf.convert_to_tensor(cls_token_horizontal_single, dtype=tf.float32)

        cls_token_horizontal = tf.ones((batch_size, window_size + 1,), dtype=tf.float32)
        cls_token_horizontal = tf.multiply(cls_token_horizontal, cls_token_horizontal_single)
        cls_token_horizontal = tf.expand_dims(cls_token_horizontal, axis=-1)

        cls_token_vertical = tf.zeros((batch_size, 1, flow_size,), dtype=tf.float32)

        x = Concatenate(axis=-2, name=f'{prefix}cls_vertical')([x, cls_token_vertical])
        x = Concatenate(axis=-1, name=f'{prefix}cls_horizontal')([x, cls_token_horizontal])

        return x

**Demonstration**

ref:

https://github.com/liamdm/FlowTransformer/blob/master/demonstration.ipynb

https://github.com/liamdm/FlowTransformer/blob/master/FlowTransformer_demo.ipynb

In [None]:
demonstration_folder = "demonstration"

if not os.path.exists(demonstration_folder):
    os.mkdir(demonstration_folder)

from framework.dataset_specification import DatasetSpecification
flow_format = DatasetSpecification(
        include_fields=['NUM_PKTS_UP_TO_128_BYTES', 'SRC_TO_DST_SECOND_BYTES', 'OUT_PKTS', 'OUT_BYTES', 'NUM_PKTS_128_TO_256_BYTES', 'DST_TO_SRC_AVG_THROUGHPUT', 'DURATION_IN', 'L4_SRC_PORT', 'ICMP_TYPE', 'PROTOCOL', 'SERVER_TCP_FLAGS', 'IN_PKTS', 'NUM_PKTS_512_TO_1024_BYTES', 'CLIENT_TCP_FLAGS', 'TCP_WIN_MAX_IN', 'NUM_PKTS_256_TO_512_BYTES', 'SHORTEST_FLOW_PKT', 'MIN_IP_PKT_LEN', 'LONGEST_FLOW_PKT', 'L4_DST_PORT', 'MIN_TTL', 'DST_TO_SRC_SECOND_BYTES', 'NUM_PKTS_1024_TO_1514_BYTES', 'DURATION_OUT', 'FLOW_DURATION_MILLISECONDS', 'TCP_FLAGS', 'MAX_TTL', 'SRC_TO_DST_AVG_THROUGHPUT', 'ICMP_IPV4_TYPE', 'MAX_IP_PKT_LEN', 'RETRANSMITTED_OUT_BYTES', 'IN_BYTES', 'RETRANSMITTED_IN_BYTES', 'TCP_WIN_MAX_OUT', 'L7_PROTO', 'RETRANSMITTED_OUT_PKTS', 'RETRANSMITTED_IN_PKTS'],
        categorical_fields=['CLIENT_TCP_FLAGS', 'L4_SRC_PORT', 'TCP_FLAGS', 'ICMP_IPV4_TYPE', 'ICMP_TYPE', 'PROTOCOL', 'SERVER_TCP_FLAGS', 'L4_DST_PORT', 'L7_PROTO'],
        class_column="Attack",
        benign_label="Benign"
    )

from framework.flow_transformer_parameters import FlowTransformerParameters
from framework.flow_transformer import FlowTransformer

pre_processing = StandardPreProcessing(n_categorical_levels=32)
encoding = RecordLevelEmbed(64)
transformer = BasicTransformer(n_layers=2, internal_size=128, n_heads=2)
classification_head = LastTokenClassificationHead()

# Define the transformer
ft = FlowTransformer(pre_processing=pre_processing,
                     input_encoding=encoding,
                     sequential_model=transformer,
                     classification_head=classification_head,
                     params=FlowTransformerParameters(window_size=8, mlp_layer_sizes=[128], mlp_dropout=0.1))

from framework.enumerations import EvaluationDatasetSampling
from IPython.display import display

# SIAN
df = ft.load_dataset("UNSW-NB15",
                data_path+datasets[1],
                specification=flow_format,
                evaluation_dataset_sampling=EvaluationDatasetSampling.LastRows,
                evaluation_percent=0.1,
                cache_path=demonstration_folder)

display(df.iloc[:500])

Using cache file path: demonstration/UNSW-NB15_0_QdLmZHuh8yOmlGcKBEkf7hepImY0_5EjmvToFWKee8t20u0dFpVzNu4s0.feather
Reading directly from cache demonstration/UNSW-NB15_0_QdLmZHuh8yOmlGcKBEkf7hepImY0_5EjmvToFWKee8t20u0dFpVzNu4s0.feather...


Unnamed: 0,OUT_PKTS,MAX_IP_PKT_LEN,DURATION_IN,RETRANSMITTED_OUT_BYTES,MAX_TTL,RETRANSMITTED_IN_BYTES,IN_PKTS,TCP_WIN_MAX_OUT,RETRANSMITTED_IN_PKTS,NUM_PKTS_128_TO_256_BYTES,...,L7_PROTO_23,L7_PROTO_24,L7_PROTO_25,L7_PROTO_26,L7_PROTO_27,L7_PROTO_28,L7_PROTO_29,L7_PROTO_30,L7_PROTO_31,L7_PROTO_32
0,0.148859,0.565324,0.0,0.000000,0.630549,0.000000,0.000000,0.801390,0.000000,0.0,...,False,False,False,False,False,False,False,False,False,False
1,0.223288,0.565324,0.0,0.284665,0.630549,0.261166,0.160324,0.817828,0.074172,0.0,...,False,False,False,False,False,False,False,False,False,False
2,0.266827,0.575948,0.0,0.326006,0.630549,0.295819,0.218877,0.831726,0.117560,0.0,...,False,False,False,False,False,False,False,False,False,False
3,0.297718,0.598516,0.0,0.353458,0.630549,0.317712,0.255508,0.843766,0.148344,0.0,...,False,False,False,False,False,False,False,False,False,False
4,0.331913,0.598516,0.0,0.384771,0.630549,0.334477,0.293311,0.854385,0.172222,0.0,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.349851,0.598516,0.0,0.395384,0.630549,0.347007,0.312343,0.863885,0.191732,0.0,...,False,False,False,False,False,False,False,False,False,False
496,0.223288,0.565324,0.0,0.284665,0.630549,0.261166,0.160324,0.817828,0.074172,0.0,...,False,False,False,False,False,False,False,False,False,False
497,0.365217,0.598516,0.0,0.405933,0.630549,0.359164,0.328315,0.872479,0.208227,0.0,...,False,False,False,False,False,False,False,False,False,False
498,0.266827,0.575948,0.0,0.326006,0.630549,0.295819,0.218877,0.831726,0.117560,0.0,...,False,False,False,False,False,False,False,False,False,False


In [None]:
# Build the transformer model
m = ft.build_model()
m.summary()

# Compile the model
m.compile(optimizer="adam", loss='binary_crossentropy', metrics=['binary_accuracy'], jit_compile=True)

In [None]:
(train_results, eval_results, final_epoch) = ft.evaluate(m, batch_size=128, epochs=5, steps_per_epoch=64, early_stopping_patience=5)

Building eval dataset...
Splitting dataset to featurewise...
Evaluation dataset is built!
Positive samples in eval set: 16271
Negative samples in eval set: 222756
Epoch = 0 / 5 (early stop in 5), step = 0, loss = 0.73607, results = [array(0.7360732, dtype=float32), array(0.546875, dtype=float32)] -- elapsed (train): 0.00s
Epoch = 0 / 5 (early stop in 5), step = 13, loss = 0.36630, results = [array(0.3663045, dtype=float32), array(0.83370537, dtype=float32)] -- elapsed (train): 1.11s
Epoch = 0 / 5 (early stop in 5), step = 30, loss = 0.22226, results = [array(0.22226392, dtype=float32), array(0.9080141, dtype=float32)] -- elapsed (train): 2.20s
Epoch = 0 / 5 (early stop in 5), step = 46, loss = 0.16748, results = [array(0.16748103, dtype=float32), array(0.9333444, dtype=float32)] -- elapsed (train): 3.32s
Epoch = 0 / 5 (early stop in 5), step = 63, loss = 0.13574, results = [array(0.13573794, dtype=float32), array(0.9468994, dtype=float32)] -- elapsed (train): 4.47s
Epoch = 1 / 5 (early