In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


In [None]:
# Load
df = pd.read_csv('/content/drive/MyDrive/5G_NIDD_multiclass_clean.csv', low_memory=False)

print("Original shape:", df.shape)

# Target
y = df['Label']
X = df.drop(columns=['Label', 'Attack Type', 'Attack Tool'], errors='ignore')

# Remove obvious non-learning columns
drop_cols = [
    'SrcMac','DstMac','SrcAddr','DstAddr','StartTime','LastTime',
    'SrcOui','DstOui'
]

X = X.drop(columns=[c for c in drop_cols if c in X.columns], errors='ignore')

# Keep only numeric features
X = X.select_dtypes(include=[np.number])

print("After numeric selection:", X.shape)


Original shape: (1215890, 112)
After numeric selection: (1215890, 86)


In [None]:
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(0, inplace=True)


In [None]:
selector = SelectKBest(score_func=f_classif, k=36)
X_selected = selector.fit_transform(X, y)

selected_features = X.columns[selector.get_support()]
print("Selected Features:", selected_features.tolist())


 60 61 62 63 64 65 66 67 68 69 70 71 72 77 78 79 80] are constant.
  f = msb / msw


Selected Features: ['Rank', 'Seq', 'Dur', 'RunTime', 'Mean', 'Sum', 'Min', 'Max', 'sTos', 'dTos', 'sTtl', 'dTtl', 'sHops', 'dHops', 'TotPkts', 'SrcPkts', 'DstPkts', 'TotBytes', 'SrcBytes', 'DstBytes', 'Offset', 'sMeanPktSz', 'dMeanPktSz', 'Loss', 'SrcLoss', 'DstLoss', 'pLoss', 'SrcWin', 'DstWin', 'sVid', 'dVid', 'SrcTCPBase', 'DstTCPBase', 'TcpRtt', 'SynAck', 'AckDat']


In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

num_classes = len(np.unique(y_encoded))
print("Classes:", num_classes)


Classes: 20


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_selected, y_encoded,
    test_size=0.2,
    stratify=y_encoded,
    random_state=42
)


In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)   # FIT ONLY TRAIN
X_test  = scaler.transform(X_test)        # TRANSFORM TEST


In [None]:
X_train = X_train.reshape(-1, 36, 1)
X_test  = X_test.reshape(-1, 36, 1)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, sequence_len, d_model):
        super().__init__()
        self.pos_encoding = self.positional_encoding(sequence_len, d_model)

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / tf.pow(10000.0, (2 * (i//2)) / tf.cast(d_model, tf.float32))
        return pos * angle_rates

    def positional_encoding(self, position, d_model):

        pos = tf.range(position, dtype=tf.float32)[:, tf.newaxis]
        i   = tf.range(d_model, dtype=tf.float32)[tf.newaxis, :]

        angles = self.get_angles(pos, i, d_model)

        # APPLY SIN TO EVEN INDICES
        sines = tf.sin(angles[:, 0::2])

        # APPLY COS TO ODD INDICES
        cosines = tf.cos(angles[:, 1::2])

        # Interleave them (NO assignment!)
        pos_encoding = tf.concat([sines, cosines], axis=-1)

        return pos_encoding[tf.newaxis, ...]

    def call(self, x):
        return x + self.pos_encoding[:, :tf.shape(x)[1], :]


In [None]:
def transformer_block(x, d_model, num_heads, ff_dim, dropout):

    head_dim = d_model // num_heads

    attn = MultiHeadAttention(
        num_heads=num_heads,
        key_dim=head_dim
    )(x, x)

    attn = Dropout(dropout)(attn)
    x = LayerNormalization(epsilon=1e-6)(x + attn)

    ffn = Dense(ff_dim, activation="relu")(x)
    ffn = Dense(d_model)(ffn)
    ffn = Dropout(dropout)(ffn)

    return LayerNormalization(epsilon=1e-6)(x + ffn)



In [None]:
def Transformer_IDS(
        d_model=64,
        num_heads=4,
        ff_dim=128,
        num_layers=2,
        dropout=0.3,
        dense_units=256):

    inp = Input(shape=(36,1))

    # Project features into embedding space
    x = Dense(d_model)(inp)

    # Add positional encoding
    x = PositionalEncoding(36, d_model)(x)

    # Stacked Transformer encoders
    for _ in range(num_layers):
        x = transformer_block(x, d_model, num_heads, ff_dim, dropout)

    # Global understanding of traffic
    x = GlobalAveragePooling1D()(x)

    # Classifier
    x = Dense(dense_units, activation="relu")(x)
    x = Dense(dense_units//2, activation="relu")(x)
    x = Dropout(dropout)(x)

    out = Dense(num_classes, activation="softmax")(x)

    return Model(inp, out)


In [None]:
!pip install keras-tuner




In [None]:
import tensorflow as tf
from tensorflow.keras import backend as K

def focal_loss(gamma=2., alpha=0.25):

    def loss(y_true, y_pred):

        y_true = tf.cast(y_true, tf.float32)

        # Prevent log(0)
        epsilon = 1e-7
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)

        # Cross entropy
        ce = -y_true * tf.math.log(y_pred)

        # Focal weight
        weight = alpha * tf.pow(1 - y_pred, gamma)

        # Apply focal loss
        fl = weight * ce

        return tf.reduce_mean(tf.reduce_sum(fl, axis=1))

    return loss

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

classes = np.unique(y_train)

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)

class_weights = dict(zip(classes, class_weights))
print(class_weights)


{np.int64(0): np.float64(0.64811172410117), np.int64(1): np.float64(0.6491671115856914), np.int64(2): np.float64(3.325965944060726), np.int64(3): np.float64(4.20650406504065), np.int64(4): np.float64(37.819284603421465), np.int64(5): np.float64(44.74296228150874), np.int64(6): np.float64(1.3619983757596124), np.int64(7): np.float64(4.309374446216552), np.int64(8): np.float64(5.309563318777292), np.int64(9): np.float64(5.274438781043271), np.int64(10): np.float64(1.9601644365629534), np.int64(11): np.float64(4.803516049382716), np.int64(12): np.float64(5.220652640618291), np.int64(13): np.float64(5.217292426517915), np.int64(14): np.float64(1.5948189926547744), np.int64(15): np.float64(1.9197000197355436), np.int64(16): np.float64(0.12998123867505493), np.int64(17): np.float64(0.21242149215139894), np.int64(18): np.float64(6.053721682847897), np.int64(19): np.float64(5.8995147986414365)}


In [None]:
from tensorflow.keras.utils import to_categorical

y_train = to_categorical(y_train, num_classes)
y_test  = to_categorical(y_test, num_classes)


In [None]:
import keras_tuner as kt
from tensorflow.keras.callbacks import EarlyStopping

def build_model(hp):

    model = Transformer_IDS(
        d_model = hp.Choice("d_model",[32,64,128]),
        num_heads = hp.Choice("heads",[2,4,8]),
        ff_dim = hp.Choice("ff",[64,128,256]),
        num_layers = hp.Choice("layers",[1,2,3]),
        dropout = hp.Choice("dropout",[0.2,0.3,0.5]),
        dense_units = hp.Choice("dense",[128,256,512])
    )

    lr = hp.Choice("lr",[1e-3,1e-4,5e-4])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss=focal_loss(gamma=2, alpha=0.25),
        metrics=["accuracy"]
    )

    return model


tuner = kt.Hyperband(
    build_model,
    objective="val_accuracy",
    max_epochs=7,
    factor=3,
    directory="tuning_transformer",
    project_name="5g_transformer"
)

# subset tuning
sample_idx = np.random.choice(len(X_train), size=int(len(X_train)*0.25), replace=False)
X_tune = X_train[sample_idx]
y_tune = y_train[sample_idx]

stop_early = EarlyStopping(monitor='val_loss', patience=3)

tuner.search(
    X_tune, y_tune,
    validation_split=0.2,
    epochs=10,
    batch_size=512,
    callbacks=[stop_early],
    verbose=1
)

best_hps = tuner.get_best_hyperparameters(1)[0]
print(best_hps.values)


Trial 10 Complete [00h 01m 23s]
val_accuracy: 0.8756065368652344

Best val_accuracy So Far: 0.8756065368652344
Total elapsed time: 00h 09m 24s
{'d_model': 32, 'heads': 8, 'ff': 64, 'layers': 3, 'dropout': 0.2, 'dense': 256, 'lr': 0.001, 'tuner/epochs': 7, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}


In [None]:
model = tuner.hypermodel.build(best_hps)

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=10,
    batch_size=512,
    class_weight=class_weights,
    callbacks=[
        EarlyStopping(patience=8, restore_best_weights=True),
        ReduceLROnPlateau(patience=4)
    ]
)


Epoch 1/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 25ms/step - accuracy: 0.6352 - loss: 0.1516 - val_accuracy: 0.8341 - val_loss: 0.0485 - learning_rate: 0.0010
Epoch 2/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 17ms/step - accuracy: 0.8292 - loss: 0.0475 - val_accuracy: 0.8661 - val_loss: 0.0317 - learning_rate: 0.0010
Epoch 3/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 17ms/step - accuracy: 0.8671 - loss: 0.0340 - val_accuracy: 0.8999 - val_loss: 0.0260 - learning_rate: 0.0010
Epoch 4/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 17ms/step - accuracy: 0.8768 - loss: 0.0298 - val_accuracy: 0.8800 - val_loss: 0.0275 - learning_rate: 0.0010
Epoch 5/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 18ms/step - accuracy: 0.8845 - loss: 0.0274 - val_accuracy: 0.9000 - val_loss: 0.0220 - learning_rate: 0.0010
Epoch 6/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━

In [None]:
print(y_test.shape)
# print(pred_probs.shape)
print(np.unique(y_test)[:10])


(243178, 20)
[0. 1.]


In [None]:
y_test = np.argmax(y_test, axis=1)

pred_probs = model.predict(X_test)
pred = np.argmax(pred_probs, axis=1)

print(classification_report(y_test, pred))



[1m7600/7600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step
              precision    recall  f1-score   support

           0       0.98      0.98      0.98     18761
           1       0.98      0.94      0.96     18730
           2       0.56      0.91      0.69      3656
           3       0.74      0.92      0.82      2890
           4       0.38      0.66      0.49       322
           5       0.37      0.17      0.24       271
           6       0.96      0.75      0.84      8927
           7       0.79      0.75      0.77      2822
           8       0.94      0.90      0.92      2290
           9       0.97      0.88      0.92      2305
          10       0.87      0.84      0.85      6203
          11       0.66      0.64      0.65      2531
          12       0.47      0.93      0.63      2329
          13       0.36      0.00      0.00      2331
          14       0.93      0.94      0.94      7624
          15       0.98      0.96      0.97      6334
   