In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


In [None]:
!ls /content/drive/MyDrive/5G_NIDD_multiclass_clean.csv

/content/drive/MyDrive/5G_NIDD_multiclass_clean.csv


In [None]:
# Load
df = pd.read_csv('/content/drive/MyDrive/5G_NIDD_multiclass_clean.csv', low_memory=False)

print("Original shape:", df.shape)

# Target
y = df['Label']
X = df.drop(columns=['Label', 'Attack Type', 'Attack Tool'], errors='ignore')

# Remove obvious non-learning columns
drop_cols = [
    'SrcMac','DstMac','SrcAddr','DstAddr','StartTime','LastTime',
    'SrcOui','DstOui'
]

X = X.drop(columns=[c for c in drop_cols if c in X.columns], errors='ignore')

# Keep only numeric features
X = X.select_dtypes(include=[np.number])

print("After numeric selection:", X.shape)


Original shape: (1215890, 112)
After numeric selection: (1215890, 86)


In [None]:
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(0, inplace=True)


In [None]:
selector = SelectKBest(score_func=f_classif, k=36)
X_selected = selector.fit_transform(X, y)

selected_features = X.columns[selector.get_support()]
print("Selected Features:", selected_features.tolist())


 60 61 62 63 64 65 66 67 68 69 70 71 72 77 78 79 80] are constant.
  f = msb / msw


Selected Features: ['Rank', 'Seq', 'Dur', 'RunTime', 'Mean', 'Sum', 'Min', 'Max', 'sTos', 'dTos', 'sTtl', 'dTtl', 'sHops', 'dHops', 'TotPkts', 'SrcPkts', 'DstPkts', 'TotBytes', 'SrcBytes', 'DstBytes', 'Offset', 'sMeanPktSz', 'dMeanPktSz', 'Loss', 'SrcLoss', 'DstLoss', 'pLoss', 'SrcWin', 'DstWin', 'sVid', 'dVid', 'SrcTCPBase', 'DstTCPBase', 'TcpRtt', 'SynAck', 'AckDat']


In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

num_classes = len(np.unique(y_encoded))
print("Classes:", num_classes)


Classes: 20


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_selected, y_encoded,
    test_size=0.2,
    stratify=y_encoded,
    random_state=42
)


In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)   # FIT ONLY TRAIN
X_test  = scaler.transform(X_test)        # TRANSFORM TEST


In [None]:
X_train = X_train.reshape(-1, 36, 1)
X_test  = X_test.reshape(-1, 36, 1)


In [None]:
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

def BiGRU_Model(drop_rate=0.5, gru_units=128, dense_units=256):

    inp = Input(shape=(36,1))

    # Sequence modelling
    x = Bidirectional(GRU(gru_units, return_sequences=True))(inp)
    x = BatchNormalization()(x)

    x = Bidirectional(GRU(gru_units))(x)
    x = BatchNormalization()(x)

    # classifier
    x = Dense(dense_units, activation="relu")(x)
    x = Dense(dense_units//2, activation="relu")(x)
    x = Dropout(drop_rate)(x)

    out = Dense(num_classes, activation="softmax")(x)

    model = Model(inp, out)
    return model


In [None]:
!pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.8-py3-none-any.whl.metadata (5.6 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.8-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.8 kt-legacy-1.0.5


In [None]:
import tensorflow as tf

def focal_loss(gamma=2.0, alpha=0.25):

    def loss(y_true, y_pred):

        y_true = tf.cast(y_true, tf.int32)
        y_true = tf.one_hot(y_true, depth=num_classes)

        ce = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

        pt = tf.exp(-ce)
        focal = alpha * tf.pow(1 - pt, gamma) * ce

        return tf.reduce_mean(focal)

    return loss


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

classes = np.unique(y_train)

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)

class_weights = dict(zip(classes, class_weights))
print(class_weights)


{np.int64(0): np.float64(0.64811172410117), np.int64(1): np.float64(0.6491671115856914), np.int64(2): np.float64(3.325965944060726), np.int64(3): np.float64(4.20650406504065), np.int64(4): np.float64(37.819284603421465), np.int64(5): np.float64(44.74296228150874), np.int64(6): np.float64(1.3619983757596124), np.int64(7): np.float64(4.309374446216552), np.int64(8): np.float64(5.309563318777292), np.int64(9): np.float64(5.274438781043271), np.int64(10): np.float64(1.9601644365629534), np.int64(11): np.float64(4.803516049382716), np.int64(12): np.float64(5.220652640618291), np.int64(13): np.float64(5.217292426517915), np.int64(14): np.float64(1.5948189926547744), np.int64(15): np.float64(1.9197000197355436), np.int64(16): np.float64(0.12998123867505493), np.int64(17): np.float64(0.21242149215139894), np.int64(18): np.float64(6.053721682847897), np.int64(19): np.float64(5.8995147986414365)}


In [None]:
import keras_tuner as kt
from tensorflow.keras.callbacks import EarlyStopping

def build_model(hp):

    model = BiGRU_Model(
        drop_rate = hp.Choice("dropout",[0.3,0.5,0.6]),
        gru_units = hp.Choice("gru_units",[64,128,256]),
        dense_units = hp.Choice("dense",[128,256,512])
    )

    lr = hp.Choice("lr",[1e-2,1e-3,1e-4])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss=focal_loss(gamma=2, alpha=0.25),
        metrics=["accuracy"]
    )

    return model


tuner = kt.Hyperband(
    build_model,
    objective="val_accuracy",
    max_epochs=7,
    factor=3,
    directory="tuning_bigru",
    project_name="5g_bigru_only"
)

# ---- Tune on subset ----
sample_idx = np.random.choice(len(X_train), size=int(len(X_train)*0.25), replace=False)
X_tune = X_train[sample_idx]
y_tune = y_train[sample_idx]

stop_early = EarlyStopping(monitor='val_loss', patience=3)

tuner.search(
    X_tune, y_tune,
    validation_split=0.2,
    epochs=10,
    batch_size=512,
    callbacks=[stop_early],
    verbose=1
)

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best Hyperparameters:")
print(best_hps.values)


Trial 10 Complete [00h 03m 56s]
val_accuracy: 0.856094241142273

Best val_accuracy So Far: 0.8606998920440674
Total elapsed time: 00h 23m 56s
Best Hyperparameters:
{'dropout': 0.5, 'gru_units': 256, 'dense': 256, 'lr': 0.001, 'tuner/epochs': 7, 'tuner/initial_epoch': 3, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0004'}


In [None]:
model = tuner.hypermodel.build(best_hps)

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=10,
    batch_size=512,
    class_weight=class_weights,
    callbacks=[
        EarlyStopping(patience=8, restore_best_weights=True),
        ReduceLROnPlateau(patience=4)
    ]
)


Epoch 1/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 92ms/step - accuracy: 0.7463 - loss: 0.0941 - val_accuracy: 0.8541 - val_loss: 0.0630 - learning_rate: 0.0010
Epoch 2/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 91ms/step - accuracy: 0.8577 - loss: 0.0400 - val_accuracy: 0.8681 - val_loss: 0.0328 - learning_rate: 0.0010
Epoch 3/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 91ms/step - accuracy: 0.8734 - loss: 0.0336 - val_accuracy: 0.8931 - val_loss: 0.0260 - learning_rate: 0.0010
Epoch 4/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 91ms/step - accuracy: 0.8800 - loss: 0.0303 - val_accuracy: 0.8796 - val_loss: 0.0339 - learning_rate: 0.0010
Epoch 5/10
[1m1710/1710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 91ms/step - accuracy: 0.8854 - loss: 0.0285 - val_accuracy: 0.8577 - val_loss: 0.0650 - learning_rate: 0.0010
Epoch 6/10
[1m1710/1710[0m [32m━━━━━━━━━━━

In [None]:
pred = model.predict(X_test)
pred = np.argmax(pred, axis=1)

print(classification_report(y_test, pred))


[1m7600/7600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 5ms/step
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     18761
           1       0.98      0.98      0.98     18730
           2       0.80      0.92      0.86      3656
           3       0.80      0.88      0.83      2890
           4       0.36      0.61      0.46       322
           5       0.32      0.46      0.38       271
           6       0.95      0.98      0.96      8927
           7       0.96      0.95      0.95      2822
           8       0.92      0.91      0.92      2290
           9       0.88      0.90      0.89      2305
          10       0.90      0.88      0.89      6203
          11       0.71      0.72      0.72      2531
          12       0.52      0.81      0.64      2329
          13       0.72      0.19      0.30      2331
          14       0.98      0.95      0.97      7624
          15       0.99      0.98      0.98      6334
   