<a href="https://colab.research.google.com/github/Vidit122/Mini-Project/blob/main/TransformerBi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder, QuantileTransformer
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Reshape, Conv2D, DepthwiseConv2D,
    BatchNormalization, ReLU, GlobalAveragePooling2D,
    Bidirectional, GRU, Dense, Dropout, Concatenate
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
df = pd.read_csv("5G_NIDD_FULL_9CLASS_MIXED.csv", low_memory=False)

print(df.shape)
print(df.columns[:10])
print(df["Label"].value_counts())

print(df.shape)
print(df.columns[:10])
print(df["Label"].value_counts())

(1693627, 51)
Index(['Max', 'AckDat', 'DstLoss', 'dDSb', 'Sum', 'Mean', 'SrcTCPBase', 'sDSb',
       'dTtl', 'TotBytes'],
      dtype='object')
Label
Benign         477737
UDPFlood1      467717
UDPFlood2      286197
Goldeneye1      93803
Goldeneye2      93650
SYNFlood1       44636
Torshammer1     38120
Torshammer2     31669
Slowloris1      31015
ICMPFlood1      18279
ICMPFlood2      14452
SYNFlood2       14108
Slowloris2      12656
TCPConnect2     11653
TCPConnect1     11645
SYNScan2        11526
SYNScan1        11450
UDPScan2        10305
UDPScan1        10043
SSH1             1608
SSH2             1358
Name: count, dtype: int64
(1693627, 51)
Index(['Max', 'AckDat', 'DstLoss', 'dDSb', 'Sum', 'Mean', 'SrcTCPBase', 'sDSb',
       'dTtl', 'TotBytes'],
      dtype='object')
Label
Benign         477737
UDPFlood1      467717
UDPFlood2      286197
Goldeneye1      93803
Goldeneye2      93650
SYNFlood1       44636
Torshammer1     38120
Torshammer2     31669
Slowloris1      31015
ICMPFlood1    

In [None]:
# ALWAYS start from df
X = df.drop(columns=["Label"])
y = df["Label"]

# Force numeric
X = X.apply(pd.to_numeric, errors="coerce")
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(X.mean(), inplace=True)

print("X shape BEFORE slicing:", X.shape)
 # MUST be > 36 columns

X shape BEFORE slicing: (1693627, 50)


In [None]:
label_map = {
    # HTTP floods
    "Goldeneye1": "HTTP_Flood",
    "Goldeneye2": "HTTP_Flood",
    "Torshammer1": "HTTP_Flood",
    "Torshammer2": "HTTP_Flood",

    # Slow DoS
    "Slowloris1": "Slowrate_DoS",
    "Slowloris2": "Slowrate_DoS",

    # UDP flood
    "UDPFlood1": "UDP_Flood",
    "UDPFlood2": "UDP_Flood",

    # SYN flood
    "SYNFlood1": "SYN_Flood",
    "SYNFlood2": "SYN_Flood",

    # ICMP flood
    "ICMPFlood1": "ICMP_Flood",
    "ICMPFlood2": "ICMP_Flood",

    # Scans
    "UDPScan1": "UDP_Scan",
    "UDPScan2": "UDP_Scan",

    "SYNScan1": "SYN_Scan",
    "SYNScan2": "SYN_Scan",

    "TCPConnect1": "TCP_Connect_Scan",
    "TCPConnect2": "TCP_Connect_Scan",

    # Benign (already correct)
    "Benign": "Benign",

    # SSH (DROP — not used)
    "SSH1": None,
    "SSH2": None
}

In [None]:
df["Label"] = df["Label"].map(label_map)

# Remove rows mapped to None (SSH etc.)
df = df.dropna(subset=["Label"])

In [None]:
print(sorted(df["Label"].unique()))
print("Number of classes:", df["Label"].nunique())

['Benign', 'HTTP_Flood', 'ICMP_Flood', 'SYN_Flood', 'SYN_Scan', 'Slowrate_DoS', 'TCP_Connect_Scan', 'UDP_Flood', 'UDP_Scan']
Number of classes: 9


In [None]:
# Features & labels
X = df.drop(columns=["Label"])
y = df["Label"]

X = X.apply(pd.to_numeric, errors="coerce")
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(X.mean(), inplace=True)

# Convert to numeric
X = X.apply(pd.to_numeric, errors="coerce")

# Remove inf first
X.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill NaNs BEFORE QT
X.fillna(X.mean(), inplace=True)


# EXACT 36 features (fixed, no filtering later)
X = X.iloc[:, :36]

# Encode labels
le = LabelEncoder()
y_enc = le.fit_transform(y)
y_onehot = tf.keras.utils.to_categorical(y_enc, 9)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_onehot, test_size=0.2, random_state=42, stratify=y_enc
)


# Quantile transform (paper style)
qt = QuantileTransformer(
    n_quantiles=1000,
    output_distribution="normal",
    random_state=42
)
X_train = qt.fit_transform(X_train)
X_test  = qt.transform(X_test)

X_train = np.nan_to_num(X_train, nan=0.0, posinf=0.0, neginf=0.0)
X_test  = np.nan_to_num(X_test,  nan=0.0, posinf=0.0, neginf=0.0)

# Reshape
X_train = X_train.reshape(-1, 36, 1)
X_test  = X_test.reshape(-1, 36, 1)


  return fnb._ureduce(a,


In [None]:
print("NaNs in X:", np.isnan(X_train).sum())
print("Infs in X:", np.isinf(X_test).sum())
print("y unique sums:", np.unique(y_train.sum(axis=1)))

NaNs in X: 0
Infs in X: 0
y unique sums: [1.]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Dense, Dropout, LayerNormalization,
    MultiHeadAttention, Add, Reshape, GlobalAveragePooling1D
)
from tensorflow.keras.models import Model


def Transformer_Bi_MHA_Rowwise(
    seq_len=36,
    d_model=64,
    num_heads=4,
    ff_dim=128,
    num_classes=9
):
    """
    Drop-in replacement for MobileNetV1 + BiGRU
    Input: (36, 1)
    """

    inp = Input(shape=(seq_len, 1))

    # Project to d_model
    x = Dense(d_model)(inp)   # (36, d_model)

    # ---- Transformer Encoder Block ----
    # Multi-Head Self Attention (bidirectional by default)
    attn_out = MultiHeadAttention(
        num_heads=num_heads,
        key_dim=d_model // num_heads
    )(x, x)

    x = Add()([x, attn_out])
    x = LayerNormalization(epsilon=1e-6)(x)

    # Feed Forward Network
    ffn = Dense(ff_dim, activation="relu")(x)
    ffn = Dense(d_model)(ffn)

    x = Add()([x, ffn])
    x = LayerNormalization(epsilon=1e-6)(x)

    # ---- Pooling ----
    x = GlobalAveragePooling1D()(x)

    # ---- Projection Head (same as paper style) ----
    x = Dense(256, activation="relu")(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.5)(x)

    out = Dense(num_classes, activation="softmax")(x)

    model = Model(inp, out)
    return model


In [None]:
model = Transformer_Bi_MHA_Rowwise()

model.compile(
    optimizer=tf.keras.optimizers.Adam(
        learning_rate=0.001,
        clipnorm=1.0
    ),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=64,
    validation_split=0.1,
    verbose=1
)


Epoch 1/10
[1m19020/19020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m879s[0m 46ms/step - accuracy: 0.6745 - loss: 0.7969 - val_accuracy: 0.7088 - val_loss: 0.6381
Epoch 2/10
[1m 6838/19020[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m8:46[0m 43ms/step - accuracy: 0.6973 - loss: 0.6865

KeyboardInterrupt: 