# CWE 399: Hyperparameter Tuning

```
Vectorized Code Gadget Input --> BiLSTM + Attention -----------\
                                                                |
                                                                |--> Concatenate --> Dense --> Output
                                                                |
Raw Code Text Input    -->   Grayscale Image -->   CNN --------/
```

In [2]:
import os
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from math import floor, sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Dropout, Bidirectional, LSTM, LeakyReLU,
                                     Concatenate, Conv2D, MaxPool2D, Flatten)
from tensorflow.keras.optimizers import Adam
from tensorflow.image import resize
from tensorflow.keras import backend as K
import optuna
from optuna.integration import TFKerasPruningCallback

SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()


class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context


def load_blstm_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values)
    labels = df["val"].values.astype(np.int32)
    valid_mask = (labels == 0) | (labels == 1)
    return vectors[valid_mask], labels[valid_mask]


def parse_file(filename):
    codes, labels = [], []
    with open(filename, "r", encoding="utf8") as file:
        gadget, label = [], None
        for line in file:
            stripped = line.strip()
            if not stripped:
                continue
            if '-' * 10 in stripped:
                if label in (0, 1) and gadget:
                    codes.append("\n".join(gadget))
                    labels.append(label)
                gadget, label = [], None
            elif stripped.split()[0].isdigit():
                if stripped.isdigit():
                    value = int(stripped)
                    label = value if value in (0, 1) else None
                else:
                    gadget.append(stripped)
            else:
                gadget.append(stripped)
        if label in (0, 1) and gadget:
            codes.append("\n".join(gadget))
            labels.append(label)

    return np.array(codes), np.array(labels)


def code_to_image(code_sample, target_size=32):
    byte_array = bytearray(code_sample, 'utf-8')
    flat = np.array(byte_array, dtype=np.uint8)
    size = floor(sqrt(len(flat)))
    cropped = flat[:size * size].reshape((size, size))
    padded = np.zeros((target_size, target_size), dtype=np.uint8)
    h, w = cropped.shape
    padded[:min(h, target_size), :min(w, target_size)] = cropped[:min(h, target_size), :min(w, target_size)]
    padded = np.expand_dims(padded, axis=-1).astype(np.float32)
    return resize(padded, (target_size, target_size)).numpy() / 255.0


def load_hybrid_data(vector_pkl, code_txt):
    blstm_X, blstm_y = load_blstm_data(vector_pkl)
    code_texts, code_y = parse_file(code_txt)

    # Filter to minimum common length to ensure sync
    min_len = min(len(blstm_X), len(code_texts))
    blstm_X, blstm_y = blstm_X[:min_len], blstm_y[:min_len]
    code_texts, code_y = code_texts[:min_len], code_y[:min_len]

    match_mask = (blstm_y == code_y)
    blstm_X, labels, code_texts = blstm_X[match_mask], blstm_y[match_mask], code_texts[match_mask]

    print(f"Original dataset class distribution: {dict(zip(*np.unique(labels, return_counts=True)))}")

    X1_train_all, X1_test, codes_train_all, codes_test, y_train_all, y_test = train_test_split(
        blstm_X, code_texts, labels, test_size=0.2, stratify=labels, random_state=SEED
    )

    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    bal_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(bal_idx)

    X1_bal, codes_bal, y_bal = X1_train_all[bal_idx], codes_train_all[bal_idx], y_train_all[bal_idx]
    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_bal, return_counts=True)))}")

    X1_train, X1_val, codes_train, codes_val, y_train, y_val = train_test_split(
        X1_bal, codes_bal, y_bal, test_size=0.2, stratify=y_bal, random_state=SEED
    )

    X2_train = np.stack([code_to_image(c) for c in codes_train])
    X2_val = np.stack([code_to_image(c) for c in codes_val])
    X2_test = np.stack([code_to_image(c) for c in codes_test])

    return X1_train, X1_val, X1_test, X2_train, X2_val, X2_test, y_train, y_val, y_test


def build_hybrid_model(blstm_input_shape, hp, cnn_input_shape=(32, 32, 1)):
    input_blstm = Input(shape=blstm_input_shape)
    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_blstm)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    input_cnn = Input(shape=cnn_input_shape)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(input_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=[input_blstm, input_cnn], outputs=output)
    model.compile(optimizer=Adam(hp["lr"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model


def objective(trial):
    hp = {
        "lstm_units": trial.suggest_categorical("lstm_units", [64, 128, 192]),
        "dense_units": trial.suggest_categorical("dense_units", [64, 128, 192]),
        "dropout_lstm": trial.suggest_float("dropout_lstm", 0.2, 0.5),
        "dropout_cnn": trial.suggest_float("dropout_cnn", 0.2, 0.5),
        "dropout_final": trial.suggest_float("dropout_final", 0.2, 0.5),
        "cnn_filters1": trial.suggest_categorical("cnn_filters1", [32, 64]),
        "cnn_filters2": trial.suggest_categorical("cnn_filters2", [64, 128]),
        "lr": trial.suggest_float("lr", 1e-4, 1e-2, log=True),
        "batch_size": trial.suggest_categorical("batch_size", [32, 64])
    }

    model = build_hybrid_model(X1_train.shape[1:], hp)
    history = model.fit(
        [X1_train, X2_train], y_train,
        validation_data=([X1_val, X2_val], y_val),
        epochs=10,
        batch_size=hp["batch_size"],
        verbose=0,
        callbacks=[TFKerasPruningCallback(trial, "val_accuracy")]
    )
    return max(history.history["val_accuracy"])


def train_hybrid_with_optuna(vector_pkl, code_txt):
    global X1_train, X1_val, X1_test, X2_train, X2_val, X2_test, y_train, y_val, y_test
    X1_train, X1_val, X1_test, X2_train, X2_val, X2_test, y_train, y_val, y_test = load_hybrid_data(vector_pkl, code_txt)

    study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
    study.optimize(objective, n_trials=20)

    print(f"Best trial: {study.best_trial.number}")
    print(f"Best hyperparameters: {study.best_params}")

    best_hp = study.best_params
    X1_full = np.concatenate([X1_train, X1_val])
    X2_full = np.concatenate([X2_train, X2_val])
    y_full = np.concatenate([y_train, y_val])

    model = build_hybrid_model(X1_full.shape[1:], best_hp)
    model.fit(
        [X1_full, X2_full], y_full,
        epochs=30,
        batch_size=best_hp["batch_size"],
        verbose=1
    )

    print("\nEvaluating on test set...")
    loss, acc = model.evaluate([X1_test, X2_test], y_test)
    print(f"Test Accuracy: {acc:.4f}")

    y_probs = model.predict([X1_test, X2_test])
    y_pred = (y_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")


# Run
vector_file = "cwe399_cgd_gadget_vectors.pkl"
code_file = "cwe399_cgd.txt"
train_hybrid_with_optuna(vector_file, code_file)

Original dataset class distribution: {0: 14600, 1: 7285}
Train split before balancing: {0: 11680, 1: 5828}
Test set class distribution: {0: 2920, 1: 1457}
Balanced training class distribution: {0: 5828, 1: 5828}


[I 2025-05-25 20:56:35,659] A new study created in memory with name: no-name-db4a376c-ba77-4856-9adc-2c06c7f2855b
[I 2025-05-25 20:57:15,947] Trial 0 finished with value: 0.9391080737113953 and parameters: {'lstm_units': 64, 'dense_units': 64, 'dropout_lstm': 0.2374121939836571, 'dropout_cnn': 0.27534292883328343, 'dropout_final': 0.2272902913249341, 'cnn_filters1': 32, 'cnn_filters2': 64, 'lr': 0.007900455844014219, 'batch_size': 64}. Best is trial 0 with value: 0.9391080737113953.
[I 2025-05-25 20:58:52,872] Trial 1 finished with value: 0.9069468379020691 and parameters: {'lstm_units': 192, 'dense_units': 64, 'dropout_lstm': 0.28303036037080276, 'dropout_cnn': 0.3431597129903241, 'dropout_final': 0.32800876068097073, 'cnn_filters1': 32, 'cnn_filters2': 128, 'lr': 0.0003213687536580007, 'batch_size': 64}. Best is trial 0 with value: 0.9391080737113953.
[I 2025-05-25 21:00:02,305] Trial 2 finished with value: 0.9215266108512878 and parameters: {'lstm_units': 128, 'dense_units': 64, 'dr

Best trial: 0
Best hyperparameters: {'lstm_units': 64, 'dense_units': 64, 'dropout_lstm': 0.2374121939836571, 'dropout_cnn': 0.27534292883328343, 'dropout_final': 0.2272902913249341, 'cnn_filters1': 32, 'cnn_filters2': 64, 'lr': 0.007900455844014219, 'batch_size': 64}
Epoch 1/30
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.5647 - loss: 0.6858
Epoch 2/30
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.8278 - loss: 0.3798
Epoch 3/30
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.8808 - loss: 0.2701
Epoch 4/30
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.8953 - loss: 0.2333
Epoch 5/30
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.9079 - loss: 0.2070
Epoch 6/30
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.9148 - loss: 0.1914
Epoc

# CWE 399: Final Model

```
Vectorized Code Gadget Input --> BiLSTM + Attention -----------\
                                                                |
                                                                |--> Concatenate --> Dense --> Output
                                                                |
Raw Code Text Input    -->   Grayscale Image -->   CNN --------/
```

In [6]:
import os
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from math import floor, sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Dropout, Bidirectional, LSTM, LeakyReLU,
                                     Concatenate, Conv2D, MaxPool2D, Flatten)
from tensorflow.keras.optimizers import Adam
from tensorflow.image import resize
from tensorflow.keras import backend as K

SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()

class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

def load_blstm_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values)
    labels = df["val"].values.astype(np.int32)
    valid_mask = (labels == 0) | (labels == 1)
    return vectors[valid_mask], labels[valid_mask]

def parse_file(filename):
    codes, labels = [], []
    with open(filename, "r", encoding="utf8") as file:
        gadget, label = [], None
        for line in file:
            stripped = line.strip()
            if not stripped:
                continue
            if '-' * 10 in stripped:
                if label in (0, 1) and gadget:
                    codes.append("\n".join(gadget))
                    labels.append(label)
                gadget, label = [], None
            elif stripped.split()[0].isdigit():
                if stripped.isdigit():
                    value = int(stripped)
                    label = value if value in (0, 1) else None
                else:
                    gadget.append(stripped)
            else:
                gadget.append(stripped)
        if label in (0, 1) and gadget:
            codes.append("\n".join(gadget))
            labels.append(label)
    return np.array(codes), np.array(labels)

def code_to_image(code_sample, target_size=32):
    byte_array = bytearray(code_sample, 'utf-8')
    flat = np.array(byte_array, dtype=np.uint8)
    size = floor(sqrt(len(flat)))
    cropped = flat[:size * size].reshape((size, size))
    padded = np.zeros((target_size, target_size), dtype=np.uint8)
    h, w = cropped.shape
    padded[:min(h, target_size), :min(w, target_size)] = cropped[:min(h, target_size), :min(w, target_size)]
    padded = np.expand_dims(padded, axis=-1).astype(np.float32)
    return resize(padded, (target_size, target_size)).numpy() / 255.0

def load_final_data(vector_pkl, code_txt):
    blstm_X, blstm_y = load_blstm_data(vector_pkl)
    code_texts, code_y = parse_file(code_txt)

    min_len = min(len(blstm_X), len(code_texts))
    blstm_X, blstm_y = blstm_X[:min_len], blstm_y[:min_len]
    code_texts, code_y = code_texts[:min_len], code_y[:min_len]

    match_mask = (blstm_y == code_y)
    blstm_X, labels, code_texts = blstm_X[match_mask], blstm_y[match_mask], code_texts[match_mask]

    print(f"Original dataset class distribution: {dict(zip(*np.unique(labels, return_counts=True)))}")

    X1_train_all, X1_test, codes_train_all, codes_test, y_train_all, y_test = train_test_split(
        blstm_X, code_texts, labels, test_size=0.2, stratify=labels, random_state=SEED
    )

    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    bal_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(bal_idx)

    X1_train = X1_train_all[bal_idx]
    codes_train = codes_train_all[bal_idx]
    y_train = y_train_all[bal_idx]

    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_train, return_counts=True)))}")

    X2_train = np.stack([code_to_image(c) for c in codes_train])
    X2_test = np.stack([code_to_image(c) for c in codes_test])

    return X1_train, X2_train, y_train, X1_test, X2_test, y_test

def build_final_model(blstm_input_shape, cnn_input_shape=(32, 32, 1)):
    hp = {
        'lstm_units': 64,
        'dense_units': 64,
        'dropout_lstm': 0.2374121939836571,
        'dropout_cnn': 0.27534292883328343,
        'dropout_final': 0.2272902913249341,
        'cnn_filters1': 32,
        'cnn_filters2': 64,
        'lr': 0.007900455844014219,
    }

    input_blstm = Input(shape=blstm_input_shape)
    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_blstm)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    input_cnn = Input(shape=cnn_input_shape)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(input_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=[input_blstm, input_cnn], outputs=output)
    model.compile(optimizer=Adam(hp["lr"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model

def train_and_evaluate(vector_pkl, code_txt):
    X1_train, X2_train, y_train, X1_test, X2_test, y_test = load_final_data(vector_pkl, code_txt)

    model = build_final_model(X1_train.shape[1:])
    model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=64, verbose=1)

    print("\nEvaluating on test set...")
    loss, acc = model.evaluate([X1_test, X2_test], y_test)
    print(f"Test Accuracy: {acc:.4f}")

    y_probs = model.predict([X1_test, X2_test])
    y_pred = (y_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")

# Run
vector_file = "cwe399_cgd_gadget_vectors.pkl"
code_file = "cwe399_cgd.txt"
train_and_evaluate(vector_file, code_file)


Original dataset class distribution: {0: 14600, 1: 7285}
Train split before balancing: {0: 11680, 1: 5828}
Test set class distribution: {0: 2920, 1: 1457}
Balanced training class distribution: {0: 5828, 1: 5828}
Epoch 1/20
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 47ms/step - accuracy: 0.6172 - loss: 0.6314
Epoch 2/20
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 46ms/step - accuracy: 0.8662 - loss: 0.2929
Epoch 3/20
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 46ms/step - accuracy: 0.8882 - loss: 0.2369
Epoch 4/20
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 45ms/step - accuracy: 0.9038 - loss: 0.2103
Epoch 5/20
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 47ms/step - accuracy: 0.9132 - loss: 0.1974
Epoch 6/20
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 45ms/step - accuracy: 0.9217 - loss: 0.1721
Epoch 7/20
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

# CWE 119: Hyperparameter Tuning

```
Vectorized Code Gadget Input --> BiLSTM + Attention -----------\
                                                                |
                                                                |--> Concatenate --> Dense --> Output
                                                                |
Raw Code Text Input    -->   Grayscale Image -->   CNN --------/
```

In [4]:
import os
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from math import floor, sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Dropout, Bidirectional, LSTM, LeakyReLU,
                                     Concatenate, Conv2D, MaxPool2D, Flatten)
from tensorflow.keras.optimizers import Adam
from tensorflow.image import resize
from tensorflow.keras import backend as K
import optuna
from optuna.integration import TFKerasPruningCallback

SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()


class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context


def load_blstm_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values)
    labels = df["val"].values.astype(np.int32)
    valid_mask = (labels == 0) | (labels == 1)
    return vectors[valid_mask], labels[valid_mask]


def parse_file(filename):
    codes, labels = [], []
    with open(filename, "r", encoding="utf8") as file:
        gadget, label = [], None
        for line in file:
            stripped = line.strip()
            if not stripped:
                continue
            if '-' * 10 in stripped:
                if label in (0, 1) and gadget:
                    codes.append("\n".join(gadget))
                    labels.append(label)
                gadget, label = [], None
            elif stripped.split()[0].isdigit():
                if stripped.isdigit():
                    value = int(stripped)
                    label = value if value in (0, 1) else None
                else:
                    gadget.append(stripped)
            else:
                gadget.append(stripped)
        if label in (0, 1) and gadget:
            codes.append("\n".join(gadget))
            labels.append(label)

    return np.array(codes), np.array(labels)


def code_to_image(code_sample, target_size=32):
    byte_array = bytearray(code_sample, 'utf-8')
    flat = np.array(byte_array, dtype=np.uint8)
    size = floor(sqrt(len(flat)))
    cropped = flat[:size * size].reshape((size, size))
    padded = np.zeros((target_size, target_size), dtype=np.uint8)
    h, w = cropped.shape
    padded[:min(h, target_size), :min(w, target_size)] = cropped[:min(h, target_size), :min(w, target_size)]
    padded = np.expand_dims(padded, axis=-1).astype(np.float32)
    return resize(padded, (target_size, target_size)).numpy() / 255.0


def load_hybrid_data(vector_pkl, code_txt):
    blstm_X, blstm_y = load_blstm_data(vector_pkl)
    code_texts, code_y = parse_file(code_txt)

    # Filter to minimum common length to ensure sync
    min_len = min(len(blstm_X), len(code_texts))
    blstm_X, blstm_y = blstm_X[:min_len], blstm_y[:min_len]
    code_texts, code_y = code_texts[:min_len], code_y[:min_len]

    match_mask = (blstm_y == code_y)
    blstm_X, labels, code_texts = blstm_X[match_mask], blstm_y[match_mask], code_texts[match_mask]

    print(f"Original dataset class distribution: {dict(zip(*np.unique(labels, return_counts=True)))}")

    X1_train_all, X1_test, codes_train_all, codes_test, y_train_all, y_test = train_test_split(
        blstm_X, code_texts, labels, test_size=0.2, stratify=labels, random_state=SEED
    )

    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    bal_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(bal_idx)

    X1_bal, codes_bal, y_bal = X1_train_all[bal_idx], codes_train_all[bal_idx], y_train_all[bal_idx]
    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_bal, return_counts=True)))}")

    X1_train, X1_val, codes_train, codes_val, y_train, y_val = train_test_split(
        X1_bal, codes_bal, y_bal, test_size=0.2, stratify=y_bal, random_state=SEED
    )

    X2_train = np.stack([code_to_image(c) for c in codes_train])
    X2_val = np.stack([code_to_image(c) for c in codes_val])
    X2_test = np.stack([code_to_image(c) for c in codes_test])

    return X1_train, X1_val, X1_test, X2_train, X2_val, X2_test, y_train, y_val, y_test


def build_hybrid_model(blstm_input_shape, hp, cnn_input_shape=(32, 32, 1)):
    input_blstm = Input(shape=blstm_input_shape)
    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_blstm)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    input_cnn = Input(shape=cnn_input_shape)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(input_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=[input_blstm, input_cnn], outputs=output)
    model.compile(optimizer=Adam(hp["lr"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model


def objective(trial):
    hp = {
        "lstm_units": trial.suggest_categorical("lstm_units", [64, 128, 192]),
        "dense_units": trial.suggest_categorical("dense_units", [64, 128, 192]),
        "dropout_lstm": trial.suggest_float("dropout_lstm", 0.2, 0.5),
        "dropout_cnn": trial.suggest_float("dropout_cnn", 0.2, 0.5),
        "dropout_final": trial.suggest_float("dropout_final", 0.2, 0.5),
        "cnn_filters1": trial.suggest_categorical("cnn_filters1", [32, 64]),
        "cnn_filters2": trial.suggest_categorical("cnn_filters2", [64, 128]),
        "lr": trial.suggest_float("lr", 1e-4, 1e-2, log=True),
        "batch_size": trial.suggest_categorical("batch_size", [32, 64])
    }

    model = build_hybrid_model(X1_train.shape[1:], hp)
    history = model.fit(
        [X1_train, X2_train], y_train,
        validation_data=([X1_val, X2_val], y_val),
        epochs=10,
        batch_size=hp["batch_size"],
        verbose=0,
        callbacks=[TFKerasPruningCallback(trial, "val_accuracy")]
    )
    return max(history.history["val_accuracy"])


def train_hybrid_with_optuna(vector_pkl, code_txt):
    global X1_train, X1_val, X1_test, X2_train, X2_val, X2_test, y_train, y_val, y_test
    X1_train, X1_val, X1_test, X2_train, X2_val, X2_test, y_train, y_val, y_test = load_hybrid_data(vector_pkl, code_txt)

    study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
    study.optimize(objective, n_trials=20)

    print(f"Best trial: {study.best_trial.number}")
    print(f"Best hyperparameters: {study.best_params}")

    best_hp = study.best_params
    X1_full = np.concatenate([X1_train, X1_val])
    X2_full = np.concatenate([X2_train, X2_val])
    y_full = np.concatenate([y_train, y_val])

    model = build_hybrid_model(X1_full.shape[1:], best_hp)
    model.fit(
        [X1_full, X2_full], y_full,
        epochs=30,
        batch_size=best_hp["batch_size"],
        verbose=1
    )

    print("\nEvaluating on test set...")
    loss, acc = model.evaluate([X1_test, X2_test], y_test)
    print(f"Test Accuracy: {acc:.4f}")

    y_probs = model.predict([X1_test, X2_test])
    y_pred = (y_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")


# Run
vector_file = "cwe119_cgd_gadget_vectors.pkl"
code_file = "cwe119_cgd.txt"
train_hybrid_with_optuna(vector_file, code_file)

Original dataset class distribution: {0: 29313, 1: 10440}
Train split before balancing: {0: 23450, 1: 8352}
Test set class distribution: {0: 5863, 1: 2088}
Balanced training class distribution: {0: 8352, 1: 8352}


[I 2025-05-25 21:45:49,172] A new study created in memory with name: no-name-00ba71d9-c424-44b8-9e50-badbd991b091
[I 2025-05-25 21:50:49,194] Trial 0 finished with value: 0.8308889269828796 and parameters: {'lstm_units': 192, 'dense_units': 192, 'dropout_lstm': 0.2742058267404428, 'dropout_cnn': 0.37941882727940535, 'dropout_final': 0.4445811161973845, 'cnn_filters1': 64, 'cnn_filters2': 64, 'lr': 0.00033987384024195146, 'batch_size': 32}. Best is trial 0 with value: 0.8308889269828796.
[I 2025-05-25 21:53:40,339] Trial 1 finished with value: 0.8548339009284973 and parameters: {'lstm_units': 128, 'dense_units': 192, 'dropout_lstm': 0.29717900056976815, 'dropout_cnn': 0.4787097987303838, 'dropout_final': 0.4239948401118443, 'cnn_filters1': 64, 'cnn_filters2': 64, 'lr': 0.00017342834914139184, 'batch_size': 32}. Best is trial 1 with value: 0.8548339009284973.
[I 2025-05-25 21:56:42,350] Trial 2 finished with value: 0.8575276732444763 and parameters: {'lstm_units': 128, 'dense_units': 64,

Best trial: 2
Best hyperparameters: {'lstm_units': 128, 'dense_units': 64, 'dropout_lstm': 0.44019520286998604, 'dropout_cnn': 0.42522026456908246, 'dropout_final': 0.49689209860503014, 'cnn_filters1': 64, 'cnn_filters2': 128, 'lr': 0.003948431087230644, 'batch_size': 32}
Epoch 1/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 44ms/step - accuracy: 0.6205 - loss: 0.6461
Epoch 2/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 43ms/step - accuracy: 0.7288 - loss: 0.5399
Epoch 3/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 45ms/step - accuracy: 0.7670 - loss: 0.4852
Epoch 4/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 43ms/step - accuracy: 0.8096 - loss: 0.4172
Epoch 5/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 41ms/step - accuracy: 0.8178 - loss: 0.3876
Epoch 6/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 42ms/step - accuracy: 0.8281 - loss: 0.

# CWE 119: Final Model

```
Vectorized Code Gadget Input --> BiLSTM + Attention -----------\
                                                                |
                                                                |--> Concatenate --> Dense --> Output
                                                                |
Raw Code Text Input    -->   Grayscale Image -->   CNN --------/
```

In [10]:
import os
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from math import floor, sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Dropout, Bidirectional, LSTM, LeakyReLU,
                                     Concatenate, Conv2D, MaxPool2D, Flatten)
from tensorflow.keras.optimizers import Adam
from tensorflow.image import resize
from tensorflow.keras import backend as K

SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()

class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

def load_blstm_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values)
    labels = df["val"].values.astype(np.int32)
    valid_mask = (labels == 0) | (labels == 1)
    return vectors[valid_mask], labels[valid_mask]

def parse_file(filename):
    codes, labels = [], []
    with open(filename, "r", encoding="utf8") as file:
        gadget, label = [], None
        for line in file:
            stripped = line.strip()
            if not stripped:
                continue
            if '-' * 10 in stripped:
                if label in (0, 1) and gadget:
                    codes.append("\n".join(gadget))
                    labels.append(label)
                gadget, label = [], None
            elif stripped.split()[0].isdigit():
                if stripped.isdigit():
                    value = int(stripped)
                    label = value if value in (0, 1) else None
                else:
                    gadget.append(stripped)
            else:
                gadget.append(stripped)
        if label in (0, 1) and gadget:
            codes.append("\n".join(gadget))
            labels.append(label)
    return np.array(codes), np.array(labels)

def code_to_image(code_sample, target_size=32):
    byte_array = bytearray(code_sample, 'utf-8')
    flat = np.array(byte_array, dtype=np.uint8)
    size = floor(sqrt(len(flat)))
    cropped = flat[:size * size].reshape((size, size))
    padded = np.zeros((target_size, target_size), dtype=np.uint8)
    h, w = cropped.shape
    padded[:min(h, target_size), :min(w, target_size)] = cropped[:min(h, target_size), :min(w, target_size)]
    padded = np.expand_dims(padded, axis=-1).astype(np.float32)
    return resize(padded, (target_size, target_size)).numpy() / 255.0

def load_final_data(vector_pkl, code_txt):
    blstm_X, blstm_y = load_blstm_data(vector_pkl)
    code_texts, code_y = parse_file(code_txt)

    min_len = min(len(blstm_X), len(code_texts))
    blstm_X, blstm_y = blstm_X[:min_len], blstm_y[:min_len]
    code_texts, code_y = code_texts[:min_len], code_y[:min_len]

    match_mask = (blstm_y == code_y)
    blstm_X, labels, code_texts = blstm_X[match_mask], blstm_y[match_mask], code_texts[match_mask]

    print(f"Original dataset class distribution: {dict(zip(*np.unique(labels, return_counts=True)))}")

    X1_train_all, X1_test, codes_train_all, codes_test, y_train_all, y_test = train_test_split(
        blstm_X, code_texts, labels, test_size=0.2, stratify=labels, random_state=SEED
    )

    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    bal_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(bal_idx)

    X1_train = X1_train_all[bal_idx]
    codes_train = codes_train_all[bal_idx]
    y_train = y_train_all[bal_idx]

    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_train, return_counts=True)))}")

    X2_train = np.stack([code_to_image(c) for c in codes_train])
    X2_test = np.stack([code_to_image(c) for c in codes_test])

    return X1_train, X2_train, y_train, X1_test, X2_test, y_test

def build_final_model(blstm_input_shape, cnn_input_shape=(32, 32, 1)):
    hp = {
        'lstm_units': 128,
        'dense_units': 64,
        'dropout_lstm': 0.44019520286998604,
        'dropout_cnn': 0.42522026456908246,
        'dropout_final': 0.49689209860503014,
        'cnn_filters1': 64,
        'cnn_filters2': 128,
        'lr': 0.003948431087230644,
    }

    input_blstm = Input(shape=blstm_input_shape)
    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_blstm)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    input_cnn = Input(shape=cnn_input_shape)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(input_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=[input_blstm, input_cnn], outputs=output)
    model.compile(optimizer=Adam(hp["lr"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model

def train_and_evaluate(vector_pkl, code_txt):
    X1_train, X2_train, y_train, X1_test, X2_test, y_test = load_final_data(vector_pkl, code_txt)

    model = build_final_model(X1_train.shape[1:])
    model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=32, verbose=1)

    print("\nEvaluating on test set...")
    loss, acc = model.evaluate([X1_test, X2_test], y_test)
    print(f"Test Accuracy: {acc:.4f}")

    y_probs = model.predict([X1_test, X2_test])
    y_pred = (y_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")

# Run
vector_file = "cwe119_cgd_gadget_vectors.pkl"
code_file = "cwe119_cgd.txt"
train_and_evaluate(vector_file, code_file)

Original dataset class distribution: {0: 29313, 1: 10440}
Train split before balancing: {0: 23450, 1: 8352}
Test set class distribution: {0: 5863, 1: 2088}
Balanced training class distribution: {0: 8352, 1: 8352}
Epoch 1/20
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 60ms/step - accuracy: 0.6056 - loss: 0.6597
Epoch 2/20
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 56ms/step - accuracy: 0.7250 - loss: 0.5531
Epoch 3/20
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 54ms/step - accuracy: 0.7572 - loss: 0.5042
Epoch 4/20
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 58ms/step - accuracy: 0.7922 - loss: 0.4397
Epoch 5/20
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 55ms/step - accuracy: 0.8300 - loss: 0.3680
Epoch 6/20
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 61ms/step - accuracy: 0.8450 - loss: 0.3426
Epoch 7/20
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0