# CWE 399: Hyperparameter Tuning

```
Vectorized Code Gadget Input --> BiLSTM + Attention ------\
                                                            --> Concatenate --> Dense --> Output
Vectorized Code Gadget Input --> CNN on expanded dims ----/
```

In [None]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import optuna
from optuna.integration import TFKerasPruningCallback
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, MaxPool2D, Flatten, Dense, Dropout, LeakyReLU,
    LSTM, Bidirectional, Concatenate, Lambda
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Set seed for reproducibility
SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()

# -----------------------------
# Load vector data
# -----------------------------
def load_vector_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values).astype(np.float32)
    labels = df["val"].values.astype(int)
    return vectors, labels

# -----------------------------
# Custom attention layer
# -----------------------------
class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

# -----------------------------
# Build model with hyperparams
# -----------------------------
def build_model(input_shape, hp):
    input_layer = Input(shape=input_shape)

    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_layer)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    x_cnn = Lambda(lambda x: tf.expand_dims(x, axis=-1))(input_layer)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(hp["learning_rate"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model

# -----------------------------
# Objective function for Optuna (with pruning)
# -----------------------------
def objective(trial):
    hp = {
        "lstm_units": trial.suggest_categorical("lstm_units", [64, 128, 256]),
        "cnn_filters1": trial.suggest_categorical("cnn_filters1", [16, 32, 64]),
        "cnn_filters2": trial.suggest_categorical("cnn_filters2", [32, 64, 128]),
        "dense_units": trial.suggest_categorical("dense_units", [64, 128, 256]),
        "dropout_lstm": trial.suggest_float("dropout_lstm", 0.2, 0.5),
        "dropout_cnn": trial.suggest_float("dropout_cnn", 0.2, 0.5),
        "dropout_final": trial.suggest_float("dropout_final", 0.2, 0.5),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-4, 1e-2),
        "batch_size": trial.suggest_categorical("batch_size", [32, 64, 128]),
    }

    model = build_model(input_shape=X_train.shape[1:], hp=hp)

    early_stopper = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True, verbose=0)

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=hp["batch_size"],
        callbacks=[
            early_stopper,
            TFKerasPruningCallback(trial, "val_accuracy")  # 💡 pruning callback
        ],
        verbose=0
    )

    val_accuracy = max(history.history["val_accuracy"])
    return val_accuracy

# -----------------------------
# Train function w/ tuning
# -----------------------------
def train_hybrid_cnn_lstm(pkl_file):
    global X_train, X_val, y_train, y_val

    X, y = load_vector_data(pkl_file)
    print(f"Original dataset class distribution: {dict(zip(*np.unique(y, return_counts=True)))}")

    X_train_all, X_test, y_train_all, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=SEED
    )

    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    # Balance training set
    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    balanced_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(balanced_idx)

    X_train_balanced = X_train_all[balanced_idx]
    y_train_balanced = y_train_all[balanced_idx]
    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_train_balanced, return_counts=True)))}")

    # Validation split
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_balanced, y_train_balanced, test_size=0.2, stratify=y_train_balanced, random_state=SEED
    )
    print(f"Final training class distribution: {dict(zip(*np.unique(y_train, return_counts=True)))}")
    print(f"Validation class distribution: {dict(zip(*np.unique(y_val, return_counts=True)))}")

    # Run Optuna with pruning enabled
    print("Running Optuna hyperparameter tuning (with pruning)...")
    study = optuna.create_study(
        direction="maximize",
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5)
    )
    study.optimize(objective, n_trials=20)

    print(f"Best trial: {study.best_trial.number}")
    print(f"Best hyperparameters: {study.best_params}")

    # Retrain best model on full training (train + val)
    best_hp = study.best_params
    X_full = np.concatenate([X_train, X_val])
    y_full = np.concatenate([y_train, y_val])
    model = build_model(input_shape=X_full.shape[1:], hp=best_hp)

    lr_scheduler = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, verbose=1)
    early_stopper = EarlyStopping(monitor="loss", patience=2, restore_best_weights=True, verbose=1)

    model.fit(
        X_full, y_full,
        epochs=30,
        batch_size=best_hp["batch_size"],
        callbacks=[lr_scheduler, early_stopper],
        verbose=1
    )

    print("\nEvaluating model on test set...")
    loss, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {acc:.4f}")

    y_pred_probs = model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")

# Run
train_hybrid_cnn_lstm("cwe399_cgd_gadget_vectors.pkl")

[I 2025-05-24 17:45:50,533] A new study created in memory with name: no-name-ddde0659-c389-4be2-b3c9-9369051c9fab


Original dataset class distribution: {0: 14600, 1: 7285}
Train split before balancing: {0: 11680, 1: 5828}
Test set class distribution: {0: 2920, 1: 1457}
Balanced training class distribution: {0: 5828, 1: 5828}
Final training class distribution: {0: 4662, 1: 4662}
Validation class distribution: {0: 1166, 1: 1166}
Running Optuna hyperparameter tuning (with pruning)...


  "learning_rate": trial.suggest_loguniform("learning_rate", 1e-4, 1e-2),
[I 2025-05-24 17:48:43,549] Trial 0 finished with value: 0.9301029443740845 and parameters: {'lstm_units': 64, 'cnn_filters1': 16, 'cnn_filters2': 32, 'dense_units': 128, 'dropout_lstm': 0.43498018806936434, 'dropout_cnn': 0.29313006045819584, 'dropout_final': 0.33092831126041217, 'learning_rate': 0.0040012046538794276, 'batch_size': 32}. Best is trial 0 with value: 0.9301029443740845.
[I 2025-05-24 17:53:16,403] Trial 1 finished with value: 0.9206689596176147 and parameters: {'lstm_units': 256, 'cnn_filters1': 64, 'cnn_filters2': 64, 'dense_units': 256, 'dropout_lstm': 0.21162349447376705, 'dropout_cnn': 0.4234054337603, 'dropout_final': 0.39768971433503764, 'learning_rate': 0.00019066966613780277, 'batch_size': 128}. Best is trial 0 with value: 0.9301029443740845.
[I 2025-05-24 18:00:44,006] Trial 2 finished with value: 0.9335334300994873 and parameters: {'lstm_units': 256, 'cnn_filters1': 16, 'cnn_filters2': 1

Best trial: 16
Best hyperparameters: {'lstm_units': 64, 'cnn_filters1': 32, 'cnn_filters2': 32, 'dense_units': 64, 'dropout_lstm': 0.3354719785627817, 'dropout_cnn': 0.45088206221631577, 'dropout_final': 0.25222072883126656, 'learning_rate': 0.0004554889354724233, 'batch_size': 32}
Epoch 1/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 71ms/step - accuracy: 0.6958 - loss: 0.5561 - learning_rate: 4.5549e-04
Epoch 2/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 66ms/step - accuracy: 0.8822 - loss: 0.2780 - learning_rate: 4.5549e-04
Epoch 3/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 72ms/step - accuracy: 0.9149 - loss: 0.2050 - learning_rate: 4.5549e-04
Epoch 4/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 72ms/step - accuracy: 0.9171 - loss: 0.1814 - learning_rate: 4.5549e-04
Epoch 5/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 68ms/step - accuracy: 0.9339 - loss: 0.

# CWE 399: Final Model

```
Vectorized Code Gadget Input --> BiLSTM + Attention ------\
                                                            --> Concatenate --> Dense --> Output
Vectorized Code Gadget Input --> CNN on expanded dims ----/
```

In [15]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, MaxPool2D, Flatten, Dense, Dropout, LeakyReLU,
    LSTM, Bidirectional, Concatenate, Lambda
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Set seed for reproducibility
SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()

# -----------------------------
# Load vector data
# -----------------------------
def load_vector_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values).astype(np.float32)
    labels = df["val"].values.astype(int)
    return vectors, labels

# -----------------------------
# Custom attention layer
# -----------------------------
class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

# -----------------------------
# Build model with best hyperparams
# -----------------------------
def build_best_model(input_shape):
    hp = {
        'lstm_units': 64,
        'cnn_filters1': 32,
        'cnn_filters2': 32,
        'dense_units': 64,
        'dropout_lstm': 0.3354719785627817,
        'dropout_cnn': 0.45088206221631577,
        'dropout_final': 0.25222072883126656,
        'learning_rate': 0.0004554889354724233
    }

    input_layer = Input(shape=input_shape)

    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_layer)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    x_cnn = Lambda(lambda x: tf.expand_dims(x, axis=-1))(input_layer)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(hp["learning_rate"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model

# -----------------------------
# Training and evaluation
# -----------------------------
def train_final_model(pkl_file):
    X, y = load_vector_data(pkl_file)
    print(f"Original dataset class distribution: {dict(zip(*np.unique(y, return_counts=True)))}")

    X_train_all, X_test, y_train_all, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=SEED
    )
    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    # Balance training set (undersample class 0)
    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    balanced_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(balanced_idx)

    X_train = X_train_all[balanced_idx]
    y_train = y_train_all[balanced_idx]
    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_train, return_counts=True)))}")

    model = build_best_model(input_shape=X_train.shape[1:])

    lr_scheduler = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, verbose=1)

    model.fit(
        X_train, y_train,
        epochs=30,
        batch_size=32,
        callbacks=[lr_scheduler],
        verbose=1
    )

    print("\nEvaluating model on test set...")
    loss, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {acc:.4f}")

    y_pred_probs = model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")

# Run
train_final_model("cwe399_cgd_gadget_vectors.pkl")

Original dataset class distribution: {0: 14600, 1: 7285}
Train split before balancing: {0: 11680, 1: 5828}
Test set class distribution: {0: 2920, 1: 1457}
Balanced training class distribution: {0: 5828, 1: 5828}
Epoch 1/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 76ms/step - accuracy: 0.7027 - loss: 0.5382 - learning_rate: 4.5549e-04
Epoch 2/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 75ms/step - accuracy: 0.8807 - loss: 0.2822 - learning_rate: 4.5549e-04
Epoch 3/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 75ms/step - accuracy: 0.9063 - loss: 0.2192 - learning_rate: 4.5549e-04
Epoch 4/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 76ms/step - accuracy: 0.9165 - loss: 0.1910 - learning_rate: 4.5549e-04
Epoch 5/30
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 76ms/step - accuracy: 0.9241 - loss: 0.1720 - learning_rate: 4.5549e-04
Epoch 6/30
[1m365/365[0m [32m━━━━━━

# CWE 119: Hyperparameter Tuning

```
Vectorized Code Gadget Input --> BiLSTM + Attention ------\
                                                            --> Concatenate --> Dense --> Output
Vectorized Code Gadget Input --> CNN on expanded dims ----/
```

In [17]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import optuna
from optuna.integration import TFKerasPruningCallback
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, MaxPool2D, Flatten, Dense, Dropout, LeakyReLU,
    LSTM, Bidirectional, Concatenate, Lambda
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Set seed for reproducibility
SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()

# -----------------------------
# Load vector data
# -----------------------------
def load_vector_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values).astype(np.float32)
    labels = df["val"].values.astype(int)
    return vectors, labels

# -----------------------------
# Custom attention layer
# -----------------------------
class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

# -----------------------------
# Build model with hyperparams
# -----------------------------
def build_model(input_shape, hp):
    input_layer = Input(shape=input_shape)

    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_layer)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    x_cnn = Lambda(lambda x: tf.expand_dims(x, axis=-1))(input_layer)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(hp["learning_rate"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model

# -----------------------------
# Objective function for Optuna (with pruning)
# -----------------------------
def objective(trial):
    hp = {
        "lstm_units": trial.suggest_categorical("lstm_units", [64, 128, 256]),
        "cnn_filters1": trial.suggest_categorical("cnn_filters1", [16, 32, 64]),
        "cnn_filters2": trial.suggest_categorical("cnn_filters2", [32, 64, 128]),
        "dense_units": trial.suggest_categorical("dense_units", [64, 128, 256]),
        "dropout_lstm": trial.suggest_float("dropout_lstm", 0.2, 0.5),
        "dropout_cnn": trial.suggest_float("dropout_cnn", 0.2, 0.5),
        "dropout_final": trial.suggest_float("dropout_final", 0.2, 0.5),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-4, 1e-2),
        "batch_size": trial.suggest_categorical("batch_size", [32, 64, 128]),
    }

    model = build_model(input_shape=X_train.shape[1:], hp=hp)

    early_stopper = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True, verbose=0)

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=hp["batch_size"],
        callbacks=[
            early_stopper,
            TFKerasPruningCallback(trial, "val_accuracy")  # 💡 pruning callback
        ],
        verbose=0
    )

    val_accuracy = max(history.history["val_accuracy"])
    return val_accuracy

# -----------------------------
# Train function w/ tuning
# -----------------------------
def train_hybrid_cnn_lstm(pkl_file):
    global X_train, X_val, y_train, y_val

    X, y = load_vector_data(pkl_file)
    print(f"Original dataset class distribution: {dict(zip(*np.unique(y, return_counts=True)))}")

    X_train_all, X_test, y_train_all, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=SEED
    )

    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    # Balance training set
    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    balanced_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(balanced_idx)

    X_train_balanced = X_train_all[balanced_idx]
    y_train_balanced = y_train_all[balanced_idx]
    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_train_balanced, return_counts=True)))}")

    # Validation split
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_balanced, y_train_balanced, test_size=0.2, stratify=y_train_balanced, random_state=SEED
    )
    print(f"Final training class distribution: {dict(zip(*np.unique(y_train, return_counts=True)))}")
    print(f"Validation class distribution: {dict(zip(*np.unique(y_val, return_counts=True)))}")

    # Run Optuna with pruning enabled
    print("Running Optuna hyperparameter tuning (with pruning)...")
    study = optuna.create_study(
        direction="maximize",
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5)
    )
    study.optimize(objective, n_trials=20)

    print(f"Best trial: {study.best_trial.number}")
    print(f"Best hyperparameters: {study.best_params}")

    # Retrain best model on full training (train + val)
    best_hp = study.best_params
    X_full = np.concatenate([X_train, X_val])
    y_full = np.concatenate([y_train, y_val])
    model = build_model(input_shape=X_full.shape[1:], hp=best_hp)

    lr_scheduler = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, verbose=1)
    early_stopper = EarlyStopping(monitor="loss", patience=2, restore_best_weights=True, verbose=1)

    model.fit(
        X_full, y_full,
        epochs=30,
        batch_size=best_hp["batch_size"],
        callbacks=[lr_scheduler, early_stopper],
        verbose=1
    )

    print("\nEvaluating model on test set...")
    loss, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {acc:.4f}")

    y_pred_probs = model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")

# Run
train_hybrid_cnn_lstm("cwe119_cgd_gadget_vectors.pkl")

Original dataset class distribution: {0: 29313, 1: 10440}
Train split before balancing: {0: 23450, 1: 8352}
Test set class distribution: {0: 5863, 1: 2088}
Balanced training class distribution: {0: 8352, 1: 8352}


[I 2025-05-24 20:37:11,526] A new study created in memory with name: no-name-13090652-d3d3-4dd0-b881-b108806cc3cc


Final training class distribution: {0: 6681, 1: 6682}
Validation class distribution: {0: 1671, 1: 1670}
Running Optuna hyperparameter tuning (with pruning)...


  "learning_rate": trial.suggest_loguniform("learning_rate", 1e-4, 1e-2),
[I 2025-05-24 20:44:27,047] Trial 0 finished with value: 0.8258006572723389 and parameters: {'lstm_units': 256, 'cnn_filters1': 64, 'cnn_filters2': 64, 'dense_units': 64, 'dropout_lstm': 0.36577380531075965, 'dropout_cnn': 0.24058217131266618, 'dropout_final': 0.22533772040065905, 'learning_rate': 0.0009890403095221465, 'batch_size': 128}. Best is trial 0 with value: 0.8258006572723389.
[I 2025-05-24 20:48:05,837] Trial 1 finished with value: 0.838371753692627 and parameters: {'lstm_units': 64, 'cnn_filters1': 16, 'cnn_filters2': 64, 'dense_units': 128, 'dropout_lstm': 0.20612973550888686, 'dropout_cnn': 0.32762572826322267, 'dropout_final': 0.3392077049654727, 'learning_rate': 0.002956332636611597, 'batch_size': 128}. Best is trial 1 with value: 0.838371753692627.
[I 2025-05-24 20:56:08,216] Trial 2 finished with value: 0.8150254487991333 and parameters: {'lstm_units': 256, 'cnn_filters1': 64, 'cnn_filters2': 12

Best trial: 7
Best hyperparameters: {'lstm_units': 128, 'cnn_filters1': 16, 'cnn_filters2': 32, 'dense_units': 128, 'dropout_lstm': 0.3946052251938623, 'dropout_cnn': 0.3900848497439904, 'dropout_final': 0.21345658079480226, 'learning_rate': 0.0012561055586646933, 'batch_size': 32}
Epoch 1/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 78ms/step - accuracy: 0.6571 - loss: 0.6038 - learning_rate: 0.0013
Epoch 2/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 78ms/step - accuracy: 0.7855 - loss: 0.4582 - learning_rate: 0.0013
Epoch 3/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 78ms/step - accuracy: 0.8218 - loss: 0.3875 - learning_rate: 0.0013
Epoch 4/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 78ms/step - accuracy: 0.8474 - loss: 0.3419 - learning_rate: 0.0013
Epoch 5/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 79ms/step - accuracy: 0.8630 - loss: 0.3076 - learning_

# CWE 119: Final Model

```
Vectorized Code Gadget Input --> BiLSTM + Attention ------\
                                                            --> Concatenate --> Dense --> Output
Vectorized Code Gadget Input --> CNN on expanded dims ----/
```

In [18]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, MaxPool2D, Flatten, Dense, Dropout, LeakyReLU,
    LSTM, Bidirectional, Concatenate, Lambda
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Set seed for reproducibility
SEED = 41
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.config.experimental.enable_op_determinism()

# -----------------------------
# Load vector data
# -----------------------------
def load_vector_data(filename):
    df = pd.read_pickle(filename)
    vectors = np.stack(df["vector"].values).astype(np.float32)
    labels = df["val"].values.astype(int)
    return vectors, labels

# -----------------------------
# Custom attention layer
# -----------------------------
class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(e, axis=-1)
        alpha = K.softmax(e)
        alpha = K.expand_dims(alpha, axis=-1)
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

# -----------------------------
# Build model with best hyperparams
# -----------------------------
def build_best_model(input_shape):
    hp = {
        'lstm_units': 128,
        'cnn_filters1': 16,
        'cnn_filters2': 32,
        'dense_units': 128,
        'dropout_lstm': 0.3946052251938623,
        'dropout_cnn': 0.3900848497439904,
        'dropout_final': 0.21345658079480226,
        'learning_rate': 0.0012561055586646933
    }

    input_layer = Input(shape=input_shape)

    x_lstm = Bidirectional(LSTM(hp["lstm_units"], return_sequences=True))(input_layer)
    x_lstm = AttentionLayer()(x_lstm)
    x_lstm = Dense(hp["dense_units"])(x_lstm)
    x_lstm = LeakyReLU()(x_lstm)
    x_lstm = Dropout(hp["dropout_lstm"])(x_lstm)

    x_cnn = Lambda(lambda x: tf.expand_dims(x, axis=-1))(input_layer)
    x_cnn = Conv2D(hp["cnn_filters1"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Conv2D(hp["cnn_filters2"], (3, 3), padding="same", activation="relu")(x_cnn)
    x_cnn = MaxPool2D()(x_cnn)
    x_cnn = Flatten()(x_cnn)
    x_cnn = Dense(hp["dense_units"], activation="relu")(x_cnn)
    x_cnn = Dropout(hp["dropout_cnn"])(x_cnn)

    merged = Concatenate()([x_lstm, x_cnn])
    merged = Dense(hp["dense_units"], activation="relu")(merged)
    merged = Dropout(hp["dropout_final"])(merged)
    output = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(hp["learning_rate"]), loss="binary_crossentropy", metrics=["accuracy"])
    return model

# -----------------------------
# Training and evaluation
# -----------------------------
def train_final_model(pkl_file):
    X, y = load_vector_data(pkl_file)
    print(f"Original dataset class distribution: {dict(zip(*np.unique(y, return_counts=True)))}")

    X_train_all, X_test, y_train_all, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=SEED
    )
    print(f"Train split before balancing: {dict(zip(*np.unique(y_train_all, return_counts=True)))}")
    print(f"Test set class distribution: {dict(zip(*np.unique(y_test, return_counts=True)))}")

    # Balance training set (undersample class 0)
    pos_idx = np.where(y_train_all == 1)[0]
    neg_idx = np.where(y_train_all == 0)[0]
    rng = np.random.default_rng(SEED)
    neg_sample = rng.choice(neg_idx, size=len(pos_idx), replace=False)
    balanced_idx = np.concatenate([pos_idx, neg_sample])
    rng.shuffle(balanced_idx)

    X_train = X_train_all[balanced_idx]
    y_train = y_train_all[balanced_idx]
    print(f"Balanced training class distribution: {dict(zip(*np.unique(y_train, return_counts=True)))}")

    model = build_best_model(input_shape=X_train.shape[1:])

    lr_scheduler = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, verbose=1)

    model.fit(
        X_train, y_train,
        epochs=30,
        batch_size=32,
        callbacks=[lr_scheduler],
        verbose=1
    )

    print("\nEvaluating model on test set...")
    loss, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {acc:.4f}")

    y_pred_probs = model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    print(f"TPR (Recall): {tpr:.4f}")
    print(f"FPR         : {fpr:.4f}")
    print(f"FNR         : {fnr:.4f}")
    print(f"Precision   : {precision:.4f}")
    print(f"F1 Score    : {f1:.4f}")

# Run
train_final_model("cwe119_cgd_gadget_vectors.pkl")

Original dataset class distribution: {0: 29313, 1: 10440}
Train split before balancing: {0: 23450, 1: 8352}
Test set class distribution: {0: 5863, 1: 2088}
Balanced training class distribution: {0: 8352, 1: 8352}
Epoch 1/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 81ms/step - accuracy: 0.6364 - loss: 0.6174 - learning_rate: 0.0013
Epoch 2/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 81ms/step - accuracy: 0.7741 - loss: 0.4756 - learning_rate: 0.0013
Epoch 3/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 80ms/step - accuracy: 0.8150 - loss: 0.4033 - learning_rate: 0.0013
Epoch 4/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 80ms/step - accuracy: 0.8368 - loss: 0.3565 - learning_rate: 0.0013
Epoch 5/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 80ms/step - accuracy: 0.8510 - loss: 0.3267 - learning_rate: 0.0013
Epoch 6/30
[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m