In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import (
    classification_report, precision_recall_curve, confusion_matrix, ConfusionMatrixDisplay,
    accuracy_score, precision_score, recall_score, fbeta_score
)

In [None]:
df = pd.read_csv("../data/processed/lstm_sequences_alemari_normalised.csv") 
X = df.drop("label", axis=1).values.astype(np.float32)
y = df["label"].values.astype(np.int64)
X = X.reshape((X.shape[0], 5, -1))  # (N, seq_len=5, input_size=4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 2)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        out = self.fc(h_n[-1])
        return F.log_softmax(out, dim=1)

In [None]:
def grid_search_lstm_fp_exact(X, y, m_grid, arch_grid, fp_exact=64, beta=2.0):#change fp exact -> 64 for 1%, 32 for 0.5%
    torch.manual_seed(42)
    np.random.seed(42)

    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    train_idx, test_idx = next(split.split(X, y))
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]
    torch.save(test_idx, "../configs/lstm_test_mask_4feat_exact64.pt")

    best_result = None
    best_f2 = -1
    closest_result = None
    min_fp_diff = float("inf")

    for arch in arch_grid:
        print(f"\n Trying arch: {arch}")
        for m in m_grid:
            model = LSTMClassifier(input_size=X.shape[2],
                                   hidden_size=arch["hidden_dim"],
                                   num_layers=arch["layers"],
                                   dropout=arch["dropout"] if arch["layers"] > 1 else 0.0).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=arch["lr"])
            class_weights = torch.tensor([1.0, m], dtype=torch.float32).to(device)
            criterion = nn.NLLLoss(weight=class_weights)

            X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)

            for epoch in range(100):
                model.train()
                optimizer.zero_grad()
                out = model(X_train_tensor)
                loss = criterion(out, y_train_tensor)
                loss.backward()
                optimizer.step()

            model.eval()
            with torch.no_grad():
                X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
                logits = model(X_test_tensor).cpu()
                probs = torch.exp(logits)[:, 1].numpy()

            labels = y_test
            thresholds = np.linspace(0.01, 0.99, 500)
            for t in thresholds:
                preds = (probs > t).astype(int)
                fp = ((labels == 0) & (preds == 1)).sum()
                tp = ((labels == 1) & (preds == 1)).sum()
                fn = ((labels == 1) & (preds == 0)).sum()
                prec = tp / (tp + fp + 1e-8)
                rec = tp / (tp + fn + 1e-8)
                fbeta = (1 + beta**2) * prec * rec / (beta**2 * prec + rec + 1e-8)

                if abs(fp - fp_exact) <= 1 and fbeta > best_f2:
                    best_f2 = fbeta
                    best_result = {
                        "m": float(m),
                        "thr": float(t),
                        "Fbeta": float(fbeta),
                        "precision": float(prec),
                        "recall": float(rec),
                        "false_positives": int(fp),
                        "state_dict": model.state_dict(),
                        "arch": arch
                    }

                if abs(fp - fp_exact) < min_fp_diff or (abs(fp - fp_exact) == min_fp_diff and fbeta > (closest_result or {}).get("Fbeta", 0)):
                    min_fp_diff = abs(fp - fp_exact)
                    closest_result = {
                        "m": float(m),
                        "thr": float(t),
                        "Fbeta": float(fbeta),
                        "precision": float(prec),
                        "recall": float(rec),
                        "false_positives": int(fp),
                        "state_dict": model.state_dict(),
                        "arch": arch
                    }

    result_to_use = best_result if best_result else closest_result

    if result_to_use:
        fname = "../models/best_lstm_model_4feat_exact64.pth"
        torch.save(result_to_use["state_dict"], fname)
        with open("../configs/lstm_best_config_4feat_exact64.json", "w") as f:
            json.dump({k: v for k, v in result_to_use.items() if k != "state_dict"}, f, indent=2)
        print(f"\Saved best LSTM model (FP ≈ {fp_exact}, F2 = {result_to_use['Fbeta']:.4f})")
        return result_to_use
    else:
        raise RuntimeError("No viable config found.")


In [None]:
arch_grid = [
    {"hidden_dim": 64,  "lr": 0.001,  "layers": 2, "dropout": 0.2},
    {"hidden_dim": 128, "lr": 0.001,  "layers": 3, "dropout": 0.3},
    {"hidden_dim": 32,  "lr": 0.0005, "layers": 2, "dropout": 0.1},
    {"hidden_dim": 64,  "lr": 0.0005, "layers": 1, "dropout": 0.0},
    {"hidden_dim": 128, "lr": 0.001,  "layers": 2, "dropout": 0.1}
]

In [None]:
m_grid = np.round(np.arange(1.0, 4.0, 0.1), 2)
#run the sweep
best_result = grid_search_lstm_fp_exact(X, y, m_grid, arch_grid, fp_exact=64)

In [None]:
#load test indices
test_idx = torch.load("../configs/lstm_test_mask_exact64.pt")
X_test, y_test = X[test_idx], y[test_idx]

X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
arch = best_result["arch"]
model = LSTMClassifier(input_size=X.shape[2],
                       hidden_size=arch["hidden_dim"],
                       num_layers=arch["layers"],
                       dropout=arch["dropout"] if arch["layers"] > 1 else 0.0).to(device)

model.load_state_dict(torch.load("../models/best_lstm_model_exact64.pth"))

model.eval()
with torch.no_grad():
    logits = model(X_test_tensor).cpu()
    probs = torch.exp(logits)[:, 1].numpy()

#threshold and prediction
threshold = best_result["thr"]
preds = (probs > threshold).astype(int)
labels = y_test

#Classification report
print("\nClassification Report:")
print(classification_report(labels, preds, target_names=["Legit", "Phishing"]))

#F2-score
f2 = fbeta_score(labels, preds, beta=2.0)
print(f"\nF2-Score (beta=2.0): {f2:.4f}")

#confusion Matrix
cm = confusion_matrix(labels, preds)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Legit", "Phishing"],
            yticklabels=["Legit", "Phishing"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix (Threshold = {:.4f})".format(threshold))
plt.tight_layout()
plt.show()

prec, rec, thr = precision_recall_curve(labels, probs)
plt.figure(figsize=(8,6))
plt.plot(rec, prec, label="PR Curve")
plt.scatter(best_result["recall"], best_result["precision"], color='red',
            label=f"Best Threshold = {threshold:.4f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
