In [None]:
! pip install tensorflow

In [None]:
# =========================================================
# PYTORCH + SKLEARN ANOMALY DETECTION PIPELINE (MIXED DTYPES)
# Works with your dataset columns (TARGET = is_attack)
# Handles categorical/string columns via OneHotEncoder
# =========================================================

# =========================================================
# SECTION 1 — IMPORTS + SETTINGS
# =========================================================
import os, json
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    confusion_matrix, classification_report,
    precision_score, recall_score, f1_score, accuracy_score,
    roc_auc_score, average_precision_score
)

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor

from pandas.api.types import is_numeric_dtype

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

DATA_PATH = r"E:\NIT surathkal\data.csv"
TARGET = "is_attack"                 # ✅ FIXED (your file has is_attack, not is_anomalous)
INCLUDE_RISK_FEATURE = False         # keep False for now
OUTPUT_DIR = "./outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# =========================================================
# SECTION 2 — HELPERS (save + metrics)
# =========================================================
def to_py(obj):
    if isinstance(obj, (np.integer,)):
        return int(obj)
    if isinstance(obj, (np.floating,)):
        return float(obj)
    if isinstance(obj, (np.ndarray,)):
        return obj.tolist()
    return obj

def save_json(path, payload):
    def convert(x):
        if isinstance(x, dict):
            return {str(to_py(k)): convert(v) for k, v in x.items()}
        if isinstance(x, list):
            return [convert(v) for v in x]
        return to_py(x)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(convert(payload), f, indent=2)

def evaluate_from_scores(y_true, scores, threshold, model_name, out_dir):
    """
    scores: higher => more anomalous
    threshold: predict anomaly if score > threshold
    """
    y_true = np.asarray(y_true).astype(int)
    scores = np.asarray(scores).astype(float)
    y_pred = (scores > threshold).astype(int)

    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()

    metrics = {
        "model": model_name,
        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
        "accuracy": float(accuracy_score(y_true, y_pred)),
        "precision": float(precision_score(y_true, y_pred, zero_division=0)),
        "recall": float(recall_score(y_true, y_pred, zero_division=0)),
        "f1": float(f1_score(y_true, y_pred, zero_division=0)),
        "roc_auc": float(roc_auc_score(y_true, scores)),
        "pr_auc": float(average_precision_score(y_true, scores)),
        "threshold": float(threshold),
    }

    report = classification_report(y_true, y_pred, digits=4)

    # Save confusion matrix
    pd.DataFrame(
        cm,
        index=["true_0_normal", "true_1_attack"],
        columns=["pred_0_normal", "pred_1_attack"]
    ).to_csv(os.path.join(out_dir, "confusion_matrix.csv"), index=False)

    save_json(os.path.join(out_dir, "metrics.json"), metrics)

    with open(os.path.join(out_dir, "classification_report.txt"), "w", encoding="utf-8") as f:
        f.write(report)

    return metrics, cm, report

def tune_threshold_by_f1(y_true, scores, percentiles=np.linspace(80, 99.9, 80)):
    """
    Pick threshold that maximizes F1 on validation data.
    scores: higher => more anomalous
    """
    y_true = np.asarray(y_true).astype(int)
    scores = np.asarray(scores).astype(float)

    best = {"f1": -1, "threshold": None, "precision": None, "recall": None, "percentile": None}
    for p in percentiles:
        thr = np.percentile(scores, p)
        y_pred = (scores > thr).astype(int)
        f1 = f1_score(y_true, y_pred, zero_division=0)
        if f1 > best["f1"]:
            best.update({
                "f1": float(f1),
                "threshold": float(thr),
                "precision": float(precision_score(y_true, y_pred, zero_division=0)),
                "recall": float(recall_score(y_true, y_pred, zero_division=0)),
                "percentile": float(p),
            })
    return best

# =========================================================
# SECTION 3 — LOAD DATA
# =========================================================
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Dataset not found at: {DATA_PATH}")

df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]

if TARGET not in df.columns:
    raise ValueError(f"TARGET='{TARGET}' not found. Available columns: {df.columns.tolist()}")

print("Loaded:", DATA_PATH)
print("Shape:", df.shape)
print("Attack rate:", df[TARGET].mean())

# =========================================================
# SECTION 4 — FEATURES / LABELS (DROP LEAKY / ID COLS)
# =========================================================
drop_cols = [TARGET]

# drop common index column
if "Unnamed: 0" in df.columns:
    drop_cols.append("Unnamed: 0")

# drop leakage column (often only present when attack)
if "attack_type" in df.columns:
    drop_cols.append("attack_type")

# drop identifiers (recommended)
for c in ["user_id", "ip_address", "device_id", "timestamp"]:
    if c in df.columns:
        drop_cols.append(c)

# optional risk_score exclusion
if not INCLUDE_RISK_FEATURE and "risk_score" in df.columns:
    drop_cols.append("risk_score")

X = df.drop(columns=[c for c in drop_cols if c in df.columns])
y = df[TARGET].astype(int).values

# =========================================================
# SECTION 5 — TRAIN/VAL/TEST SPLIT
# =========================================================
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.20, random_state=SEED, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.20, random_state=SEED, stratify=y_train_full
)

# Unsupervised training: fit ONLY on NORMAL (non-attack) samples
X_train_normal = X_train[y_train == 0]

print("\nSplit sizes:")
print("Train full:", X_train_full.shape, "Test:", X_test.shape)
print("Train:", X_train.shape, "Val:", X_val.shape)
print("Train normals only:", X_train_normal.shape)

# =========================================================
# SECTION 6 — PREPROCESSOR (NUMERIC + CATEGORICAL SAFE)
# =========================================================
num_cols = [c for c in X.columns if is_numeric_dtype(X[c])]
cat_cols = [c for c in X.columns if c not in num_cols]

print("\nFeature types:")
print("Numeric:", len(num_cols), num_cols)
print("Categorical:", len(cat_cols), cat_cols)

preprocessor = ColumnTransformer(
    transformers=[
        ("num", RobustScaler(with_centering=True, with_scaling=True), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
    ],
    remainder="drop"
)

X_train_normal_p = preprocessor.fit_transform(X_train_normal)
X_val_p = preprocessor.transform(X_val)
X_test_p = preprocessor.transform(X_test)

# Save preprocessed splits (optional)
pd.DataFrame(X_train_normal_p).to_csv(os.path.join(OUTPUT_DIR, "X_train_normal_preprocessed.csv"), index=False)
pd.DataFrame(X_val_p).to_csv(os.path.join(OUTPUT_DIR, "X_val_preprocessed.csv"), index=False)
pd.DataFrame(X_test_p).to_csv(os.path.join(OUTPUT_DIR, "X_test_preprocessed.csv"), index=False)
pd.Series(y_val, name="y_val").to_csv(os.path.join(OUTPUT_DIR, "y_val.csv"), index=False)
pd.Series(y_test, name="y_test").to_csv(os.path.join(OUTPUT_DIR, "y_test.csv"), index=False)

# =========================================================
# SECTION 7 — MODEL 1: ISOLATION FOREST
# =========================================================
iso_dir = os.path.join(OUTPUT_DIR, "IsolationForest")
os.makedirs(iso_dir, exist_ok=True)

iso = IsolationForest(
    n_estimators=400,
    max_samples="auto",
    contamination="auto",
    random_state=SEED,
    n_jobs=-1
)
iso.fit(X_train_normal_p)

val_scores_iso = -iso.decision_function(X_val_p)
test_scores_iso = -iso.decision_function(X_test_p)

best_iso = tune_threshold_by_f1(y_val, val_scores_iso)
metrics_iso, cm_iso, rep_iso = evaluate_from_scores(y_test, test_scores_iso, best_iso["threshold"], "IsolationForest", iso_dir)
save_json(os.path.join(iso_dir, "threshold_tuning.json"), best_iso)

print("\n=== IsolationForest ===")
print(metrics_iso)
print(rep_iso)

# =========================================================
# SECTION 8 — MODEL 2: ONE-CLASS SVM
# =========================================================
svm_dir = os.path.join(OUTPUT_DIR, "OneClassSVM")
os.makedirs(svm_dir, exist_ok=True)

ocsvm = OneClassSVM(kernel="rbf", nu=0.05, gamma="scale")
ocsvm.fit(X_train_normal_p)

val_scores_svm = -ocsvm.decision_function(X_val_p)
test_scores_svm = -ocsvm.decision_function(X_test_p)

best_svm = tune_threshold_by_f1(y_val, val_scores_svm)
metrics_svm, cm_svm, rep_svm = evaluate_from_scores(y_test, test_scores_svm, best_svm["threshold"], "OneClassSVM", svm_dir)
save_json(os.path.join(svm_dir, "threshold_tuning.json"), best_svm)

print("\n=== OneClassSVM ===")
print(metrics_svm)
print(rep_svm)

# =========================================================
# SECTION 9 — MODEL 3: LOCAL OUTLIER FACTOR (NOVELTY MODE)
# =========================================================
lof_dir = os.path.join(OUTPUT_DIR, "LocalOutlierFactor")
os.makedirs(lof_dir, exist_ok=True)

lof = LocalOutlierFactor(
    n_neighbors=35,
    novelty=True,
    metric="minkowski"
)
lof.fit(X_train_normal_p)

val_scores_lof = -lof.decision_function(X_val_p)
test_scores_lof = -lof.decision_function(X_test_p)

best_lof = tune_threshold_by_f1(y_val, val_scores_lof)
metrics_lof, cm_lof, rep_lof = evaluate_from_scores(y_test, test_scores_lof, best_lof["threshold"], "LocalOutlierFactor", lof_dir)
save_json(os.path.join(lof_dir, "threshold_tuning.json"), best_lof)

print("\n=== LocalOutlierFactor ===")
print(metrics_lof)
print(rep_lof)

# =========================================================
# SECTION 10 — MODEL 4: AUTOENCODER (PYTORCH)
# =========================================================
ae_dir = os.path.join(OUTPUT_DIR, "Autoencoder_Torch")
os.makedirs(ae_dir, exist_ok=True)

input_dim = X_train_normal_p.shape[1]

class AutoEncoder(nn.Module):
    def __init__(self, d: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(d, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.15),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.10),

            nn.Linear(64, 16),
            nn.ReLU(),

            nn.Linear(16, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.10),

            nn.Linear(64, 128),
            nn.ReLU(),

            nn.Linear(128, d)
        )

    def forward(self, x):
        return self.net(x)

def mae_per_row(recon, x):
    return torch.mean(torch.abs(recon - x), dim=1)

def train_ae_min_epoch_early_stop(
    model, train_loader, val_tensor,
    min_epochs=15, patience=7, max_epochs=60, lr=1e-3
):
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.L1Loss()  # MAE

    history = {"train_loss": [], "val_loss": []}
    best_val = float("inf")
    wait = 0

    for epoch in range(1, max_epochs + 1):
        model.train()
        losses = []
        for (xb,) in train_loader:
            xb = xb.to(DEVICE)
            opt.zero_grad(set_to_none=True)
            recon = model(xb)
            loss = criterion(recon, xb)
            loss.backward()
            opt.step()
            losses.append(loss.item())

        model.eval()
        with torch.no_grad():
            xv = val_tensor.to(DEVICE)
            rv = model(xv)
            vloss = criterion(rv, xv).item()

        tloss = float(np.mean(losses))
        history["train_loss"].append(tloss)
        history["val_loss"].append(vloss)

        print(f"Epoch {epoch:03d} | train_loss={tloss:.6f} | val_loss={vloss:.6f}")

        if vloss < best_val - 1e-8:
            best_val = vloss
            wait = 0
            torch.save(model.state_dict(), os.path.join(ae_dir, "best_ae.pt"))
        else:
            if epoch >= min_epochs:
                wait += 1
                if wait >= patience:
                    print("Early stopping triggered.")
                    break

    best_path = os.path.join(ae_dir, "best_ae.pt")
    if os.path.exists(best_path):
        model.load_state_dict(torch.load(best_path, map_location=DEVICE))

    return history

# Torch tensors
X_train_t = torch.tensor(X_train_normal_p, dtype=torch.float32)
X_val_t = torch.tensor(X_val_p, dtype=torch.float32)
X_test_t = torch.tensor(X_test_p, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train_t), batch_size=1024, shuffle=True, drop_last=False)

ae = AutoEncoder(input_dim).to(DEVICE)
hist = train_ae_min_epoch_early_stop(ae, train_loader, X_val_t, min_epochs=15, patience=7, max_epochs=60, lr=1e-3)

ae.eval()
with torch.no_grad():
    val_recon = mae_per_row(ae(X_val_t.to(DEVICE)), X_val_t.to(DEVICE)).cpu().numpy()
    test_recon = mae_per_row(ae(X_test_t.to(DEVICE)), X_test_t.to(DEVICE)).cpu().numpy()

best_ae = tune_threshold_by_f1(y_val, val_recon)
metrics_ae, cm_ae, rep_ae = evaluate_from_scores(y_test, test_recon, best_ae["threshold"], "Autoencoder_Torch", ae_dir)
save_json(os.path.join(ae_dir, "threshold_tuning.json"), best_ae)

pd.DataFrame(hist).to_csv(os.path.join(ae_dir, "train_history.csv"), index=False)

print("\n=== Autoencoder (PyTorch) ===")
print(metrics_ae)
print(rep_ae)

# =========================================================
# SECTION 11 — FINAL SUMMARY
# =========================================================
summary = pd.DataFrame([metrics_iso, metrics_svm, metrics_lof, metrics_ae])
summary.to_csv(os.path.join(OUTPUT_DIR, "summary_all_models.csv"), index=False)

print("\n✅ DONE. Saved outputs to:", os.path.abspath(OUTPUT_DIR))
print(" - Per-model folders contain: confusion_matrix.csv, metrics.json, classification_report.txt")
print(" - Summary: summary_all_models.csv")


Loaded: C:\Users\varsh\OneDrive\Desktop\NITK\data.csv
Shape: (103000, 18)
Attack rate: 0.02912621359223301

Split sizes:
Train full: (82400, 11) Test: (20600, 11)
Train: (65920, 11) Val: (16480, 11)
Train normals only: (64000, 11)

Feature types:
Numeric: 8 ['resource', 'auth_method', 'hour', 'day_of_week', 'login_success', 'new_device', 'geo_anomaly', 'privilege_escalation']
Categorical: 3 ['geo_country', 'login_result', 'privilege_level']

=== IsolationForest ===
{'model': 'IsolationForest', 'tn': 18857, 'fp': 1143, 'fn': 15, 'tp': 585, 'accuracy': 0.9437864077669903, 'precision': 0.3385416666666667, 'recall': 0.975, 'f1': 0.5025773195876289, 'roc_auc': 0.9579930416666667, 'pr_auc': 0.2671231957993937, 'threshold': 0.06535240841251712}
              precision    recall  f1-score   support

           0     0.9992    0.9428    0.9702     20000
           1     0.3385    0.9750    0.5026       600

    accuracy                         0.9438     20600
   macro avg     0.6689    0.9589 

In [None]:
# =========================================================
# FULL PIPELINE (ATTENTION ONLY — NO GATED FUSION):
# 1) Load data (TARGET = is_attack)
# 2) Split train/val/test (stratified)
# 3) Fit preprocessor on TRAIN NORMAL ONLY (robust scale + onehot)
# 4) Train 3 unsupervised models on TRAIN NORMAL ONLY:
#       IsolationForest, OneClassSVM, LOF(novelty)
# 5) Extract meta-features from the 3 models for train_full/val/test
# 6) Train ATTENTION-ONLY fusion (PyTorch) supervised using y labels
# 7) Evaluate on test + print metrics + save outputs
# =========================================================

import os, json
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    confusion_matrix, classification_report,
    precision_score, recall_score, f1_score, accuracy_score,
    roc_auc_score, average_precision_score
)

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor

from pandas.api.types import is_numeric_dtype

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# -----------------------------
# SETTINGS
# -----------------------------
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

DATA_PATH = r"C:\Users\varsh\OneDrive\Desktop\NITK\data.csv"
TARGET = "is_attack"
OUTPUT_DIR = "./outputs_attention_only"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -----------------------------
# HELPERS
# -----------------------------
def evaluate_probs(y_true, probs, threshold=0.5):
    y_true = np.asarray(y_true).astype(int)
    probs = np.asarray(probs).astype(float)
    y_pred = (probs >= threshold).astype(int)

    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()

    metrics = {
        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
        "accuracy": float(accuracy_score(y_true, y_pred)),
        "precision": float(precision_score(y_true, y_pred, zero_division=0)),
        "recall": float(recall_score(y_true, y_pred, zero_division=0)),
        "f1": float(f1_score(y_true, y_pred, zero_division=0)),
        "roc_auc": float(roc_auc_score(y_true, probs)),
        "pr_auc": float(average_precision_score(y_true, probs)),
        "threshold": float(threshold),
    }
    report = classification_report(y_true, y_pred, digits=4)
    return metrics, cm, report

def save_text(path, s):
    with open(path, "w", encoding="utf-8") as f:
        f.write(s)

def save_json(path, payload):
    def to_py(obj):
        if isinstance(obj, (np.integer,)): return int(obj)
        if isinstance(obj, (np.floating,)): return float(obj)
        if isinstance(obj, (np.ndarray,)): return obj.tolist()
        return obj
    def convert(x):
        if isinstance(x, dict):
            return {str(to_py(k)): convert(v) for k, v in x.items()}
        if isinstance(x, list):
            return [convert(v) for v in x]
        return to_py(x)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(convert(payload), f, indent=2)

def tune_threshold_by_f1_probs(y_true, probs, thresholds=np.linspace(0.01, 0.99, 99)):
    best = {"f1": -1, "threshold": 0.5, "precision": 0, "recall": 0}
    for t in thresholds:
        pred = (probs >= t).astype(int)
        f1 = f1_score(y_true, pred, zero_division=0)
        if f1 > best["f1"]:
            best = {
                "f1": float(f1),
                "threshold": float(t),
                "precision": float(precision_score(y_true, pred, zero_division=0)),
                "recall": float(recall_score(y_true, pred, zero_division=0)),
            }
    return best

# =========================================================
# 1) LOAD DATA
# =========================================================
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Dataset not found at: {DATA_PATH}")

df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]

if TARGET not in df.columns:
    raise ValueError(f"TARGET='{TARGET}' not found. Available columns: {df.columns.tolist()}")

print("Loaded:", DATA_PATH)
print("Shape:", df.shape)
print("Attack rate:", df[TARGET].mean())

# =========================================================
# 2) FEATURES / LABELS (DROP LEAKY + IDS)
# =========================================================
drop_cols = [TARGET]

if "Unnamed: 0" in df.columns:
    drop_cols.append("Unnamed: 0")

if "attack_type" in df.columns:
    drop_cols.append("attack_type")

for c in ["user_id", "ip_address", "device_id", "timestamp"]:
    if c in df.columns:
        drop_cols.append(c)

X = df.drop(columns=[c for c in drop_cols if c in df.columns])
y = df[TARGET].astype(int).values

# =========================================================
# 3) SPLIT train/val/test
# =========================================================
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.20, random_state=SEED, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.20, random_state=SEED, stratify=y_train_full
)

X_train_normal = X_train[y_train == 0]

print("\nSplit sizes:")
print("Train full:", X_train_full.shape, "Test:", X_test.shape)
print("Train:", X_train.shape, "Val:", X_val.shape)
print("Train normals only:", X_train_normal.shape)

# =========================================================
# 4) PREPROCESSOR (fit on normal training only)
# =========================================================
num_cols = [c for c in X.columns if is_numeric_dtype(X[c])]
cat_cols = [c for c in X.columns if c not in num_cols]

print("\nNumeric cols:", len(num_cols))
print("Categorical cols:", len(cat_cols))

preprocessor = ColumnTransformer(
    transformers=[
        ("num", RobustScaler(with_centering=True, with_scaling=True), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
    ],
    remainder="drop"
)

X_train_normal_p = preprocessor.fit_transform(X_train_normal)
X_train_full_p   = preprocessor.transform(X_train_full)
X_val_p          = preprocessor.transform(X_val)
X_test_p         = preprocessor.transform(X_test)

print("\nTransformed shapes:")
print("X_train_normal_p:", X_train_normal_p.shape)
print("X_train_full_p:  ", X_train_full_p.shape)
print("X_val_p:         ", X_val_p.shape)
print("X_test_p:        ", X_test_p.shape)

# =========================================================
# 5) TRAIN 3 UNSUPERVISED MODELS (on normals)
# =========================================================
iso = IsolationForest(
    n_estimators=400,
    max_samples="auto",
    contamination="auto",
    random_state=SEED,
    n_jobs=-1
)
ocsvm = OneClassSVM(kernel="rbf", nu=0.05, gamma="scale")
lof = LocalOutlierFactor(n_neighbors=35, novelty=True, metric="minkowski")

iso.fit(X_train_normal_p)
ocsvm.fit(X_train_normal_p)
lof.fit(X_train_normal_p)

def anomaly_scores(model, Xp):
    return -model.decision_function(Xp)  # higher => more anomalous

# =========================================================
# 6) META-FEATURES FROM 3 MODELS  => [N, 3, 3]
# =========================================================
def build_meta_features(Xp, score_stats=None):
    s_iso = anomaly_scores(iso, Xp)
    s_svm = anomaly_scores(ocsvm, Xp)
    s_lof = anomaly_scores(lof, Xp)

    S = np.vstack([s_iso, s_svm, s_lof]).T  # [N, 3]

    if score_stats is None:
        mu = S.mean(axis=0, keepdims=True)
        sd = S.std(axis=0, keepdims=True) + 1e-8
        score_stats = (mu, sd)
    else:
        mu, sd = score_stats

    Z = (S - mu) / sd  # [N, 3]

    R = np.zeros_like(S)
    for j in range(S.shape[1]):
        order = np.argsort(S[:, j])
        ranks = np.empty_like(order, dtype=float)
        ranks[order] = np.linspace(0, 1, len(order))
        R[:, j] = ranks

    feats = np.stack([S, Z, R], axis=-1)  # [N, 3, 3]
    return feats.astype(np.float32), score_stats

X_train_full_feats, stats = build_meta_features(X_train_full_p, score_stats=None)
X_val_feats, _ = build_meta_features(X_val_p, score_stats=stats)
X_test_feats, _ = build_meta_features(X_test_p, score_stats=stats)

print("\nMeta-feature shapes:")
print("train:", X_train_full_feats.shape, "val:", X_val_feats.shape, "test:", X_test_feats.shape)

# =========================================================
# 7) ATTENTION-ONLY FUSION MODEL (NO GATE)
# =========================================================
class AttentionOnlyFusion(nn.Module):
    """
    Input:  [B, M, D]  M=3 models, D=3 meta-features
    Output: logit [B]
    """
    def __init__(self, d_in=3, d_model=32, n_heads=4, dropout=0.1):
        super().__init__()
        self.proj = nn.Linear(d_in, d_model)

        self.attn = nn.MultiheadAttention(
            embed_dim=d_model,
            num_heads=n_heads,
            dropout=dropout,
            batch_first=True
        )

        self.pool = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, d_model),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

        self.head = nn.Linear(d_model, 1)

    def forward(self, x):
        h = self.proj(x)           # [B, M, d_model]
        a, _ = self.attn(h, h, h)  # [B, M, d_model]
        fused = a.mean(dim=1)      # [B, d_model]
        fused = self.pool(fused)
        logit = self.head(fused).squeeze(1)
        return logit

def make_loader(X_feats, y, batch_size=512, shuffle=True):
    Xt = torch.tensor(X_feats, dtype=torch.float32)
    yt = torch.tensor(y.astype(np.float32), dtype=torch.float32)
    return DataLoader(TensorDataset(Xt, yt), batch_size=batch_size, shuffle=shuffle, drop_last=False)

train_loader = make_loader(X_train_full_feats, y_train_full, batch_size=512, shuffle=True)
val_loader = make_loader(X_val_feats, y_val, batch_size=1024, shuffle=False)
test_loader = make_loader(X_test_feats, y_test, batch_size=1024, shuffle=False)

model = AttentionOnlyFusion(d_in=3, d_model=32, n_heads=4, dropout=0.1).to(DEVICE)
criterion = nn.BCEWithLogitsLoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)

def predict_probs(loader):
    model.eval()
    all_logits, all_y = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(DEVICE)
            logits = model(xb)
            all_logits.append(logits.cpu().numpy())
            all_y.append(yb.numpy())
    logits = np.concatenate(all_logits)
    y = np.concatenate(all_y).astype(int)
    probs = 1 / (1 + np.exp(-logits))
    return y, probs

best_f1 = -1.0
best_state = None
patience = 8
wait = 0

for epoch in range(1, 51):
    model.train()
    losses = []

    for xb, yb in train_loader:
        xb = xb.to(DEVICE)
        yb = yb.to(DEVICE)

        opt.zero_grad(set_to_none=True)
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        opt.step()
        losses.append(loss.item())

    yv, pv = predict_probs(val_loader)
    val_metrics, _, _ = evaluate_probs(yv, pv, threshold=0.5)

    print(f"Epoch {epoch:02d} | loss={np.mean(losses):.4f} | val_f1={val_metrics['f1']:.4f} | val_pr_auc={val_metrics['pr_auc']:.4f}")

    if val_metrics["f1"] > best_f1 + 1e-6:
        best_f1 = val_metrics["f1"]
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping.")
            break

if best_state is not None:
    model.load_state_dict(best_state)

# =========================================================
# 8) THRESHOLD TUNING ON VAL + TEST METRICS
# =========================================================
yv, pv = predict_probs(val_loader)
best_thr = tune_threshold_by_f1_probs(yv, pv)
print("\nBest VAL threshold by F1:", best_thr)

yt, pt = predict_probs(test_loader)
test_metrics, test_cm, test_report = evaluate_probs(yt, pt, threshold=best_thr["threshold"])

print(f"\n=== Attention-Only Fusion TEST @ thr={best_thr['threshold']:.2f} ===")
print("Accuracy :", test_metrics["accuracy"])
print("Precision:", test_metrics["precision"])
print("Recall   :", test_metrics["recall"])
print("F1       :", test_metrics["f1"])
print("ROC-AUC  :", test_metrics["roc_auc"])
print("PR-AUC   :", test_metrics["pr_auc"])
print("\nConfusion Matrix:\n", test_cm)
print("\nClassification Report:\n", test_report)

# =========================================================
# 9) SAVE OUTPUTS
# =========================================================
fusion_dir = os.path.join(OUTPUT_DIR, "Fusion_Attention_Only")
os.makedirs(fusion_dir, exist_ok=True)

pd.DataFrame(
    test_cm,
    index=["true_0_normal", "true_1_attack"],
    columns=["pred_0_normal", "pred_1_attack"]
).to_csv(os.path.join(fusion_dir, "confusion_matrix.csv"))

save_json(os.path.join(fusion_dir, "metrics.json"), test_metrics)
save_text(os.path.join(fusion_dir, "classification_report.txt"), test_report)

pd.DataFrame({"y_true": yt, "p_attack": pt}).to_csv(os.path.join(fusion_dir, "test_predictions.csv"), index=False)

print("\n✅ DONE. Outputs saved to:", os.path.abspath(fusion_dir))


Loaded: C:\Users\varsh\OneDrive\Desktop\NITK\data.csv
Shape: (103000, 18)
Attack rate: 0.02912621359223301

Split sizes:
Train full: (82400, 11) Test: (20600, 11)
Train: (65920, 11) Val: (16480, 11)
Train normals only: (64000, 11)

Numeric cols: 8
Categorical cols: 3

Transformed shapes:
X_train_normal_p: (64000, 16)
X_train_full_p:   (82400, 16)
X_val_p:          (16480, 16)
X_test_p:         (20600, 16)

Meta-feature shapes:
train: (82400, 3, 3) val: (16480, 3, 3) test: (20600, 3, 3)
Epoch 01 | loss=0.0818 | val_f1=0.7689 | val_pr_auc=0.8702
Epoch 02 | loss=0.0384 | val_f1=0.7799 | val_pr_auc=0.8740
Epoch 03 | loss=0.0318 | val_f1=0.8069 | val_pr_auc=0.8848
Epoch 04 | loss=0.0281 | val_f1=0.8089 | val_pr_auc=0.8931
Epoch 05 | loss=0.0269 | val_f1=0.8089 | val_pr_auc=0.9025
Epoch 06 | loss=0.0268 | val_f1=0.8049 | val_pr_auc=0.8921
Epoch 07 | loss=0.0258 | val_f1=0.8094 | val_pr_auc=0.9116
Epoch 08 | loss=0.0262 | val_f1=0.8089 | val_pr_auc=0.9043
Epoch 09 | loss=0.0257 | val_f1=0.808

In [None]:
# ============================
# PRINT METRICS (VAL + TEST)
# ============================

# 1) Validation metrics @ tuned threshold
val_metrics, val_cm, val_report = evaluate_probs(yv, pv, threshold=best_thr["threshold"])

print("\n================ VALIDATION METRICS ================")
print(f"Threshold : {best_thr['threshold']:.2f}")
print(f"Accuracy  : {val_metrics['accuracy']:.4f}")
print(f"Precision : {val_metrics['precision']:.4f}")
print(f"Recall    : {val_metrics['recall']:.4f}")
print(f"F1-score  : {val_metrics['f1']:.4f}")
print(f"ROC-AUC   : {val_metrics['roc_auc']:.4f}")
print(f"PR-AUC    : {val_metrics['pr_auc']:.4f}")
print("Confusion Matrix:\n", val_cm)
print("\nClassification Report:\n", val_report)

# 2) Test metrics @ tuned threshold
test_metrics, test_cm, test_report = evaluate_probs(yt, pt, threshold=best_thr["threshold"])

print("\n=================== TEST METRICS ===================")
print(f"Threshold : {best_thr['threshold']:.2f}")
print(f"Accuracy  : {test_metrics['accuracy']:.4f}")
print(f"Precision : {test_metrics['precision']:.4f}")
print(f"Recall    : {test_metrics['recall']:.4f}")
print(f"F1-score  : {test_metrics['f1']:.4f}")
print(f"ROC-AUC   : {test_metrics['roc_auc']:.4f}")
print(f"PR-AUC    : {test_metrics['pr_auc']:.4f}")
print("Confusion Matrix:\n", test_cm)
print("\nClassification Report:\n", test_report)



Threshold : 0.19
Accuracy  : 0.9904
Precision : 0.9257
Recall    : 0.7271
F1-score  : 0.8145
ROC-AUC   : 0.9957
PR-AUC    : 0.9116
Confusion Matrix:
 [[15972    28]
 [  131   349]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9919    0.9982    0.9950     16000
           1     0.9257    0.7271    0.8145       480

    accuracy                         0.9904     16480
   macro avg     0.9588    0.8627    0.9048     16480
weighted avg     0.9899    0.9904    0.9898     16480


Threshold : 0.19
Accuracy  : 0.9907
Precision : 0.9113
Recall    : 0.7533
F1-score  : 0.8248
ROC-AUC   : 0.9961
PR-AUC    : 0.9170
Confusion Matrix:
 [[19956    44]
 [  148   452]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9926    0.9978    0.9952     20000
           1     0.9113    0.7533    0.8248       600

    accuracy                         0.9907     20600
   macro avg     0.9520    0.8756    0.91

In [None]:
print("Meta-feature tensor shape:", X_train_full_feats.shape)  # (N, 3, 3)
print("Models (tokens):", X_train_full_feats.shape[1])         # 3
print("Features per model:", X_train_full_feats.shape[2])      # 3
print("Total features per sample (flattened):", X_train_full_feats.shape[1] * X_train_full_feats.shape[2])  # 9


Meta-feature tensor shape: (82400, 3, 3)
Models (tokens): 3
Features per model: 3
Total features per sample (flattened): 9


In [None]:
X_train_flat = X_train_full_feats.reshape(X_train_full_feats.shape[0], -1)
print("Flattened shape:", X_train_flat.shape)  # (N, 9)


Flattened shape: (82400, 9)


# Features extracted with variational AE with transformer

In [None]:
# =========================================================
# TRANSFORMER VAE FEATURE EXTRACTION + 3 MODELS + METRICS
# =========================================================

import os, json
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from pandas.api.types import is_numeric_dtype

from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor

from sklearn.metrics import (
    confusion_matrix, classification_report,
    precision_score, recall_score, f1_score, accuracy_score,
    roc_auc_score, average_precision_score
)

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# -----------------------------
# SETTINGS
# -----------------------------
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

DATA_PATH = r"C:\Users\varsh\OneDrive\Desktop\NITK\data.csv"
TARGET = "is_attack"
OUTPUT_DIR = "./outputs_vae_transformer"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Drop leaky / ID columns (recommended)
DROP_IDS = ["Unnamed: 0", "user_id", "ip_address", "device_id", "timestamp"]
DROP_LEAKY = ["attack_type"]  # often leakage

# -----------------------------
# METRICS HELPERS
# -----------------------------
def tune_threshold_by_f1(y_true, scores, percentiles=np.linspace(80, 99.9, 80)):
    y_true = np.asarray(y_true).astype(int)
    scores = np.asarray(scores).astype(float)

    best = {"f1": -1, "threshold": None, "precision": None, "recall": None, "percentile": None}
    for p in percentiles:
        thr = np.percentile(scores, p)
        y_pred = (scores > thr).astype(int)
        f1 = f1_score(y_true, y_pred, zero_division=0)
        if f1 > best["f1"]:
            best = {
                "f1": float(f1),
                "threshold": float(thr),
                "precision": float(precision_score(y_true, y_pred, zero_division=0)),
                "recall": float(recall_score(y_true, y_pred, zero_division=0)),
                "percentile": float(p),
            }
    return best

def evaluate_from_scores(y_true, scores, threshold, model_name):
    """
    scores: higher => more anomalous
    """
    y_true = np.asarray(y_true).astype(int)
    scores = np.asarray(scores).astype(float)
    y_pred = (scores > threshold).astype(int)

    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()

    metrics = {
        "model": model_name,
        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
        "accuracy": float(accuracy_score(y_true, y_pred)),
        "precision": float(precision_score(y_true, y_pred, zero_division=0)),
        "recall": float(recall_score(y_true, y_pred, zero_division=0)),
        "f1": float(f1_score(y_true, y_pred, zero_division=0)),
        "roc_auc": float(roc_auc_score(y_true, scores)),
        "pr_auc": float(average_precision_score(y_true, scores)),
        "threshold": float(threshold),
    }
    report = classification_report(y_true, y_pred, digits=4)
    return metrics, cm, report

def print_metrics_block(title, metrics, cm, report):
    print(f"\n================ {title} ================")
    print(f"Accuracy : {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall   : {metrics['recall']:.4f}")
    print(f"F1       : {metrics['f1']:.4f}")
    print(f"ROC-AUC  : {metrics['roc_auc']:.4f}")
    print(f"PR-AUC   : {metrics['pr_auc']:.4f}")
    print(f"Threshold: {metrics['threshold']:.6f}")
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", report)

# =========================================================
# 1) LOAD DATA
# =========================================================
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Dataset not found at: {DATA_PATH}")

df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]

if TARGET not in df.columns:
    raise ValueError(f"TARGET='{TARGET}' not found. Available: {df.columns.tolist()}")

print("Loaded:", DATA_PATH)
print("Shape:", df.shape)
print("Attack rate:", df[TARGET].mean())

# =========================================================
# 2) BUILD X,y (drop ids/leaky)
# =========================================================
drop_cols = [TARGET]
for c in DROP_IDS + DROP_LEAKY:
    if c in df.columns:
        drop_cols.append(c)

X = df.drop(columns=[c for c in drop_cols if c in df.columns])
y = df[TARGET].astype(int).values

# =========================================================
# 3) SPLIT train/val/test
# =========================================================
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.20, random_state=SEED, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.20, random_state=SEED, stratify=y_train_full
)

X_train_normal = X_train[y_train == 0]

print("\nSplit sizes:")
print("Train full:", X_train_full.shape, "Test:", X_test.shape)
print("Train:", X_train.shape, "Val:", X_val.shape)
print("Train normals only:", X_train_normal.shape)

# =========================================================
# 4) PREPROCESS (fit on normal only)
# =========================================================
num_cols = [c for c in X.columns if is_numeric_dtype(X[c])]
cat_cols = [c for c in X.columns if c not in num_cols]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", RobustScaler(with_centering=True, with_scaling=True), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
    ],
    remainder="drop"
)

X_train_normal_p = preprocessor.fit_transform(X_train_normal)
X_train_full_p   = preprocessor.transform(X_train_full)
X_val_p          = preprocessor.transform(X_val)
X_test_p         = preprocessor.transform(X_test)

print("\nPreprocessed dims:", X_train_full_p.shape[1])

# =========================================================
# 5) TRANSFORMER VAE FOR TABULAR (PATCH TRANSFORMER)
# =========================================================
class PatchTransformerVAE(nn.Module):
    """
    Takes a dense feature vector [B, F] (after preprocessing),
    splits into patches, projects patches to d_model,
    TransformerEncoder -> pooled -> (mu, logvar) latent.
    Decoder reconstructs original [B, F].

    Feature extraction = mu (latent embedding).
    """
    def __init__(self, feat_dim, patch_size=32, d_model=128, n_heads=4, n_layers=2,
                 latent_dim=32, dropout=0.1):
        super().__init__()
        self.feat_dim = feat_dim
        self.patch_size = patch_size

        # compute patches + padding
        self.n_patches = int(np.ceil(feat_dim / patch_size))
        self.pad_dim = self.n_patches * patch_size
        self.pad_needed = self.pad_dim - feat_dim

        self.patch_proj = nn.Linear(patch_size, d_model)

        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=n_heads, dim_feedforward=d_model*4,
            dropout=dropout, batch_first=True, activation="gelu"
        )
        self.encoder_tf = nn.TransformerEncoder(enc_layer, num_layers=n_layers)

        self.pool = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, d_model),
            nn.GELU()
        )

        self.mu = nn.Linear(d_model, latent_dim)
        self.logvar = nn.Linear(d_model, latent_dim)

        # Decoder: latent -> hidden -> reconstruct full padded vector -> trim
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, d_model*2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_model*2, self.pad_dim)
        )

    def _pad(self, x):
        if self.pad_needed <= 0:
            return x
        pad = torch.zeros((x.size(0), self.pad_needed), device=x.device, dtype=x.dtype)
        return torch.cat([x, pad], dim=1)

    def encode(self, x):
        # x: [B, F]
        x = self._pad(x)  # [B, pad_dim]
        x = x.view(x.size(0), self.n_patches, self.patch_size)  # [B, P, patch]
        h = self.patch_proj(x)  # [B, P, d_model]
        h = self.encoder_tf(h)  # [B, P, d_model]
        h = h.mean(dim=1)       # pooled [B, d_model]
        h = self.pool(h)
        mu = self.mu(h)
        logvar = self.logvar(h)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        recon_pad = self.decoder(z)               # [B, pad_dim]
        recon = recon_pad[:, :self.feat_dim]      # [B, F]
        return recon, mu, logvar

def vae_loss(recon, x, mu, logvar, beta=1.0):
    # recon loss (MSE) + beta * KL
    recon_loss = torch.mean((recon - x) ** 2)
    kl = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + beta * kl, recon_loss.detach(), kl.detach()

# ---- Train VAE on normal-only data
feat_dim = X_train_normal_p.shape[1]
vae = PatchTransformerVAE(
    feat_dim=feat_dim,
    patch_size=32,
    d_model=128,
    n_heads=4,
    n_layers=2,
    latent_dim=32,
    dropout=0.1
).to(DEVICE)

opt = torch.optim.AdamW(vae.parameters(), lr=1e-3, weight_decay=1e-4)

Xtr = torch.tensor(X_train_normal_p, dtype=torch.float32)
Xv  = torch.tensor(X_val_p, dtype=torch.float32)  # only for monitoring recon, not labels

train_loader = DataLoader(TensorDataset(Xtr), batch_size=1024, shuffle=True, drop_last=False)

best_val = float("inf")
best_state = None
patience, wait = 8, 0
EPOCHS = 50
BETA = 0.5  # you can try 0.1..1.0

print("\nTraining Transformer-VAE on NORMAL samples...")
for epoch in range(1, EPOCHS + 1):
    vae.train()
    losses = []
    for (xb,) in train_loader:
        xb = xb.to(DEVICE)
        opt.zero_grad(set_to_none=True)
        recon, mu, logvar = vae(xb)
        loss, rloss, kl = vae_loss(recon, xb, mu, logvar, beta=BETA)
        loss.backward()
        opt.step()
        losses.append(loss.item())

    # quick val recon monitoring
    vae.eval()
    with torch.no_grad():
        xv = Xv.to(DEVICE)
        recon_v, mu_v, logvar_v = vae(xv)
        val_loss, val_rloss, val_kl = vae_loss(recon_v, xv, mu_v, logvar_v, beta=BETA)

    print(f"Epoch {epoch:02d} | train={np.mean(losses):.6f} | val={val_loss.item():.6f} | recon={val_rloss.item():.6f} | kl={val_kl.item():.6f}")

    if val_loss.item() < best_val - 1e-7:
        best_val = val_loss.item()
        best_state = {k: v.detach().cpu().clone() for k, v in vae.state_dict().items()}
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping VAE.")
            break

if best_state is not None:
    vae.load_state_dict(best_state)

# =========================================================
# 6) FEATURE EXTRACTION FROM VAE (use mu)
# =========================================================
def extract_mu(Xp):
    vae.eval()
    Xt = torch.tensor(Xp, dtype=torch.float32, device=DEVICE)
    with torch.no_grad():
        mu, logvar = vae.encode(Xt)
    return mu.cpu().numpy()

Z_train_full = extract_mu(X_train_full_p)
Z_train_norm = extract_mu(X_train_normal_p)
Z_val = extract_mu(X_val_p)
Z_test = extract_mu(X_test_p)

print("\nExtracted feature shape (latent mu):", Z_train_full.shape)
print("=> Number of extracted features per sample =", Z_train_full.shape[1])

# Optional: VAE reconstruction error as anomaly score too
def recon_error(Xp):
    vae.eval()
    Xt = torch.tensor(Xp, dtype=torch.float32, device=DEVICE)
    with torch.no_grad():
        recon, _, _ = vae(Xt)
        err = torch.mean(torch.abs(recon - Xt), dim=1)  # MAE per sample
    return err.cpu().numpy()

val_scores_vae = recon_error(X_val_p)
test_scores_vae = recon_error(X_test_p)

# =========================================================
# 7) TRAIN 3 MODELS ON EXTRACTED FEATURES (NORMAL ONLY)
# =========================================================
iso = IsolationForest(n_estimators=400, contamination="auto", random_state=SEED, n_jobs=-1)
ocsvm = OneClassSVM(kernel="rbf", nu=0.05, gamma="scale")
lof = LocalOutlierFactor(n_neighbors=35, novelty=True, metric="minkowski")

iso.fit(Z_train_norm)
ocsvm.fit(Z_train_norm)
lof.fit(Z_train_norm)

def score_model(model, Z):
    return -model.decision_function(Z)  # higher => more anomalous

# =========================================================
# 8) EVALUATION (threshold tuned on VAL by F1)
# =========================================================
results = []

# ---- VAE reconstruction (not one of the 3 models, but useful baseline)
best_vae = tune_threshold_by_f1(y_val, val_scores_vae)
m_vae, cm_vae, rep_vae = evaluate_from_scores(y_test, test_scores_vae, best_vae["threshold"], "VAE_ReconError")
print_metrics_block("VAE Reconstruction Error", m_vae, cm_vae, rep_vae)
results.append(m_vae)

# ---- IsolationForest on VAE features
val_scores_iso = score_model(iso, Z_val)
test_scores_iso = score_model(iso, Z_test)
best_iso = tune_threshold_by_f1(y_val, val_scores_iso)
m_iso, cm_iso, rep_iso = evaluate_from_scores(y_test, test_scores_iso, best_iso["threshold"], "IsolationForest_on_VAE")
print_metrics_block("IsolationForest on VAE Features", m_iso, cm_iso, rep_iso)
results.append(m_iso)

# ---- OneClassSVM on VAE features
val_scores_svm = score_model(ocsvm, Z_val)
test_scores_svm = score_model(ocsvm, Z_test)
best_svm = tune_threshold_by_f1(y_val, val_scores_svm)
m_svm, cm_svm, rep_svm = evaluate_from_scores(y_test, test_scores_svm, best_svm["threshold"], "OneClassSVM_on_VAE")
print_metrics_block("OneClassSVM on VAE Features", m_svm, cm_svm, rep_svm)
results.append(m_svm)

# ---- LOF on VAE features
val_scores_lof = score_model(lof, Z_val)
test_scores_lof = score_model(lof, Z_test)
best_lof = tune_threshold_by_f1(y_val, val_scores_lof)
m_lof, cm_lof, rep_lof = evaluate_from_scores(y_test, test_scores_lof, best_lof["threshold"], "LOF_on_VAE")
print_metrics_block("LOF on VAE Features", m_lof, cm_lof, rep_lof)
results.append(m_lof)

# =========================================================
# 9) SUMMARY TABLE
# =========================================================
summary = pd.DataFrame(results)
summary_path = os.path.join(OUTPUT_DIR, "summary_metrics.csv")
summary.to_csv(summary_path, index=False)

print("\n✅ Saved summary:", os.path.abspath(summary_path))
print(summary[["model", "accuracy", "precision", "recall", "f1", "roc_auc", "pr_auc", "threshold"]])


Loaded: C:\Users\varsh\OneDrive\Desktop\NITK\data.csv
Shape: (103000, 18)
Attack rate: 0.02912621359223301

Split sizes:
Train full: (82400, 11) Test: (20600, 11)
Train: (65920, 11) Val: (16480, 11)
Train normals only: (64000, 11)

Preprocessed dims: 16

Training Transformer-VAE on NORMAL samples...
Epoch 01 | train=0.166227 | val=0.135743 | recon=0.108393 | kl=0.054700
Epoch 02 | train=0.130481 | val=0.129589 | recon=0.095021 | kl=0.069137
Epoch 03 | train=0.124651 | val=0.125028 | recon=0.087346 | kl=0.075363
Epoch 04 | train=0.118826 | val=0.120647 | recon=0.077162 | kl=0.086971
Epoch 05 | train=0.115769 | val=0.117973 | recon=0.072682 | kl=0.090583
Epoch 06 | train=0.113622 | val=0.116482 | recon=0.069781 | kl=0.093403
Epoch 07 | train=0.112233 | val=0.115705 | recon=0.068639 | kl=0.094132
Epoch 08 | train=0.111249 | val=0.114914 | recon=0.068729 | kl=0.092370
Epoch 09 | train=0.110775 | val=0.114052 | recon=0.065036 | kl=0.098032
Epoch 10 | train=0.110427 | val=0.114519 | recon=0.

In [None]:
latent_dim = Z_train_full.shape[1]
latent_feature_names = [f"z_mu_{i}" for i in range(latent_dim)]

print("Extracted latent features:", latent_feature_names)
print("Latent feature matrix shape:", Z_train_full.shape)
latent_df = pd.DataFrame(Z_train_full, columns=latent_feature_names)
print(latent_df.head())


Extracted latent features: ['z_mu_0', 'z_mu_1', 'z_mu_2', 'z_mu_3', 'z_mu_4', 'z_mu_5', 'z_mu_6', 'z_mu_7', 'z_mu_8', 'z_mu_9', 'z_mu_10', 'z_mu_11', 'z_mu_12', 'z_mu_13', 'z_mu_14', 'z_mu_15', 'z_mu_16', 'z_mu_17', 'z_mu_18', 'z_mu_19', 'z_mu_20', 'z_mu_21', 'z_mu_22', 'z_mu_23', 'z_mu_24', 'z_mu_25', 'z_mu_26', 'z_mu_27', 'z_mu_28', 'z_mu_29', 'z_mu_30', 'z_mu_31']
Latent feature matrix shape: (82400, 32)
     z_mu_0    z_mu_1    z_mu_2    z_mu_3    z_mu_4    z_mu_5    z_mu_6  \
0 -0.906946 -0.189529  0.002966  0.041748  1.001947 -0.021322  0.045265   
1 -0.431586 -0.207037 -0.033935  0.040579 -0.030113 -0.045382  0.016980   
2 -0.803885 -0.214530 -0.018176  0.027154 -0.263716 -0.023137  0.030048   
3  0.525172 -0.142282 -0.043666  0.037461  0.198523 -0.026219  0.045868   
4  0.913633  0.127134  0.012624 -0.048581 -0.360730  0.015022 -0.036639   

     z_mu_7    z_mu_8    z_mu_9  ...   z_mu_22   z_mu_23   z_mu_24   z_mu_25  \
0  0.038647  0.008978 -0.023701  ...  0.021850 -0.019756 -

In [None]:
# Save the trained model to a file
import joblib
model_filename = 'trained_model.pkl'
joblib.dump(model, model_filename)
print(f"Model saved to {model_filename}")