In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Anomaly Detection on Tabular Data with 8 Models + Mutation Analysis

Models:
1. Isolation Forest
2. One-Class SVM
3. LocalOutlierFactor
4. Simple Autoencoder
5. Transformer-based model (EnergyBasedTabTransformer)
6. LSTM-based model
7. MLP-based model
8. Deep SVDD

Mutation Analysis: A1/A2 (large shifts => want anomaly=1)
                    A3     (random noise => want stability)
"""

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader, TensorDataset
from transformers import AlbertModel
from torch.optim import Adam

from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, confusion_matrix)
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import random

############################################################################
# 1) Global Setup & Data Loading
############################################################################
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

file_path = 'vertebral.csv'
df = pd.read_csv(file_path)#.drop('ID', axis=1)

def clean_dataframe(df):
    for col in df.select_dtypes(include=['object']):
        df[col] = df[col].replace(r"[:\\[\\],]\'", '', regex=True)
    target_col = "Class"
    majority_value = df[target_col].value_counts().idxmax()
    df[target_col] = df[target_col].apply(lambda x: 1 if x == majority_value else 0)
    df = df.fillna(df.mean())
    return df

df = clean_dataframe(df)

target_col = "Class"

numeric_cols = df.select_dtypes(include=[np.number]).columns.drop(target_col)
all_cols     = [c for c in df.columns if c != target_col]
categorical_cols = list(set(all_cols) - set(numeric_cols))

scaler = MinMaxScaler(feature_range=(-1, 1))
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])



train_df, test_df = train_test_split(df, test_size=0.1, random_state=SEED)
train_df = train_df.reset_index(drop=True)
test_df  = test_df.reset_index(drop=True)

X_train = train_df[numeric_cols].values
y_train = train_df[target_col].values
X_test  = test_df[numeric_cols].values
y_test  = test_df[target_col].values

print("Train shape:", train_df.shape)
print("Test shape:",  test_df.shape)
print("Train label distribution:\n", train_df[target_col].value_counts())
print("Test label distribution:\n",  test_df[target_col].value_counts())

############################################################################
# 2) Traditional Baselines: IF, OCSVM, LOF, Autoencoder
############################################################################
# 2.1 Isolation Forest
from sklearn.ensemble import IsolationForest

iso_model = IsolationForest(n_estimators=300, random_state=SEED)
iso_model.fit(X_train)
iso_pred = iso_model.predict(X_test)   # +1=normal, -1=outlier
iso_pred_label = np.where(iso_pred == -1, 1, 0)

iso_acc  = accuracy_score(y_test, iso_pred_label)
iso_prec = precision_score(y_test, iso_pred_label, zero_division=0)
iso_rec  = recall_score(y_test, iso_pred_label, zero_division=0)
iso_f1   = f1_score(y_test, iso_pred_label, zero_division=0)

print("=== Isolation Forest ===")
print(f"Acc={iso_acc:.4f}, Prec={iso_prec:.4f}, Rec={iso_rec:.4f}, F1={iso_f1:.4f}")

# 2.2 One-Class SVM
from sklearn.svm import OneClassSVM

oc_svm = OneClassSVM(kernel='rbf', nu=0.1, gamma='auto')
oc_svm.fit(X_train[y_train==0])
svm_pred = oc_svm.predict(X_test)   # +1=normal, -1=outlier
svm_pred_label = np.where(svm_pred==-1, 1, 0)

svm_acc  = accuracy_score(y_test, svm_pred_label)
svm_prec = precision_score(y_test, svm_pred_label, zero_division=0)
svm_rec  = recall_score(y_test, svm_pred_label, zero_division=0)
svm_f1   = f1_score(y_test, svm_pred_label, zero_division=0)

print("\n=== One-Class SVM ===")
print(f"Acc={svm_acc:.4f}, Prec={svm_prec:.4f}, Rec={svm_rec:.4f}, F1={svm_f1:.4f}")

# 2.3 LocalOutlierFactor
from sklearn.neighbors import LocalOutlierFactor
lof_model = LocalOutlierFactor(n_neighbors=5, contamination=0.1, novelty=True)
lof_model.fit(X_train)
lof_pred = lof_model.predict(X_test)  # +1=normal, -1=outlier
lof_pred_label = np.where(lof_pred == -1, 1, 0)

lof_acc  = accuracy_score(y_test, lof_pred_label)
lof_prec = precision_score(y_test, lof_pred_label, zero_division=0)
lof_rec  = recall_score(y_test, lof_pred_label, zero_division=0)
lof_f1   = f1_score(y_test, lof_pred_label, zero_division=0)

print("\n=== Local Outlier Factor ===")
print(f"Acc={lof_acc:.4f}, Prec={lof_prec:.4f}, Rec={lof_rec:.4f}, F1={lof_f1:.4f}")

# 2.4 Simple Autoencoder
import torch.utils.data as torchdata

class SimpleAutoencoder(nn.Module):
    def __init__(self, input_dim=7, hidden_dim=4):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

X_train_normal = X_train[y_train == 0]
tensor_train_normal = torch.tensor(X_train_normal, dtype=torch.float32)
train_loader_ae = torchdata.DataLoader(tensor_train_normal, batch_size=32, shuffle=True)

autoenc = SimpleAutoencoder(input_dim=len(numeric_cols), hidden_dim=4).to(device)
optim_ae = torch.optim.Adam(autoenc.parameters(), lr=1e-3)
criterion_ae = nn.MSELoss()

epochs_ae = 20
for ep in range(epochs_ae):
    autoenc.train()
    for batch_x in train_loader_ae:
        batch_x = batch_x.to(device)
        optim_ae.zero_grad()
        recon = autoenc(batch_x)
        loss = criterion_ae(recon, batch_x)
        loss.backward()
        optim_ae.step()

autoenc.eval()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
with torch.no_grad():
    recon_test = autoenc(X_test_tensor)
    errors = torch.mean((recon_test - X_test_tensor)**2, dim=1).cpu().numpy()
threshold = np.percentile(errors, 90)
ae_pred_label = (errors > threshold).astype(int)

ae_acc  = accuracy_score(y_test, ae_pred_label)
ae_prec = precision_score(y_test, ae_pred_label, zero_division=0)
ae_rec  = recall_score(y_test, ae_pred_label, zero_division=0)
ae_f1   = f1_score(y_test, ae_pred_label, zero_division=0)

print("\n=== Simple Autoencoder ===")
print(f"Acc={ae_acc:.4f}, Prec={ae_prec:.4f}, Rec={ae_rec:.4f}, F1={ae_f1:.4f}")

############################################################################
# 3) Torch-based Baselines: Transformer, LSTM, MLP, Deep SVDD
############################################################################

# Helper: numeric dataset for classification
class NumericDataset(torchdata.Dataset):
    def __init__(self, X, y):
        super().__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset_torch = NumericDataset(X_train, y_train)
test_dataset_torch  = NumericDataset(X_test,  y_test)

train_loader_torch = torchdata.DataLoader(train_dataset_torch, batch_size=32, shuffle=True)
test_loader_torch  = torchdata.DataLoader(test_dataset_torch,  batch_size=32)

##################### 3.1 Transformer-based Model ###########################
from transformers import AlbertModel

class EnergyBasedTabTransformer(nn.Module):
    def __init__(self, albert_model, input_dim):
        super().__init__()
        self.albert_encoder = albert_model.encoder
        self.config = albert_model.config
        self.embedding_size = self.config.embedding_size

        # We do a simple numeric -> embedding linear transform
        self.numeric_linear = nn.Linear(input_dim, self.embedding_size)
        # We'll skip categorical for simplicity, or we can just treat them as none
        self.cls_token = nn.Parameter(torch.zeros(1,1,self.embedding_size))
        self.energy_layer = nn.Linear(self.config.hidden_size, 1)
        self.classifier   = nn.Linear(self.config.hidden_size, 2)

    def _convert_mask(self, attention_mask, dtype):
        extended_mask = attention_mask.unsqueeze(1).unsqueeze(2).to(dtype=dtype)
        return (1.0 - extended_mask) * -10000.0

    def forward(self, numeric_batch):
        """
        numeric_batch shape = (B, input_dim).
        We'll reshape: numeric->(B,1,embedding_size), plus [CLS] => total length=2
        Then pass into the ALBERT encoder (bypassing usual text).
        """
        bsz = numeric_batch.size(0)
        numeric_embed = self.numeric_linear(numeric_batch).unsqueeze(1)  # (B,1,embedding_size)

        cls_tok = self.cls_token.expand(bsz, -1, -1)                     # (B,1,embedding_size)
        seq_embeds = torch.cat([cls_tok, numeric_embed], dim=1)          # (B,2,embedding_size)
        attention_mask = torch.ones(bsz, seq_embeds.size(1), device=seq_embeds.device)
        extended_mask  = self._convert_mask(attention_mask, seq_embeds.dtype)

        outputs = self.albert_encoder(hidden_states=seq_embeds,
                                      attention_mask=extended_mask,
                                      head_mask=[None]*self.config.num_hidden_layers,
                                      output_hidden_states=True)
        cls_embedding = outputs.hidden_states[-1][:,0,:]  # (B, hidden_size)

        energy_score = self.energy_layer(cls_embedding).squeeze(-1)  # (B,)
        logits       = self.classifier(cls_embedding)                # (B,2)
        return energy_score, logits

albert_model = AlbertModel.from_pretrained("albert-base-v2")
transformer_model = EnergyBasedTabTransformer(albert_model, input_dim=len(numeric_cols)).to(device)

##################### 3.2 LSTM-based Model ###########################
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=16):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden_dim, batch_first=True)
        self.fc_classifier = nn.Linear(hidden_dim, 2)
        self.fc_energy     = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x shape (B, input_dim)
        # Treat each feature as a "time step" => shape => (B, input_dim, 1)
        x = x.unsqueeze(-1)
        lstm_out, (h,c) = self.lstm(x)
        # lstm_out shape: (B, input_dim, hidden_dim)
        # We'll use last time step => out = lstm_out[:,-1,:]
        out = lstm_out[:, -1, :]
        energy_score = self.fc_energy(out).squeeze(-1)   # (B,)
        logits       = self.fc_classifier(out)           # (B,2)
        return energy_score, logits

lstm_model = LSTMModel(input_dim=len(numeric_cols), hidden_dim=16).to(device)

##################### 3.3 MLP-based Model ###########################
class MLPModel(nn.Module):
    def __init__(self, input_dim, hidden_dims=[32, 16]):
        super().__init__()
        layers=[]
        prev = input_dim
        for hd in hidden_dims:
            layers.append(nn.Linear(prev, hd))
            layers.append(nn.ReLU())
            prev=hd
        self.encoder = nn.Sequential(*layers)
        self.classifier = nn.Linear(prev, 2)
        self.energy_layer= nn.Linear(prev, 1)

    def forward(self, x):
        z = self.encoder(x)
        logits = self.classifier(z)
        energy_score = self.energy_layer(z).squeeze(-1)
        return energy_score, logits

mlp_model = MLPModel(input_dim=len(numeric_cols)).to(device)

##################### 3.4 Deep SVDD-based Model ###########################
class DeepSVDDModel(nn.Module):
    def __init__(self, input_dim, hidden_dims=[32,16], latent_dim=2):
        super().__init__()
        layers=[]
        prev = input_dim
        for hd in hidden_dims:
            layers.append(nn.Linear(prev, hd))
            layers.append(nn.ReLU())
            prev=hd
        layers.append(nn.Linear(prev, latent_dim))
        self.encoder = nn.Sequential(*layers)
        self.classifier = nn.Linear(latent_dim, 2)

    def forward(self, x):
        latent = self.encoder(x)
        logits = self.classifier(latent)  # (B,2)
        # energy => norm of latent
        energy_score = torch.norm(latent, p=2, dim=1)
        return energy_score, logits

svdd_model = DeepSVDDModel(input_dim=len(numeric_cols), hidden_dims=[32,16], latent_dim=2).to(device)

##################### Training & Evaluate Helper ###########################
def train_deep_classifier(model, train_loader, lr=1e-3, epochs=10):
    model.train()
    optimizer = Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for ep in range(epochs):
        for Xb, yb in train_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)
            optimizer.zero_grad()
            _, logits = model(Xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

def predict_labels(model, X):
    model.eval()
    with torch.no_grad():
        X_torch = torch.tensor(X, dtype=torch.float32).to(device)
        _, logits = model(X_torch)
        preds = torch.argmax(logits, dim=1).cpu().numpy()
    return preds

# Train each new model & Evaluate
for dl_model, name in [(transformer_model, "Transformer"),
                       (lstm_model,        "LSTM"),
                       (mlp_model,         "MLP"),
                       (svdd_model,        "DeepSVDD")]:
    train_deep_classifier(dl_model, train_loader_torch, lr=1e-3, epochs=10)
    dl_pred_label = predict_labels(dl_model, X_test)
    dl_acc  = accuracy_score(y_test, dl_pred_label)
    dl_prec = precision_score(y_test, dl_pred_label, zero_division=0)
    dl_rec  = recall_score(y_test, dl_pred_label, zero_division=0)
    dl_f1   = f1_score(y_test, dl_pred_label, zero_division=0)
    print(f"\n=== {name} ===")
    print(f"Acc={dl_acc:.4f}, Prec={dl_prec:.4f}, Rec={dl_rec:.4f}, F1={dl_f1:.4f}")

############################################################################
# 4) MUTATION ANALYSIS FOR ALL MODELS
############################################################################
numeric_cols_for_mutation = [c for c in numeric_cols if c != "Class"]
def anomaly_injection_a1_a2(
    X,
    mutation_fraction=0.05,
    min_mutations=2,
    num_features=1,
    intensity="moderate"
):
    intensity_levels = {
        "weak":[0.25,0.5,0.75],
        "moderate":[1,1.5,2],
        "strong":[8,9,10]
    }
    if intensity not in intensity_levels:
        raise ValueError("Invalid intensity for A1/A2")

    std_multipliers = intensity_levels[intensity]
    # Use numeric_cols_for_mutation to ensure we never mutate "Class"
    # or any other label column
    if len(numeric_cols_for_mutation) < num_features:
        raise ValueError("Not enough numeric columns for A1/A2")

    selected_features = random.sample(list(numeric_cols_for_mutation), num_features)
    X = X.copy()
    mutated_indices_set= set()
    mutated_feature_names=[]

    for feature in selected_features:
        std_dev = X[feature].std()
        mutation_factors = [m*std_dev for m in std_multipliers]
        mutation_factor  = np.random.choice(mutation_factors)

        num_mutations = max(min_mutations,int(mutation_fraction*len(X)))
        num_mutations = min(num_mutations, len(X))

        mutated_rows = np.random.choice(X.index, size=num_mutations, replace=False)
        mutated_indices_set.update(mutated_rows)

        before_vals = X.loc[mutated_rows, feature].copy()
        small_value = std_dev*0.025
        X.loc[mutated_rows, feature] = np.where(
            X.loc[mutated_rows, feature]==0,
            small_value/10,
            X.loc[mutated_rows, feature]
        )
        X.loc[mutated_rows, feature] += mutation_factor*np.sign(X.loc[mutated_rows, feature])
        after_vals = X.loc[mutated_rows, feature].copy()

        #print(f"A1/A2 injected in {feature} factor={mutation_factor:.3f} (intensity={intensity})")
        #print(pd.DataFrame({"Before":before_vals,"After":after_vals}),"\n")

        mutated_feature_names.append(feature)

    '''if len(selected_features)>1:
        print(f"A2 mutated columns (together): {mutated_feature_names}\n")
    else:
        print(f"A1 mutated column: {mutated_feature_names}\n")'''

    mutated_indices = sorted(list(mutated_indices_set))
    return X, mutated_indices, mutated_feature_names

def anomaly_injection_a3(
    X,
    mutation_fraction=0.05,
    min_mutations=2,
    num_features=1,
    intensity="moderate"
):
    intensity_levels = {
        "weak":0.1,
        "moderate":0.5,
        "strong":1.0
    }
    if intensity not in intensity_levels:
        raise ValueError("Invalid intensity for A3")

    noise_level = intensity_levels[intensity]

    # again, forcibly skip "Class"
    if len(numeric_cols_for_mutation) < num_features:
        raise ValueError("Not enough numeric columns for A3")

    selected_features= random.sample(numeric_cols_for_mutation, num_features)
    X = X.copy()
    mutated_indices_set=set()

    for feature in selected_features:
        std_dev = X[feature].std()
        num_mutations= max(min_mutations,int(mutation_fraction*len(X)))
        num_mutations= min(num_mutations,len(X))

        mutation_rows = np.random.choice(X.index, size=num_mutations, replace=False)
        mutated_indices_set.update(mutation_rows)

        before_vals = X.loc[mutation_rows, feature].values
        noise = np.random.normal(loc=0, scale=noise_level*std_dev, size=num_mutations)
        X.loc[mutation_rows, feature] = np.clip(
            X.loc[mutation_rows, feature] + noise,
            a_min=0,
            a_max=None
        )
        after_vals = X.loc[mutation_rows, feature].values

        #print(f"A3 injected in {feature}, intensity={intensity}, noise_level={noise_level}, std={std_dev:.3f}")
        #print("Before:\n", before_vals)
        #print("After:\n", after_vals,"\n")

    mutated_indices = sorted(list(mutated_indices_set))
    return X, mutated_indices, selected_features

def predict_baseline(df_data, baseline_predict_func):
    X_temp = df_data[numeric_cols].values
    return baseline_predict_func(X_temp)

def iso_predict_func(X):
    p = iso_model.predict(X)   # +1=normal, -1=outlier
    return np.where(p == -1, 1, 0)

def svm_predict_func(X):
    p = oc_svm.predict(X)      # +1=normal, -1=outlier
    return np.where(p == -1, 1, 0)

def lof_predict_func(X):
    p = lof_model.predict(X)   # +1=normal, -1=outlier
    return np.where(p == -1, 1, 0)

def autoenc_predict_func(X):
    X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
    with torch.no_grad():
        recon = autoenc(X_tensor)
        errs = torch.mean((recon - X_tensor)**2, dim=1).cpu().numpy()
    return (errs > threshold).astype(int)

# New deep models:
def transformer_predict_func(X):
    return predict_labels(transformer_model, X)

def lstm_predict_func(X):
    return predict_labels(lstm_model, X)

def mlp_predict_func(X):
    return predict_labels(mlp_model, X)

def svdd_predict_func(X):
    return predict_labels(svdd_model, X)

# Evaluate mutated sets
def predict_baseline(df_data, baseline_predict_func):
    # same approach => pass numeric columns
    X_temp = df_data[numeric_cols].values
    return baseline_predict_func(X_temp)

def evaluate_mutation_a1_a2_baseline(df_mutated, mutated_indices, baseline_predict_func):
    y_mut = df_mutated[target_col].values
    pred  = predict_baseline(df_mutated, baseline_predict_func)
    acc  = accuracy_score(y_mut, pred)
    prec = precision_score(y_mut, pred, zero_division=0)
    rec  = recall_score(y_mut, pred, zero_division=0)
    f1   = f1_score(y_mut, pred, zero_division=0)

    mutated_rows_pred = pred[mutated_indices]
    predicted_anomaly = np.sum(mutated_rows_pred == 1)
    total_mut = len(mutated_indices)
    mutation_score = 100.0 * predicted_anomaly / total_mut if total_mut>0 else 0.0
    return acc, prec, rec, f1, mutation_score

def evaluate_mutation_a3_stability_baseline(df_mutated, mutated_indices, orig_preds, baseline_predict_func):
    y_mut = df_mutated[target_col].values
    new_pred = predict_baseline(df_mutated, baseline_predict_func)
    acc  = accuracy_score(y_mut, new_pred)
    prec = precision_score(y_mut, new_pred, zero_division=0)
    rec  = recall_score(y_mut, new_pred, zero_division=0)
    f1   = f1_score(y_mut, new_pred, zero_division=0)

    changed = 0
    for row_idx in mutated_indices:
        if orig_preds[row_idx] != new_pred[row_idx]:
            changed +=1
    total = len(mutated_indices)
    stability_score = 100.0*(total-changed)/total if total>0 else 100.0
    return acc, prec, rec, f1, stability_score

def run_mutation_for_baseline(baseline_name, baseline_predict_func, orig_preds):
    print(f"\n=== Mutation for {baseline_name} ===")
    # A1
    df_a1, mut_idxs_a1, _ = anomaly_injection_a1_a2(
        X=test_df.copy(), mutation_fraction=0.1, min_mutations=2,
        num_features=1, intensity="strong"
    )
    a1_acc,a1_prec,a1_rec,a1_f1,a1_score = evaluate_mutation_a1_a2_baseline(df_a1, mut_idxs_a1, baseline_predict_func)
    print(f"\n-- {baseline_name} A1 Results --")
    print(f"Acc={a1_acc:.4f}, Prec={a1_prec:.4f}, Rec={a1_rec:.4f}, F1={a1_f1:.4f}")
    #print(f"Mutation Score (A1)={a1_score:.1f}%")

    # A2
    df_a2, mut_idxs_a2, _ = anomaly_injection_a1_a2(
        X=test_df.copy(), mutation_fraction=0.1, min_mutations=2,
        num_features=3, intensity="strong"
    )
    a2_acc,a2_prec,a2_rec,a2_f1,a2_score = evaluate_mutation_a1_a2_baseline(df_a2, mut_idxs_a2, baseline_predict_func)
    print(f"\n-- {baseline_name} A2 Results --")
    print(f"Acc={a2_acc:.4f}, Prec={a2_prec:.4f}, Rec={a2_rec:.4f}, F1={a2_f1:.4f}")
    #print(f"Mutation Score (A2)={a2_score:.1f}%")

    # A3
    df_a3, mut_idxs_a3, _ = anomaly_injection_a3(
        X=test_df.copy(), mutation_fraction=0.1, min_mutations=2,
        num_features=2, intensity="strong"
    )
    a3_acc,a3_prec,a3_rec,a3_f1,a3_stability = evaluate_mutation_a3_stability_baseline(
        df_a3, mut_idxs_a3, orig_preds, baseline_predict_func
    )
    print(f"\n-- {baseline_name} A3 Results --")
    print(f"Acc={a3_acc:.4f}, Prec={a3_prec:.4f}, Rec={a3_rec:.4f}, F1={a3_f1:.4f}")
    #print(f"Stability Score (A3)={a3_stability:.1f}%")

############################################################################
# 5) Final: Mutation Analysis for All 8 Models
############################################################################
print("\n========== BASELINE MUTATION ANALYSIS ==========")
# 5.1 Traditional scikit-learn:
iso_orig_pred = iso_predict_func(X_test)
run_mutation_for_baseline("IsolationForest", iso_predict_func, iso_orig_pred)

svm_orig_pred = svm_predict_func(X_test)
run_mutation_for_baseline("OneClassSVM", svm_predict_func, svm_orig_pred)

lof_orig_pred = lof_predict_func(X_test)
run_mutation_for_baseline("LocalOutlierFactor", lof_predict_func, lof_orig_pred)

ae_orig_pred = autoenc_predict_func(X_test)
run_mutation_for_baseline("Autoencoder", autoenc_predict_func, ae_orig_pred)

# 5.2 Deep Models:
transformer_orig_pred = transformer_predict_func(X_test)
run_mutation_for_baseline("Transformer", transformer_predict_func, transformer_orig_pred)

lstm_orig_pred = lstm_predict_func(X_test)
run_mutation_for_baseline("LSTM", lstm_predict_func, lstm_orig_pred)

mlp_orig_pred = mlp_predict_func(X_test)
run_mutation_for_baseline("MLP", mlp_predict_func, mlp_orig_pred)

svdd_orig_pred = svdd_predict_func(X_test)
run_mutation_for_baseline("DeepSVDD", svdd_predict_func, svdd_orig_pred)

print("\nAll baseline mutation analyses completed.")


Train shape: (279, 7)
Test shape: (31, 7)
Train label distribution:
 1    187
0     92
Name: Class, dtype: int64
Test label distribution:
 1    23
0     8
Name: Class, dtype: int64
=== Isolation Forest ===
Acc=0.4516, Prec=1.0000, Rec=0.2609, F1=0.4138

=== One-Class SVM ===
Acc=0.6774, Prec=0.9333, Rec=0.6087, F1=0.7368

=== Local Outlier Factor ===
Acc=0.2903, Prec=1.0000, Rec=0.0435, F1=0.0833

=== Simple Autoencoder ===
Acc=0.3548, Prec=1.0000, Rec=0.1304, F1=0.2308





=== Transformer ===
Acc=0.7419, Prec=0.7419, Rec=1.0000, F1=0.8519

=== LSTM ===
Acc=0.7419, Prec=0.7419, Rec=1.0000, F1=0.8519

=== MLP ===
Acc=0.7419, Prec=0.7419, Rec=1.0000, F1=0.8519

=== DeepSVDD ===
Acc=0.8065, Prec=0.8400, Rec=0.9130, F1=0.8750


=== Mutation for IsolationForest ===

-- IsolationForest A1 Results --
Acc=0.4516, Prec=1.0000, Rec=0.2609, F1=0.4138

-- IsolationForest A2 Results --
Acc=0.4839, Prec=0.8889, Rec=0.3478, F1=0.5000

-- IsolationForest A3 Results --
Acc=0.3871, Prec=0.7500, Rec=0.2609, F1=0.3871

=== Mutation for OneClassSVM ===

-- OneClassSVM A1 Results --
Acc=0.6452, Prec=0.8750, Rec=0.6087, F1=0.7179

-- OneClassSVM A2 Results --
Acc=0.7419, Prec=0.9412, Rec=0.6957, F1=0.8000

-- OneClassSVM A3 Results --
Acc=0.7097, Prec=0.9375, Rec=0.6522, F1=0.7692

=== Mutation for LocalOutlierFactor ===

-- LocalOutlierFactor A1 Results --
Acc=0.3226, Prec=0.7500, Rec=0.1304, F1=0.2222

-- LocalOutlierFactor A2 Results --
Acc=0.4516, Prec=0.8000, Rec=0.3478, 