# 🌈 Spectral Data Anomaly Detection using AutoEncoder Latent Space

This notebook demonstrates five methods for anomaly detection based on the latent space of an AutoEncoder:
- Reconstruction Error (AE)
- Mahalanobis Distance
- Gaussian Mixture Model (GMM)
- One-Class SVM

You can run your own spectral dataset using this structure.

## 📥 Load Your Spectral Data

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your data here
# X should be shape (n_samples, n_features)
# For demonstration, we'll use random data
X = np.random.rand(1000, 200)  # Replace with your spectral data

# Split into training and test
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

## 🔧 Train AutoEncoder

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class AutoEncoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )
        
    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z), z

input_dim = X_train.shape[1]
latent_dim = 16

model = AutoEncoder(input_dim, latent_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32))
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

# Training loop
model.train()
for epoch in range(20):
    total_loss = 0
    for batch in train_loader:
        x = batch[0]
        x_hat, _ = model(x)
        loss = criterion(x_hat, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

## 🧠 Extract Latent Features and Reconstruction Error

In [None]:
model.eval()
with torch.no_grad():
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    
    X_train_recon, Z_train = model(X_train_tensor)
    X_test_recon, Z_test = model(X_test_tensor)
    
    Z_train = Z_train.numpy()
    Z_test = Z_test.numpy()
    
    recon_error_train = ((X_train_tensor - X_train_recon) ** 2).mean(dim=1).numpy()
    recon_error_test = ((X_test_tensor - X_test_recon) ** 2).mean(dim=1).numpy()

## 📏 Mahalanobis Distance

In [None]:
from scipy.spatial import distance
from numpy.linalg import inv

mean_vec = Z_train.mean(axis=0)
cov_mat = np.cov(Z_train, rowvar=False)
inv_covmat = inv(cov_mat)

def mahalanobis(x, mean, inv_cov):
    return np.array([distance.mahalanobis(v, mean, inv_cov) for v in x])

maha_scores_test = mahalanobis(Z_test, mean_vec, inv_covmat)
maha_confidence = 1 / (1 + maha_scores_test)

## 📊 Gaussian Mixture Model Likelihood

In [None]:
from sklearn.mixture import GaussianMixture

gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=42)
gmm.fit(Z_train)
gmm_scores = gmm.score_samples(Z_test)  # log-likelihood
gmm_scores = np.exp(gmm_scores)  # convert to actual likelihood
gmm_confidence = gmm_scores / (gmm_scores + 1)

## 🧪 One-Class SVM

In [None]:
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import MinMaxScaler

ocsvm = OneClassSVM(gamma='auto', nu=0.05)
ocsvm.fit(Z_train)
svm_scores = ocsvm.decision_function(Z_test)
svm_confidence = MinMaxScaler().fit_transform(svm_scores.reshape(-1, 1)).flatten()

## 📈 Compare All Confidence Scores

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.hist(recon_error_test, bins=50, alpha=0.6, label='AE MSE')
plt.legend()
plt.title('AE Reconstruction Error (Test Set)')
plt.show()

plt.figure()
plt.hist(maha_confidence, bins=50, alpha=0.6, label='Mahalanobis Confidence')
plt.hist(gmm_confidence, bins=50, alpha=0.6, label='GMM Confidence')
plt.hist(svm_confidence, bins=50, alpha=0.6, label='SVM Confidence')
plt.legend()
plt.title('Confidence Score Comparison')
plt.show()