## Anomaly Detection

This notebook implements and compares common anomaly detection approaches:

1. Gaussian Anomaly Detection (multivariate)
2. Isolation Forest
3. One-Class SVM
4. Local Outlier Factor (LOF)
5. Robust Covariance (EllipticEnvelope)
6. PCA Reconstruction Error
7. Autoencoder (Keras/TensorFlow)


#### Imports & Helpers

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    precision_recall_fscore_support,
    roc_auc_score,
    average_precision_score,
    confusion_matrix,
    classification_report
)

from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor
from sklearn.covariance import EllipticEnvelope
from sklearn.decomposition import PCA

np.random.seed(42)


#### Utility Functions

In [2]:
def evaluate_history(y_true,y_pred,title="Model"):
    """
    Evaluate predictions (0=normal, 1=anomaly).
    """

    precision,recall,f1,_ = precision_recall_fscore_support(
        y_true,
        y_pred,
        average='binary',
        zero_division=0
    )
    cm = confusion_matrix(y_true,y_pred)
    print(f"\n=== {title} ===")
    print("Confusion Matrix:")
    print(cm)
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1:        {f1:.4f}")
    print("\nClassification report:")
    print(classification_report(y_true, y_pred, target_names=["normal", "anomaly"], zero_division=0))

def plot_2d_results(X, y_true, y_pred, title=""):
    """
    Plot 2D scatter with true labels and predicted anomalies.
    Assumes X has 2 columns.
    """
    plt.figure(figsize=(7, 6))

    # Plot normals (true)
    normal_idx = (y_true == 0)
    plt.scatter(X[normal_idx, 0], X[normal_idx, 1], s=18, alpha=0.6, label="True Normal")

    # Plot true anomalies
    anomaly_idx = (y_true == 1)
    plt.scatter(X[anomaly_idx, 0], X[anomaly_idx, 1], s=30, alpha=0.8, label="True Anomaly")

    # Overlay predicted anomalies with 'x'
    pred_anomaly_idx = (y_pred == 1)
    plt.scatter(X[pred_anomaly_idx, 0], X[pred_anomaly_idx, 1], s=80, marker="x", label="Predicted Anomaly")

    plt.title(title)
    plt.legend()
    plt.grid(True, alpha=0.2)
    plt.show()

def pick_threshold_from_scores(scores, contamination=0.05):
    """
    Convert anomaly scores into a threshold using a contamination ratio.
    Higher score => more anomalous (assumed).

    We select threshold such that top contamination fraction becomes anomalies.
    """
    thresh = np.quantile(scores, 1 - contamination)
    return thresh



