# Anomaly Detection

This notebook demonstrates:
- Statistical methods (Z-Score, IQR, Mahalanobis)
- Isolation Forest
- Local Outlier Factor (LOF)
- One-Class SVM
- Ensemble anomaly detection
- Time series anomaly detection

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
from sklearn.metrics import precision_score, recall_score, f1_score

np.random.seed(42)
print('Libraries loaded')

## 1. Generate Data with Anomalies

In [None]:
X_normal, _ = make_blobs(n_samples=1000, centers=2, cluster_std=1.0, random_state=42)
X_anomaly = np.random.uniform(-8, 8, (50, 2))
X_all = np.vstack([X_normal, X_anomaly])
y_true = np.concatenate([np.ones(1000), -np.ones(50)])  # 1=normal, -1=anomaly

plt.figure(figsize=(8, 6))
plt.scatter(X_normal[:, 0], X_normal[:, 1], c='blue', s=10, alpha=0.5, label='Normal')
plt.scatter(X_anomaly[:, 0], X_anomaly[:, 1], c='red', s=30, marker='x', label='Anomaly')
plt.legend(); plt.title('Data with Anomalies'); plt.tight_layout(); plt.show()

## 2. Isolation Forest

In [None]:
iso = IsolationForest(n_estimators=200, contamination=0.05, random_state=42)
y_iso = iso.fit_predict(X_all)
scores = iso.decision_function(X_all)

# Score contour
xx, yy = np.meshgrid(np.linspace(-10, 10, 100), np.linspace(-10, 10, 100))
Z = iso.decision_function(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

fig, ax = plt.subplots(figsize=(10, 7))
ax.contourf(xx, yy, Z, levels=20, cmap='RdBu')
ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')
c = ['red' if p == -1 else 'blue' for p in y_iso]
ax.scatter(X_all[:, 0], X_all[:, 1], c=c, s=15, alpha=0.6, edgecolors='k', linewidths=0.3)
ax.set_title(f'Isolation Forest ({(y_iso==-1).sum()} anomalies)')
plt.tight_layout(); plt.show()

## 3. Local Outlier Factor

In [None]:
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
y_lof = lof.fit_predict(X_all)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
for ax, preds, name in zip(axes, [y_iso, y_lof], ['Isolation Forest', 'LOF']):
    c = ['red' if p == -1 else 'blue' for p in preds]
    ax.scatter(X_all[:, 0], X_all[:, 1], c=c, s=15, alpha=0.6)
    ax.set_title(f'{name} ({(preds==-1).sum()} anomalies)')
plt.tight_layout(); plt.show()

## 4. Comparison

In [None]:
y_true_bin = (y_true == -1).astype(int)

# One-Class SVM
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_all)
ocsvm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.05)
y_svm = ocsvm.fit_predict(X_scaled)

results = {}
for name, preds in [('IsolationForest', y_iso), ('LOF', y_lof), ('OneClassSVM', y_svm)]:
    p_bin = (preds == -1).astype(int)
    results[name] = {
        'Detected': p_bin.sum(),
        'Precision': precision_score(y_true_bin, p_bin),
        'Recall': recall_score(y_true_bin, p_bin),
        'F1': f1_score(y_true_bin, p_bin)
    }

pd.DataFrame(results).T.round(3)

## 5. Time Series Anomaly Detection

In [None]:
n = 500
t = np.arange(n)
ts = 50 + 5 * np.sin(2 * np.pi * t / 100) + np.random.normal(0, 2, n)
for idx in [100, 200, 300, 400]:
    ts[idx] += np.random.choice([-1, 1]) * 20

ts_df = pd.DataFrame({'value': ts})
window = 30
ts_df['roll_mean'] = ts_df['value'].rolling(window).mean()
ts_df['roll_std'] = ts_df['value'].rolling(window).std()
ts_df['z'] = (ts_df['value'] - ts_df['roll_mean']) / ts_df['roll_std']
ts_df['anomaly'] = ts_df['z'].abs() > 3

fig, ax = plt.subplots(figsize=(14, 5))
ax.plot(ts_df['value'], 'b-', alpha=0.7, lw=0.8)
ax.plot(ts_df['roll_mean'], 'g-', lw=1.5)
ax.fill_between(range(n), ts_df['roll_mean']-3*ts_df['roll_std'],
                ts_df['roll_mean']+3*ts_df['roll_std'], alpha=0.2, color='green')
anom = ts_df[ts_df['anomaly']]
ax.scatter(anom.index, anom['value'], c='red', s=60, zorder=5, label=f'{len(anom)} anomalies')
ax.set_title('Rolling Z-Score Anomaly Detection'); ax.legend()
plt.tight_layout(); plt.show()