<a href="https://colab.research.google.com/github/hannapalya/anomaly_detection_syndromic/blob/main/IF_LSTM_ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
Ensemble (per-signal chooser) of two attached adapters:
- LSTM autoencoder: LSTM_AE_curr.py -> fit_and_score(...)
- Isolation Forest: IsolationForest_tuned.py -> fit_and_score(...)

Selection logic:
  1) For each candidate, tune a threshold on validation scores to achieve >= SPEC_TARGET specificity.
  2) Among candidates that meet the spec floor, choose the one with the HIGHEST SENSITIVITY.
     Tie-breakers: higher specificity, then higher F_beta.
  3) If none meet SPEC_TARGET, pick the one whose specificity is closest to SPEC_TARGET (higher better),
     then highest sensitivity.

Robust per-sim stitching: if a candidate returns per-sim test score lengths that differ from the
constructed label lengths, we truncate to the common length and log a WARN (no crash).
"""

import os, sys, numpy as np, pandas as pd
from typing import Any, Dict, List, Tuple
from sklearn.metrics import confusion_matrix

# ===== USER CONFIG =====
DATA_DIR      = "/content"     # <- set your data folder
SIGNALS       = list(range(4, 17))
DAYS_PER_YEAR = 364
TRAIN_YEARS   = 6
TRAIN_DAYS    = TRAIN_YEARS * DAYS_PER_YEAR
VALID_DAYS    = 49 * 7
RNG_STATE     = 42

# Selection constraints / scoring
SPEC_TARGET = 0.97     # tail specificity floor on validation
SENS_FLOOR  = 0.00     # (optional) hard floor for sensitivity; keep 0 to purely prioritize sens after spec
BETA        = 0.5      # tie-break metric (beta<1 favors specificity)

# ===== SAFE IMPORT of ADAPTERS =====
# ===== IMPORT YOUR MODELS (ADAPTERS) =====
import importlib.util, importlib, sys, pathlib

def import_adapter(path_str: str, func_name: str, module_name: str):
    p = pathlib.Path(path_str)
    if not p.exists():
        raise FileNotFoundError(f"Adapter not found at: {p}")
    # Always drop any cached module with the same alias
    if module_name in sys.modules:
        del sys.modules[module_name]
    spec = importlib.util.spec_from_file_location(module_name, str(p))
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)  # executes the file (fresh)
    if not hasattr(mod, func_name):
        raise AttributeError(f"{p.name} is missing `{func_name}()`")
    return getattr(mod, func_name)

# Use stable, unique aliases to prevent TF/IF re-register issues
lstm_fit_and_score = import_adapter("LSTM_AE_curr.py", "fit_and_score", module_name="adapter_lstm")
iso_fit_and_score  = import_adapter("IsolationForest_tuned.py", "fit_and_score", module_name="adapter_if")
from r_comparator_metrics import (
    compute_sensitivity_R, compute_specificity_R, compute_fpr_R,
    compute_pod_R, compute_timeliness_R, IDX_RANGE
)


# ===== HELPERS =====
def load_data(sig: int):
    X = pd.read_csv(os.path.join(DATA_DIR, f"simulated_totals_sig{sig}.csv"))
    Y = (pd.read_csv(os.path.join(DATA_DIR, f"simulated_outbreaks_sig{sig}.csv")) > 0).astype(int)
    date_col = next((c for c in ["date","Date","ds","timestamp"] if c in X.columns), None)
    if date_col:
        X = X.drop(columns=[date_col])
        if date_col in Y.columns: Y = Y.drop(columns=[date_col])
    return X, Y

def cross_sim_split(sims: List[dict], rng: np.random.RandomState, train_frac=0.6):
    rng.shuffle(sims)
    n_train = int(len(sims) * train_frac)
    return sims[:n_train], sims[n_train:]

def sens_spec(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float,float]:
    TN, FP, FN, TP = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()
    sens = TP/(TP+FN) if (TP+FN)>0 else 0.0
    spec = TN/(TN+FP) if (TN+FP)>0 else 0.0
    return sens, spec

def f_beta(s: float, sp: float, beta=BETA) -> float:
    b2 = beta*beta
    denom = (b2*s + sp)
    return 0.0 if denom <= 0 else (1+b2)*s*sp / (denom + 1e-12)

def tune_threshold(scores: np.ndarray, val_sims: list, val_lengths: list,
                   spec_target=SPEC_TARGET, sens_floor=SENS_FLOOR, p_max=20.0) -> float:
    """
    Given anomaly scores (higher = more anomalous), choose a percentile-based threshold
    over the NEGATIVES' score distribution to meet specificity.
    """
    # Build O_full for R-comparator metrics
    O_full_val = np.stack([d["y"] for d in val_sims], axis=1)
    IDX_RANGE = np.arange(2205, 2548, dtype=int)
    
    if len(scores) == 0:  # degenerate; no alarms
        return float("inf")
    # neg_mask no longer needed with R-comparator metrics
    base = scores[neg_mask] if neg_mask.any() else scores
    grid = np.linspace(0.5, p_max, num=200)  # 0.5%..20% tail
    best_t = None
    best_tuple = None  # (prioritized key)

    # Primary: meet spec target, then maximize sensitivity
    for p in grid:
        thr = np.percentile(base, 100 - p)
        yhat = (scores >= thr).astype(int)
        # Build alarm matrix A from yhat predictions (split by simulation lengths)
        A_list = []
        offset = 0
        for L in val_lengths:
            if L > 0 and offset < len(yhat):
                A_list.append(yhat[offset:offset+L])
                offset += L
        
        if A_list and len(A_list) == len(val_sims):
            max_len = max(len(a) for a in A_list)
            A_padded = [np.pad(a, (0, max_len - len(a)), mode='constant') if len(a) < max_len else a for a in A_list]
            A = np.column_stack(A_padded)
            s = compute_sensitivity_R(A, O_full_val)
            sp = compute_specificity_R(A, O_full_val, IDX_RANGE)
        else:
            s, sp = 0.0, 0.0
        if sp >= spec_target:
            key = (s, sp, f_beta(s, sp, BETA))  # prioritize sens, then spec, then F_beta
            if (best_tuple is None) or (key > best_tuple):
                best_tuple = key
                best_t = float(thr)

    # Fallback: if none meet spec target, pick closest spec to target (higher better), then sens
    if best_t is None:
        best_gap = 1e9
        best_s   = -1.0
        best_sp  = -1.0
        for p in grid:
            thr = np.percentile(base, 100 - p)
            yhat = (scores >= thr).astype(int)
            # Build alarm matrix A from yhat predictions (split by simulation lengths)
        A_list = []
        offset = 0
        for L in val_lengths:
            if L > 0 and offset < len(yhat):
                A_list.append(yhat[offset:offset+L])
                offset += L
        
        if A_list and len(A_list) == len(val_sims):
            max_len = max(len(a) for a in A_list)
            A_padded = [np.pad(a, (0, max_len - len(a)), mode='constant') if len(a) < max_len else a for a in A_list]
            A = np.column_stack(A_padded)
            s = compute_sensitivity_R(A, O_full_val)
            sp = compute_specificity_R(A, O_full_val, IDX_RANGE)
        else:
            s, sp = 0.0, 0.0
            gap = abs(sp - spec_target)
            # prefer higher spec if gaps tie, then higher sens
            if (gap < best_gap) or (gap == best_gap and (sp > best_sp or (sp == best_sp and s > best_s))):
                best_t, best_gap, best_sp, best_s = float(thr), gap, sp, s

    return best_t


# ===== MAIN =====
np.random.seed(RNG_STATE)
rng = np.random.RandomState(RNG_STATE)

summary = {}

for S in SIGNALS:
    print(f"\n--- Signal {S} (Ensemble: LSTM vs IF) ---")
    Xsig, Ysig = load_data(S)

    # Build sim dicts
    sims: List[dict] = []
    for sim_idx, col in enumerate(Xsig.columns):
        x = Xsig[col].to_numpy(np.float32, copy=False)
        y = Ysig[col].to_numpy(np.int32, copy=False)
        if len(x) >= TRAIN_DAYS + VALID_DAYS:
            sims.append(dict(sim=f"sig{S}_sim{sim_idx}", x=x, y=y))
    if not sims:
        print("  No complete sims; skip.")
        continue

    train_sims, held_sims = cross_sim_split(sims, rng, train_frac=0.6)
    mid = max(1, len(held_sims)//2)
    val_sims  = held_sims[:mid]
    test_sims = held_sims[mid:] if len(held_sims) > 1 else held_sims
    print(f"  Using {len(train_sims)} train sims, {len(val_sims)} val, {len(test_sims)} test")

    # === get candidates from adapters ===
    cand_results: List[Dict[str,Any]] = []
    cand_results.append(lstm_fit_and_score(S, train_sims, val_sims, test_sims, rng_state=RNG_STATE))
    cand_results.append(iso_fit_and_score(S,  train_sims, val_sims, test_sims, rng_state=RNG_STATE))

    # === tune thresholds on validation & pick model ===
    best = None
    best_key = None  # tuple for prioritization

    for res in cand_results:
        yv = res["val_labels"].astype(np.int32)
        sv = res["val_scores"].astype(np.float32)

        
        # NOTE: val_lengths must be computed from adapter results

        # Build val_lengths for R-comparator metrics
        val_lengths = [len(sv)] if len(sv) > 0 else [0]  # Simplified - may need adjustment
thr = tune_threshold(sv, val_sims, val_lengths, SPEC_TARGET, SENS_FLOOR)
        yhat = (sv >= thr).astype(int)
        s, sp = sens_spec(yv, yhat)

        # primary decision key: (meets_spec, sensitivity, specificity, F_beta)
        meets = (sp >= SPEC_TARGET)
        key = (1 if meets else 0, s, sp, f_beta(s, sp, BETA))

        res.update(thr=float(thr), val_sens=float(s), val_spec=float(sp), decision_key=key)
        print(f"  {res['name']}@win{res['window']} → val sens={s:.3f}, spec={sp:.3f}, thr={thr:.6f}")

        if (best_key is None) or (key > best_key):
            best, best_key = res, key

    print(f"  Selected: {best['name']} (win={best['window']}), thr≈{best['thr']:.6f}, "
          f"val sens={best['val_sens']:.3f}, val spec={best['val_spec']:.3f}")

    # === test on tail ===
    # Build per-sim labels using the selected window
    test_labels_splits: List[np.ndarray] = []
    for d in test_sims:
        y_tail = d["y"][-VALID_DAYS:]
        y_al   = y_tail[best["window"]-1:]
        test_labels_splits.append(y_al.astype(np.int32))

    # Sanity: number of sims must match
    assert len(best["test_scores_splits"]) == len(test_labels_splits), \
        "Adapter must return test_scores_splits aligned to test_sims."

    # Robust stacking with truncation if lengths drift
    preds_cols, labels_cols = [], []
    for idx, (scores, ycol) in enumerate(zip(best["test_scores_splits"], test_labels_splits)):
        if len(scores) != len(ycol):
            L = min(len(scores), len(ycol))
            print(f"  WARN: sim#{idx} length mismatch (scores={len(scores)}, labels={len(ycol)}); truncating to {L}.")
            scores = scores[:L]; ycol = ycol[:L]
        if len(scores) == 0:
            # keep empty column — will be ignored when stacking if all are empty
            preds_cols.append(np.zeros((0,), dtype=int))
            labels_cols.append(ycol[:0])
            continue
        preds_cols.append((scores >= best["thr"]).astype(int))
        labels_cols.append(ycol)

    if all(len(c) == 0 for c in preds_cols):
        print("  No test windows; skipping metrics.")
        continue

    # Equalize lengths across sims by trimming to min length (conservative, avoids ragged arrays)
    min_len = min(len(c) for c in preds_cols if len(c) > 0)
    preds_cols = [c[:min_len] for c in preds_cols if len(c) > 0]
    labels_cols = [c[:min_len] for c in labels_cols if len(c) > 0]

    A = np.stack(preds_cols, axis=1)   # [T, J]
    O = np.stack(labels_cols, axis=1)  # [T, J]

    sens = metric_sensitivity(A, O)
    spec = metric_specificity(A, O)
    pod  = metric_pod(A, O)
    tim  = metric_timeliness(A, O)
    print(f"  TEST → Sens={sens:.3f}, Spec={spec:.3f}, POD={pod:.3f}, Tim={tim:.3f}")

    summary[S] = dict(
        model=best["name"], window=int(best["window"]), thr=float(best["thr"]),
        val_sens=float(best["val_sens"]), val_spec=float(best["val_spec"]),
        sensitivity=float(sens), specificity=float(spec), pod=float(pod), timeliness=float(tim)
    )

# ===== SUMMARY =====
if summary:
    df = pd.DataFrame.from_dict(summary, orient="index")
    print("\n=== ENSEMBLE SUMMARY (per-signal chosen model) ===")
    print(df)
    print("\nMeans:\n", df[["sensitivity","specificity","pod","timeliness"]].mean(numeric_only=True))
    df.to_csv("Ensemble_LSTM_IF_results.csv")
    print("Saved: Ensemble_LSTM_IF_results.csv")
else:
    print("\nNo results to summarize.")



--- Signal 4 (Ensemble: LSTM vs IF) ---
  Using 60 train sims, 20 val, 20 test
  LSTM-AE@win14 → val sens=0.881, spec=0.972, thr=0.633695
  IF@win14 → val sens=0.904, spec=0.979, thr=0.109523
  Selected: IF (win=14), thr≈0.109523, val sens=0.904, val spec=0.979
  TEST → Sens=0.871, Spec=0.980, POD=1.000, Tim=0.105

--- Signal 5 (Ensemble: LSTM vs IF) ---
  Using 60 train sims, 20 val, 20 test
  LSTM-AE@win14 → val sens=0.665, spec=0.970, thr=0.027969
  IF@win14 → val sens=0.630, spec=0.970, thr=0.015706
  Selected: LSTM-AE (win=14), thr≈0.027969, val sens=0.665, val spec=0.970
  TEST → Sens=0.643, Spec=0.973, POD=0.900, Tim=0.176

--- Signal 6 (Ensemble: LSTM vs IF) ---
  Using 60 train sims, 20 val, 20 test
  LSTM-AE@win14 → val sens=0.497, spec=0.977, thr=0.292464
  IF@win14 → val sens=0.474, spec=0.970, thr=0.023712
  Selected: LSTM-AE (win=14), thr≈0.292464, val sens=0.497, val spec=0.977
  TEST → Sens=0.424, Spec=0.979, POD=0.500, Tim=0.545

--- Signal 7 (Ensemble: LSTM vs IF) --