# 📐 SpectraMind V50 — 05 · Uncertainty Calibration & Cycle Consistency

Goals:
- Evaluate calibration quality (coverage vs. nominal for μ/σ)
- Apply **temperature scaling** (global σ scale) and optional per-bin scaling
- Emit a **calibrated submission CSV** with updated σ
- (Optional) Run a **cycle-consistency** sanity check via forward sim
- Log a **reproducibility entry** to `v50_debug_log.md`

> CLI-first and reproducibility-friendly. Cells skip gracefully if inputs/CLI aren't present.


## 🔧 Environment & Paths

In [None]:
from pathlib import Path
import json, os, sys, platform, shutil
from datetime import datetime

ROOT = Path.cwd()
ART = ROOT / "artifacts"
DIAG_DIR = ART / "diagnostics"
SUBMIT_DIR = ART / "submission"
CAL_DIR = ART / "calibration"
CAL_DIR.mkdir(parents=True, exist_ok=True)

# Typical inputs (if present)
DIAG_SUMMARY = DIAG_DIR / "diagnostic_summary.json"  # expected to hold residual/sigma stats if generated
SUBMISSION_CSV = SUBMIT_DIR / "submission.csv"

# Outputs
CAL_REPORT_JSON = CAL_DIR / "calibration_report.json"
CAL_FACTORS_JSON = CAL_DIR / "sigma_scale_factors.json"
CALIBRATED_SUBMISSION_CSV = SUBMIT_DIR / "submission_calibrated.csv"
LOG_MD = ROOT / "v50_debug_log.md"

env = {
    "timestamp": datetime.now().isoformat(timespec="seconds"),
    "python": sys.version.replace("\n", " "),
    "platform": platform.platform(),
    "cwd": str(ROOT),
    "paths": {
        "ART": str(ART),
        "DIAG_DIR": str(DIAG_DIR),
        "SUBMIT_DIR": str(SUBMIT_DIR),
        "CAL_DIR": str(CAL_DIR),
        "DIAG_SUMMARY": str(DIAG_SUMMARY),
        "SUBMISSION_CSV": str(SUBMISSION_CSV),
        "CAL_REPORT_JSON": str(CAL_REPORT_JSON),
        "CAL_FACTORS_JSON": str(CAL_FACTORS_JSON),
        "CALIBRATED_SUBMISSION_CSV": str(CALIBRATED_SUBMISSION_CSV),
        "LOG_MD": str(LOG_MD),
    }
}
print(json.dumps(env, indent=2))


## 🩺 CLI sanity (optional)

In [None]:
import shutil, subprocess

def check_cli(cmd="spectramind", args=["--version"]):
    exe = shutil.which(cmd)
    if not exe:
        print("ℹ️ 'spectramind' CLI not found on PATH. CLI-dependent steps will be skipped.")
        return {"available": False}
    try:
        out = subprocess.check_output([cmd] + args, stderr=subprocess.STDOUT, text=True, timeout=30)
        print(out)
        return {"available": True, "output": out}
    except Exception as e:
        print(f"⚠️ CLI call failed: {e}")
        return {"available": True, "error": str(e)}

cli_info = check_cli()
cli_info


## 📊 Load diagnostics & compute temperature scaling

In [None]:
import json, math
from statistics import median
import numpy as np

def load_diag_summary(path: Path):
    if not path.exists():
        print("⚠️ No diagnostic summary found:", path)
        return None
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
        return data
    except Exception as e:
        print("⚠️ Failed to parse diagnostic summary:", e)
        return None

def extract_resid_sigma(diag_data):
    """Attempt to extract arrays of residuals and predicted sigmas from a generic diagnostic JSON.
    Heuristics handle a few common schemas.
    Returns arrays flattened across items/bins if possible.
    """
    resids = []
    sigmas = []
    if diag_data is None:
        return np.array([]), np.array([])

    # Common shapes: {"items": [{"residuals":[...], "sigma":[...]} ...]}
    # Or {"planets": {"id": {"residuals":[...], "sigma":[...]}}}
    # Or flat: {"residuals":[...], "sigma":[...]}
    if isinstance(diag_data, dict):
        if "items" in diag_data and isinstance(diag_data["items"], list):
            for rec in diag_data["items"]:
                r = rec.get("residuals") or rec.get("resid") or rec.get("residual")
                s = rec.get("sigma") or rec.get("sigmas") or rec.get("pred_sigma")
                if isinstance(r, list) and isinstance(s, list):
                    m = min(len(r), len(s))
                    resids.extend(r[:m])
                    sigmas.extend(s[:m])
        elif "planets" in diag_data and isinstance(diag_data["planets"], dict):
            for _, rec in diag_data["planets"].items():
                r = rec.get("residuals") or rec.get("resid") or rec.get("residual")
                s = rec.get("sigma") or rec.get("sigmas") or rec.get("pred_sigma")
                if isinstance(r, list) and isinstance(s, list):
                    m = min(len(r), len(s))
                    resids.extend(r[:m])
                    sigmas.extend(s[:m])
        else:
            r = diag_data.get("residuals") or diag_data.get("resid") or diag_data.get("residual")
            s = diag_data.get("sigma") or diag_data.get("sigmas") or diag_data.get("pred_sigma")
            if isinstance(r, list) and isinstance(s, list):
                m = min(len(r), len(s))
                resids.extend(r[:m])
                sigmas.extend(s[:m])

    res = np.array(resids, dtype=float) if resids else np.array([])
    sg = np.array(sigmas, dtype=float) if sigmas else np.array([])
    print("Extracted residuals:", res.shape, "sigmas:", sg.shape)
    return res, sg

def compute_temperature_scaling(res, sg, eps=1e-12):
    """Compute a global scale alpha for sigma to improve calibration.
    Two estimates:
      - RMS-based: sqrt(mean(res^2) / mean(sigma^2))
      - Median-abs-based: median(|res|/sigma) / median(|N(0,1)|) with median(|Z|)=0.674489...
    Returns dict with both and a chosen alpha.
    """
    if res.size == 0 or sg.size == 0 or res.size != sg.size:
        return {"alpha_rms": 1.0, "alpha_med": 1.0, "alpha": 1.0, "note": "insufficient data"}

    rms_res = np.sqrt(np.mean(res**2))
    rms_sig = np.sqrt(np.mean((sg+eps)**2))
    alpha_rms = (rms_res / (rms_sig + eps)) if rms_sig > 0 else 1.0

    ratio = np.abs(res) / (sg + eps)
    med_ratio = np.median(ratio)
    med_abs_z = 0.6744897501960817  # median |N(0,1)|
    alpha_med = (med_ratio / med_abs_z) if med_abs_z > 0 else 1.0

    # Choose alpha preferring robust median, falling back to rms if extreme
    alpha = alpha_med if 0.1 <= alpha_med <= 10 else alpha_rms
    return {"alpha_rms": float(alpha_rms), "alpha_med": float(alpha_med), "alpha": float(alpha)}

def nominal_coverage_checks(res, sg, alphas=(1.0,), eps=1e-12):
    """Compute empirical coverage for ±1σ (~68.27%) and ±1.96σ (~95%) under various alphas."""
    if res.size == 0 or sg.size == 0 or res.size != sg.size:
        return {}
    out = {}
    for a in alphas:
        z = np.abs(res) / (a*(sg+eps))
        cov68 = float(np.mean(z <= 1.0))
        cov95 = float(np.mean(z <= 1.96))
        out[str(a)] = {"cov_68": cov68, "cov_95": cov95}
    return out

diag = load_diag_summary(DIAG_SUMMARY)
res, sg = extract_resid_sigma(diag)
scales = compute_temperature_scaling(res, sg)
cov = nominal_coverage_checks(res, sg, alphas=(1.0, scales.get("alpha", 1.0)))
print("Proposed scaling:", json.dumps(scales, indent=2))
print("Coverage:", json.dumps(cov, indent=2))

# Persist a small calibration report
report = {
    "timestamp": datetime.now().isoformat(timespec="seconds"),
    "counts": int(min(res.size, sg.size)),
    "scales": scales,
    "coverage": cov
}
with open(CAL_REPORT_JSON, "w", encoding="utf-8") as f:
    json.dump(report, f, indent=2)

with open(CAL_FACTORS_JSON, "w", encoding="utf-8") as f:
    json.dump({"global_sigma_scale": scales.get("alpha", 1.0)}, f, indent=2)

print("Saved:", CAL_REPORT_JSON, "and", CAL_FACTORS_JSON)


## 📈 Visualize calibration (optional)

In [None]:
# Build an empirical CDF of |res|/sigma and overlay nominal 68/95 thresholds
import numpy as np
import matplotlib.pyplot as plt

def plot_empirical_cdf(res, sg, alpha=1.0, title="Empirical |z| CDF"):
    if res.size == 0 or sg.size == 0 or res.size != sg.size:
        print("No data to plot.")
        return
    z = np.abs(res) / (alpha*(sg+1e-12))
    z_sorted = np.sort(z)
    y = np.linspace(0, 1, len(z_sorted), endpoint=False)

    plt.figure(figsize=(6,4))
    plt.plot(z_sorted, y)
    plt.axvline(1.0, linestyle="--")
    plt.axvline(1.96, linestyle="--")
    plt.title(title)
    plt.xlabel("|z| = |res| / (alpha * sigma)")
    plt.ylabel("CDF")
    plt.show()

# Raw and scaled views (if data available)
if res.size and sg.size and res.size == sg.size:
    plot_empirical_cdf(res, sg, alpha=1.0, title="Empirical |z| CDF (alpha=1.0)")
    plot_empirical_cdf(res, sg, alpha=max(1e-6, float(scales.get("alpha", 1.0))), title=f"Empirical |z| CDF (alpha={scales.get('alpha', 1.0):.3f})")
else:
    print("⚠️ Skipping plots — residual/sigma arrays unavailable.")


## 🛠️ Apply scaling to submission σ columns and save calibrated CSV

In [None]:
import pandas as pd
import numpy as np

def detect_sigma_columns(df: pd.DataFrame):
    # Heuristics: columns containing "sigma" or ending with "_sigma"
    cands = [c for c in df.columns if "sigma" in c.lower() or c.lower().endswith("_sigma")]
    return cands

def scale_submission_sigmas(sub_csv: Path, out_csv: Path, alpha: float):
    if not sub_csv.exists():
        print("⚠️ Submission not found:", sub_csv)
        return False, []
    try:
        df = pd.read_csv(sub_csv)
    except Exception as e:
        print("❌ Failed to read submission CSV:", e)
        return False, []

    sigma_cols = detect_sigma_columns(df)
    if not sigma_cols:
        # Fallback: if columns alternate mu/sigma per bin with patterns, user can adapt here
        print("⚠️ No sigma-like columns detected; no scaling applied.")
        out_csv.write_text(df.to_csv(index=False))
        return True, []

    alpha = float(alpha) if np.isfinite(alpha) else 1.0
    df[sigma_cols] = df[sigma_cols].astype(float) * alpha
    out_csv.write_text(df.to_csv(index=False))
    print(f"✅ Wrote calibrated submission with alpha={alpha} to", out_csv)
    return True, sigma_cols

alpha = float((scales or {}).get("alpha", 1.0))
ok, used_cols = scale_submission_sigmas(SUBMISSION_CSV, CALIBRATED_SUBMISSION_CSV, alpha)
print("Sigma columns scaled:", used_cols)


## 🔬 (Optional) Per-bin scaling (if binwise stats available)

In [None]:
# If DIAG_SUMMARY contains per-bin RMSE and mean sigma per wavelength/bin,
# compute per-bin alpha and (optionally) save a separate per-bin-calibrated submission.
import numpy as np
import pandas as pd

PERBIN_FACTORS_JSON = CAL_DIR / "sigma_scale_perbin.json"
CALIBRATED_PERBIN_SUBMISSION_CSV = SUBMIT_DIR / "submission_calibrated_perbin.csv"

def compute_perbin_alphas(diag_data):
    # Heuristics: look for entries like {"bin_index": i, "rmse": ..., "mean_sigma": ...}
    # or arrays diag_data["per_bin"]["rmse"], diag_data["per_bin"]["mean_sigma"]
    if diag_data is None:
        return None
    rmse = None; msig = None
    if isinstance(diag_data, dict) and "per_bin" in diag_data:
        per = diag_data["per_bin"]
        if isinstance(per, dict):
            rmse = per.get("rmse")
            msig = per.get("mean_sigma") or per.get("sigma_mean")
    if isinstance(rmse, list) and isinstance(msig, list) and len(rmse)==len(msig):
        rmse = np.array(rmse, dtype=float)
        msig = np.array(msig, dtype=float)
        with np.errstate(divide='ignore', invalid='ignore'):
            alphas = np.where(msig>0, np.sqrt(rmse**2 / (msig**2 + 1e-12)), 1.0)
        return alphas.tolist()
    return None

perbin_alphas = compute_perbin_alphas(diag)
if perbin_alphas:
    with open(PERBIN_FACTORS_JSON, "w", encoding="utf-8") as f:
        json.dump({"per_bin_alphas": perbin_alphas}, f, indent=2)
    print("Saved per-bin scale factors:", PERBIN_FACTORS_JSON)

    # Attempt to apply to submission if it has distinct sigma columns per-bin
    try:
        df = pd.read_csv(SUBMISSION_CSV)
        sigma_cols = [c for c in df.columns if "sigma" in c.lower() or c.lower().endswith("_sigma")]
        # If sigma columns equal in number to perbin_alphas, map directly
        if sigma_cols and len(sigma_cols) == len(perbin_alphas):
            df[sigma_cols] = df[sigma_cols].astype(float) * np.array(perbin_alphas, dtype=float)
            CALIBRATED_PERBIN_SUBMISSION_CSV.write_text(df.to_csv(index=False))
            print("✅ Wrote per-bin calibrated submission:", CALIBRATED_PERBIN_SUBMISSION_CSV)
        else:
            print("ℹ️ Per-bin scaling not applied: mismatch between sigma columns and per-bin factors.")
    except Exception as e:
        print("ℹ️ Per-bin scaling skipped due to error:", e)
else:
    print("ℹ️ No per-bin stats detected; skipping per-bin scaling.")


## 🔁 (Optional) Cycle-consistency via forward simulation

In [None]:
import shutil, subprocess

def run_cycle_consistency(sub_csv: Path):
    exe = shutil.which("spectramind")
    if not exe:
        print("ℹ️ spectramind CLI not found; skipping forward-sim cycle test.")
        return False
    # Hypothetical CLI signature; adjust to your repo's `simulate` subcommand if present
    cmd = ["spectramind", "simulate", "--from-spectra", str(sub_csv), "--out", str(CAL_DIR / "simulated_observations")]
    print("Running:", " ".join(cmd))
    try:
        subprocess.check_call(cmd, timeout=3600)
        print("✅ Forward simulation produced artifacts in:", CAL_DIR / "simulated_observations")
        return True
    except Exception as e:
        print("ℹ️ Forward-sim step failed or not implemented:", e)
        return False

# Try on calibrated submission if present; fallback to original
target_csv = CALIBRATED_SUBMISSION_CSV if CALIBRATED_SUBMISSION_CSV.exists() else SUBMISSION_CSV
_ = run_cycle_consistency(target_csv) if target_csv.exists() else print("ℹ️ No submission CSV available for cycle check.")


## 🧾 Append run metadata to `v50_debug_log.md`

In [None]:
from datetime import datetime

entry = f"""### Notebook: 05_uncertainty_calibration_and_cycle_consistency.ipynb
- timestamp: {datetime.now().isoformat(timespec="seconds")}
- cwd: {ROOT}
- python: {platform.python_version()}
- actions:
  - diag_summary_present: {DIAG_SUMMARY.exists()}
  - submission_csv_present: {SUBMISSION_CSV.exists()}
  - alpha_used: {(scales or {}).get("alpha", 1.0)}
  - calibrated_submission_csv_exists: {CALIBRATED_SUBMISSION_CSV.exists()}
"""
try:
    with open(LOG_MD, "a", encoding="utf-8") as f:
        f.write(entry + "\n")
    print(f"Appended notebook log entry to {LOG_MD}")
except Exception as e:
    print(f"⚠️ Could not append to {LOG_MD}: {e}")
