# 🛰️ SpectraMind V50 — Physics‑Informed Modeling & Symbolic Constraints (Notebook 06)

**Purpose.** Add *physics‑informed, symbolic constraints* to the SpectraMind V50 pipeline and run diagnostics for
constraint violations and cycle‑consistency. The notebook adheres to the CLI‑first, Hydra‑safe workflow used in 00–05.

**Sections**
1. Pre‑flight & environment capture
2. Compose Hydra overrides for symbolic losses
3. Train with symbolic constraints
4. Symbolic diagnostics (rule ranking/overlays)
5. Cycle‑consistency (simulate μ → validate)
6. Artifacts & next steps

> Degrades gracefully: if the `spectramind` CLI is not available, the notebook switches to **DRY‑RUN** and still produces configs/logs/placeholder artifacts to keep the workflow reproducible.


In [None]:
# ░░ Pre‑flight: environment, run IDs, paths, CLI detection ░░
import os, sys, json, platform, shutil, subprocess, datetime, pathlib

RUN_TS = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
RUN_ID = f"physics_informed_{RUN_TS}"
ROOT_OUT = "/mnt/data/physics_informed"
ARTIFACTS = os.path.join(ROOT_OUT, RUN_ID)
LOGS = os.path.join(ARTIFACTS, "logs")
CFG_OUT = os.path.join(ARTIFACTS, "configs")
DIAG_OUT = os.path.join(ARTIFACTS, "diagnostics")
SIM_OUT = os.path.join(ARTIFACTS, "simulation")
for p in (ROOT_OUT, ARTIFACTS, LOGS, CFG_OUT, DIAG_OUT, SIM_OUT):
    os.makedirs(p, exist_ok=True)

def which(cmd: str):
    return shutil.which(cmd) is not None

CLI_PRESENT = which("spectramind")
DRY_RUN = not CLI_PRESENT

def git_cmd(args):
    try:
        out = subprocess.check_output(["git", *args], stderr=subprocess.STDOUT, timeout=5).decode().strip()
        return out
    except Exception:
        return None

env = {
    "python": sys.version.replace("\n"," "),
    "platform": platform.platform(),
    "cli_present": CLI_PRESENT,
    "dry_run": DRY_RUN,
    "run_id": RUN_ID,
    "paths": {"artifacts": ARTIFACTS, "logs": LOGS, "configs": CFG_OUT, "diagnostics": DIAG_OUT, "simulation": SIM_OUT},
    "git": {
        "commit": git_cmd(["rev-parse", "HEAD"]),
        "branch": git_cmd(["rev-parse", "--abbrev-ref", "HEAD"]),
        "status": git_cmd(["status", "--porcelain"]),
    },
}
with open(os.path.join(ARTIFACTS, "env.json"), "w") as f:
    json.dump(env, f, indent=2)

print("=== SpectraMind V50 — Notebook 06 ===")
print(json.dumps(env, indent=2))


## Configuration knobs (Hydra overrides)

**Symbolic losses** enabled here:
- `nonnegativity` — penalize negative flux/μ
- `smoothness` — L2 gradient/curvature prior on μ
- `fft_coherence` — spectral structure coherence
- `molecular_priors` — optional rule pack (H₂O/CO₂/CH₄ bands)

> Start with small weights and increase gradually while monitoring GLL and violation dashboards.


In [None]:
import json, os

overrides = {
    "loss.symbolic.enable": "true",
    "loss.symbolic.weights.nonnegativity": "1.0",
    "loss.symbolic.weights.smoothness": "0.05",
    "loss.symbolic.weights.fft_coherence": "0.10",
    "loss.symbolic.molecular_priors.enable": "true",
    "loss.symbolic.molecular_priors.pack": "default_v1",
    "diagnostics.symbolic.top_k": "12",
    "training.max_epochs": "12",
    "training.batch_size": "16",
    "data": "ariel_nominal",
    "model": "v50",
    "training.seed": "1337",
}

cfg_file = os.path.join(CFG_OUT, "symbolic_overrides.json")
with open(cfg_file, "w") as f:
    json.dump(overrides, f, indent=2)

print("Saved overrides ->", cfg_file)
print(json.dumps(overrides, indent=2))


## Helper: robust CLI runner (uses DRY‑RUN when CLI not present)

In [None]:
import shlex, subprocess, time

def run_cli(cmd_list, log_name="run"):
    log_path = os.path.join(LOGS, f"{log_name}.log")
    err_path = os.path.join(LOGS, f"{log_name}.err")
    start = time.time()
    result = {"cmd": cmd_list, "dry_run": DRY_RUN, "returncode": 0, "stdout": "", "stderr": ""}
    if DRY_RUN:
        msg = f"[DRY-RUN] Would execute: {' '.join(shlex.quote(c) for c in cmd_list)}\n"
        result["stdout"] = msg
        with open(log_path, "w") as f: f.write(msg)
        with open(err_path, "w") as f: f.write("")
        placeholder = os.path.join(ARTIFACTS, "dry_run_placeholder.txt")
        with open(placeholder, "a") as f: f.write(msg)
        return result

    with open(log_path, "wb") as out, open(err_path, "wb") as err:
        try:
            proc = subprocess.Popen(cmd_list, stdout=out, stderr=err, env=os.environ.copy())
            proc.wait()
            result["returncode"] = proc.returncode
        except Exception as e:
            result["returncode"] = 99
            with open(err_path, "ab") as errf:
                errf.write(str(e).encode())

    try:
        result["stdout"] = open(log_path, "r").read()
    except Exception:
        pass
    try:
        result["stderr"] = open(err_path, "r").read()
    except Exception:
        pass
    result["elapsed_sec"] = round(time.time() - start, 3)
    print(f"[rc={result['returncode']}] logs: {log_path}")
    return result


## Train with physics‑informed symbolic constraints

In [None]:
cmd = [
    "spectramind", "train",
    "--config-name", "config_v50.yaml",
    "+outputs.root_dir=" + ARTIFACTS,
]
for k, v in overrides.items():
    cmd.append(f"+{k}={v}")
cmd += ["+training.fast_mode=true"]  # if supported

res_train = run_cli(cmd, log_name="01_train_symbolic")
print(res_train["stdout"][:500])
if res_train["returncode"] not in (0, None):
    print("Training non-zero return code:", res_train["returncode"])


## Symbolic diagnostics & rule ranking

In [None]:
cmd_diag = [
    "spectramind", "diagnose", "symbolic-rank",
    "--top-k", overrides.get("diagnostics.symbolic.top_k", "12"),
    "--export", DIAG_OUT,
]
res_diag = run_cli(cmd_diag, log_name="02_diagnose_symbolic_rank")
print(res_diag["stdout"][:500])

cmd_dash = [
    "spectramind", "diagnose", "dashboard",
    "--out", os.path.join(DIAG_OUT, "diagnostic_report_v1.html"),
    "--show-logic-graph",
]
res_dash = run_cli(cmd_dash, log_name="03_diagnose_dashboard")
print(res_dash["stdout"][:500])


## Cycle‑consistency: simulate → validate

In [None]:
# Prepare a tiny placeholder μ CSV if none exists (DRY-RUN friendly)
mu_csv = os.path.join(ARTIFACTS, "pred_mu.csv")
if not os.path.exists(mu_csv):
    import csv
    with open(mu_csv, "w", newline="") as f:
        w = csv.writer(f)
        w.writerow(["planet_id"] + [f"mu_{i:03d}" for i in range(283)])
        for pid in ["P0001","P0002","P0003"]:
            row = [pid] + [0.0]*283
            w.writerow(row)

cmd_sim = ["spectramind", "simulate-lightcurve-from-mu", "--mu-csv", mu_csv, "--out", SIM_OUT]
res_sim = run_cli(cmd_sim, log_name="04_simulate_from_mu")
print(res_sim["stdout"][:500])

cmd_cc = ["spectramind", "validate", "cycle-consistency",
          "--sim-dir", SIM_OUT, "--mu-ref", mu_csv,
          "--out", os.path.join(DIAG_OUT, "cycle_consistency.json")]
res_cc = run_cli(cmd_cc, log_name="05_cycle_consistency")
print(res_cc["stdout"][:500])


## Browse produced artifacts

In [None]:
import os

def tree(path, prefix=""):
    items = sorted(os.listdir(path))
    lines = []
    for i, name in enumerate(items):
        full = os.path.join(path, name)
        connector = "└── " if i == len(items)-1 else "├── "
        lines.append(prefix + connector + name)
        if os.path.isdir(full):
            extension = "    " if i == len(items)-1 else "│   "
            lines.extend(tree(full, prefix + extension))
    return lines

print("ARTIFACTS TREE:", ARTIFACTS)
print("\n".join(tree(ARTIFACTS)))
dash_path = os.path.join(DIAG_OUT, "diagnostic_report_v1.html")
print("\nDashboard:", dash_path if os.path.exists(dash_path) else "(not found)")


## Pipeline sketch (Mermaid)

```mermaid
flowchart LR
  A[Calibrated data] --> B[Train with symbolic losses]
  B --> C[Predict μ, σ]
  C --> D[Symbolic diagnostics<br/>rule ranking & overlays]
  C --> E[Simulate lightcurves from μ]
  E --> F[Cycle‑consistency validation]
  D --> G[Dashboard / Reports]
  F --> G
```

## Next steps
- Sweep symbolic weights via `spectramind ablate` and compare GLL vs. violation score.
- Enable molecule‑specific prior packs where available; monitor per‑band improvements.
- Integrate outputs into your unified HTML report and CI for regression checks.
