<details>
<summary>
üìäMASTER REPORT GENERATOR + other stuff
</summary>
<div style="padding: 15px;">

**one-cell ‚ÄúMaster Report Index‚Äù generator**
Drop it at the end of `02_Data_Validation.ipynb`. It auto-discovers any reports that exist in `Level_3/reports/`, adds file size + last-modified timestamps, and writes a pretty `index.html` you can open in a browser or commit to your repo.

```python
# ============================================================
# üìö Build Master HTML Index for Data Validation Reports
# ============================================================
from pathlib import Path
from datetime import datetime

REPORT_DIR = Path("Level_3/reports")
REPORT_DIR.mkdir(parents=True, exist_ok=True)

# Known report files we try to link (only add if present)
candidates = [
    ("Missingness ‚Äî Pre",          "missingness_pre.html"),
    ("Missingness ‚Äî Post",         "missingness_post.html"),
    ("Missingness ‚Äî Œî (post‚àípre)", "missingness_delta.html"),
    ("Low-Variance Summary",       "low_variance_summary.html"),
    ("Numeric Coercion Status",    "numeric_coercion_status.html"),
    ("Categorical Levels Audit",   "categorical_levels_audit.html"),
    ("Invalid / Out-of-Range (CSV)", "invalid_out_of_range.csv"),
    ("Validation Summary (CSV)",   "validation_summary.csv"),
]

def fmt_size(n: int) -> str:
    for unit in ["B","KB","MB","GB","TB"]:
        if n < 1024:
            return f"{n:.0f} {unit}"
        n /= 1024
    return f"{n:.1f} PB"

rows = []
for label, fname in candidates:
    path = REPORT_DIR / fname
    if path.exists():
        stat = path.stat()
        mtime = datetime.fromtimestamp(stat.st_mtime).strftime("%Y-%m-%d %H:%M:%S")
        rows.append((label, fname, fmt_size(stat.st_size), mtime))

# If nothing found, still create a stub page
generated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

html = f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Data Validation ‚Äî Report Index</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
  :root {{
    --bg:#0b1020; --card:#11162a; --text:#e8edf7; --muted:#a8b3c7; --accent:#7aa2ff; --border:#1e2440;
  }}
  body {{
    background: var(--bg); color: var(--text); font: 14px/1.45 system-ui, -apple-system, Segoe UI, Roboto, sans-serif;
    padding: 32px; margin: 0;
  }}
  .wrap {{ max-width: 980px; margin: 0 auto; }}
  h1 {{ margin: 0 0 10px; font-size: 26px; }}
  p.sub {{ color: var(--muted); margin: 0 0 24px; }}
  table {{
    width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border);
    border-radius: 12px; overflow: hidden;
  }}
  th, td {{ padding: 12px 14px; border-bottom: 1px solid var(--border); text-align: left; }}
  th {{ background: #131a31; color: #cfd8ea; font-weight: 700; font-size: 13px; letter-spacing: .02em; }}
  tr:last-child td {{ border-bottom: 0; }}
  a {{ color: var(--accent); text-decoration: none; }}
  a:hover {{ text-decoration: underline; }}
  .tag {{ background:#1a2242; color:#aecdff; padding:2px 8px; border-radius:999px; font-size:12px; }}
  .empty {{ color: var(--muted); font-style: italic; padding: 14px; }}
</style>
</head>
<body>
<div class="wrap">
  <h1>Data Validation ‚Äî Report Index</h1>
  <p class="sub">Generated: {generated_at}</p>
  <table role="grid">
    <thead>
      <tr>
        <th>Report</th>
        <th>File</th>
        <th>Size</th>
        <th>Last Modified</th>
      </tr>
    </thead>
    <tbody>
      {"".join(
        f'<tr><td>{label}</td><td><a href="{fname}">{fname}</a></td><td>{size}</td><td>{mtime}</td></tr>'
        for (label, fname, size, mtime) in rows
      ) if rows else f'<tr><td colspan="4" class="empty">No reports found yet. Run the validation cells to generate artifacts.</td></tr>'}
    </tbody>
  </table>

  <p style="margin-top:18px;color:#a8b3c7">
    Tip: commit this folder to your repo so reviewers can browse your validation trail.
  </p>
</div>
</body>
</html>"""

index_path = REPORT_DIR / "index.html"
index_path.write_text(html, encoding="utf-8")
print(f"üìñ Master index written to: {index_path.resolve()}")
```

**How to use**

1. Run your validation cells (missingness, low-variance, numeric coercion, categorical audit, range checks).
2. Run the **Master Index** cell above.
3. Open `Level_3/reports/index.html` in your browser ‚Äî one click to every artifact.

Want me to add this same idea to your **Statistics** and **Modeling** notebooks so each stage has its own mini-portal?

In [None]:
# a tiny, reusable **report I/O toolkit** 
# you can paste near the top of each notebook (or into a `utils/report_io.py`). It standardizes *where* and *how* you save artifacts across Validation / Statistics / Modeling.

# ============================================================
# üì¶ Report I/O Helpers ‚Äî standardized saves under Level_3/reports
# ============================================================
from pathlib import Path
from datetime import datetime
import json

try:
    import yaml  # optional, only if you save YAML
except Exception:
    yaml = None

REPORT_ROOT = Path("Level_3/reports")

def _ensure_parent(p: Path) -> Path:
    p.parent.mkdir(parents=True, exist_ok=True)
    return p

def _resolve(rel_path: str | Path) -> Path:
    """Resolve a relative path *inside* Level_3/reports, e.g. 'statistics/corr.html'."""
    rel_path = Path(rel_path)
    if rel_path.is_absolute():
        # keep absolute if you really want, but prefer relative under REPORT_ROOT
        return _ensure_parent(rel_path)
    return _ensure_parent(REPORT_ROOT / rel_path)

def with_timestamp(rel_path: str | Path, stamp: str | None = None, fmt: str = "%Y%m%d-%H%M%S") -> Path:
    """
    Insert a timestamp before extension: 'dir/file.html' -> 'dir/file_YYYYmmdd-HHMMSS.html'
    """
    p = Path(rel_path)
    ts = stamp or datetime.now().strftime(fmt)
    return p.with_name(f"{p.stem}_{ts}{p.suffix}")

# ---------- HTML (Styler or raw HTML string) ----------
def save_styler(styler, rel_path: str | Path) -> Path:
    """
    Save a pandas Styler to HTML.
    """
    path = _resolve(rel_path)
    styler.to_html(path)
    print(f"üóÇÔ∏è Saved HTML (Styler): {path.resolve()}")
    return path

def save_html(html: str, rel_path: str | Path) -> Path:
    path = _resolve(rel_path)
    path.write_text(html, encoding="utf-8")
    print(f"üóÇÔ∏è Saved HTML: {path.resolve()}")
    return path

# ---------- Tables (CSV / JSON / YAML) ----------
def save_csv(df, rel_path: str | Path, **to_csv_kwargs) -> Path:
    path = _resolve(rel_path)
    df.to_csv(path, index=False, **to_csv_kwargs)
    print(f"üìÑ Saved CSV: {path.resolve()} (rows={len(df)})")
    return path

def save_json(obj, rel_path: str | Path, **json_kwargs) -> Path:
    path = _resolve(rel_path)
    path.write_text(json.dumps(obj, indent=2, **json_kwargs), encoding="utf-8")
    print(f"üìÑ Saved JSON: {path.resolve()}")
    return path

def save_yaml(obj, rel_path: str | Path) -> Path:
    assert yaml is not None, "pyyaml not installed ‚Äî run `pip install pyyaml`"
    path = _resolve(rel_path)
    path.write_text(yaml.safe_dump(obj, sort_keys=False), encoding="utf-8")
    print(f"üìÑ Saved YAML: {path.resolve()}")
    return path

def save_text(text: str, rel_path: str | Path) -> Path:
    path = _resolve(rel_path)
    path.write_text(text, encoding="utf-8")
    print(f"üìù Saved text: {path.resolve()}")
    return path

# ---------- Figures ----------
def save_fig(fig, rel_path: str | Path, dpi: int = 150, tight: bool = True, transparent: bool = False) -> Path:
    """
    Save a Matplotlib figure. Pass plt.gcf() if needed.
    """
    path = _resolve(rel_path)
    if tight:
        fig.savefig(path, dpi=dpi, bbox_inches="tight", transparent=transparent)
    else:
        fig.savefig(path, dpi=dpi, transparent=transparent)
    print(f"üñºÔ∏è Saved figure: {path.resolve()}")
    return path

# ---------- Archives ----------
def save_zip(rel_zip_path: str | Path, files: list[Path | str]) -> Path:
    """
    Bundle one or more files (plots, htmls, etc.) into a ZIP under reports/.
    """
    import zipfile
    zip_path = _resolve(rel_zip_path)
    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for f in files:
            f = Path(f)
            if f.exists():
                # store with a pretty, relative arcname inside the zip
                try:
                    arc = f.relative_to(REPORT_ROOT)
                except Exception:
                    arc = f.name
                zf.write(f, arcname=str(arc))
    print(f"üóúÔ∏è Saved ZIP: {zip_path.resolve()}")
    return zip_path
```

### üõ†Ô∏è Common patterns (copy/paste)

```python
# 1) Save a Styler (Validation or Stats tables)
_, sty = some_function_returning_df_and_styler()
save_styler(sty, "statistics/bvc_test_table.html")  # -> Level_3/reports/statistics/bvc_test_table.html

# 2) Timestamped export to avoid overwriting
path = save_styler(sty, with_timestamp("statistics/bvc_test_table.html"))

# 3) Save CSV logs / violations
save_csv(violations_df, "validation/invalid_out_of_range.csv")

# 4) Save a Matplotlib figure
import matplotlib.pyplot as plt
fig = plt.figure()
# ... draw ...
save_fig(fig, "modeling/roc_auc.png")

# 5) Bundle multiple artifacts
files = [
    REPORT_ROOT/"statistics/bvc_box_violin_1.html",
    REPORT_ROOT/"statistics/bvc_box_violin_2.html",
    REPORT_ROOT/"statistics/bvc_box_violin_3.html",
]
save_zip("statistics/bvc_box_violin.zip", files)

# 6) Save config/state
save_json(model_config, "modeling/model_config.json")
# or YAML (requires PyYAML)
# save_yaml(model_config, "modeling/model_config.yaml")
```

### ‚úÖ Why this helps

* **One path convention** ‚Üí everything lands under `Level_3/reports/‚Ä¶`.
* **Minimal repetition** ‚Üí fewer mistakes, cleaner cells.
* **Timestamp option** ‚Üí keep historical runs without manual renaming.
* **ZIP utility** ‚Üí ship plot galleries and artifacts neatly.

In [None]:
# **a tiny `register_artifact(label, rel_path)` that appends to a lightweight JSON ledger you can render into the master index later.**
# Perfect ‚Äî here‚Äôs a tiny **artifact ledger** you can drop in your helpers block. It lets you:

# * `register_artifact(label, rel_path, stage, tags, meta)` ‚Üí append/update a JSON ledger
# * `list_artifacts(...)` ‚Üí query the ledger
# * `render_index_from_ledger(output)` ‚Üí build a pretty HTML index from the ledger (so you don‚Äôt have to rescan the filesystem)

# ============================================================
# üìí Artifact Ledger ‚Äî register & index reports under Level_3/reports
# ============================================================
from pathlib import Path
from datetime import datetime
import json, os

REPORT_ROOT = Path("Level_3/reports")
LEDGER_PATH = REPORT_ROOT / "_ledger.json"
REPORT_ROOT.mkdir(parents=True, exist_ok=True)

def _now_iso():
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

def _fmt_size(n: int) -> str:
    for u in ["B","KB","MB","GB","TB"]:
        if n < 1024: return f"{n:.0f} {u}"
        n /= 1024
    return f"{n:.1f} PB"

def _load_ledger() -> list[dict]:
    if LEDGER_PATH.exists():
        try:
            return json.loads(LEDGER_PATH.read_text(encoding="utf-8"))
        except Exception:
            pass
    return []

def _save_ledger(rows: list[dict]) -> None:
    LEDGER_PATH.parent.mkdir(parents=True, exist_ok=True)
    LEDGER_PATH.write_text(json.dumps(rows, indent=2), encoding="utf-8")

def register_artifact(
    label: str,
    rel_path: str | Path,
    stage: str | None = None,             # e.g., "validation", "statistics", "modeling"
    tags: list[str] | None = None,        # e.g., ["bvc","effect-size","html"]
    meta: dict | None = None              # any extras you want to record
) -> dict:
    """
    Record (or update) an artifact in the ledger. If the file exists,
    size/mtime are populated automatically. Idempotent by path.
    """
    rel_path = str(rel_path)
    abs_path = (REPORT_ROOT / rel_path) if not str(rel_path).startswith(str(REPORT_ROOT)) else Path(rel_path)
    entry = {
        "label": label,
        "rel_path": str(abs_path.relative_to(REPORT_ROOT)) if abs_path.exists() else rel_path,
        "stage": stage or "",
        "tags": tags or [],
        "meta": meta or {},
        "registered_at": _now_iso()
    }

    # enrich with file stats if present
    if abs_path.exists():
        st = abs_path.stat()
        entry.update({
            "exists": True,
            "size": st.st_size,
            "size_h": _fmt_size(st.st_size),
            "mtime": datetime.fromtimestamp(st.st_mtime).strftime("%Y-%m-%d %H:%M:%S")
        })
    else:
        entry.update({"exists": False, "size": 0, "size_h": "0 B", "mtime": ""})

    # load/update ledger by unique key = rel_path
    rows = _load_ledger()
    idx = next((i for i, r in enumerate(rows) if r.get("rel_path") == entry["rel_path"]), None)
    if idx is None:
        rows.append(entry)
    else:
        # keep original registered_at, update the rest
        entry["registered_at"] = rows[idx].get("registered_at", entry["registered_at"])
        rows[idx] = entry
    _save_ledger(rows)
    print(f"üìí Registered: {entry['label']} ‚Üí {entry['rel_path']} ({entry['size_h']})")
    return entry

def list_artifacts(stage: str | None = None, tag: str | None = None) -> list[dict]:
    """
    Return artifacts filtered by stage and/or tag.
    """
    rows = _load_ledger()
    if stage:
        rows = [r for r in rows if r.get("stage") == stage]
    if tag:
        rows = [r for r in rows if tag in (r.get("tags") or [])]
    return rows

def render_index_from_ledger(output_rel: str | Path = "index_from_ledger.html", title="Report Index (Ledger)") -> Path:
    """
    Build a single HTML index using only the ledger (fast & consistent).
    """
    rows = _load_ledger()
    rows.sort(key=lambda r: (r.get("stage",""), r.get("label","")))
    generated_at = _now_iso()

    def tr(r):
        link = r["rel_path"]
        exists = "‚úÖ" if r.get("exists") else "‚ùå"
        size = r.get("size_h","")
        mtime = r.get("mtime","")
        stage = r.get("stage","")
        tags = ", ".join(r.get("tags") or [])
        return f"<tr><td>{stage}</td><td>{r['label']}</td><td><a href='{link}'>{link}</a></td><td>{exists}</td><td>{size}</td><td>{mtime}</td><td>{tags}</td></tr>"

    html = f"""<!doctype html>
<html><head><meta charset="utf-8"><title>{title}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
  :root {{ --bg:#0b1020; --card:#11162a; --text:#e8edf7; --muted:#a8b3c7; --accent:#7aa2ff; --border:#1e2440; }}
  body {{ background:var(--bg); color:var(--text); font:14px/1.45 system-ui,-apple-system,Segoe UI,Roboto,sans-serif; padding:32px; margin:0; }}
  .wrap {{ max-width:1100px; margin:0 auto; }}
  h1 {{ margin:0 0 10px; font-size:26px; }} p.sub {{ color:var(--muted); margin:0 0 24px; }}
  table {{ width:100%; border-collapse:collapse; background:var(--card); border:1px solid var(--border); border-radius:12px; overflow:hidden; }}
  th,td {{ padding:12px 14px; border-bottom:1px solid var(--border); text-align:left; }}
  th {{ background:#131a31; color:#cfd8ea; font-weight:700; font-size:13px; letter-spacing:.02em; }}
  tr:last-child td {{ border-bottom:0; }} a {{ color:var(--accent); text-decoration:none; }} a:hover {{ text-decoration:underline; }}
  .empty {{ color: var(--muted); font-style: italic; padding: 14px; }}
</style></head>
<body><div class="wrap">
<h1>{title}</h1><p class="sub">Generated: {generated_at}</p>
<table role="grid">
<thead><tr><th>Stage</th><th>Label</th><th>File</th><th>Exists</th><th>Size</th><th>Last Modified</th><th>Tags</th></tr></thead>
<tbody>
{("".join(tr(r) for r in rows)) if rows else "<tr><td colspan='7' class='empty'>Ledger empty. Register some artifacts.</td></tr>"}
</tbody>
</table>
</div></body></html>"""

    out_path = REPORT_ROOT / output_rel
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(html, encoding="utf-8")
    print(f"üìñ Ledger index written to: {out_path.resolve()}")
    return out_path


### How to use (quick patterns)

# After saving a Styler/HTML/PNG/CSV, register it:
register_artifact(
    label="Missingness ‚Äî Pre",
    rel_path="missingness_pre.html",
    stage="validation",
    tags=["missing","html"]
)

register_artifact(
    label="BvC Test Table",
    rel_path="statistics/bvc_test_table.html",
    stage="statistics",
    tags=["bvc","html","tests"],
    meta={"alpha": 0.05, "effect_size": "cohen_d"}
)

# List by stage or tag:
list_artifacts(stage="statistics")
list_artifacts(tag="missing")

# Generate a ledger-driven index (e.g., at end of each stage):
render_index_from_ledger("index_from_ledger.html", title="Level 3 ‚Äî All Artifacts (Ledger)")

# **Tips**

# * Call `register_artifact(...)` right after each `save_*` call so the ledger stays in sync.
# * If you overwrite a file, re-register it ‚Äî the entry updates with the latest size/mtime.
# * Use `stage` values like `"validation"`, `"statistics"`, `"modeling"`, `"insights"` to keep things tidy.