In [None]:
import os, json, nbformat as nbf

NB_PATH = "notebooks/01_data_audit_and_family_mapping.ipynb"

nb = nbf.v4.new_notebook()
cells = []

# ── Title & Intro (markdown)
cells.append(nbf.v4.new_markdown_cell("""# Phase 1–2 — Data Audit & Family Mapping (Review Notebook)

> This notebook is prepared **for review only**. Core code is intentionally minimized or removed.
> See `DATA_PROFILE.md` for audit details and `artifacts/` for review-safe artifacts.
"""))

# ── Links to docs
cells.append(nbf.v4.new_markdown_cell("""## Documents
- **DATA_PROFILE.md** — Audit summary (range, duplicates, gaps, notes)
- **.artifacts.README.md** — Content & usage scope
- **NOTICE.md** — Evaluation-only; no IP transfer
"""))

# ── Dataset Summary placeholder
cells.append(nbf.v4.new_markdown_cell("""## Dataset Summary (from DATA_PROFILE.md)
- Format: 5-of-50
- Period: 2020-03-06 → 2025-09-30
- Rows: *(fill in)*
- Range/Schema: PASS
- In-row duplicates: PASS
- Missing dates/gaps: *(summary)*
"""))

# ── Artifacts Snapshot (safe display)
cells.append(nbf.v4.new_markdown_cell("""## Artifacts Snapshot (review-safe)
Below cells **only display** a few rows or summary from artifacts if present.
"""))

cells.append(nbf.v4.new_code_cell("""# Display a few lines from bond_stats.csv (if available)
import os, pandas as pd
path = "artifacts/bond_stats.csv"
if os.path.exists(path):
    df = pd.read_csv(path)
    display(df.head(10))
    print("\\nRows:", len(df), "| Columns:", list(df.columns))
else:
    print("bond_stats.csv not found (expected at artifacts/bond_stats.csv)")"""))

cells.append(nbf.v4.new_code_cell("""# Display DNA map sample info (if available)
import os, json
path = "artifacts/dna_map.json"
if os.path.exists(path):
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    fam = data.get("families", {})
    edges = data.get("edges", [])
    if isinstance(fam, dict):
        fam_count = len(fam)
    elif isinstance(fam, list):
        fam_count = len(fam)
    else:
        fam_count = None
    print("families:", fam_count, "| edges:", len(edges))
    if isinstance(edges, list) and edges[:3]:
        print("\\nSample edges:", edges[:3])
    if isinstance(fam, dict):
        sample_keys = list(fam.keys())[:5]
        print("\\nSample family keys:", sample_keys)
    elif isinstance(fam, list):
        print("\\nSample families:", fam[:3])
else:
    print("dna_map.json not found (expected at artifacts/dna_map.json)")"""))

cells.append(nbf.v4.new_code_cell("""# Components (k1/k2) preview (if available)
import os, json
for k in [1, 2]:
    path = f"artifacts/components_k{k}.json"
    if os.path.exists(path):
        with open(path, "r", encoding="utf-8") as f:
            comp = json.load(f)
        try:
            size = len(comp)
        except TypeError:
            size = "(unknown)"
        print(f"components_k{k}.json — count:", size)
        if isinstance(comp, list):
            print("sample:", comp[:5])
        elif isinstance(comp, dict):
            keys = list(comp.keys())[:5]
            print("sample keys:", keys)
    else:
        print(f"components_k{k}.json not found")"""))

# ── Family Mapping (markdown)
cells.append(nbf.v4.new_markdown_cell("""## Family Mapping (High-level)
- Families derived from k1..k4 components with bond scoring (PMI/Lift).
- This notebook only **shows summary**; generation logic is proprietary.
"""))

# ── Close notes
cells.append(nbf.v4.new_markdown_cell("""---
**Note:** To protect IP, code cells above are limited to *display-only* logic.
For evaluation needs (additional tables/plots), we can add more **read-only** views without exposing core algorithms.
"""))

nb["cells"] = cells
os.makedirs(os.path.dirname(NB_PATH), exist_ok=True)
nbf.write(nb, NB_PATH)
print("✓ Wrote", NB_PATH)