# CNT Lab — Setup Template
Run the next cell to initialize the environment for this notebook.

In [2]:
import cntlab as cl
cl.nb.init()


[2025-10-08 20:17:46,086] INFO cntlab: CNTLab notebook initialized
[2025-10-08 20:17:46,087] INFO cntlab: CNT Paths(root=C:\Users\caleb\CNT_Lab)


→ CNTLab ready.
   Root: C:\Users\caleb\CNT_Lab
   Figures: C:\Users\caleb\CNT_Lab\artifacts\figures
   Tables: C:\Users\caleb\CNT_Lab\artifacts\tables
   Metrics: C:\Users\caleb\CNT_Lab\artifacts\metrics


{'root': 'C:\\Users\\caleb\\CNT_Lab',
 'figures': 'C:\\Users\\caleb\\CNT_Lab\\artifacts\\figures',
 'tables': 'C:\\Users\\caleb\\CNT_Lab\\artifacts\\tables',
 'metrics': 'C:\\Users\\caleb\\CNT_Lab\\artifacts\\metrics'}

In [5]:
# === CNT Lab — One-Cell Migrator ============================================
# Scans source folders, classifies files by extension, copies or moves them into
# CNT_LAB_DIR/artifacts/* (figures, tables, metrics, models, blobs), and logs each
# entry into the manifest with sha256 + tags inferred from path components.
#
# Usage:
# 1) Adjust SOURCE_ROOTS below (defaults include your Windows path).
# 2) Set DRY_RUN=False to actually copy/move.
# 3) Set MODE to "copy" (default) or "move".
# 4) Re-run as needed; duplicates (same hash) are skipped.
# ============================================================================

import os, re, json, shutil, hashlib
from pathlib import Path
import cntlab as cl

# --- CONFIG -----------------------------------------------------------------
# Add/adjust any roots you want to sweep:
SOURCE_ROOTS = [
    Path(r"C:\Users\caleb\cnt_genome"),
    Path.home() / "cnt_genome",  # fallback if you run elsewhere
]

# Dry-run prints what it *would* do without touching files:
DRY_RUN = False         # <-- set to False to perform the migration
MODE    = "copy"       # "copy" or "move"

# Skip these dirs by name anywhere in the tree:
EXCLUDE_DIRS = {".git", ".hg", ".svn", "__pycache__", ".ipynb_checkpoints", ".venv", "env", "venv", "node_modules"}

# File classification (extend as you wish):
IMG_EXTS    = {".png", ".jpg", ".jpeg", ".svg", ".tif", ".tiff", ".gif", ".pdf"}
TABLE_EXTS  = {".csv", ".tsv", ".parquet", ".feather", ".xls", ".xlsx"}
METRIC_EXTS = {".json", ".yaml", ".yml", ".toml", ".jsonl"}
MODEL_EXTS  = {".pt", ".pth", ".ckpt", ".h5", ".hdf5", ".joblib", ".pkl", ".sav", ".onnx", ".pb", ".tflite", ".npz"}

# Tag helpers: words to tag if they appear in the path (case-insensitive)
KEYWORDS = ["kuramoto", "ising", "genome", "atlas", "eeg", "granger", "topo", "drift", "resonance", "fss", "cnt", "mega"]

# ----------------------------------------------------------------------------
P = cl.P
logger = cl.log.get_logger("migrate")

def sha256(path: Path) -> str:
    h = hashlib.sha256()
    with path.open("rb") as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b""):
            h.update(chunk)
    return h.hexdigest()

def load_known_hashes() -> set[str]:
    mf = P.manifests / "manifest.jsonl"
    known = set()
    if mf.exists():
        with mf.open("r", encoding="utf-8") as f:
            for line in f:
                try:
                    rec = json.loads(line)
                except json.JSONDecodeError:
                    continue
                if rec.get("sha256"):
                    known.add(rec["sha256"])
    return known

def classify(ext: str) -> str:
    e = ext.lower()
    if e in IMG_EXTS:   return "figure"
    if e in TABLE_EXTS: return "table"
    if e in METRIC_EXTS:return "metrics"
    if e in MODEL_EXTS: return "model"
    return "blob"

def slug(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "-", s)
    return s.strip("-")

def infer_tags(rel_parts: list[str], ext: str) -> list[str]:
    tags = {"migrated", ext.lstrip(".").lower()}
    # include up to first 3 directory names as coarse tags
    for p in rel_parts[:3]:
        if not p: 
            continue
        tags.add(slug(p))
    # keyword hits
    joined = "/".join(rel_parts).lower()
    for k in KEYWORDS:
        if k in joined:
            tags.add(k)
    return sorted(t for t in tags if t)

def ensure_dirs():
    for d in [P.figures, P.tables, P.metrics, P.models, P.artifacts, P.manifests, P.logs]:
        d.mkdir(parents=True, exist_ok=True)

def target_dir_for(kind: str) -> Path:
    return {
        "figure":  P.figures,
        "table":   P.tables,
        "metrics": P.metrics,
        "model":   P.models,
        "blob":    P.artifacts,
    }[kind]

def migrate():
    ensure_dirs()
    known = load_known_hashes()
    stats = {"scanned":0, "skipped_dupe":0, "migrated":0, "errors":0}
    by_kind = {"figure":0, "table":0, "metrics":0, "model":0, "blob":0}

    # Prepare source roots that exist
    roots = [r for r in SOURCE_ROOTS if r.exists()]
    if not roots:
        print("No SOURCE_ROOTS exist. Adjust paths and retry.")
        return

    for root in roots:
        print(f"Scanning: {root}")
        for dirpath, dirnames, filenames in os.walk(root):
            # strip excluded
            dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
            base = Path(dirpath)
            rel_base = base.relative_to(root) if base != root else Path("")

            for fn in filenames:
                src = base / fn
                stats["scanned"] += 1

                ext = src.suffix.lower()
                kind = classify(ext)
                rel_parts = [p for p in rel_base.parts if p]
                tags = infer_tags(rel_parts + [src.stem], ext)
                try:
                    h = sha256(src)
                except Exception as e:
                    logger.error(f"hash fail: {src} | {e}")
                    stats["errors"] += 1
                    continue

                if h in known:
                    stats["skipped_dupe"] += 1
                    continue

                # Deterministic-ish rename using content hash prefix:
                short = h[:8]
                tgt_dir = target_dir_for(kind)
                tgt_name = f"migrated__{slug(src.stem)}__{short}{ext}"
                dst = tgt_dir / tgt_name

                msg = f"{MODE.upper():4} {kind:7} {src}  ->  {dst}   tags={tags}"
                if DRY_RUN:
                    print("[DRY]", msg)
                else:
                    try:
                        dst.parent.mkdir(parents=True, exist_ok=True)
                        if MODE == "move":
                            shutil.move(str(src), str(dst))
                        else:
                            shutil.copy2(str(src), str(dst))
                        # Log to manifest
                        cl.manifest.log_artifact(dst, kind=kind, tags=tags, meta={"source": str(src)})
                        stats["migrated"] += 1
                        by_kind[kind] += 1
                        known.add(h)
                        logger.info(msg)
                    except Exception as e:
                        logger.error(f"migrate fail: {src} -> {dst} | {e}")
                        stats["errors"] += 1

    print("\n== MIGRATION SUMMARY ==")
    for k,v in stats.items(): print(f"{k:14}: {v}")
    if not DRY_RUN:
        print("By kind:", by_kind)
    print("Manifest:", (P.manifests / "manifest.jsonl"))

migrate()


[2025-10-08 20:21:01,279] INFO migrate: COPY blob    C:\Users\caleb\cnt_genome\CNTAlpha.ipynb  ->  C:\Users\caleb\CNT_Lab\artifacts\migrated__cntalpha__9c54888e.ipynb   tags=['cnt', 'cntalpha', 'ipynb', 'migrated']
[2025-10-08 20:21:01,307] INFO migrate: COPY blob    C:\Users\caleb\cnt_genome\CNTGenome  ->  C:\Users\caleb\CNT_Lab\artifacts\migrated__cntgenome__39ac6c67   tags=['cnt', 'cntgenome', 'genome', 'migrated']
[2025-10-08 20:21:01,321] INFO migrate: COPY blob    C:\Users\caleb\cnt_genome\CNTGenome.ipynb  ->  C:\Users\caleb\CNT_Lab\artifacts\migrated__cntgenome__703ea35e.ipynb   tags=['cnt', 'cntgenome', 'genome', 'ipynb', 'migrated']
[2025-10-08 20:21:01,335] INFO migrate: COPY blob    C:\Users\caleb\cnt_genome\CNT_3D_GenomicField_clean.ipynb  ->  C:\Users\caleb\CNT_Lab\artifacts\migrated__cnt-3d-genomicfield-clean__44829feb.ipynb   tags=['cnt', 'cnt-3d-genomicfield-clean', 'ipynb', 'migrated']
[2025-10-08 20:21:01,351] INFO migrate: COPY blob    C:\Users\caleb\cnt_genome\CNT_3

Scanning: C:\Users\caleb\cnt_genome


[2025-10-08 20:22:14,330] INFO migrate: COPY blob    C:\Users\caleb\cnt_genome\CNT_CognitiveAlphabet_Bundle.zip  ->  C:\Users\caleb\CNT_Lab\artifacts\migrated__cnt-cognitivealphabet-bundle__cc7e149f.zip   tags=['cnt', 'cnt-cognitivealphabet-bundle', 'migrated', 'zip']
[2025-10-08 20:22:14,347] INFO migrate: COPY figure  C:\Users\caleb\cnt_genome\CNT_colored_3D_field.png  ->  C:\Users\caleb\CNT_Lab\artifacts\figures\migrated__cnt-colored-3d-field__ab11c5a3.png   tags=['cnt', 'cnt-colored-3d-field', 'migrated', 'png']
[2025-10-08 20:22:14,373] INFO migrate: COPY table   C:\Users\caleb\cnt_genome\CNT_edges.csv  ->  C:\Users\caleb\CNT_Lab\artifacts\tables\migrated__cnt-edges__204dcd26.csv   tags=['cnt', 'cnt-edges', 'csv', 'migrated']
[2025-10-08 20:22:14,385] INFO migrate: COPY table   C:\Users\caleb\cnt_genome\CNT_genomic_modules.csv  ->  C:\Users\caleb\CNT_Lab\artifacts\tables\migrated__cnt-genomic-modules__2ee69350.csv   tags=['cnt', 'cnt-genomic-modules', 'csv', 'migrated']
[2025-10-0

Scanning: C:\Users\caleb\cnt_genome

== MIGRATION SUMMARY ==
scanned       : 1414
skipped_dupe  : 742
migrated      : 672
errors        : 0
By kind: {'figure': 288, 'table': 95, 'metrics': 101, 'model': 11, 'blob': 177}
Manifest: C:\Users\caleb\CNT_Lab\artifacts\manifests\manifest.jsonl


In [6]:
from pathlib import Path
import shutil, cntlab as cl

P = cl.P
dst = P.root / "notebooks" / "archive"
dst.mkdir(parents=True, exist_ok=True)

moved = 0
for p in (P.artifacts).glob("*.ipynb"):
    newp = dst / p.name
    shutil.move(str(p), str(newp))
    cl.manifest.log_artifact(newp, kind="notebook",
                             tags=["migrated","notebook"],
                             meta={"relocated_from": str(p)})
    moved += 1

print(f"Relocated {moved} notebook(s) → {dst}")


Relocated 28 notebook(s) → C:\Users\caleb\CNT_Lab\notebooks\archive


In [7]:
import pandas as pd, cntlab as cl
rows = []
for k in ["figure","table","metrics","model","notebook","blob"]:
    for rec in cl.manifest.find_artifacts(kind=k):
        rows.append({"kind":k,"path":rec["path"],"tags":";".join(rec["tags"]), "ts":rec["ts"], "size":rec.get("size")})
df = pd.DataFrame(rows).sort_values("ts")
idx_path = cl.io.save_df(df, module="inventory", dataset="all", desc="manifest_index", fmt="csv", tags=["index","manifest"])
print("Index saved →", idx_path)
df.tail(10)


Index saved → C:\Users\caleb\CNT_Lab\artifacts\tables\inventory__all__manifest_index__20251008-202444.csv


Unnamed: 0,kind,path,tags,ts,size
503,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,438661
502,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,37612
501,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,450902
500,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,257554
498,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,17545
497,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,21007
496,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,31093
495,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,134343
511,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,617
499,notebook,C:\Users\caleb\CNT_Lab\notebooks\archive\migra...,migrated;notebook,2025-10-09T00:24:17Z,199733


In [8]:
# Count by kind
import collections, json, cntlab as cl
cnts = collections.Counter(rec["kind"] for rec in cl.manifest.find_artifacts(kind=None))
cnts

# Find anything with 'resonance'
[x["path"] for x in cl.manifest.find_artifacts(tags_any=["resonance"])][:5]


['C:\\Users\\caleb\\CNT_Lab\\artifacts\\tables\\migrated__cnt-genomic-resonance-map__df629623.csv',
 'C:\\Users\\caleb\\CNT_Lab\\artifacts\\tables\\migrated__cnt-genomic-resonance-scored__ab1e21a4.csv',
 'C:\\Users\\caleb\\CNT_Lab\\artifacts\\tables\\migrated__cnt-genomic-resonance-scored-v2__6c4fb256.csv']