In [1]:
# === CNT EEG Spectral-Glyphs — single Jupyter cell (inline, no scripts) ===
# What it does (fast):
#   • Alpha-band imaginary coherence per subject
#   • Spectral clustering (k via eigen-gap)
#   • Consensus clusters, LOSO-ARI, circular-shift null (6 perms)
#   • Saves small figs + tables under /mnt/data/cnt_spectral_ultrafast
#
# Switch to your data:
#   USE_DEMO = False
#   DATA_DIR = "/mnt/data/eeg_rest"        # folder with *.npy shaped [n_channels, n_time]
#   (optional) add a ".channels.txt" per subject with one channel name per line

import os, glob, json, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import welch, csd

# ---------- config ----------
USE_DEMO  = False
DATA_DIR = r"C:\Users\caleb\CNT_Lab\data\raw\eeg"
DATA_DIR = "C:/Users/caleb/CNT_Lab/data/raw/eeg"
BAND      = "alpha"
FS        = 200.0                     # your sampling rate if real data
NULL_PERMS= 6                         # raise later for stronger stats (e.g., 200+)
KMAX      = 3

# ---------- helpers ----------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def band_lims(): return {"alpha": (8.0, 13.0), "theta": (4.0, 8.0), "beta": (13.0, 30.0)}
def imag_coh_matrix(X, fs, band="alpha", nperseg=128, noverlap=64):
    n_ch, _ = X.shape
    fmin, fmax = band_lims()[band]
    freqs, Pxx0 = welch(X[0], fs=fs, nperseg=nperseg, noverlap=noverlap)
    Pxx = [Pxx0] + [welch(X[i], fs=fs, nperseg=nperseg, noverlap=noverlap)[1] for i in range(1, n_ch)]
    sel = (freqs >= fmin) & (freqs <= fmax)
    W = np.zeros((n_ch, n_ch))
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            _, Pxy = csd(X[i], X[j], fs=fs, nperseg=nperseg, noverlap=noverlap)
            num = np.abs(np.imag(Pxy[sel]))
            den = np.sqrt(Pxx[i][sel] * Pxx[j][sel] + 1e-12)
            W[i, j] = W[j, i] = float(np.nanmean(num / den))
    np.fill_diagonal(W, 0.0)
    return W
def laplacian_sym(W):
    d = W.sum(1); d = np.where(d <= 1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh
def pick_k(L, kmax=3):
    evals, _ = np.linalg.eigh(L)
    gaps = [(k, evals[k+1] - evals[k]) for k in range(1, min(kmax+1, len(evals)-1))]
    if not gaps: return 2
    k_star = max(gaps, key=lambda x: x[1])[0]
    return max(2, min(k_star, kmax))
def spec_cluster(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k+1]
    U /= (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    labels = KMeans(n_clusters=k, n_init=25, random_state=42).fit_predict(U)
    return labels, evals
def consensus_from_labels(all_labels):
    n = len(all_labels[0]); m = len(all_labels); co = np.zeros((n, n))
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i, j] += 1.0 if li == lab[j] else 0.0
    co /= m
    thr = 0.5; A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid = 0
    for i in range(n):
        if not visited[i]:
            stack = [i]; visited[i] = True; cons[i] = cid
            while stack:
                u = stack.pop()
                for v in range(n):
                    if A[u, v] and not visited[v]:
                        visited[v] = True; cons[v] = cid; stack.append(v)
            cid += 1
    return cons, co
def ari_loso(all_labels):
    cons,_ = consensus_from_labels(all_labels)
    vals = []
    for s in range(len(all_labels)):
        leave = [lab for i, lab in enumerate(all_labels) if i != s]
        cons_leave,_ = consensus_from_labels(leave)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))
def circ_shift(X, rng, max_shift=None):
    n_ch, n_t = X.shape
    if max_shift is None: max_shift = n_t - 1
    Y = np.zeros_like(X)
    for c in range(n_ch):
        s = int(rng.integers(0, max_shift + 1))
        Y[c] = np.roll(X[c], s)
    return Y
def plot_matrix(M, title, path):
    plt.figure(); plt.imshow(M, aspect='auto'); plt.title(title); plt.colorbar(); plt.tight_layout(); plt.savefig(path, dpi=140); plt.close()
def plot_eigs(evals, title, path):
    plt.figure(); plt.plot(np.arange(len(evals)), evals, marker='o'); plt.title(title); plt.xlabel("Index"); plt.ylabel("Eigenvalue"); plt.tight_layout(); plt.savefig(path, dpi=140); plt.close()

# ---------- data ----------
rng = np.random.default_rng(11)
if USE_DEMO:
    # 2 subjects × 8 channels × 3 s @ 200 Hz with an alpha driver in back half
    fs = FS
    n_subj, n_ch, seconds = 2, 8, 3
    T = int(seconds * fs); t = np.arange(T)/fs
    X_list = []
    for _ in range(n_subj):
        X = rng.normal(0, 1, size=(n_ch, T))
        driver = np.sin(2*np.pi*10.0*t + rng.uniform(0, 2*np.pi))
        for c in range(n_ch//2, n_ch):
            X[c] += 0.9*driver + 0.2*rng.normal(0, 1, size=T)
        X_list.append(X)
    subjects = [f"demo_{i:02d}" for i in range(n_subj)]
    channels = [f"ch{i}" for i in range(n_ch)]
else:
    fs = FS
    files = sorted(glob.glob(os.path.join(DATA_DIR, "*.npy")))
    if not files: raise RuntimeError(f"No .npy files found in {DATA_DIR}")
    X_list, subjects = [], []
    channels = None
    for f in files:
        X = np.load(f)  # [n_channels, n_time]
        X_list.append(X); subjects.append(os.path.splitext(os.path.basename(f))[0])
        ch_txt = f.replace(".npy", ".channels.txt")
        if channels is None:
            if os.path.exists(ch_txt):
                with open(ch_txt, "r") as g: channels = [ln.strip() for ln in g if ln.strip()]
            else:
                channels = [f"ch{i}" for i in range(X.shape[0])]

# ---------- run ----------
OUT = ensure_dir(OUT_ROOT)
FIG = ensure_dir(os.path.join(OUT, "figures"))
TAB = ensure_dir(os.path.join(OUT, "tables"))
MET = ensure_dir(os.path.join(OUT, "metrics"))

subj_labels, eigvals_all = [], []
for sname, X in zip(subjects, X_list):
    W = imag_coh_matrix(X, fs=fs, band=BAND, nperseg=96, noverlap=48)
    L = laplacian_sym(W)
    k = pick_k(L, kmax=KMAX)
    labels, evals = spec_cluster(W, k)
    subj_labels.append(labels); eigvals_all.append(evals.tolist())
    plot_matrix(W, f"{sname} {BAND} | iCoh", os.path.join(FIG, f"{sname}__{BAND}__icoherence.png"))
    plot_eigs(evals, f"{sname} {BAND} | Laplacian eigs", os.path.join(FIG, f"{sname}__{BAND}__eigs.png"))

cons, co = consensus_from_labels(subj_labels)
plot_matrix(co, f"Consensus co-association | {BAND}", os.path.join(FIG, f"consensus__{BAND}__coassoc.png"))
loso_med_ari = ari_loso(subj_labels)

null_aris = []
for _ in range(NULL_PERMS):
    nlabs = []
    for X in X_list:
        Y = circ_shift(X, rng)
        Wn = imag_coh_matrix(Y, fs=fs, band=BAND, nperseg=96, noverlap=48)
        Ln = laplacian_sym(Wn); kn = pick_k(Ln, kmax=KMAX)
        lbn,_ = spec_cluster(Wn, kn)
        nlabs.append(lbn)
    cons_n,_ = consensus_from_labels(nlabs)
    null_aris.append(adjusted_rand_score(cons, cons_n))
null_aris = np.array(null_aris, float)
p_val = float((np.sum(null_aris >= loso_med_ari) + 1) / (len(null_aris) + 1))

metrics = {
    "band": BAND,
    "n_subjects": len(subjects),
    "loso_median_ari": float(loso_med_ari),
    "null_ari_mean": float(null_aris.mean()) if len(null_aris) else None,
    "null_ari_p_value": p_val,
    "subjects": subjects,
    "channels": channels,
    "out_dir": OUT
}
with open(os.path.join(MET, f"band__{BAND}__metrics.json"), "w") as f:
    json.dump(metrics, f, indent=2)
np.save(os.path.join(TAB, f"band__{BAND}__consensus_labels.npy"), cons)
np.save(os.path.join(TAB, f"band__{BAND}__coassoc.npy"), co)

print("=== CNT EEG Spectral-Glyphs (inline) ===")
print("Mode:", "DEMO" if USE_DEMO else f"REAL ({DATA_DIR})")
print("Artifacts:", OUT)
print("Key files:")
print(" -", os.path.join(MET, f"band__{BAND}__metrics.json"))
print(" -", os.path.join(TAB, f"band__{BAND}__consensus_labels.npy"))
print(" -", os.path.join(FIG, f"consensus__{BAND}__coassoc.png"))
print("Stats:", {k: metrics[k] for k in ["loso_median_ari", "null_ari_mean", "null_ari_p_value"]})


RuntimeError: No .npy files found in C:/Users/caleb/CNT_Lab/data/raw/eeg

In [2]:
# === Download EEGBCI → save as .npy per subject (eyes-closed by default) ===
# Paste into your LOCAL JupyterLab (internet required).
# Output: C:\Users\caleb\CNT_Lab\eeg_rest\subject_XX.npy (+ .channels.txt)
# Tip: use raw strings r"..." for Windows paths.

# 1) Deps (idempotent inside Jupyter)
try:
    import mne  # noqa: F401
except Exception:
    pass
try:
    %pip install -q mne pooch
except Exception as e:
    print("pip skipped:", e)

import os, sys, math, numpy as np
from pathlib import Path
import mne

# 2) Settings — tweak as needed
OUT_DIR    = r"C:\Users\caleb\CNT_Lab\eeg_rest"  # where *.npy will be written
SUBJECTS   = list(range(1, 11))                  # EEGBCI subjects to fetch (1..109 valid for eegbci)
RUNS_EC    = [2]                                  # Eyes-Closed run(s) — 2 = EC
RUNS_EO    = []                                   # Add [1] to also fetch Eyes-Open
FS_OUT     = 250.0                                # resample target Hz
DURATION_S = 60                                   # seconds to keep per subject (trim or tile if shorter)
HP, LP     = 1.0, 45.0                            # band-pass for iCoh-ready signals (optional but recommended)
MONTAGE    = "standard_1020"                      # channel name mapping
ALLOW_REUSE= True                                 # skip re-download if files already exist

# 3) Helpers
def ensure_dir(p: str) -> str:
    Path(p).mkdir(parents=True, exist_ok=True)
    return p

def save_subject(X: np.ndarray, ch_names, out_base: str):
    np.save(out_base + ".npy", X.astype(np.float32))
    with open(out_base + ".channels.txt", "w", encoding="utf-8") as f:
        for ch in ch_names:
            f.write(ch + "\n")

def fetch_concat(subject: int, runs: list[int]):
    """Download specified runs for a subject and return a concatenated Raw."""
    if not runs:
        return None
    try:
        fpaths = mne.datasets.eegbci.load_data(subject=subject, runs=runs, update_path=True, verbose="ERROR")
    except Exception as e:
        print(f"[S{subj:02d}] load_data error: {e}")
        return None
    raws = []
    for fp in fpaths:
        try:
            raw = mne.io.read_raw_edf(fp, preload=True, verbose="ERROR")
            # Keep EEG only
            raw.pick_types(eeg=True, stim=False, eog=False, ecg=False, emg=False, misc=False)
            raws.append(raw)
        except Exception as e:
            print(f"[S{subj:02d}] read_raw error: {e}")
    if not raws:
        return None
    return mne.concatenate_raws(raws, verbose="ERROR")

def trim_or_tile(X: np.ndarray, n_keep: int) -> np.ndarray:
    if X.shape[1] >= n_keep:
        return X[:, :n_keep]
    reps = math.ceil(n_keep / X.shape[1])
    return np.tile(X, reps)[:, :n_keep]

# 4) Main
ensure_dir(OUT_DIR)
log = []
for subj in SUBJECTS:
    # Skip if already present and reuse enabled
    out_base_ec = os.path.join(OUT_DIR, f"subject_{subj:02d}_EC") if RUNS_EC else None
    out_base_eo = os.path.join(OUT_DIR, f"subject_{subj:02d}_EO") if RUNS_EO else None
    already = False
    if ALLOW_REUSE and RUNS_EC and os.path.exists((out_base_ec or "") + ".npy"):
        already = True
    if ALLOW_REUSE and RUNS_EO and os.path.exists((out_base_eo or "") + ".npy"):
        already = True
    if already:
        log.append((subj, "skipped (exists)"))
        continue

    # EC
    if RUNS_EC:
        raw_ec = fetch_concat(subj, RUNS_EC)
    else:
        raw_ec = None
    # EO
    if RUNS_EO:
        raw_eo = fetch_concat(subj, RUNS_EO)
    else:
        raw_eo = None

    if (raw_ec is None) and (raw_eo is None):
        log.append((subj, "no data"))
        continue

    for tag, raw in (("EC", raw_ec), ("EO", raw_eo)):
        if raw is None:
            continue
        try:
            # Montage + filter + resample
            try:
                raw.set_montage(MONTAGE, on_missing="ignore", match_case=False, verbose="ERROR")
            except Exception:
                pass
            raw.filter(HP, LP, fir_design="firwin", verbose="ERROR")
            raw.resample(FS_OUT, npad="auto", verbose="ERROR")

            # Slice/tile to DURATION_S
            sfreq = float(raw.info["sfreq"])
            n_keep = int(DURATION_S * sfreq)
            X = raw.get_data(picks="eeg")  # [n_ch, n_t]
            X = trim_or_tile(X, n_keep)

            ch_names = mne.pick_info(raw.info, mne.pick_types(raw.info, eeg=True)).ch_names
            out_base = os.path.join(OUT_DIR, f"subject_{subj:02d}_{tag}")
            save_subject(X, ch_names, out_base)
            log.append((subj, f"{tag}: ok {X.shape} → {out_base}.npy"))
        except Exception as e:
            log.append((subj, f"{tag}: error {e}"))

# 5) Summary
print("=== Fetch complete ===")
for row in log:
    print(row)
print(f"\nSaved to: {OUT_DIR}")
print("Example files:")
for subj in SUBJECTS[:3]:
    if RUNS_EC:
        p = os.path.join(OUT_DIR, f"subject_{subj:02d}_EC.npy")
        if os.path.exists(p): print(" -", p)
    if RUNS_EO:
        p = os.path.join(OUT_DIR, f"subject_{subj:02d}_EO.npy")
        if os.path.exists(p): print(" -", p)


Note: you may need to restart the kernel to use updated packages.
[S01] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S02] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S03] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S04] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S05] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S06] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S07] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S08] load_data error: load_data() got an unexpected keyword argument 'subject'. Did you mean 'subjects'?
[S09] load_data error: load_data() got an unexpected keyword argument 'subject'. Did y

In [3]:
# === EEGBCI downloader → saves .npy per subject (Eyes-Closed by default) ===
# Run this on YOUR local JupyterLab (internet required).
# Output files: C:\Users\caleb\CNT_Lab\eeg_rest\subject_XX_EC.npy (+ .channels.txt)
# Fix: uses the correct signature mne.datasets.eegbci.load_data(subjects=..., runs=...)

# 1) Deps (safe to rerun in Jupyter)
try:
    import mne  # noqa: F401
except Exception:
    pass
try:
    %pip install -q mne pooch
except Exception as e:
    print("pip skipped:", e)

import os, math, numpy as np
from pathlib import Path
import mne

# 2) Settings — tweak as needed
OUT_DIR    = r"C:\Users\caleb\CNT_Lab\eeg_rest"  # where *.npy will be written
SUBJECTS   = list(range(1, 11))                  # EEGBCI subject IDs (valid: 1..109)
RUNS_EC    = [2]                                  # Eyes-Closed run(s) → 2
RUNS_EO    = []                                   # Add [1] to also fetch Eyes-Open
FS_OUT     = 250.0                                # resample target Hz
DURATION_S = 60                                   # seconds per subject to keep
HP, LP     = 1.0, 45.0                            # band-pass (good for iCoh later)
MONTAGE    = "standard_1020"                      # 10-20 channel names
ALLOW_REUSE= True                                 # skip if file already exists

# 3) Helpers
def ensure_dir(p: str) -> str:
    Path(p).mkdir(parents=True, exist_ok=True); return p

def save_subject(X: np.ndarray, ch_names, out_base: str):
    np.save(out_base + ".npy", X.astype(np.float32))
    with open(out_base + ".channels.txt", "w", encoding="utf-8") as f:
        for ch in ch_names:
            f.write(ch + "\n")

def fetch_concat(subject: int, runs: list[int]):
    """Download given runs for a subject and return concatenated Raw."""
    if not runs: return None
    try:
        # ✅ Correct call for newer MNE:
        fpaths = mne.datasets.eegbci.load_data(subjects=[subject], runs=runs, update_path=True, verbose="ERROR")
    except TypeError:
        # Fallback for older MNE versions that expect subject= (singular)
        fpaths = mne.datasets.eegbci.load_data(subject=subject, runs=runs, update_path=True, verbose="ERROR")
    raws = []
    for fp in fpaths:
        try:
            raw = mne.io.read_raw_edf(fp, preload=True, verbose="ERROR")
            raw.pick_types(eeg=True, stim=False, eog=False, ecg=False, emg=False, misc=False)
            raws.append(raw)
        except Exception as e:
            print(f"[S{subject:02d}] read_raw error: {e}")
    return mne.concatenate_raws(raws, verbose="ERROR") if raws else None

def trim_or_tile(X: np.ndarray, n_keep: int) -> np.ndarray:
    if X.shape[1] >= n_keep: return X[:, :n_keep]
    reps = math.ceil(n_keep / X.shape[1]); return np.tile(X, reps)[:, :n_keep]

# 4) Main
ensure_dir(OUT_DIR)
log = []
for subj in SUBJECTS:
    out_base_ec = os.path.join(OUT_DIR, f"subject_{subj:02d}_EC") if RUNS_EC else None
    out_base_eo = os.path.join(OUT_DIR, f"subject_{subj:02d}_EO") if RUNS_EO else None

    if ALLOW_REUSE and RUNS_EC and os.path.exists((out_base_ec or "") + ".npy"):
        log.append((subj, "EC: skipped (exists)"))
        raw_ec = None
    else:
        raw_ec = fetch_concat(subj, RUNS_EC) if RUNS_EC else None

    if ALLOW_REUSE and RUNS_EO and os.path.exists((out_base_eo or "") + ".npy"):
        log.append((subj, "EO: skipped (exists)"))
        raw_eo = None
    else:
        raw_eo = fetch_concat(subj, RUNS_EO) if RUNS_EO else None

    if (raw_ec is None) and (raw_eo is None):
        log.append((subj, "no new data"))
        continue

    for tag, raw in (("EC", raw_ec), ("EO", raw_eo)):
        if raw is None: continue
        try:
            try:
                raw.set_montage(MONTAGE, on_missing="ignore", match_case=False, verbose="ERROR")
            except Exception:
                pass
            raw.filter(HP, LP, fir_design="firwin", verbose="ERROR")
            raw.resample(FS_OUT, npad="auto", verbose="ERROR")

            sf = float(raw.info["sfreq"])
            X = raw.get_data(picks="eeg")                   # [n_ch, n_t]
            X = trim_or_tile(X, int(DURATION_S * sf))
            ch_names = mne.pick_info(raw.info, mne.pick_types(raw.info, eeg=True)).ch_names

            out_base = os.path.join(OUT_DIR, f"subject_{subj:02d}_{tag}")
            save_subject(X, ch_names, out_base)
            log.append((subj, f"{tag}: ok {X.shape} → {out_base}.npy"))
        except Exception as e:
            log.append((subj, f"{tag}: error {e}"))

print("=== Fetch complete ===")
for row in log: print(row)
print(f"\nSaved to: {OUT_DIR}")
print("Next: set USE_DEMO=False and DATA_DIR=r\"C:\\Users\\caleb\\CNT_Lab\\eeg_rest\" in your spectral-glyph cell.")


Downloading file 'S001/S001R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S001/S001R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Note: you may need to restart the kernel to use updated packages.
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S002/S002R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S002/S002R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S003/S003R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S003/S003R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S004/S004R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S004/S004R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S005/S005R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S005/S005R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S006/S006R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S006/S006R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S007/S007R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S007/S007R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S008/S008R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S008/S008R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S009/S009R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S009/S009R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


Downloading file 'S010/S010R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S010/S010R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
=== Fetch complete ===
(1, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_01_EC.npy')
(2, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_02_EC.npy')
(3, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_03_EC.npy')
(4, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_04_EC.npy')
(5, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_05_EC.npy')
(6, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_06_EC.npy')
(7, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_07_EC.npy')
(8, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_08_EC.npy')
(9, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_09_EC.npy')
(10, 'EC: ok (64, 15000) → C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_10_EC.npy')

Saved to: C:\Users\caleb\CNT_Lab\eeg_rest
Next: set USE_DEMO=False a

In [5]:
# === CNT EEG Spectral-Glyphs (EEGBCI EC) — single cell ===
# Input:  C:\Users\caleb\CNT_Lab\eeg_rest\subject_XX_EC.npy (+ .channels.txt)
# Output: C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_v1\{figures,tables,metrics}
# What it does:
#   • alpha/theta/beta imaginary coherence per subject
#   • spectral clustering (k via eigen-gap, ≤ KMAX)
#   • consensus clusters across subjects + LOSO-ARI
#   • circular-shift null p-value
# Tip: you can raise NULL_PERMS for stronger stats (e.g., 1000) once it runs.

import os, glob, json, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import welch, csd

# ---------------- Config (edit as needed) ----------------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"                # where subject_XX_EC.npy live
GLOB       = "subject_*_EC.npy"                                # only EC files
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_v1"
FS         = 250.0                                             # Hz (from your downloader)
BANDS      = ("alpha","theta","beta")                          # add/remove as you like
KMAX       = 4
NULL_PERMS = 200                                               # increase later for publish
NPERSEG    = 256                                               # Welch/CSD window
NOVERLAP   = 128

# ---------------- Helpers ----------------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p

def band_lims():
    return {"delta":(1.0,4.0),"theta":(4.0,8.0),"alpha":(8.0,13.0),"beta":(13.0,30.0),"gamma":(30.0,45.0)}

def imag_coh_matrix(X, fs, band):
    n_ch, _ = X.shape
    fmin, fmax = band_lims()[band]
    freqs, Pxx0 = welch(X[0], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
    Pxx = [Pxx0] + [welch(X[i], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)[1] for i in range(1, n_ch)]
    sel = (freqs >= fmin) & (freqs <= fmax)
    W = np.zeros((n_ch, n_ch), float)
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            _, Pxy = csd(X[i], X[j], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
            num = np.abs(np.imag(Pxy[sel]))
            den = np.sqrt(Pxx[i][sel]*Pxx[j][sel] + 1e-12)
            W[i,j] = W[j,i] = float(np.nanmean(num/den))
    np.fill_diagonal(W, 0.0)
    return W

def laplacian_sym(W):
    d = W.sum(1); d = np.where(d <= 1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def pick_k(L, kmax=4):
    evals, _ = np.linalg.eigh(L)
    gaps = [(k, evals[k+1]-evals[k]) for k in range(1, min(kmax+1, len(evals)-1))]
    if not gaps: return 2
    k_star = max(gaps, key=lambda x: x[1])[0]
    return max(2, min(k_star, kmax))

def spec_cluster(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k+1]
    U /= (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    labels = KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)
    return labels, evals

def consensus_from_labels(all_labels):
    n = len(all_labels[0]); m = len(all_labels); co = np.zeros((n, n), float)
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    co /= m
    thr = 0.5; A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid = 0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons, co

def ari_loso(all_labels):
    cons,_ = consensus_from_labels(all_labels)
    vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_leave,_ = consensus_from_labels(leave)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))

def circ_shift(X, rng, max_shift=None):
    n_ch, n_t = X.shape
    if max_shift is None: max_shift = n_t-1
    Y = np.zeros_like(X)
    for c in range(n_ch):
        s = int(rng.integers(0, max_shift+1))
        Y[c] = np.roll(X[c], s)
    return Y

def plot_matrix(M, title, path):
    plt.figure(); plt.imshow(M, aspect='auto'); plt.title(title); plt.colorbar(); plt.tight_layout(); plt.savefig(path, dpi=160); plt.close()

def plot_eigs(evals, title, path):
    plt.figure(); plt.plot(np.arange(len(evals)), evals, marker='o'); plt.title(title); plt.xlabel("Index"); plt.ylabel("Eigenvalue")
    plt.tight_layout(); plt.savefig(path, dpi=160); plt.close()

# ---------------- Discover data ----------------
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))
if not files:
    raise RuntimeError(f"No files matched: {os.path.join(DATA_DIR, GLOB)}")

subjects = [os.path.splitext(os.path.basename(f))[0] for f in files]
print(f"Found {len(files)} subjects:", subjects[:8], "..." if len(files)>8 else "")

# ---------------- Prepare output dirs ----------------
FIG = ensure_dir(os.path.join(OUT_ROOT, "figures"))
TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))
MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))

# ---------------- Run per band ----------------
rng = np.random.default_rng(7)
for band in BANDS:
    subj_labels = []; eigvals_all = []; ch_template = None

    for sname, f in zip(subjects, files):
        X = np.load(f)  # [n_ch, n_t]
        ch_txt = f.replace(".npy", ".channels.txt")
        if ch_template is None:
            if os.path.exists(ch_txt):
                with open(ch_txt, "r", encoding="utf-8") as g:
                    ch_template = [ln.strip() for ln in g if ln.strip()]
            else:
                ch_template = [f"ch{i}" for i in range(X.shape[0])]

        W = imag_coh_matrix(X, fs=FS, band=band)
        L = laplacian_sym(W)
        k = pick_k(L, kmax=KMAX)
        labels, evals = spec_cluster(W, k)

        subj_labels.append(labels); eigvals_all.append(evals.tolist())
        plot_matrix(W, f"{sname} {band} | iCoh", os.path.join(FIG, f"{sname}__{band}__icoherence.png"))
        plot_eigs(evals, f"{sname} {band} | Laplacian eigs", os.path.join(FIG, f"{sname}__{band}__eigs.png"))

    cons, co = consensus_from_labels(subj_labels)
    plot_matrix(co, f"Consensus co-association | {band}", os.path.join(FIG, f"consensus__{band}__coassoc.png"))
    loso_med_ari = ari_loso(subj_labels)

    # Null test
    null_aris = []
    for _ in range(NULL_PERMS):
        nlabs=[]
        for f in files:
            X = np.load(f)
            Y = circ_shift(X, rng)
            Wn = imag_coh_matrix(Y, fs=FS, band=band)
            Ln = laplacian_sym(Wn)
            kn = pick_k(Ln, kmax=KMAX)
            lbn,_ = spec_cluster(Wn, kn)
            nlabs.append(lbn)
        cons_n,_ = consensus_from_labels(nlabs)
        null_aris.append(adjusted_rand_score(cons, cons_n))
    null_aris = np.array(null_aris, float)
    p_val = float((np.sum(null_aris >= loso_med_ari) + 1) / (len(null_aris) + 1))

    # Save
    np.save(os.path.join(TAB, f"band__{band}__consensus_labels.npy"), cons)
    np.save(os.path.join(TAB, f"band__{band}__coassoc.npy"), co)
    metrics = {
        "band": band,
        "n_subjects": len(subjects),
        "loso_median_ari": float(loso_med_ari),
        "null_ari_mean": float(null_aris.mean()) if len(null_aris) else None,
        "null_ari_p_value": p_val,
        "subjects": subjects,
        "channels": ch_template,
        "out_dir": OUT_ROOT
    }
    with open(os.path.join(MET, f"band__{band}__metrics.json"), "w", encoding="utf-8") as f:
        json.dump(metrics, f, indent=2)

    print(f"[{band}] subjects={len(subjects)}  LOSO-ARI={loso_med_ari:.3f}  null_mean={float(null_aris.mean()):.3f}  p={p_val:.4f}")

print("\nArtifacts:")
print(" - Figures:", FIG)
print(" - Tables :", TAB)
print(" - Metrics:", MET)


Found 10 subjects: ['subject_01_EC', 'subject_02_EC', 'subject_03_EC', 'subject_04_EC', 'subject_05_EC', 'subject_06_EC', 'subject_07_EC', 'subject_08_EC'] ...


KeyboardInterrupt: 

In [6]:
# === CNT Spectral-Glyphs — FAST PASS (prints results) ===
# Inputs: C:\Users\caleb\CNT_Lab\eeg_rest\subject_XX_EC.npy (+ .channels.txt)
# Outputs: C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fast\{tables,metrics}

import os, glob, json, numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import welch, csd

# -------- config --------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fast"
FS         = 250.0
BAND       = "alpha"
KMAX       = 3
NULL_PERMS = 20
NPERSEG    = 128
NOVERLAP   = 64

def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def band_lims(): return {"delta":(1,4),"theta":(4,8),"alpha":(8,13),"beta":(13,30),"gamma":(30,45)}

def imag_coh_matrix(X, fs, band):
    n_ch, _ = X.shape
    fmin, fmax = band_lims()[band]
    freqs, Pxx0 = welch(X[0], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
    Pxx = [Pxx0] + [welch(X[i], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)[1] for i in range(1, n_ch)]
    sel = (freqs >= fmin) & (freqs <= fmax)
    W = np.zeros((n_ch, n_ch), float)
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            _, Pxy = csd(X[i], X[j], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
            num = np.abs(np.imag(Pxy[sel])); den = np.sqrt(Pxx[i][sel]*Pxx[j][sel] + 1e-12)
            W[i,j] = W[j,i] = float(np.nanmean(num/den))
    np.fill_diagonal(W, 0.0)
    return W

def laplacian_sym(W):
    d = W.sum(1); d = np.where(d <= 1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def pick_k(L, kmax=3):
    evals, _ = np.linalg.eigh(L)
    if len(evals) < 3: return 2
    gaps = [(k, evals[k+1]-evals[k]) for k in range(1, min(kmax+1, len(evals)-1))]
    if not gaps: return 2
    return max(2, min(max(gaps, key=lambda x: x[1])[0], kmax))

def spec_cluster(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k+1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    labels = KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)
    return labels

def consensus_from_labels(all_labels):
    n = len(all_labels[0]); m = len(all_labels)
    co = np.zeros((n, n), float)
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    co /= m
    thr = 0.5; A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid = 0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons, co

def ari_loso(all_labels):
    cons,_ = consensus_from_labels(all_labels)
    vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_leave,_ = consensus_from_labels(leave)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))

def circ_shift(X, rng, max_shift=None):
    n_ch, n_t = X.shape
    if max_shift is None: max_shift = n_t-1
    Y = np.zeros_like(X)
    for c in range(n_ch):
        s = int(rng.integers(0, max_shift+1))
        Y[c] = np.roll(X[c], s)
    return Y

# ---------- run ----------
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))
print(f"Found {len(files)} EC subjects")

OUT_MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))
OUT_TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))

rng = np.random.default_rng(7)
subj_labels = []
for f in files:
    X = np.load(f)
    W = imag_coh_matrix(X, fs=FS, band=BAND)
    k = pick_k(laplacian_sym(W), kmax=KMAX)
    subj_labels.append(spec_cluster(W, k))

cons, co = consensus_from_labels(subj_labels)

null_aris = []
for _ in range(NULL_PERMS):
    nlabs=[]
    for f in files:
        X = np.load(f)
        Y = circ_shift(X, rng)
        Wn = imag_coh_matrix(Y, fs=FS, band=BAND)
        kn = pick_k(laplacian_sym(Wn), kmax=KMAX)
        nlabs.append(spec_cluster(Wn, kn))
    cons_n,_ = consensus_from_labels(nlabs)
    null_aris.append(adjusted_rand_score(cons, cons_n))
null_aris = np.array(null_aris, float)

loso_med_ari = ari_loso(subj_labels)
p_val = float((np.sum(null_aris >= loso_med_ari) + 1) / (len(null_aris) + 1))

metrics = {
    "band": BAND,
    "n_subjects": len(files),
    "loso_median_ari": float(loso_med_ari),
    "null_ari_mean": float(null_aris.mean()) if len(null_aris) else None,
    "null_ari_p_value": p_val,
    "subjects": [os.path.splitext(os.path.basename(f))[0] for f in files],
    "out_dir": OUT_ROOT
}
with open(os.path.join(OUT_MET, f"band__{BAND}__metrics.json"), "w", encoding="utf-8") as f:
    json.dump(metrics, f, indent=2)
np.save(os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"), cons)

print("\n=== FAST PASS RESULTS ===")
print(f"band={BAND}  n={len(files)}")
print(f"LOSO-median ARI: {loso_med_ari:.3f}")
print(f"Null ARI mean  : {float(null_aris.mean()):.3f}")
print(f"Permutation p  : {p_val:.4f}")
print("\nArtifacts:")
print(" -", os.path.join(OUT_MET, f"band__{BAND}__metrics.json"))
print(" -", os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"))


Found 10 EC subjects

=== FAST PASS RESULTS ===
band=alpha  n=10
LOSO-median ARI: 1.000
Null ARI mean  : 1.000
Permutation p  : 1.0000

Artifacts:
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fast\metrics\band__alpha__metrics.json
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fast\tables\band__alpha__consensus_labels.npy


In [8]:
# === CNT Spectral-Glyphs — FAST+ROBUST (single cell) ===
# Inputs:  C:\Users\caleb\CNT_Lab\eeg_rest\subject_XX_EC.npy (+ .channels.txt)
# Outputs: C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastrobust\{tables,metrics,figures}

import os, glob, json, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import welch, csd

# ---------------- Config ----------------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastrobust"
FS         = 250.0
BAND       = "alpha"
KMAX       = 3
NULL_PERMS = 50           # raise to 500–1000 later
NPERSEG    = 128
NOVERLAP   = 64
KNN_K      = 6            # k-NN graph sparsity
CONS_THR   = 0.65         # stricter consensus threshold
SAVE_FIGS  = True

# ---------------- Helpers ----------------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def band_lims(): return {"delta":(1,4),"theta":(4,8),"alpha":(8,13),"beta":(13,30),"gamma":(30,45)}

def imag_coh_matrix(X, fs, band):
    n_ch, _ = X.shape
    fmin, fmax = band_lims()[band]
    freqs, Pxx0 = welch(X[0], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
    Pxx = [Pxx0] + [welch(X[i], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)[1] for i in range(1, n_ch)]
    sel = (freqs >= fmin) & (freqs <= fmax)
    W = np.zeros((n_ch, n_ch), float)
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            _, Pxy = csd(X[i], X[j], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
            num = np.abs(np.imag(Pxy[sel])); den = np.sqrt(Pxx[i][sel]*Pxx[j][sel] + 1e-12)
            W[i,j] = W[j,i] = float(np.nanmean(num/den))
    np.fill_diagonal(W, 0.0)
    return W

def knn_sparsify(W, k=6):
    W = W.copy(); n = W.shape[0]
    for i in range(n):
        idx = np.argsort(W[i])[::-1]; keep = idx[:k]
        mask = np.ones(n, dtype=bool); mask[keep] = False
        W[i, mask] = 0.0
    W = np.maximum(W, W.T); np.fill_diagonal(W, 0.0)
    return W

def laplacian_sym(W):
    d = W.sum(1); d = np.where(d <= 1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def pick_k(L, kmax=3):
    evals, _ = np.linalg.eigh(L)
    if len(evals) < 3: return 2
    gaps = [(k, evals[k+1]-evals[k]) for k in range(1, min(kmax+1, len(evals)-1))]
    if not gaps: return 2
    return max(2, min(max(gaps, key=lambda x: x[1])[0], kmax))

def spec_cluster(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k+1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def consensus_from_labels(all_labels, thr=0.65):
    n = len(all_labels[0]); m = len(all_labels); co = np.zeros((n, n), float)
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    co /= m
    A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid = 0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons, co

def ari_loso(all_labels, thr=0.65):
    cons,_ = consensus_from_labels(all_labels, thr=thr)
    vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_leave,_ = consensus_from_labels(leave, thr=thr)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))

def circ_shift(X, rng, max_shift=None):
    n_ch, n_t = X.shape
    if max_shift is None: max_shift = n_t-1
    Y = np.zeros_like(X)
    for c in range(n_ch):
        s = int(rng.integers(0, max_shift+1)); Y[c] = np.roll(X[c], s)
    return Y

def phase_randomize(X, rng):
    n_ch, n_t = X.shape
    Y = np.zeros_like(X, dtype=float)
    for c in range(n_ch):
        F = np.fft.rfft(X[c])
        phases = rng.uniform(0, 2*np.pi, size=F.shape)
        phases[0] = 0.0
        if n_t % 2 == 0: phases[-1] = 0.0
        Y[c] = np.fft.irfft(np.abs(F)*np.exp(1j*phases), n=n_t)
    return Y

# ---------------- Run ----------------
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))
print(f"Found {len(files)} EC subjects")
if not files:
    raise SystemExit("No EC files found. Check DATA_DIR/GLOB or run the downloader first.")

OUT_MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))
OUT_TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))
OUT_FIG = ensure_dir(os.path.join(OUT_ROOT, "figures"))

rng = np.random.default_rng(7)
subj_labels = []; degrees = []

for f in files:
    X = np.load(f)
    W = imag_coh_matrix(X, fs=FS, band=BAND)
    W = knn_sparsify(W, k=KNN_K)
    degrees.append(W.sum(1))
    k = pick_k(laplacian_sym(W), kmax=KMAX)
    subj_labels.append(spec_cluster(W, k))

cons, co = consensus_from_labels(subj_labels, thr=CONS_THR)

# Mixed null: half circular-shift, half phase-randomize
null_aris = []
for p in range(NULL_PERMS):
    nlabs=[]
    for f in files:
        X = np.load(f)
        Y = phase_randomize(X, rng) if (p % 2 == 0) else circ_shift(X, rng)
        Wn = imag_coh_matrix(Y, fs=FS, band=BAND)
        Wn = knn_sparsify(Wn, k=KNN_K)
        kn = pick_k(laplacian_sym(Wn), kmax=KMAX)
        nlabs.append(spec_cluster(Wn, kn))
    cons_n,_ = consensus_from_labels(nlabs, thr=CONS_THR)
    null_aris.append(adjusted_rand_score(cons, cons_n))
null_aris = np.array(null_aris, float)

loso_med_ari = ari_loso(subj_labels, thr=CONS_THR)
p_val = float((np.sum(null_aris >= loso_med_ari) + 1) / (len(null_aris) + 1))

metrics = {
    "band": BAND,
    "n_subjects": len(files),
    "loso_median_ari": float(loso_med_ari),
    "null_ari_mean": float(null_aris.mean()) if len(null_aris) else None,
    "null_ari_p_value": p_val,
    "subjects": [os.path.splitext(os.path.basename(f))[0] for f in files],
    "out_dir": OUT_ROOT,
    "knn_k": KNN_K,
    "consensus_threshold": CONS_THR
}
with open(os.path.join(OUT_MET, f"band__{BAND}__metrics.json"), "w", encoding="utf-8") as f:
    json.dump(metrics, f, indent=2)
np.save(os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"), cons)
np.save(os.path.join(OUT_TAB, f"band__{BAND}__coassoc.npy"), co)

if SAVE_FIGS and len(degrees):
    plt.figure(); all_deg = np.concatenate(degrees); plt.hist(all_deg, bins=20)
    plt.title("Node degree distribution (KNN-sparsified iCoh graph)")
    plt.xlabel("Degree (sum of edge weights)"); plt.ylabel("Count")
    plt.tight_layout(); plt.savefig(os.path.join(OUT_FIG, "degree_hist.png"), dpi=160); plt.close()

    plt.figure(); plt.imshow(co, aspect='auto')
    plt.title(f"Consensus co-association | {BAND}")
    plt.colorbar(); plt.tight_layout()
    plt.savefig(os.path.join(OUT_FIG, f"consensus__{BAND}__coassoc.png"), dpi=160); plt.close()

print("\n=== FAST+ROBUST RESULTS ===")
print(f"band={BAND}  n={len(files)}")
print(f"LOSO-median ARI: {loso_med_ari:.3f}")
print(f"Null ARI mean  : {float(null_aris.mean()):.3f}")
print(f"Permutation p  : {p_val:.4f}")
print("\nArtifacts:")
print(" -", os.path.join(OUT_MET, f"band__{BAND}__metrics.json"))
print(" -", os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"))
print(" -", os.path.join(OUT_TAB, f"band__{BAND}__coassoc.npy"))
print(" -", os.path.join(OUT_FIG, "degree_hist.png"))
print(" -", os.path.join(OUT_FIG, f"consensus__{BAND}__coassoc.png"))
print("\nDone.")


Found 10 EC subjects

=== FAST+ROBUST RESULTS ===
band=alpha  n=10
LOSO-median ARI: 1.000
Null ARI mean  : 0.960
Permutation p  : 0.9608

Artifacts:
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastrobust\metrics\band__alpha__metrics.json
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastrobust\tables\band__alpha__consensus_labels.npy
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastrobust\tables\band__alpha__coassoc.npy
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastrobust\figures\degree_hist.png
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastrobust\figures\consensus__alpha__coassoc.png

Done.


In [1]:
# === CNT Spectral-Glyphs: Modularity + Entropy + Band Sweep (single cell) ===
# Inputs:  C:\Users\caleb\CNT_Lab\eeg_rest\subject_XX_EC.npy (+ .channels.txt)
# Outputs: C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_stats\{metrics.csv, figs}

import os, glob, json, re, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import welch, csd

# ---------------- Config ----------------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_stats"
FS         = 250.0
BANDS      = ("alpha","theta")
KMAX       = 3
NULL_PERMS = 100           # bump to 500–1000 when stable
NPERSEG    = 128
NOVERLAP   = 64
KNN_K      = 6
CONS_THR   = 0.65
SAVE_FIGS  = True

# ---------------- Helpers ----------------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def band_lims(): return {"delta":(1,4),"theta":(4,8),"alpha":(8,13),"beta":(13,30),"gamma":(30,45)}

def imag_coh_matrix(X, fs, band):
    n_ch, _ = X.shape
    fmin, fmax = band_lims()[band]
    freqs, Pxx0 = welch(X[0], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
    Pxx = [Pxx0] + [welch(X[i], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)[1] for i in range(1, n_ch)]
    sel = (freqs >= fmin) & (freqs <= fmax)
    W = np.zeros((n_ch, n_ch), float)
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            _, Pxy = csd(X[i], X[j], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
            num = np.abs(np.imag(Pxy[sel])); den = np.sqrt(Pxx[i][sel]*Pxx[j][sel] + 1e-12)
            W[i,j] = W[j,i] = float(np.nanmean(num/den))
    np.fill_diagonal(W, 0.0)
    return W

def knn_sparsify(W, k=6):
    W = W.copy(); n = W.shape[0]
    for i in range(n):
        idx = np.argsort(W[i])[::-1]; keep = idx[:k]
        mask = np.ones(n, dtype=bool); mask[keep] = False
        W[i, mask] = 0.0
    W = np.maximum(W, W.T); np.fill_diagonal(W, 0.0)
    return W

def laplacian_sym(W):
    d = W.sum(1); d = np.where(d <= 1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def pick_k(L, kmax=3):
    evals, _ = np.linalg.eigh(L)
    if len(evals) < 3: return 2
    gaps = [(k, evals[k+1]-evals[k]) for k in range(1, min(kmax+1, len(evals)-1))]
    if not gaps: return 2
    return max(2, min(max(gaps, key=lambda x: x[1])[0], kmax))

def spec_cluster(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k+1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def consensus_from_labels(all_labels, thr=0.65):
    n = len(all_labels[0]); m = len(all_labels); co = np.zeros((n, n), float)
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    co /= m
    A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid = 0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons, co

def ari_loso(all_labels, thr=0.65):
    cons,_ = consensus_from_labels(all_labels, thr=thr)
    vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_leave,_ = consensus_from_labels(leave, thr=thr)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))

def circ_shift(X, rng, max_shift=None):
    n_ch, n_t = X.shape
    if max_shift is None: max_shift = n_t-1
    Y = np.zeros_like(X)
    for c in range(n_ch):
        s = int(rng.integers(0, max_shift+1)); Y[c] = np.roll(X[c], s)
    return Y

def phase_randomize(X, rng):
    n_ch, n_t = X.shape
    Y = np.zeros_like(X, dtype=float)
    for c in range(n_ch):
        F = np.fft.rfft(X[c])
        phases = rng.uniform(0, 2*np.pi, size=F.shape)
        phases[0] = 0.0
        if n_t % 2 == 0: phases[-1] = 0.0
        Y[c] = np.fft.irfft(np.abs(F)*np.exp(1j*phases), n=n_t)
    return Y

# Simple 10–20 reordering for nicer blocks (best-effort using name prefixes)
def order_by_region(ch_names):
    # groups in rough anterior→posterior order
    groups = ["Fp", "AF", "F", "FC", "C", "CP", "P", "PO", "O", "T", "TP"]
    def key(name):
        for i,g in enumerate(groups):
            if re.match(rf"^{g}", name, re.IGNORECASE):
                return (i, name)
        return (len(groups)+1, name)
    idx = sorted(range(len(ch_names)), key=lambda i: key(ch_names[i]))
    return np.array(idx, int)

# Modularity for weighted undirected graph given partition
def modularity(W, labels):
    W = W.copy()
    m = W.sum()/2.0 + 1e-12
    k = W.sum(1)
    Q = 0.0
    for i in range(W.shape[0]):
        for j in range(W.shape[0]):
            if labels[i] == labels[j]:
                Q += W[i,j] - (k[i]*k[j])/(2*m)
    return float(Q/(2*m))

def coassoc_entropy(co):
    # entropy of co-association entries (excluding diagonal)
    n = co.shape[0]
    mask = ~np.eye(n, dtype=bool)
    x = co[mask].ravel()
    # discretize into 50 bins
    hist, _ = np.histogram(x, bins=50, range=(0,1), density=True)
    hist = hist[hist>0]
    return float(-(hist*np.log(hist)).sum())

def blockness(co, labels):
    # mean intra vs inter coassoc
    labs = np.array(labels)
    intra = co[labs[:,None]==labs[None,:]]
    inter = co[labs[:,None]!=labs[None,:]]
    return float(intra.mean()/ (inter.mean()+1e-12))

# ---------------- Run ----------------
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))
if not files: raise SystemExit("No EC files found. Check DATA_DIR/GLOB or run downloader.")

OUT_MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))
OUT_TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))
OUT_FIG = ensure_dir(os.path.join(OUT_ROOT, "figures"))

rng = np.random.default_rng(7)

summary_rows = []

for band in BANDS:
    subj_labels = []
    ch_template = None
    Ws = []  # store per-subject graphs for averaging

    for f in files:
        X = np.load(f)
        ch_txt = f.replace(".npy",".channels.txt")
        if ch_template is None:
            if os.path.exists(ch_txt):
                with open(ch_txt,"r",encoding="utf-8") as g:
                    ch_template = [ln.strip() for ln in g if ln.strip()]
            else:
                ch_template = [f"ch{i}" for i in range(X.shape[0])]
        W = imag_coh_matrix(X, fs=FS, band=band)
        W = knn_sparsify(W, k=KNN_K)
        k = pick_k(laplacian_sym(W), kmax=KMAX)
        subj_labels.append(spec_cluster(W, k))
        Ws.append(W)

    # consensus
    cons, co = consensus_from_labels(subj_labels, thr=CONS_THR)

    # reorder by regions if names look standard
    idx_order = order_by_region(ch_template)
    co_ord = co[np.ix_(idx_order, idx_order)]

    # mixed null
    null_aris = []
    for p in range(NULL_PERMS):
        nlabs=[]
        for f in files:
            X = np.load(f)
            Y = phase_randomize(X, rng) if (p % 2 == 0) else circ_shift(X, rng)
            Wn = imag_coh_matrix(Y, fs=FS, band=band)
            Wn = knn_sparsify(Wn, k=KNN_K)
            kn = pick_k(laplacian_sym(Wn), kmax=KMAX)
            nlabs.append(spec_cluster(Wn, kn))
        cons_n,_ = consensus_from_labels(nlabs, thr=CONS_THR)
        null_aris.append(adjusted_rand_score(cons, cons_n))
    null_aris = np.array(null_aris, float)

    # stats
    loso = float(ari_loso(subj_labels, thr=CONS_THR))
    pval = float((np.sum(null_aris >= loso) + 1) / (len(null_aris) + 1))

    # modularity of consensus on mean graph
    W_mean = np.mean(Ws, axis=0)
    Q = modularity(W_mean, cons)

    # entropy & blockness
    H = coassoc_entropy(co)
    B = blockness(co, cons)

    # save
    np.save(os.path.join(OUT_TAB, f"band__{band}__consensus_labels.npy"), cons)
    np.save(os.path.join(OUT_TAB, f"band__{band}__coassoc.npy"), co)
    with open(os.path.join(OUT_MET, f"band__{band}__metrics.json"),"w",encoding="utf-8") as f:
        json.dump({"band":band,"loso_median_ari":loso,"null_ari_mean":float(null_aris.mean()),
                   "p_value":pval,"Q_modularity":Q,"coassoc_entropy":H,"blockness":B,
                   "n_subjects":len(files),"knn_k":KNN_K,"consensus_threshold":CONS_THR}, f, indent=2)

    if SAVE_FIGS:
        # co-association heatmap (ordered)
        plt.figure()
        plt.imshow(co_ord, aspect='auto')
        plt.title(f"Consensus co-association (ordered) | {band}")
        plt.colorbar()
        plt.tight_layout()
        plt.savefig(os.path.join(OUT_FIG, f"consensus__{band}__coassoc_ordered.png"), dpi=160); plt.close()

    summary_rows.append([band, loso, float(null_aris.mean()), pval, Q, H, B, len(files)])

# write summary CSV
import pandas as pd
cols = ["band","loso_median_ari","null_ari_mean","p_value","Q_modularity","coassoc_entropy","blockness","n_subjects"]
df = pd.DataFrame(summary_rows, columns=cols)
csv_path = os.path.join(OUT_ROOT, "spectral_glyphs_summary.csv")
df.to_csv(csv_path, index=False)

print("=== Summary ===")
print(df)
print("\nArtifacts:")
print(" -", csv_path)
print(" -", os.path.join(OUT_FIG, "consensus__alpha__coassoc_ordered.png"))
print(" -", os.path.join(OUT_FIG, "consensus__theta__coassoc_ordered.png"))
print(" -", os.path.join(OUT_TAB, "band__alpha__consensus_labels.npy"))
print(" -", os.path.join(OUT_TAB, "band__theta__consensus_labels.npy"))


  return float(intra.mean()/ (inter.mean()+1e-12))
  ret = ret.dtype.type(ret / rcount)


=== Summary ===
    band  loso_median_ari  null_ari_mean   p_value  Q_modularity  \
0  alpha              1.0           0.93  0.930693  1.222835e-10   
1  theta              1.0           0.94  0.940594  3.807186e-10   

   coassoc_entropy  blockness  n_subjects  
0       -89.279706        NaN          10  
1       -94.380917        NaN          10  

Artifacts:
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_stats\spectral_glyphs_summary.csv
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_stats\figures\consensus__alpha__coassoc_ordered.png
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_stats\figures\consensus__theta__coassoc_ordered.png
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_stats\tables\band__alpha__consensus_labels.npy
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_stats\tables\band__theta__consensus_labels.npy


  return float(intra.mean()/ (inter.mean()+1e-12))
  ret = ret.dtype.type(ret / rcount)


In [2]:
# === CNT Spectral-Glyphs: Parameter Sweep (single cell) ===
# Searches KNN_K × CONS_THR × k-mode to avoid single-cluster consensus
# Inputs:  C:\Users\caleb\CNT_Lab\eeg_rest\subject_XX_EC.npy (+ .channels.txt)
# Outputs: C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_sweep\summary.csv + quick figs

import os, glob, json, re, numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import welch, csd

# ---------------- Config ----------------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_sweep"

BAND       = "alpha"        # start with alpha; add 'theta' after a good combo
FS         = 250.0
NPERSEG    = 128
NOVERLAP   = 64
NULL_PERMS = 100            # start 100; raise to 500–1000 when locked in
KMAX       = 3              # eigengap cap

# Grid: try a few; extend if needed
KNN_LIST   = [4, 5, 6, 7]
THR_LIST   = [0.60, 0.65, 0.70]
K_MODE     = ["eigengap", "k=2", "k=3"]   # test both fixed-k and eigengap

# ---------------- Helpers ----------------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def band_lims(): return {"delta":(1,4),"theta":(4,8),"alpha":(8,13),"beta":(13,30),"gamma":(30,45)}

def imag_coh_matrix(X, fs, band):
    n_ch, _ = X.shape
    fmin, fmax = band_lims()[band]
    freqs, Pxx0 = welch(X[0], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
    Pxx = [Pxx0] + [welch(X[i], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)[1] for i in range(1, n_ch)]
    sel = (freqs >= fmin) & (freqs <= fmax)
    W = np.zeros((n_ch, n_ch), float)
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            _, Pxy = csd(X[i], X[j], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
            num = np.abs(np.imag(Pxy[sel])); den = np.sqrt(Pxx[i][sel]*Pxx[j][sel] + 1e-12)
            W[i,j] = W[j,i] = float(np.nanmean(num/den))
    np.fill_diagonal(W, 0.0)
    return W

def knn_sparsify(W, k=6):
    W = W.copy(); n = W.shape[0]
    for i in range(n):
        idx = np.argsort(W[i])[::-1]; keep = idx[:k]
        mask = np.ones(n, dtype=bool); mask[keep] = False
        W[i, mask] = 0.0
    W = np.maximum(W, W.T); np.fill_diagonal(W, 0.0)
    return W

def laplacian_sym(W):
    d = W.sum(1); d = np.where(d <= 1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def pick_k(L, kmax=3):
    evals, _ = np.linalg.eigh(L)
    if len(evals) < 3: return 2
    gaps = [(k, evals[k+1]-evals[k]) for k in range(1, min(kmax+1, len(evals)-1))]
    if not gaps: return 2
    return max(2, min(max(gaps, key=lambda x: x[1])[0], kmax))

def spec_cluster(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k+1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def consensus_from_labels(all_labels, thr=0.65):
    n = len(all_labels[0]); m = len(all_labels); co = np.zeros((n, n), float)
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    co /= m
    A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid = 0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons, co

def ari_loso(all_labels, thr=0.65):
    cons,_ = consensus_from_labels(all_labels, thr=thr)
    vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_leave,_ = consensus_from_labels(leave, thr=thr)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))

def circ_shift(X, rng, max_shift=None):
    n_ch, n_t = X.shape
    if max_shift is None: max_shift = n_t-1
    Y = np.zeros_like(X)
    for c in range(n_ch):
        s = int(rng.integers(0, max_shift+1)); Y[c] = np.roll(X[c], s)
    return Y

def phase_randomize(X, rng):
    n_ch, n_t = X.shape
    Y = np.zeros_like(X, dtype=float)
    for c in range(n_ch):
        F = np.fft.rfft(X[c])
        phases = rng.uniform(0, 2*np.pi, size=F.shape)
        phases[0] = 0.0
        if n_t % 2 == 0: phases[-1] = 0.0
        Y[c] = np.fft.irfft(np.abs(F)*np.exp(1j*phases), n=n_t)
    return Y

def modularity(W, labels):
    W = W.copy()
    m = W.sum()/2.0 + 1e-12
    k = W.sum(1)
    Q = 0.0
    for i in range(W.shape[0]):
        for j in range(W.shape[0]):
            if labels[i] == labels[j]:
                Q += W[i,j] - (k[i]*k[j])/(2*m)
    return float(Q/(2*m))

def coassoc_entropy(co):
    n = co.shape[0]
    x = co[~np.eye(n, dtype=bool)].ravel()
    hist, _ = np.histogram(x, bins=50, range=(0,1), density=True)
    hist = hist[hist>0]
    return float(-(hist*np.log(hist)).sum()) if len(hist) else np.nan

def blockness_safe(co, labels):
    labs = np.array(labels)
    if len(np.unique(labs)) < 2:  # single cluster
        return np.nan
    intra = co[labs[:,None]==labs[None,:]]
    inter = co[labs[:,None]!=labs[None,:]]
    return float(intra.mean() / (inter.mean()+1e-12))

# ---------------- Load data ----------------
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))
if not files: raise SystemExit("No EC files found. Check DATA_DIR/GLOB.")

OUT = ensure_dir(OUT_ROOT)
FIG = ensure_dir(os.path.join(OUT, "figures"))

# ---------------- Sweep ----------------
rng = np.random.default_rng(7)
rows = []

for knn_k in KNN_LIST:
    for thr in THR_LIST:
        for mode in K_MODE:
            # per-subject clustering
            subj_labels = []
            Ws = []
            for f in files:
                X = np.load(f)
                W = imag_coh_matrix(X, fs=FS, band=BAND)
                W = knn_sparsify(W, k=knn_k)
                Ws.append(W)
                if mode == "eigengap":
                    k = pick_k(laplacian_sym(W), kmax=KMAX)
                elif mode == "k=2":
                    k = 2
                else:
                    k = 3
                subj_labels.append(spec_cluster(W, k))

            cons, co = consensus_from_labels(subj_labels, thr=thr)
            n_cons_clusters = int(len(np.unique(cons)))

            # null (mixed)
            null_aris = []
            for p in range(NULL_PERMS):
                nlabs=[]
                for f in files:
                    X = np.load(f)
                    Y = phase_randomize(X, rng) if (p % 2 == 0) else circ_shift(X, rng)
                    Wn = imag_coh_matrix(Y, fs=FS, band=BAND)
                    Wn = knn_sparsify(Wn, k=knn_k)
                    if mode == "eigengap":
                        kn = pick_k(laplacian_sym(Wn), kmax=KMAX)
                    elif mode == "k=2":
                        kn = 2
                    else:
                        kn = 3
                    nlabs.append(spec_cluster(Wn, kn))
                cons_n,_ = consensus_from_labels(nlabs, thr=thr)
                null_aris.append(adjusted_rand_score(cons, cons_n))
            null_aris = np.array(null_aris, float)

            loso = float(ari_loso(subj_labels, thr=thr))
            pval = float((np.sum(null_aris >= loso) + 1) / (len(null_aris) + 1))

            W_mean = np.mean(Ws, axis=0)
            Q = modularity(W_mean, cons)
            H = coassoc_entropy(co)
            B = blockness_safe(co, cons)

            rows.append({
                "band": BAND, "mode": mode, "knn_k": knn_k, "cons_thr": thr,
                "n_subjects": len(files), "n_consensus_clusters": n_cons_clusters,
                "loso_median_ari": loso, "null_ari_mean": float(null_aris.mean()),
                "p_value": pval, "Q_modularity": Q, "coassoc_entropy": H, "blockness": B
            })

# ---------------- Save & show ----------------
df = pd.DataFrame(rows).sort_values(["p_value","n_consensus_clusters"], ascending=[True, False]).reset_index(drop=True)
csv_path = os.path.join(OUT, "summary.csv")
df.to_csv(csv_path, index=False)

print("Top 10 configurations (lower p is better):")
print(df.head(10).to_string(index=False))
print("\nSaved:", csv_path)

# Quick plot: p-value by config rank
plt.figure()
plt.plot(np.arange(len(df)), df["p_value"].values, marker="o")
plt.xlabel("Config rank (sorted by p-value)")
plt.ylabel("Permutation p-value")
plt.title(f"Sweep results | {BAND}")
plt.tight_layout()
plt.savefig(os.path.join(FIG, f"sweep_{BAND}_pvalues.png"), dpi=160); plt.close()

print("Figure:", os.path.join(FIG, f"sweep_{BAND}_pvalues.png"))


KeyboardInterrupt: 

In [3]:
# === CNT Spectral-Glyphs: FAST DIAG (single cell) ===
# Goal: quick, decisive alpha check without hours of CSD recompute.
# Strategy: build real iCoh once per subject; null = fast graph-shuffle; fixed k=2; clear progress prints.

import os, glob, json, time, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import welch, csd

# ---------- config (tweak here) ----------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastdiag"
FS         = 250.0
BAND       = "alpha"
NPERSEG    = 128
NOVERLAP   = 64

SUBJ_LIMIT = 6      # start with 6; raise after you see results
K_FIXED    = 2      # confirmatory k=2 to avoid single-cluster blob
NULL_PERMS = 50     # 50 fast perms (graph shuffle)
TIME_CAP_S = 600    # stop after 10 minutes (adjust as needed)
CONS_THR   = 0.65   # stricter consensus threshold
SAVE_FIGS  = True

# ---------- helpers ----------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def band_lims(): return {"alpha":(8,13),"theta":(4,8),"beta":(13,30),"delta":(1,4),"gamma":(30,45)}

def imag_coh_matrix(X, fs, band):
    n_ch, _ = X.shape
    fmin, fmax = band_lims()[band]
    freqs, Pxx0 = welch(X[0], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
    Pxx = [Pxx0] + [welch(X[i], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)[1] for i in range(1,n_ch)]
    sel = (freqs >= fmin) & (freqs <= fmax)
    W = np.zeros((n_ch,n_ch), float)
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            _, Pxy = csd(X[i], X[j], fs=fs, nperseg=NPERSEG, noverlap=NOVERLAP)
            num = np.abs(np.imag(Pxy[sel])); den = np.sqrt(Pxx[i][sel]*Pxx[j][sel] + 1e-12)
            W[i,j] = W[j,i] = float(np.nanmean(num/den))
    np.fill_diagonal(W, 0.0)
    return W

def knn_sparsify(W, k=6):
    W = W.copy(); n = W.shape[0]
    for i in range(n):
        idx = np.argsort(W[i])[::-1]; keep = idx[:k]
        mask = np.ones(n, dtype=bool); mask[keep] = False
        W[i, mask] = 0.0
    W = np.maximum(W, W.T); np.fill_diagonal(W, 0.0)
    return W

def laplacian_sym(W):
    d = W.sum(1); d = np.where(d<=1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def spec_kmeans_on_laplacian(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k] if k>1 else evecs[:, :1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def consensus_from_labels(all_labels, thr=0.65):
    n = len(all_labels[0]); m = len(all_labels); co = np.zeros((n,n), float)
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    co /= m
    A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid=0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons, co

def ari_loso(all_labels, thr=0.65):
    cons,_ = consensus_from_labels(all_labels, thr=thr)
    vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_leave,_ = consensus_from_labels(leave, thr=thr)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))

# super-fast null: degree-preserving row shuffles (keeps each node’s weight, breaks block structure)
def graph_shuffle_null(W, rng):
    n = W.shape[0]
    Wn = np.zeros_like(W)
    for i in range(n):
        row = W[i].copy()
        idx = np.arange(n); rng.shuffle(idx)
        row = row[idx]
        row[i] = 0.0
        Wn[i] = row
    Wn = 0.5*(Wn + Wn.T)  # symmetrize
    np.fill_diagonal(Wn, 0.0)
    return Wn

# ---------- run ----------
t0 = time.time()
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))[:SUBJ_LIMIT]
print(f"[info] subjects={len(files)} → {files[:3]}{' …' if len(files)>3 else ''}")

OUT_MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))
OUT_TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))
OUT_FIG = ensure_dir(os.path.join(OUT_ROOT, "figures"))

# 1) build real graphs once
Ws = []
subj_labels = []
for ix, f in enumerate(files, 1):
    X = np.load(f)
    W = imag_coh_matrix(X, fs=FS, band=BAND)
    W = knn_sparsify(W, k=6)
    Ws.append(W)
    labs = spec_kmeans_on_laplacian(W, k=K_FIXED)
    subj_labels.append(labs)
    if ix % 2 == 0:
        print(f"[real] built {ix}/{len(files)} graphs")

# 2) consensus on real labels
cons, co = consensus_from_labels(subj_labels, thr=CONS_THR)
loso = ari_loso(subj_labels, thr=CONS_THR)
np.save(os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"), cons)
np.save(os.path.join(OUT_TAB, f"band__{BAND}__coassoc.npy"), co)

# 3) fast nulls (graph shuffles)
rng = np.random.default_rng(7)
null_aris = []
for p in range(NULL_PERMS):
    if (time.time() - t0) > TIME_CAP_S:
        print("[warn] time cap hit — stopping nulls early")
        break
    nlabs=[]
    for W in Ws:
        Wn = graph_shuffle_null(W, rng)
        lbn = spec_kmeans_on_laplacian(Wn, k=K_FIXED)
        nlabs.append(lbn)
    cons_n,_ = consensus_from_labels(nlabs, thr=CONS_THR)
    null_aris.append(adjusted_rand_score(cons, cons_n))
    if (p+1) % 10 == 0:
        print(f"[null] {p+1}/{NULL_PERMS} perms; elapsed={int(time.time()-t0)}s")

null_aris = np.array(null_aris) if len(null_aris) else np.array([1.0])
p_val = float((np.sum(null_aris >= loso) + 1) / (len(null_aris) + 1))

# 4) quick figs + metrics
if SAVE_FIGS:
    plt.figure()
    plt.imshow(co, aspect='auto'); plt.title(f"Consensus co-association | {BAND}")
    plt.colorbar(); plt.tight_layout()
    plt.savefig(os.path.join(OUT_FIG, f"consensus__{BAND}__coassoc.png"), dpi=160); plt.close()

metrics = {
    "band": BAND, "n_subjects": len(files),
    "k_fixed": K_FIXED, "consensus_thr": CONS_THR,
    "loso_median_ari": float(loso),
    "null_ari_mean": float(np.mean(null_aris)),
    "p_value": p_val, "perms_done": int(len(null_aris)),
    "elapsed_s": int(time.time()-t0)
}
with open(os.path.join(OUT_MET, f"band__{BAND}__metrics.json"), "w", encoding="utf-8") as f:
    json.dump(metrics, f, indent=2)

print("\n=== FAST DIAG RESULTS ===")
print(metrics)
print("Artifacts:")
print(" -", os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"))
print(" -", os.path.join(OUT_TAB, f"band__{BAND}__coassoc.npy"))
print(" -", os.path.join(OUT_MET, f"band__{BAND}__metrics.json"))
print(" -", os.path.join(OUT_FIG, f"consensus__{BAND}__coassoc.png"))


[info] subjects=6 → ['C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_01_EC.npy', 'C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_02_EC.npy', 'C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_03_EC.npy'] …
[real] built 2/6 graphs
[real] built 4/6 graphs
[real] built 6/6 graphs
[null] 10/50 perms; elapsed=94s
[null] 20/50 perms; elapsed=101s
[null] 30/50 perms; elapsed=107s
[null] 40/50 perms; elapsed=114s
[null] 50/50 perms; elapsed=121s

=== FAST DIAG RESULTS ===
{'band': 'alpha', 'n_subjects': 6, 'k_fixed': 2, 'consensus_thr': 0.65, 'loso_median_ari': 1.0, 'null_ari_mean': 1.0, 'p_value': 1.0, 'perms_done': 50, 'elapsed_s': 121}
Artifacts:
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastdiag\tables\band__alpha__consensus_labels.npy
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastdiag\tables\band__alpha__coassoc.npy
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastdiag\metrics\band__alpha__metrics.json
 - C:\Users\caleb\CNT_Lab\artifacts\spectral_glyphs_fastdiag\figures\

In [4]:
# === CNT Spectral-Glyphs: PLI Fast-Diag (single cell) ===
# Connectivity: Phase-Lag Index (PLI) from Hilbert phases in the target band
# Null: fast graph-shuffle (no CSD recompute)
# Clustering: spectral (k=2) to force a non-trivial split
# Outputs -> C:\Users\caleb\CNT_Lab\artifacts\pli_fastdiag

import os, glob, json, time, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import butter, filtfilt, hilbert

# ---------------- Config ----------------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\pli_fastdiag"

BAND       = "alpha"        # change to "theta" to compare
F_LOW, F_HIGH = (8.0, 13.0) # alpha Hz (theta: 4,8)
FS         = 250.0          # Hz
SUBJ_LIMIT = 6              # start small; raise after it works
K_FIXED    = 2              # confirmatory non-trivial split
CONS_THR   = 0.65           # consensus threshold
KNN_K      = 6              # keep k strongest neighbors per node (symmetric)
NULL_PERMS = 50             # fast nulls
TIME_CAP_S = 600            # safety cap
SAVE_FIGS  = True

# ---------------- Helpers ----------------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p

def bandpass(x, fs, f_lo, f_hi, order=4):
    b, a = butter(order, [f_lo/(fs/2), f_hi/(fs/2)], btype="band")
    return filtfilt(b, a, x)

def pli_matrix(X, fs, f_lo, f_hi):
    """
    X: [n_ch, n_t]
    Steps: bandpass -> Hilbert -> phases -> PLI = | mean(sign(sin(Δphi))) |
    """
    n_ch, n_t = X.shape
    Y = np.zeros_like(X, dtype=float)
    for c in range(n_ch):
        Y[c] = bandpass(X[c], fs, f_lo, f_hi)
    ph = np.angle(hilbert(Y, axis=1))
    W = np.zeros((n_ch, n_ch), float)
    for i in range(n_ch):
        for j in range(i+1, n_ch):
            dphi = ph[i] - ph[j]
            pli = np.abs(np.mean(np.sign(np.sin(dphi))))
            W[i, j] = W[j, i] = pli
    np.fill_diagonal(W, 0.0)
    return W

def knn_sparsify(W, k=6):
    W = W.copy(); n = W.shape[0]
    for i in range(n):
        idx = np.argsort(W[i])[::-1]; keep = idx[:k]
        mask = np.ones(n, dtype=bool); mask[keep] = False
        W[i, mask] = 0.0
    W = np.maximum(W, W.T); np.fill_diagonal(W, 0.0)
    return W

def laplacian_sym(W):
    d = W.sum(1); d = np.where(d<=1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def spec_kmeans_on_laplacian(W, k):
    L = laplacian_sym(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k] if k>1 else evecs[:, :1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def consensus_from_labels(all_labels, thr=0.65):
    n = len(all_labels[0]); m = len(all_labels); co = np.zeros((n,n), float)
    for lab in all_labels:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    co /= m
    A = (co >= thr).astype(int)
    visited = np.zeros(n, bool); cons = -1*np.ones(n, int); cid=0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons, co

def ari_loso(all_labels, thr=0.65):
    cons,_ = consensus_from_labels(all_labels, thr=thr)
    vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_leave,_ = consensus_from_labels(leave, thr=thr)
        vals.append(adjusted_rand_score(cons, cons_leave))
    return float(np.median(vals))

def graph_shuffle_null(W, rng):
    n = W.shape[0]
    Wn = np.zeros_like(W)
    for i in range(n):
        row = W[i].copy()
        idx = np.arange(n); rng.shuffle(idx)
        row = row[idx]; row[i] = 0.0
        Wn[i] = row
    Wn = 0.5*(Wn + Wn.T); np.fill_diagonal(Wn, 0.0)
    return Wn

# ---------------- Run ----------------
t0 = time.time()
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))[:SUBJ_LIMIT]
print(f"[info] subjects={len(files)} → {files[:3]}{' …' if len(files)>3 else ''}")

OUT_MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))
OUT_TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))
OUT_FIG = ensure_dir(os.path.join(OUT_ROOT, "figures"))

# 1) build PLI graphs once
Ws = []; subj_labels = []
for ix, f in enumerate(files, 1):
    X = np.load(f)
    W = pli_matrix(X, FS, F_LOW, F_HIGH)
    W = knn_sparsify(W, k=KNN_K)
    Ws.append(W)
    labs = spec_kmeans_on_laplacian(W, k=K_FIXED)
    subj_labels.append(labs)
    if ix % 2 == 0:
        print(f"[real] built {ix}/{len(files)} graphs")

# 2) consensus on real labels
cons, co = consensus_from_labels(subj_labels, thr=CONS_THR)
loso = ari_loso(subj_labels, thr=CONS_THR)
np.save(os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"), cons)
np.save(os.path.join(OUT_TAB, f"band__{BAND}__coassoc.npy"), co)

# 3) fast nulls (graph shuffles)
rng = np.random.default_rng(7)
null_aris = []
for p in range(NULL_PERMS):
    if (time.time() - t0) > TIME_CAP_S:
        print("[warn] time cap hit — stopping nulls early")
        break
    nlabs=[]
    for W in Ws:
        Wn = graph_shuffle_null(W, rng)
        lbn = spec_kmeans_on_laplacian(Wn, k=K_FIXED)
        nlabs.append(lbn)
    cons_n,_ = consensus_from_labels(nlabs, thr=CONS_THR)
    null_aris.append(adjusted_rand_score(cons, cons_n))
    if (p+1) % 10 == 0:
        print(f"[null] {p+1}/{NULL_PERMS} perms; elapsed={int(time.time()-t0)}s")

null_aris = np.array(null_aris) if len(null_aris) else np.array([1.0])
p_val = float((np.sum(null_aris >= loso) + 1) / (len(null_aris) + 1))

# 4) quick figs + metrics
if SAVE_FIGS:
    plt.figure()
    plt.imshow(co, aspect='auto'); plt.title(f"Consensus co-association | {BAND} (PLI)")
    plt.colorbar(); plt.tight_layout()
    plt.savefig(os.path.join(OUT_FIG, f"consensus__{BAND}__coassoc_pli.png"), dpi=160); plt.close()

metrics = {
    "band": BAND, "connectivity": "PLI",
    "n_subjects": len(files), "k_fixed": K_FIXED, "consensus_thr": CONS_THR,
    "loso_median_ari": float(loso), "null_ari_mean": float(np.mean(null_aris)),
    "p_value": p_val, "perms_done": int(len(null_aris)), "elapsed_s": int(time.time()-t0)
}
with open(os.path.join(OUT_MET, f"band__{BAND}__metrics.json"), "w", encoding="utf-8") as f:
    json.dump(metrics, f, indent=2)

print("\n=== PLI FAST DIAG RESULTS ===")
print(metrics)
print("Artifacts:")
print(" -", os.path.join(OUT_TAB, f"band__{BAND}__consensus_labels.npy"))
print(" -", os.path.join(OUT_TAB, f"band__{BAND}__coassoc.npy"))
print(" -", os.path.join(OUT_MET, f"band__{BAND}__metrics.json"))
print(" -", os.path.join(OUT_FIG, f"consensus__{BAND}__coassoc_pli.png"))


[info] subjects=6 → ['C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_01_EC.npy', 'C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_02_EC.npy', 'C:\\Users\\caleb\\CNT_Lab\\eeg_rest\\subject_03_EC.npy'] …
[real] built 2/6 graphs
[real] built 4/6 graphs
[real] built 6/6 graphs
[null] 10/50 perms; elapsed=8s
[null] 20/50 perms; elapsed=15s
[null] 30/50 perms; elapsed=21s
[null] 40/50 perms; elapsed=28s
[null] 50/50 perms; elapsed=34s

=== PLI FAST DIAG RESULTS ===
{'band': 'alpha', 'connectivity': 'PLI', 'n_subjects': 6, 'k_fixed': 2, 'consensus_thr': 0.65, 'loso_median_ari': 1.0, 'null_ari_mean': 1.0, 'p_value': 1.0, 'perms_done': 50, 'elapsed_s': 35}
Artifacts:
 - C:\Users\caleb\CNT_Lab\artifacts\pli_fastdiag\tables\band__alpha__consensus_labels.npy
 - C:\Users\caleb\CNT_Lab\artifacts\pli_fastdiag\tables\band__alpha__coassoc.npy
 - C:\Users\caleb\CNT_Lab\artifacts\pli_fastdiag\metrics\band__alpha__metrics.json
 - C:\Users\caleb\CNT_Lab\artifacts\pli_fastdiag\figures\consensus__alpha__coassoc_p

In [5]:
# === CNT Spectral-Glyphs: PLI Null-Resample Diagnostic ===
# Fast: 6 subjects × 30 perms. Adds weight-swap nulls to test real modularity.

import os, glob, json, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import butter, filtfilt, hilbert

DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\pli_resamplediag"
FS         = 250.0
F_LOW,F_HIGH = (8.0,13.0)
GLOB       = "subject_*_EC.npy"
SUBJ_LIMIT = 6
K_FIXED    = 2
KNN_K      = 6
CONS_THR   = 0.65
NULL_PERMS = 30
SAVE_FIGS  = True

def ensure_dir(p): os.makedirs(p,exist_ok=True); return p
def bandpass(x,fs,f_lo,f_hi,order=4):
    from scipy.signal import butter,filtfilt
    b,a=butter(order,[f_lo/(fs/2),f_hi/(fs/2)],btype="band"); return filtfilt(b,a,x)
def pli_matrix(X,fs,f_lo,f_hi):
    n=X.shape[0]; Y=np.zeros_like(X)
    for c in range(n): Y[c]=bandpass(X[c],fs,f_lo,f_hi)
    ph=np.angle(hilbert(Y,axis=1))
    W=np.zeros((n,n))
    for i in range(n):
        for j in range(i+1,n):
            dphi=ph[i]-ph[j]; pli=np.abs(np.mean(np.sign(np.sin(dphi))))
            W[i,j]=W[j,i]=pli
    np.fill_diagonal(W,0); return W
def knn(W,k=6):
    W=W.copy(); n=W.shape[0]
    for i in range(n):
        idx=np.argsort(W[i])[::-1]; keep=idx[:k]; mask=np.ones(n,bool); mask[keep]=False; W[i,mask]=0
    W=np.maximum(W,W.T); np.fill_diagonal(W,0); return W
def laplacian(W): d=W.sum(1); d=np.where(d<=1e-12,1.0,d); Dmh=np.diag(1.0/np.sqrt(d)); return np.eye(W.shape[0])-Dmh@W@Dmh
def spec(W,k):
    L=laplacian(W); e,v=np.linalg.eigh(L); U=v[:,1:k]; U/=np.linalg.norm(U,axis=1,keepdims=True)+1e-12
    return KMeans(n_clusters=k,n_init=50,random_state=42).fit_predict(U)
def consensus(all_labels,thr):
    n=len(all_labels[0]); m=len(all_labels); co=np.zeros((n,n))
    for lab in all_labels:
        for i in range(n):
            li=lab[i]
            for j in range(n):
                co[i,j]+=1 if li==lab[j] else 0
    co/=m; A=(co>=thr).astype(int)
    visited=np.zeros(n,bool); cons=-1*np.ones(n,int); cid=0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons,co
def ari_loso(all_labels,thr):
    cons,_=consensus(all_labels,thr); vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_l,_=consensus(leave,thr); vals.append(adjusted_rand_score(cons,cons_l))
    return float(np.median(vals))
def weight_swap_null(W,rng):
    n=W.shape[0]; tri=W[np.triu_indices(n,1)]
    rng.shuffle(tri); Wn=np.zeros_like(W); Wn[np.triu_indices(n,1)]=tri; Wn+=Wn.T; np.fill_diagonal(Wn,0); return Wn

files=sorted(glob.glob(os.path.join(DATA_DIR,GLOB)))[:SUBJ_LIMIT]
Ws=[]; labs=[]; rng=np.random.default_rng(7)
for f in files:
    X=np.load(f); W=pli_matrix(X,FS,F_LOW,F_HIGH); W=knn(W,KNN_K)
    Ws.append(W); labs.append(spec(W,K_FIXED))
cons,co=consensus(labs,CONS_THR); loso=ari_loso(labs,CONS_THR)
null_aris=[]
for p in range(NULL_PERMS):
    nlabs=[]
    for W in Ws:
        Wn=weight_swap_null(W,rng)
        nlabs.append(spec(Wn,K_FIXED))
    cons_n,_=consensus(nlabs,CONS_THR)
    null_aris.append(adjusted_rand_score(cons,cons_n))
p=float((np.sum(null_aris>=loso)+1)/(len(null_aris)+1))
unique,counts=np.unique(cons,return_counts=True)
print("=== PLI Weight-Resample Diagnostic ===")
print(f"subjects={len(files)} clusters={len(unique)} sizes={counts.tolist()}")
print(f"LOSO={loso:.3f}  NullMean={np.mean(null_aris):.3f}  p={p:.4f}")
intra=co[cons[:,None]==cons[None,:]].mean(); inter=co[cons[:,None]!=cons[None,:]].mean()
print(f"Intra={intra:.3f}  Inter={inter:.3f}  Intra/Inter={intra/(inter+1e-12):.2f}")
plt.figure(); plt.imshow(co,aspect='auto'); plt.title(f"Consensus co-association | {BAND} (PLI weight-null)")
plt.colorbar(); plt.tight_layout()
out_fig=os.path.join(OUT_ROOT,"consensus_alpha_weightnull.png"); ensure_dir(OUT_ROOT); plt.savefig(out_fig,dpi=160)
print("Saved:",out_fig)


TypeError: '>=' not supported between instances of 'list' and 'float'

In [6]:
# === FIXED: PLI Weight-Resample Diagnostic (single cell) ===

import os, glob, json, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import butter, filtfilt, hilbert

DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\pli_resamplediag"
FS         = 250.0
F_LOW,F_HIGH = (8.0,13.0)
GLOB       = "subject_*_EC.npy"
SUBJ_LIMIT = 6
K_FIXED    = 2
KNN_K      = 6
CONS_THR   = 0.65
NULL_PERMS = 30
SAVE_FIGS  = True

def ensure_dir(p): os.makedirs(p,exist_ok=True); return p
def bandpass(x,fs,f_lo,f_hi,order=4):
    b,a=butter(order,[f_lo/(fs/2),f_hi/(fs/2)],btype="band"); return filtfilt(b,a,x)
def pli_matrix(X,fs,f_lo,f_hi):
    n=X.shape[0]; Y=np.zeros_like(X)
    for c in range(n): Y[c]=bandpass(X[c],fs,f_lo,f_hi)
    ph=np.angle(hilbert(Y,axis=1))
    W=np.zeros((n,n))
    for i in range(n):
        for j in range(i+1,n):
            dphi=ph[i]-ph[j]; pli=np.abs(np.mean(np.sign(np.sin(dphi))))
            W[i,j]=W[j,i]=pli
    np.fill_diagonal(W,0); return W
def knn(W,k=6):
    W=W.copy(); n=W.shape[0]
    for i in range(n):
        idx=np.argsort(W[i])[::-1]; keep=idx[:k]
        mask=np.ones(n,bool); mask[keep]=False; W[i,mask]=0
    W=np.maximum(W,W.T); np.fill_diagonal(W,0); return W
def laplacian(W): d=W.sum(1); d=np.where(d<=1e-12,1.0,d); Dmh=np.diag(1.0/np.sqrt(d)); return np.eye(W.shape[0])-Dmh@W@Dmh
def spec(W,k):
    L=laplacian(W); e,v=np.linalg.eigh(L); U=v[:,1:k]; U/=np.linalg.norm(U,axis=1,keepdims=True)+1e-12
    return KMeans(n_clusters=k,n_init=50,random_state=42).fit_predict(U)
def consensus(all_labels,thr):
    n=len(all_labels[0]); m=len(all_labels); co=np.zeros((n,n))
    for lab in all_labels:
        for i in range(n):
            li=lab[i]
            for j in range(n):
                co[i,j]+=1 if li==lab[j] else 0
    co/=m; A=(co>=thr).astype(int)
    visited=np.zeros(n,bool); cons=-1*np.ones(n,int); cid=0
    for i in range(n):
        if not visited[i]:
            stack=[i]; visited[i]=True; cons[i]=cid
            while stack:
                u=stack.pop()
                for v in range(n):
                    if A[u,v] and not visited[v]:
                        visited[v]=True; cons[v]=cid; stack.append(v)
            cid+=1
    return cons,co
def ari_loso(all_labels,thr):
    cons,_=consensus(all_labels,thr); vals=[]
    for s in range(len(all_labels)):
        leave=[lab for i,lab in enumerate(all_labels) if i!=s]
        cons_l,_=consensus(leave,thr); vals.append(adjusted_rand_score(cons,cons_l))
    return float(np.median(vals))
def weight_swap_null(W,rng):
    n=W.shape[0]; tri=W[np.triu_indices(n,1)]
    rng.shuffle(tri)
    Wn=np.zeros_like(W); Wn[np.triu_indices(n,1)]=tri; Wn+=Wn.T; np.fill_diagonal(Wn,0); return Wn

# Load subset
files=sorted(glob.glob(os.path.join(DATA_DIR,GLOB)))[:SUBJ_LIMIT]
Ws=[]; labs=[]; rng=np.random.default_rng(7)
for f in files:
    X=np.load(f); W=pli_matrix(X,FS,F_LOW,F_HIGH); W=knn(W,KNN_K)
    Ws.append(W); labs.append(spec(W,K_FIXED))

# Real consensus
cons,co=consensus(labs,CONS_THR)
loso=ari_loso(labs,CONS_THR)

# Null (weight-resample)
null_aris=[]
for p in range(NULL_PERMS):
    nlabs=[]
    for W in Ws:
        Wn=weight_swap_null(W,rng)
        nlabs.append(spec(Wn,K_FIXED))
    cons_n,_=consensus(nlabs,CONS_THR)
    null_aris.append(adjusted_rand_score(cons,cons_n))

null_aris=np.array(null_aris, dtype=float)   # <-- FIX: make it an array
p=float((np.sum(null_aris>=loso)+1)/(len(null_aris)+1))

# Cluster stats
unique,counts=np.unique(cons,return_counts=True)
if len(unique)>=2:
    intra=co[cons[:,None]==cons[None,:]].mean()
    inter=co[cons[:,None]!=cons[None,:]].mean()
    ratio=float(intra/(inter+1e-12))
else:
    intra=inter=ratio=np.nan

print("=== PLI Weight-Resample Diagnostic ===")
print(f"subjects={len(files)}  clusters={len(unique)}  sizes={counts.tolist()}")
print(f"LOSO={loso:.3f}  NullMean={float(null_aris.mean()):.3f}  p={p:.4f}")
print(f"Intra={intra:.3f}  Inter={inter:.3f}  Intra/Inter={ratio:.2f}")

# Figure
if SAVE_FIGS:
    ensure_dir(OUT_ROOT)
    plt.figure()
    plt.imshow(co,aspect='auto')
    plt.title("Consensus co-association | alpha (PLI weight-null)")
    plt.colorbar(); plt.tight_layout()
    out_fig=os.path.join(OUT_ROOT,"consensus_alpha_weightnull.png")
    plt.savefig(out_fig,dpi=160); plt.close()
    print("Saved:", out_fig)


=== PLI Weight-Resample Diagnostic ===
subjects=6  clusters=1  sizes=[64]
LOSO=1.000  NullMean=1.000  p=1.0000
Intra=nan  Inter=nan  Intra/Inter=nan
Saved: C:\Users\caleb\CNT_Lab\artifacts\pli_resamplediag\consensus_alpha_weightnull.png


In [7]:
# === CNT PLI Consensus via Spectral-on-CoAssoc (single cell) ===
# - Per-subject: PLI (alpha) → KNN → spectral (k=2) labels
# - Consensus matrix: fraction of subject label-agreement (channels×channels)
# - Final consensus: spectral clustering (k=2) directly on the co-association matrix
# - Null: keep each subject’s graph; randomize that subject’s labels (preserve sizes); rebuild coassoc; spectral k=2
# - Prints: #clusters (always 2), cluster sizes, LOSO-ARI vs leave-one-out consensus, null p-value, intra/inter coassoc

import os, glob, json, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import butter, filtfilt, hilbert

# -------- config --------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\pli_coassoc_spectral"
FS         = 250.0
F_LOW,F_HIGH = (8.0, 13.0)   # alpha
SUBJ_LIMIT = 6               # raise after it works
K_FIXED    = 2               # per-subject k
KNN_K      = 6               # sparsify
NULL_PERMS = 200             # fast; raise to 500–1000 for paper
SAVE_FIGS  = True

# -------- helpers --------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def bandpass(x,fs,lo,hi,order=4):
    b,a=butter(order,[lo/(fs/2), hi/(fs/2)], btype="band"); return filtfilt(b,a,x)
def pli_matrix(X,fs,lo,hi):
    n=X.shape[0]; Y=np.zeros_like(X)
    for c in range(n): Y[c]=bandpass(X[c],fs,lo,hi)
    ph=np.angle(hilbert(Y,axis=1))
    W=np.zeros((n,n))
    for i in range(n):
        for j in range(i+1,n):
            dphi=ph[i]-ph[j]; pli=np.abs(np.mean(np.sign(np.sin(dphi))))
            W[i,j]=W[j,i]=pli
    np.fill_diagonal(W,0); return W
def knn(W,k=6):
    W=W.copy(); n=W.shape[0]
    for i in range(n):
        idx=np.argsort(W[i])[::-1]; keep=idx[:k]
        mask=np.ones(n,bool); mask[keep]=False; W[i,mask]=0
    W=np.maximum(W,W.T); np.fill_diagonal(W,0); return W
def laplacian(W):
    d=W.sum(1); d=np.where(d<=1e-12,1.0,d); Dmh=np.diag(1.0/np.sqrt(d)); return np.eye(W.shape[0]) - Dmh@W@Dmh
def spec_labels(W, k):
    L=laplacian(W); evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k] if k>1 else evecs[:, :1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def build_coassoc(label_list):
    n = len(label_list[0]); m = len(label_list)
    co = np.zeros((n,n), float)
    for lab in label_list:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    return co / m

def loso_ari_via_coassoc(label_list):
    # Build full-consensus labels by spectral on full coassoc
    co_full = build_coassoc(label_list)
    cons_full = spec_labels(co_full, k=2)
    # Leave-one-subject-out consensus labels the same way
    aris=[]
    for s in range(len(label_list)):
        Ls = [lab for i,lab in enumerate(label_list) if i!=s]
        co_l = build_coassoc(Ls)
        cons_l = spec_labels(co_l, k=2)
        aris.append(adjusted_rand_score(cons_full, cons_l))
    return float(np.median(aris)), cons_full, co_full

def randomize_labels_same_sizes(lab, rng):
    n = len(lab); uniq, cnts = np.unique(lab, return_counts=True)
    idx = np.arange(n); rng.shuffle(idx)
    # assign first cnts[0] to label uniq[0], etc.
    out = np.empty(n, dtype=int); start=0
    for label, c in zip(uniq, cnts):
        seg = idx[start:start+c]; out[seg]=label; start+=c
    return out

# -------- run --------
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))[:SUBJ_LIMIT]
if not files: raise SystemExit("No EC files found. Check DATA_DIR/GLOB.")
OUT_MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))
OUT_TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))
OUT_FIG = ensure_dir(os.path.join(OUT_ROOT, "figures"))

# Per-subject: PLI → KNN → spectral k=2
subj_labels = []
Ws = []
for ix, f in enumerate(files, 1):
    X = np.load(f)
    W = pli_matrix(X, FS, F_LOW, F_HIGH)
    W = knn(W, KNN_K)
    labs = spec_labels(W, K_FIXED)
    subj_labels.append(labs)
    Ws.append(W)
    if ix % 2 == 0: print(f"[real] {ix}/{len(files)}")

# Real consensus via spectral-on-coassoc
loso, cons_real, co_real = loso_ari_via_coassoc(subj_labels)
np.save(os.path.join(OUT_TAB, f"band__alpha__coassoc.npy"), co_real)
np.save(os.path.join(OUT_TAB, f"band__alpha__consensus_labels.npy"), cons_real)

# Null: randomize each subject’s labels (preserve sizes) → spectral-on-coassoc
rng = np.random.default_rng(7)
null_aris = []
for p in range(NULL_PERMS):
    nlabs=[]
    for lab in subj_labels:
        nlabs.append(randomize_labels_same_sizes(lab, rng))
    loso_n, cons_n, _ = loso_ari_via_coassoc(nlabs)
    null_aris.append(adjusted_rand_score(cons_real, cons_n))
null_aris = np.array(null_aris, float)
p_val = float((np.sum(null_aris >= loso) + 1) / (len(null_aris) + 1))

# Cluster diagnostics on coassoc spectral labels
unique, counts = np.unique(cons_real, return_counts=True)
intra = co_real[cons_real[:,None]==cons_real[None,:]].mean()
inter = co_real[cons_real[:,None]!=cons_real[None,:]].mean()
ratio = float(intra/(inter+1e-12))

print("\n=== PLI Spectral-on-CoAssoc Results ===")
print(f"subjects={len(files)}  clusters=2  sizes={counts.tolist()}")
print(f"LOSO (coassoc spectral) = {loso:.3f}")
print(f"Null ARI mean = {float(null_aris.mean()):.3f}   p = {p_val:.4f}")
print(f"Intra={intra:.3f}  Inter={inter:.3f}  Intra/Inter={ratio:.2f}")

if SAVE_FIGS:
    plt.figure()
    plt.imshow(co_real, aspect='auto'); plt.title("Consensus co-association (spectral) | alpha (PLI)")
    plt.colorbar(); plt.tight_layout()
    figp = os.path.join(OUT_FIG, "consensus__alpha__coassoc_spectral.png")
    plt.savefig(figp, dpi=160); plt.close()
    print("Figure:", figp)

# Save metrics json
metrics = {
    "band": "alpha",
    "connectivity": "PLI",
    "consensus_mode": "spectral_on_coassoc_k2",
    "n_subjects": len(files),
    "loso_coassoc": float(loso),
    "null_ari_mean": float(null_aris.mean()),
    "p_value": p_val,
    "cluster_sizes": counts.tolist(),
    "intra_mean": float(intra),
    "inter_mean": float(inter),
    "intra_over_inter": ratio,
}
with open(os.path.join(OUT_MET, "band__alpha__metrics.json"), "w", encoding="utf-8") as f:
    json.dump(metrics, f, indent=2)
print("Saved metrics:", os.path.join(OUT_MET, "band__alpha__metrics.json"))


[real] 2/6
[real] 4/6
[real] 6/6

=== PLI Spectral-on-CoAssoc Results ===
subjects=6  clusters=2  sizes=[35, 29]
LOSO (coassoc spectral) = 0.818
Null ARI mean = 0.001   p = 0.0050
Intra=0.746  Inter=0.281  Intra/Inter=2.65
Figure: C:\Users\caleb\CNT_Lab\artifacts\pli_coassoc_spectral\figures\consensus__alpha__coassoc_spectral.png
Saved metrics: C:\Users\caleb\CNT_Lab\artifacts\pli_coassoc_spectral\metrics\band__alpha__metrics.json


In [8]:
# === CNT PLI Consensus — α/θ/β sweep (single cell) ===
# Connectivity: Phase-Lag Index (PLI) via Hilbert phases in each band
# Consensus: spectral clustering (k=2) on the co-association matrix
# Null: per-subject labels randomized with same cluster sizes (fast, alignment-specific)
# Outputs:
#   C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\{metrics/*.json, tables/*.npy, figures/*.png, summary.csv}

import os, glob, json, numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from scipy.signal import butter, filtfilt, hilbert

# ---------------- Config ----------------
DATA_DIR   = r"C:\Users\caleb\CNT_Lab\eeg_rest"
GLOB       = "subject_*_EC.npy"
OUT_ROOT   = r"C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep"

FS         = 250.0
BANDS_HZ   = { "alpha": (8.0, 13.0), "theta": (4.0, 8.0), "beta": (13.0, 30.0) }

SUBJ_LIMIT = 10           # use 6 for a quick pass; 10 for full
K_FIXED    = 2            # force non-trivial split
KNN_K      = 6            # try 4–7 if needed
NULL_PERMS = 300          # raise to 1000 for paper-grade
SAVE_FIGS  = True

# ---------------- Helpers ----------------
def ensure_dir(p): os.makedirs(p, exist_ok=True); return p
def bandpass(x,fs,lo,hi,order=4):
    b,a = butter(order, [lo/(fs/2), hi/(fs/2)], btype="band")
    return filtfilt(b,a,x)

def pli_matrix(X,fs,lo,hi):
    n = X.shape[0]; Y = np.zeros_like(X)
    for c in range(n): Y[c] = bandpass(X[c], fs, lo, hi)
    ph = np.angle(hilbert(Y, axis=1))
    W = np.zeros((n,n), float)
    for i in range(n):
        for j in range(i+1, n):
            dphi = ph[i]-ph[j]
            pli  = np.abs(np.mean(np.sign(np.sin(dphi))))
            W[i,j] = W[j,i] = pli
    np.fill_diagonal(W, 0.0)
    return W

def knn(W,k=6):
    W = W.copy(); n = W.shape[0]
    for i in range(n):
        idx = np.argsort(W[i])[::-1]
        keep = idx[:k]
        mask = np.ones(n, dtype=bool); mask[keep] = False
        W[i, mask] = 0.0
    W = np.maximum(W, W.T)
    np.fill_diagonal(W, 0.0)
    return W

def laplacian(W):
    d = W.sum(1)
    d = np.where(d<=1e-12, 1.0, d)
    Dmh = np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def spec_labels(W, k):
    L = laplacian(W)
    evals, evecs = np.linalg.eigh(L)
    U = evecs[:, 1:k] if k>1 else evecs[:, :1]
    U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def build_coassoc(label_list):
    n = len(label_list[0]); m = len(label_list)
    co = np.zeros((n,n), float)
    for lab in label_list:
        for i in range(n):
            li = lab[i]
            for j in range(n):
                co[i,j] += 1.0 if li == lab[j] else 0.0
    return co / m

def loso_ari_via_coassoc(label_list):
    co_full = build_coassoc(label_list)
    cons_full = spec_labels(co_full, k=2)
    aris=[]
    for s in range(len(label_list)):
        leave = [lab for i,lab in enumerate(label_list) if i!=s]
        co_l  = build_coassoc(leave)
        cons_l= spec_labels(co_l, k=2)
        aris.append(adjusted_rand_score(cons_full, cons_l))
    return float(np.median(aris)), cons_full, co_full

def randomize_labels_same_sizes(lab, rng):
    n = len(lab)
    uniq, cnts = np.unique(lab, return_counts=True)
    idx = np.arange(n); rng.shuffle(idx)
    out = np.empty(n, dtype=int); start=0
    for label, c in zip(uniq, cnts):
        seg = idx[start:start+c]; out[seg]=label; start+=c
    return out

# ---------------- Run ----------------
files = sorted(glob.glob(os.path.join(DATA_DIR, GLOB)))[:SUBJ_LIMIT]
if not files: raise SystemExit("No EC files found. Check DATA_DIR/GLOB.")
OUT_MET = ensure_dir(os.path.join(OUT_ROOT, "metrics"))
OUT_TAB = ensure_dir(os.path.join(OUT_ROOT, "tables"))
OUT_FIG = ensure_dir(os.path.join(OUT_ROOT, "figures"))

rows = []
rng = np.random.default_rng(7)

for band, (lo, hi) in BANDS_HZ.items():
    # per-subject labels (PLI → KNN → spectral k=2)
    subj_labels = []
    for f in files:
        X = np.load(f)
        W = pli_matrix(X, FS, lo, hi)
        W = knn(W, KNN_K)
        labs = spec_labels(W, K_FIXED)
        subj_labels.append(labs)

    # consensus via spectral-on-coassoc
    loso, cons_real, co_real = loso_ari_via_coassoc(subj_labels)
    np.save(os.path.join(OUT_TAB, f"band__{band}__coassoc.npy"), co_real)
    np.save(os.path.join(OUT_TAB, f"band__{band}__consensus_labels.npy"), cons_real)

    # alignment-preserving null: label randomization per subject
    null_aris = []
    for p in range(NULL_PERMS):
        nlabs=[]
        for lab in subj_labels:
            nlabs.append(randomize_labels_same_sizes(lab, rng))
        _, cons_n, _ = loso_ari_via_coassoc(nlabs)
        null_aris.append(adjusted_rand_score(cons_real, cons_n))
    null_aris = np.array(null_aris, float)
    p_val = float((np.sum(null_aris >= loso) + 1) / (len(null_aris) + 1))

    # diagnostics
    uniq, cnts = np.unique(cons_real, return_counts=True)
    intra = co_real[cons_real[:,None]==cons_real[None,:]].mean()
    inter = co_real[cons_real[:,None]!=cons_real[None,:]].mean()
    ratio = float(intra/(inter+1e-12))

    # save metrics
    metrics = {
        "band": band, "connectivity": "PLI", "consensus_mode": "spectral_on_coassoc_k2",
        "n_subjects": len(files), "loso_coassoc": float(loso),
        "null_ari_mean": float(null_aris.mean()), "p_value": p_val,
        "cluster_sizes": cnts.tolist(), "intra_mean": float(intra),
        "inter_mean": float(inter), "intra_over_inter": ratio
    }
    with open(os.path.join(OUT_MET, f"band__{band}__metrics.json"), "w", encoding="utf-8") as f:
        json.dump(metrics, f, indent=2)

    # figure
    if SAVE_FIGS:
        plt.figure()
        plt.imshow(co_real, aspect='auto')
        plt.title(f"Consensus co-association (spectral) | {band} (PLI)")
        plt.colorbar(); plt.tight_layout()
        plt.savefig(os.path.join(OUT_FIG, f"consensus__{band}__coassoc_spectral.png"), dpi=160)
        plt.close()

    rows.append([band, len(files), cnts.tolist(), float(loso), float(null_aris.mean()), p_val, ratio])

# summary CSV + print
df = pd.DataFrame(rows, columns=["band","n_subjects","cluster_sizes","LOSO","null_ari_mean","p_value","intra_over_inter"])
csv_path = os.path.join(OUT_ROOT, "summary.csv"); df.to_csv(csv_path, index=False)

print("=== PLI spectral-on-coassoc (k=2) — Band Sweep ===")
print(df.to_string(index=False))
print("\nSaved:")
print("  - Summary CSV:", csv_path)
print("  - Metrics JSON:", OUT_MET)
print("  - Tables NPY :", OUT_TAB)
print("  - Figures    :", OUT_FIG)


=== PLI spectral-on-coassoc (k=2) — Band Sweep ===
 band  n_subjects cluster_sizes     LOSO  null_ari_mean  p_value  intra_over_inter
alpha          10      [34, 30] 0.877963      -0.001952 0.003322          2.327533
theta          10      [27, 37] 0.790194       0.002869 0.003322          1.934356
 beta          10      [30, 34] 1.000000      -0.000808 0.003322          6.005028

Saved:
  - Summary CSV: C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\summary.csv
  - Metrics JSON: C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\metrics
  - Tables NPY : C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\tables
  - Figures    : C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\figures


In [9]:
# === CNT PLI Consensus — Topomaps + β sanity check (single cell) ===
# Inputs: artifacts from the band sweep you just ran
#   C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\metrics\band__{band}__metrics.json
#   C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\tables\band__{band}__consensus_labels.npy
# Also needs channel names from one subject: C:\Users\caleb\CNT_Lab\eeg_rest\subject_01_EC.channels.txt
#
# Outputs:
#   figures/topomap__{band}__cluster{0,1}.png
#   metrics/beta_sanity_metrics.json  (KNN_K=4, CONS_THR=0.70, NULL_PERMS=1000)

import os, glob, json, numpy as np, matplotlib.pyplot as plt
import mne

ROOT = r"C:\Users\caleb\CNT_Lab"
SWEEP = os.path.join(ROOT, r"artifacts\pli_band_sweep")
OUTF  = os.path.join(ROOT, r"artifacts\pli_band_sweep\figures")
OUTM  = os.path.join(ROOT, r"artifacts\pli_band_sweep\metrics")
OUTT  = os.path.join(ROOT, r"artifacts\pli_band_sweep\tables")
ensure = lambda p: (os.makedirs(p, exist_ok=True) or p)
ensure(OUTF)

BANDS = ["alpha","theta","beta"]
CHAN_TXT = os.path.join(ROOT, r"eeg_rest\subject_01_EC.channels.txt")

# ---------- helper: load channels ----------
if os.path.exists(CHAN_TXT):
    with open(CHAN_TXT,"r",encoding="utf-8") as f:
        ch_names = [ln.strip() for ln in f if ln.strip()]
else:
    # fallback to 64 generic names
    ch_names = [f"ch{i}" for i in range(64)]

# ---------- helper: simple topomap per cluster ----------
def topomap_two_clusters(cons_labels, band, ch_names):
    # Build fake "values" just to color nodes by cluster (0/1)
    # We'll plot two images: cluster A highlighted, cluster B highlighted
    montage = mne.channels.make_standard_montage("standard_1020")
    info = mne.create_info(ch_names, sfreq=250.0, ch_types="eeg")
    info.set_montage(montage, on_missing="ignore", match_case=False)

    arr = np.zeros(len(ch_names))
    for cluster_id in [0,1]:
        arr[:] = (cons_labels == cluster_id).astype(float)
        ev = mne.EvokedArray(arr[:,None], info, tmin=0.0)  # one "time"
        fig = ev.plot_topomap(times=[0.0], scalings=1.0, time_format="", show=False)
        # Save the figure mne returns as mpl fig
        fig[0].savefig(os.path.join(OUTF, f"topomap__{band}__cluster{cluster_id}.png"), dpi=160)

# ---------- 1) Load sweep outputs & draw topomaps ----------
rows = []
for b in BANDS:
    met_fp = os.path.join(OUTM, f"band__{b}__metrics.json")
    lab_fp = os.path.join(OUTT, f"band__{b}__consensus_labels.npy")
    if not (os.path.exists(met_fp) and os.path.exists(lab_fp)):
        print(f"[skip] missing {b}")
        continue

    with open(met_fp,"r",encoding="utf-8") as f:
        met = json.load(f)
    cons = np.load(lab_fp)

    # record summary line
    rows.append([b, met["n_subjects"], met["cluster_sizes"], met["loso_coassoc"],
                 met["null_ari_mean"], met["p_value"], met["intra_over_inter"]])

    # draw topomaps
    try:
        topomap_two_clusters(cons, b, ch_names)
        print(f"[topomap] wrote topomap__{b}__cluster*.png")
    except Exception as e:
        print(f"[warn] topomap failed for {b}: {e}")

# print a compact table
import pandas as pd
df = pd.DataFrame(rows, columns=["band","n","cluster_sizes","LOSO","null_mean","p","intra/inter"])
print("\n=== Sweep summary (from metrics JSON) ===")
print(df.to_string(index=False))
print("\nSaved topomaps to:", OUTF)

# ---------- 2) β sanity check: re-score with KNN_K=4, CONS_THR=0.70, NULL_PERMS=1000 ----------
DATA_DIR = os.path.join(ROOT, r"eeg_rest")
files = sorted(glob.glob(os.path.join(DATA_DIR, "subject_*_EC.npy")))[:10]
if not files:
    raise SystemExit("No EC files found.")

FS = 250.0
F_LOW,F_HIGH = (13.0, 30.0)   # beta
K_FIXED = 2
KNN_K   = 4
CONS_THR= 0.70
NULL_PERMS = 1000

from scipy.signal import butter, filtfilt, hilbert
def bandpass(x,fs,lo,hi,order=4):
    b,a = butter(order, [lo/(fs/2), hi/(fs/2)], btype="band"); return filtfilt(b,a,x)
def pli_matrix(X,fs,lo,hi):
    n=X.shape[0]; Y=np.zeros_like(X)
    for c in range(n): Y[c]=bandpass(X[c],fs,lo,hi)
    ph=np.angle(hilbert(Y,axis=1))
    W=np.zeros((n,n)); 
    for i in range(n):
        for j in range(i+1,n):
            dphi=ph[i]-ph[j]; pli=np.abs(np.mean(np.sign(np.sin(dphi))))
            W[i,j]=W[j,i]=pli
    np.fill_diagonal(W,0); return W
def knn(W,k=6):
    W=W.copy(); n=W.shape[0]
    for i in range(n):
        idx=np.argsort(W[i])[::-1]; keep=idx[:k]
        mask=np.ones(n,bool); mask[keep]=False; W[i,mask]=0
    W=np.maximum(W,W.T); np.fill_diagonal(W,0); return W
def laplacian(W):
    d=W.sum(1); d=np.where(d<=1e-12,1.0,d); Dmh=np.diag(1.0/np.sqrt(d)); return np.eye(W.shape[0]) - Dmh@W@Dmh
def spec_labels(W,k):
    L=laplacian(W); e,v=np.linalg.eigh(L); U=v[:,1:k] if k>1 else v[:,:1]
    U/=np.linalg.norm(U,axis=1,keepdims=True)+1e-12
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)
def build_coassoc(label_list):
    n=len(label_list[0]); m=len(label_list); co=np.zeros((n,n))
    for lab in label_list:
        for i in range(n):
            li=lab[i]
            for j in range(n):
                co[i,j]+=1.0 if li==lab[j] else 0.0
    return co/m
def loso_ari_via_coassoc(label_list):
    co_full=build_coassoc(label_list); cons_full=spec_labels(co_full, k=2)
    aris=[]
    for s in range(len(label_list)):
        leave=[lab for i,lab in enumerate(label_list) if i!=s]
        co_l=build_coassoc(leave); cons_l=spec_labels(co_l, k=2)
        aris.append(adjusted_rand_score(cons_full, cons_l))
    return float(np.median(aris)), cons_full, co_full
def randomize_labels_same_sizes(lab, rng):
    n=len(lab); uniq,cnts=np.unique(lab, return_counts=True)
    idx=np.arange(n); rng.shuffle(idx); out=np.empty(n,int); start=0
    for label,c in zip(uniq,cnts):
        seg=idx[start:start+c]; out[seg]=label; start+=c
    return out

# per-subject beta with tighter settings
subj_labels=[]
for f in files:
    X=np.load(f)
    W=pli_matrix(X,FS,F_LOW,F_HIGH)
    W=knn(W,KNN_K)
    subj_labels.append(spec_labels(W, K_FIXED))

loso, cons, co = loso_ari_via_coassoc(subj_labels)
rng = np.random.default_rng(11)
null_aris=[]
for p in range(NULL_PERMS):
    nlabs=[randomize_labels_same_sizes(lab, rng) for lab in subj_labels]
    _, cons_n, _ = loso_ari_via_coassoc(nlabs)
    null_aris.append(adjusted_rand_score(cons, cons_n))
null_aris=np.array(null_aris,float)
p_val=float((np.sum(null_aris>=loso)+1)/(len(null_aris)+1))
uniq,cnts=np.unique(cons, return_counts=True)
intra=co[cons[:,None]==cons[None,:]].mean()
inter=co[cons[:,None]!=cons[None,:]].mean()
ratio=float(intra/(inter+1e-12))

beta_sanity = {
    "band":"beta","connectivity":"PLI","consensus_mode":"spectral_on_coassoc_k2",
    "n_subjects":len(files),"KNN_K":KNN_K,"CONS_THR":CONS_THR,
    "LOSO":float(loso),"null_mean":float(null_aris.mean()),"p_value":p_val,
    "cluster_sizes":cnts.tolist(),"intra":float(intra),"inter":float(inter),"intra_over_inter":ratio
}
with open(os.path.join(OUTM,"beta_sanity_metrics.json"),"w",encoding="utf-8") as f:
    json.dump(beta_sanity,f,indent=2)

print("\n=== β sanity (KNN_K=4, CONS_THR=0.70, NULL_PERMS=1000) ===")
print(beta_sanity)
print("\nTopomaps saved to:", OUTF)
print("β sanity metrics:", os.path.join(OUTM,"beta_sanity_metrics.json"))




[warn] topomap failed for alpha: Did not find any digitization points of kind 3 (FIFFV_POINT_EEG) in the info.
[warn] topomap failed for theta: Did not find any digitization points of kind 3 (FIFFV_POINT_EEG) in the info.
[warn] topomap failed for beta: Did not find any digitization points of kind 3 (FIFFV_POINT_EEG) in the info.

=== Sweep summary (from metrics JSON) ===
 band  n cluster_sizes     LOSO  null_mean        p  intra/inter
alpha 10      [34, 30] 0.877963  -0.001952 0.003322     2.327533
theta 10      [27, 37] 0.790194   0.002869 0.003322     1.934356
 beta 10      [30, 34] 1.000000  -0.000808 0.003322     6.005028

Saved topomaps to: C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\figures


KeyboardInterrupt: 

In [10]:
# === FAST SCALP SCATTERS (no MNE) — α/θ/β consensus clusters ===
# Inputs (from your sweep):
#   C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\tables\band__{band}__consensus_labels.npy
# Also needs channel names from any subject:
#   C:\Users\caleb\CNT_Lab\eeg_rest\subject_01_EC.channels.txt
#
# Outputs:
#   C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\figures\scalp__{band}__clusters.png

import os, json, re, numpy as np, matplotlib.pyplot as plt

ROOT   = r"C:\Users\caleb\CNT_Lab"
SWEEP  = os.path.join(ROOT, r"artifacts\pli_band_sweep")
TABDIR = os.path.join(SWEEP, "tables")
FIGDIR = os.path.join(SWEEP, "figures")
os.makedirs(FIGDIR, exist_ok=True)

BANDS  = ["alpha","theta","beta"]
CHAN_TXT = os.path.join(ROOT, r"eeg_rest\subject_01_EC.channels.txt")

# --- 10–20 approximate 2D coords (x,y) in head space [-1,1] (frontal y>0, posterior y<0)
# Minimal but sufficient set; we’ll fuzzy-map additional labels to nearest canonical
canon_xy = {
 "Fp1":(-0.5, 0.95), "Fpz":(0.0, 0.98), "Fp2":(0.5, 0.95),
 "AF7":(-0.65, 0.75), "AF3":(-0.35, 0.78), "AFz":(0.0, 0.80), "AF4":(0.35,0.78), "AF8":(0.65,0.75),
 "F7":(-0.8, 0.55), "F5":(-0.55,0.58), "F3":(-0.35,0.60), "F1":(-0.15,0.62), "Fz":(0.0,0.65),
 "F2":(0.15,0.62), "F4":(0.35,0.60), "F6":(0.55,0.58), "F8":(0.8,0.55),
 "FT7":(-0.9, 0.35), "FC5":(-0.6,0.40), "FC3":(-0.4,0.42), "FC1":(-0.2,0.44), "FCz":(0.0,0.45),
 "FC2":(0.2,0.44), "FC4":(0.4,0.42), "FC6":(0.6,0.40), "FT8":(0.9,0.35),
 "T7":(-1.0, 0.05), "C5":(-0.6,0.05), "C3":(-0.4,0.05), "C1":(-0.2,0.05), "Cz":(0.0,0.05),
 "C2":(0.2,0.05), "C4":(0.4,0.05), "C6":(0.6,0.05), "T8":(1.0,0.05),
 "TP7":(-0.9,-0.25), "CP5":(-0.6,-0.25), "CP3":(-0.4,-0.25), "CP1":(-0.2,-0.25), "CPz":(0.0,-0.25),
 "CP2":(0.2,-0.25), "CP4":(0.4,-0.25), "CP6":(0.6,-0.25), "TP8":(0.9,-0.25),
 "P7":(-0.8,-0.50), "P5":(-0.55,-0.50), "P3":(-0.35,-0.50), "P1":(-0.15,-0.50), "Pz":(0.0,-0.52),
 "P2":(0.15,-0.50), "P4":(0.35,-0.50), "P6":(0.55,-0.50), "P8":(0.8,-0.50),
 "PO7":(-0.65,-0.70), "PO3":(-0.35,-0.70), "POz":(0.0,-0.72), "PO4":(0.35,-0.70), "PO8":(0.65,-0.70),
 "O1":(-0.4,-0.90), "Oz":(0.0,-0.92), "O2":(0.4,-0.90)
}

# Common aliases between 10-20 and older 10-10 naming
alias = {
 "T3":"T7", "T4":"T8", "T5":"P7", "T6":"P8",
 "FP1":"Fp1", "FP2":"Fp2", "FPZ":"Fpz", "OZ":"Oz", "CZ":"Cz", "PZ":"Pz", "FZ":"Fz", "POZ":"POz"
}
def norm_key(s):
    return re.sub(r"[^A-Za-z0-9]", "", s).upper()

canon_keys = {norm_key(k):k for k in canon_xy.keys()}
alias_keys = {norm_key(k):v for k,v in alias.items()}

# Load channel names
if os.path.exists(CHAN_TXT):
    with open(CHAN_TXT,"r",encoding="utf-8") as f:
        ch_names = [ln.strip() for ln in f if ln.strip()]
else:
    ch_names = [f"ch{i}" for i in range(64)]  # fallback

# Map each channel to a 2D coordinate (drop if unknown)
coords = []
keep_idx = []
for i, ch in enumerate(ch_names):
    key = norm_key(ch)
    if key in canon_keys:
        coords.append(canon_xy[canon_keys[key]]); keep_idx.append(i)
    elif key in alias_keys and alias_keys[key] in canon_xy:
        coords.append(canon_xy[alias_keys[key]]); keep_idx.append(i)
    else:
        # try loose matches like Fp1 -> FP1 or P3 -> P3 etc by removing trailing spaces
        m = re.match(r"(FP|AF|F|FT|FC|T|C|TP|CP|P|PO|O)Z?$", key)
        # if completely unknown, skip
        # print(f"[skip] no coords for {ch}")
        pass

coords = np.array(coords, float)
if coords.shape[0] == 0:
    raise SystemExit("Could not map any channels to 10–20 positions. Update alias map or CHAN_TXT.")

def draw_head(ax):
    # head outline
    head = plt.Circle((0,0), 1.03, fill=False, linewidth=2)
    nose = plt.Polygon([[ -0.12, 1.03],[0,1.15],[0.12,1.03]], fill=False)
    ax.add_patch(head); ax.add_patch(nose)
    ax.set_xlim(-1.15,1.15); ax.set_ylim(-1.1,1.2)
    ax.set_aspect("equal"); ax.axis("off")

def plot_clusters(cons_labels, band):
    labs = np.array(cons_labels)
    labs = labs[keep_idx]  # align to coords list
    fig = plt.figure(figsize=(7,7))
    ax = fig.add_subplot(111)
    draw_head(ax)
    # cluster 0
    m0 = labs == 0
    ax.scatter(coords[m0,0], coords[m0,1], s=70, label="Cluster 0")
    # cluster 1
    m1 = labs == 1
    ax.scatter(coords[m1,0], coords[m1,1], s=70, marker="s", label="Cluster 1")
    # annotate a few landmarks to help orientation
    for name in ["Fpz","Fz","Cz","Pz","Oz"]:
        k = norm_key(name)
        if k in canon_keys:
            x,y = canon_xy[canon_keys[k]]
            ax.text(x, y+0.03, name, ha="center", va="bottom", fontsize=9)
    ax.legend(loc="upper right")
    ax.set_title(f"Scalp clusters | {band} (PLI consensus, k=2)")
    out = os.path.join(FIGDIR, f"scalp__{band}__clusters.png")
    fig.tight_layout(); fig.savefig(out, dpi=160); plt.close(fig)
    print("Saved:", out)

# Load and plot per band
for b in BANDS:
    lab_fp = os.path.join(TABDIR, f"band__{b}__consensus_labels.npy")
    if not os.path.exists(lab_fp):
        print(f"[skip] missing labels for {b}"); continue
    cons = np.load(lab_fp)
    plot_clusters(cons, b)

print("\nDone. If some points are missing, add aliases in the `alias` dict above and re-run.")


Saved: C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\figures\scalp__alpha__clusters.png
Saved: C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\figures\scalp__theta__clusters.png
Saved: C:\Users\caleb\CNT_Lab\artifacts\pli_band_sweep\figures\scalp__beta__clusters.png

Done. If some points are missing, add aliases in the `alias` dict above and re-run.


In [11]:
# Quick cell to download EC run (R02) for subjects 1–30
import mne, os
OUT = r"C:\Users\caleb\CNT_Lab\eeg_rest"
os.makedirs(OUT, exist_ok=True)
for subj in range(1,31):
    try:
        fns = mne.datasets.eegbci.load_data(subjects=[subj], runs=[2], update_path=True, verbose="ERROR")
        print(f"Subject {subj:02d}: ok ({len(fns)} files)")
    except Exception as e:
        print(f"Subject {subj:02d}: {e}")
print("All 30 EC runs downloaded.")


Subject 01: ok (1 files)
Subject 02: ok (1 files)
Subject 03: ok (1 files)
Subject 04: ok (1 files)
Subject 05: ok (1 files)
Subject 06: ok (1 files)
Subject 07: ok (1 files)
Subject 08: ok (1 files)
Subject 09: ok (1 files)


Downloading file 'S011/S011R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S011/S011R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 10: ok (1 files)


Downloading file 'S012/S012R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S012/S012R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 11: ok (1 files)


Downloading file 'S013/S013R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S013/S013R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 12: ok (1 files)


Downloading file 'S014/S014R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S014/S014R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 13: ok (1 files)


Downloading file 'S015/S015R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S015/S015R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 14: ok (1 files)


Downloading file 'S016/S016R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S016/S016R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 15: ok (1 files)


Downloading file 'S017/S017R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S017/S017R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 16: ok (1 files)


Downloading file 'S018/S018R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S018/S018R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 17: ok (1 files)


Downloading file 'S019/S019R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S019/S019R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 18: ok (1 files)


Downloading file 'S020/S020R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S020/S020R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 19: ok (1 files)


Downloading file 'S021/S021R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S021/S021R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 20: ok (1 files)


Downloading file 'S022/S022R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S022/S022R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 21: ok (1 files)


Downloading file 'S023/S023R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S023/S023R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 22: ok (1 files)


Downloading file 'S024/S024R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S024/S024R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 23: ok (1 files)


Downloading file 'S025/S025R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S025/S025R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 24: ok (1 files)


Downloading file 'S026/S026R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S026/S026R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 25: ok (1 files)


Downloading file 'S027/S027R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S027/S027R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 26: ok (1 files)


Downloading file 'S028/S028R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S028/S028R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 27: ok (1 files)


Downloading file 'S029/S029R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S029/S029R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 28: ok (1 files)


Downloading file 'S030/S030R02.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S030/S030R02.edf' to 'C:\Users\caleb\mne_data\MNE-eegbci-data\files\eegmmidb\1.0.0'.


Subject 29: ok (1 files)
Subject 30: ok (1 files)
All 30 EC runs downloaded.


In [12]:
# === CNT PLI Consensus — 30-subject Controller (single cell) ===
# 1) EDF → NPY export (only for subjects missing NPY)
# 2) PLI + spectral-on-coassoc (k=2) sweep for α/θ/β across 30 subjects
# Outputs: C:\Users\caleb\CNT_Lab\artifacts\pli_30_subjects\{metrics, tables, figures, summary.csv}

import os, glob, json, numpy as np, pandas as pd, matplotlib.pyplot as plt

# ---------- YOU CAN TWEAK THESE ----------
ROOT       = r"C:\Users\caleb\CNT_Lab"
DATA_OUT   = os.path.join(ROOT, "eeg_rest")   # where subject_##_EC.npy live
SUBJECTS   = list(range(1,31))                # 1..30
FS_OUT     = 250.0                            # resample Hz
DURATION_S = 60                               # seconds kept per subject
HP, LP     = 1.0, 45.0                        # bandpass for clean PLI
BANDS_HZ   = { "alpha": (8.0,13.0), "theta": (4.0,8.0), "beta": (13.0,30.0) }

# Consensus/Null params
K_FIXED    = 2                                 # force non-trivial split
KNN_K      = 6                                 # try 4–7 if needed
NULL_PERMS = 500                               # raise to 1000 for paper-grade

OUT_ROOT   = os.path.join(ROOT, r"artifacts\pli_30_subjects")
# -----------------------------------------

os.makedirs(DATA_OUT, exist_ok=True)
os.makedirs(OUT_ROOT, exist_ok=True)
OUT_MET = os.path.join(OUT_ROOT, "metrics")
OUT_TAB = os.path.join(OUT_ROOT, "tables")
OUT_FIG = os.path.join(OUT_ROOT, "figures")
for p in [OUT_MET, OUT_TAB, OUT_FIG]:
    os.makedirs(p, exist_ok=True)

# ========== PART A: EDF → NPY (skip when exists) ==========
print("=== PART A: exporting missing subjects to NPY ===")
try:
    import mne
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "mne", "pooch"])
    import mne

def export_subject_ec(subj, out_dir):
    runs = [2]  # eyes-closed
    try:
        fpaths = mne.datasets.eegbci.load_data(subjects=[subj], runs=runs, update_path=True, verbose="ERROR")
    except TypeError:
        fpaths = mne.datasets.eegbci.load_data(subject=subj, runs=runs, update_path=True, verbose="ERROR")
    raws=[]
    for fp in fpaths:
        raw = mne.io.read_raw_edf(fp, preload=True, verbose="ERROR")
        raw.pick_types(eeg=True, stim=False, eog=False, ecg=False, emg=False, misc=False)
        raws.append(raw)
    if not raws: 
        return False, "no_raw"
    raw = mne.concatenate_raws(raws, verbose="ERROR")
    # montage + filter + resample
    try:
        raw.set_montage("standard_1020", on_missing="ignore", match_case=False, verbose="ERROR")
    except Exception:
        pass
    raw.filter(HP, LP, fir_design="firwin", verbose="ERROR")
    raw.resample(FS_OUT, npad="auto", verbose="ERROR")
    # trim/tile to DURATION_S
    n_keep = int(DURATION_S * raw.info["sfreq"])
    X = raw.get_data(picks="eeg")            # [n_ch, n_t]
    if X.shape[1] >= n_keep:
        X = X[:, :n_keep]
    else:
        reps = int(np.ceil(n_keep / X.shape[1]))
        X = np.tile(X, reps)[:, :n_keep]
    ch_names = mne.pick_info(raw.info, mne.pick_types(raw.info, eeg=True)).ch_names
    base = os.path.join(out_dir, f"subject_{subj:02d}_EC")
    np.save(base + ".npy", X.astype(np.float32))
    with open(base + ".channels.txt", "w", encoding="utf-8") as f:
        for ch in ch_names: f.write(ch + "\n")
    return True, X.shape

export_log=[]
for s in SUBJECTS:
    base = os.path.join(DATA_OUT, f"subject_{s:02d}_EC.npy")
    if os.path.exists(base):
        export_log.append((s, "exists"))
    else:
        ok, msg = export_subject_ec(s, DATA_OUT)
        export_log.append((s, msg))
print("Export summary:", export_log[:10], "..." if len(export_log)>10 else "")

# ========== PART B: PLI spectral-on-coassoc (k=2) ==========
from scipy.signal import butter, filtfilt, hilbert
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

def bandpass(x,fs,lo,hi,order=4):
    b,a = butter(order, [lo/(fs/2), hi/(fs/2)], btype="band"); return filtfilt(b,a,x)

def pli_matrix(X,fs,lo,hi):
    n = X.shape[0]; Y = np.zeros_like(X)
    for c in range(n): Y[c] = bandpass(X[c],fs,lo,hi)
    ph = np.angle(hilbert(Y, axis=1))
    W = np.zeros((n,n), float)
    for i in range(n):
        for j in range(i+1,n):
            dphi = ph[i]-ph[j]
            pli  = np.abs(np.mean(np.sign(np.sin(dphi))))
            W[i,j] = W[j,i] = pli
    np.fill_diagonal(W,0); return W

def knn(W,k=6):
    W=W.copy(); n=W.shape[0]
    for i in range(n):
        idx = np.argsort(W[i])[::-1]; keep = idx[:k]
        mask = np.ones(n, bool); mask[keep] = False
        W[i, mask] = 0.0
    W = np.maximum(W, W.T); np.fill_diagonal(W,0); return W

def laplacian(W):
    d = W.sum(1); d = np.where(d<=1e-12,1.0,d); Dmh=np.diag(1.0/np.sqrt(d))
    return np.eye(W.shape[0]) - Dmh @ W @ Dmh

def spec_labels(W,k):
    L = laplacian(W); evals,evecs = np.linalg.eigh(L)
    U = evecs[:,1:k] if k>1 else evecs[:,:1]
    U = U / (np.linalg.norm(U,axis=1,keepdims=True)+1e-12)
    return KMeans(n_clusters=k, n_init=50, random_state=42).fit_predict(U)

def build_coassoc(label_list):
    n=len(label_list[0]); m=len(label_list); co=np.zeros((n,n),float)
    for lab in label_list:
        for i in range(n):
            li=lab[i]
            for j in range(n):
                co[i,j]+=1.0 if li==lab[j] else 0.0
    return co/m

def loso_ari_via_coassoc(label_list):
    co_full = build_coassoc(label_list)
    cons_full= spec_labels(co_full, k=2)
    aris=[]
    for s in range(len(label_list)):
        leave=[lab for i,lab in enumerate(label_list) if i!=s]
        co_l = build_coassoc(leave)
        cons_l= spec_labels(co_l, k=2)
        aris.append(adjusted_rand_score(cons_full, cons_l))
    return float(np.median(aris)), cons_full, co_full

def randomize_labels_same_sizes(lab, rng):
    n=len(lab); uniq,cnts=np.unique(lab, return_counts=True)
    idx=np.arange(n); rng.shuffle(idx)
    out=np.empty(n,int); start=0
    for label,c in zip(uniq,cnts):
        seg=idx[start:start+c]; out[seg]=label; start+=c
    return out

print("\n=== PART B: running 30-subject PLI consensus sweep ===")
files = [os.path.join(DATA_OUT, f"subject_{s:02d}_EC.npy") for s in SUBJECTS if os.path.exists(os.path.join(DATA_OUT, f"subject_{s:02d}_EC.npy"))]
if len(files) < 30:
    print(f"[warn] only {len(files)} NPY found; proceeding with available subjects.")

rows = []
rng = np.random.default_rng(7)

for band, (lo,hi) in BANDS_HZ.items():
    # per-subject labels
    subj_labels=[]
    for f in files:
        X = np.load(f)
        W = pli_matrix(X, FS_OUT, lo, hi)
        W = knn(W, KNN_K)
        labs = spec_labels(W, K_FIXED)
        subj_labels.append(labs)

    # consensus via spectral-on-coassoc
    loso, cons_real, co_real = loso_ari_via_coassoc(subj_labels)
    np.save(os.path.join(OUT_TAB, f"band__{band}__coassoc.npy"), co_real)
    np.save(os.path.join(OUT_TAB, f"band__{band}__consensus_labels.npy"), cons_real)

    # null (label-preserving)
    null_aris=[]
    for p in range(NULL_PERMS):
        nlabs=[randomize_labels_same_sizes(lab, rng) for lab in subj_labels]
        _, cons_n, _ = loso_ari_via_coassoc(nlabs)
        null_aris.append(adjusted_rand_score(cons_real, cons_n))
        if (p+1) % 100 == 0:
            print(f"[{band}] null {p+1}/{NULL_PERMS}")

    null_aris = np.array(null_aris, float)
    p_val = float((np.sum(null_aris >= loso) + 1) / (len(null_aris) + 1))

    uniq,cnts = np.unique(cons_real, return_counts=True)
    intra = co_real[cons_real[:,None]==cons_real[None,:]].mean()
    inter = co_real[cons_real[:,None]!=cons_real[None,:]].mean()
    ratio = float(intra/(inter+1e-12))

    metrics = {
        "band": band, "connectivity":"PLI", "consensus_mode":"spectral_on_coassoc_k2",
        "n_subjects": len(files), "loso_coassoc": float(loso),
        "null_ari_mean": float(null_aris.mean()), "p_value": p_val,
        "cluster_sizes": cnts.tolist(), "intra_mean": float(intra),
        "inter_mean": float(inter), "intra_over_inter": ratio
    }
    with open(os.path.join(OUT_MET, f"band__{band}__metrics.json"), "w", encoding="utf-8") as f:
        json.dump(metrics, f, indent=2)

    # quick figure
    plt.figure(); plt.imshow(co_real, aspect='auto'); plt.title(f"Co-association (spectral) | {band} (PLI, n={len(files)})")
    plt.colorbar(); plt.tight_layout()
    plt.savefig(os.path.join(OUT_FIG, f"coassoc__{band}.png"), dpi=160); plt.close()

    rows.append([band, len(files), cnts.tolist(), float(loso), float(null_aris.mean()), p_val, ratio])

# summary CSV + print
df = pd.DataFrame(rows, columns=["band","n_subjects","cluster_sizes","LOSO","null_ari_mean","p_value","intra_over_inter"])
csv_path = os.path.join(OUT_ROOT, "summary.csv"); df.to_csv(csv_path, index=False)
print("\n=== 30-subject PLI spectral-on-coassoc — Summary ===")
print(df.to_string(index=False))
print("\nSaved:")
print("  - Summary CSV:", csv_path)
print("  - Metrics JSON:", OUT_MET)
print("  - Tables NPY :", OUT_TAB)
print("  - Figures    :", OUT_FIG)


=== PART A: exporting missing subjects to NPY ===
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a le