# CNT 3D Genomic Correlate Field — Clean Notebook
Minimal, robust pipeline to render a 3D field of genes with threads across correlates.
- Works with **scored multivariate tables** (e.g., `resonance_score`, `cnt_score`, degrees, tissues)
- Avoids full n^2 correlation matrices (memory-safe)
- Produces: a static **PNG** and optional interactive **HTML**

**Pipeline**
1. Config & imports  
2. Load table → build one feature vector per gene  
3. kNN graph in cosine space (correlate threads)  
4. Mutual-kNN + light pruning (de-spike)  
5. 3D layout & render (PNG + HTML)


In [1]:

# === 1) Config & imports ==================================================
import os, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

# Path to your CSV/TSV
DATA_PATH = r"C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored.csv"  # <- change if needed

# Outputs (written next to the notebook by default)
PNG_PATH  = "CNT_genomic_network_3D.png"
HTML_PATH = "CNT_genomic_network_3D.html"

# Feature selection & behavior
INDEX_COL      = "gene_name"  # fallback to 'rsid' or first non-numeric if missing
FEATURE_WISHLIST = [
    "resonance_score", "cnt_score", "structure_score",
    "gene_deg", "ccre_deg", "tissue_hits", "tissues"
]

# Graph/Render knobs
SEED           = 42
MAX_GENES      = 12000     # cap by variance (raise/lower for density/speed)
K_NEIGHBORS    = 10        # edges per node before filtering
EDGE_CAP       = 200_000   # absolute safety cap
MUTUAL_K       = 8         # keep edge only if mutual top-K (de-spike)
PRUNE_FRACTION = 0.25      # drop weakest 25% edges
USE_SPRING     = True      # False -> instant random 3D
SPRING_ITERS   = 180       # iterations for 3D spring
SPRING_LIMIT   = 6000      # if nodes > limit, auto-skip spring

print("Notebook ready. Edit DATA_PATH if needed and run all cells ↓")


Notebook ready. Edit DATA_PATH if needed and run all cells ↓


In [2]:

# === 2) Load table & assemble per-gene feature matrix =====================
def load_feature_table(path, index_col, wishlist):
    # Robust read
    try:
        df = pd.read_csv(path, sep=None, engine="python")
    except Exception:
        try:
            df = pd.read_csv(path, sep="\t", engine="python")
        except Exception:
            df = pd.read_csv(path, sep=",", engine="python")
    # Pick index
    if index_col not in df.columns:
        for fallback in ("rsid",):
            if fallback in df.columns:
                index_col = fallback; break
        else:
            nonnum = [c for c in df.columns if not np.issubdtype(df[c].dtype, np.number)]
            index_col = nonnum[0] if nonnum else df.columns[0]
    df = df.dropna(subset=[index_col])
    # Feature columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    feat_cols = [c for c in wishlist if c in df.columns]
    if not feat_cols:
        feat_cols = numeric_cols
    use_cols = [index_col] + feat_cols
    F = df[use_cols].copy()
    # Aggregate
    agg = {c: "mean" for c in feat_cols}
    for key in ("resonance_score", "cnt_score", "structure_score"):
        if key in agg: agg[key] = "max"
    GDF = F.groupby(index_col).agg(agg)
    GDF = GDF.dropna(how="all")
    GDF = GDF.fillna(GDF.median(numeric_only=True))
    if GDF.shape[0] > MAX_GENES:
        var = GDF.var(axis=1, numeric_only=True).sort_values(ascending=False)
        GDF = GDF.loc[var.index[:MAX_GENES]]
    return index_col, feat_cols, GDF

INDEX_COL, FEATURES_USED, GDF = load_feature_table(DATA_PATH, INDEX_COL, FEATURE_WISHLIST)
print("Index column:", INDEX_COL)
print("Features used:", FEATURES_USED)
print("Genes (rows):", GDF.shape[0])
GDF.head()


Index column: gene_name
Features used: ['resonance_score', 'cnt_score', 'structure_score', 'gene_deg', 'ccre_deg', 'tissue_hits', 'tissues']
Genes (rows): 12000


Unnamed: 0_level_0,resonance_score,cnt_score,structure_score,gene_deg,ccre_deg,tissue_hits,tissues
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Y_RNA,1.0,7.808606,6.808606,404.0,1.245467,0.0,
HLA-DQA1,1.0,6.89424,5.89424,96.0,7.184971,0.0,
Metazoa_SRP,1.0,6.147494,5.147494,85.0,1.228758,0.0,
ENSG00000298426,1.0,6.542753,5.542753,76.0,6.417178,0.0,
ENSG00000271581,1.0,6.711916,5.711916,65.0,12.461538,0.0,


In [3]:

# === 3) Build kNN graph in cosine space (memory-safe) =====================
def standardize_rows(M):
    M = M.astype("float32")
    mu = np.nanmean(M, axis=0, keepdims=True)
    sd = np.nanstd(M, axis=0, ddof=1, keepdims=True); sd[sd==0]=1.0
    Z = (M - mu) / sd
    Z = np.nan_to_num(Z, nan=0.0, posinf=0.0, neginf=0.0)
    norms = np.linalg.norm(Z, axis=1, keepdims=True); norms[norms==0]=1.0
    return Z / norms

def knn_graph_cosine(Z, genes, k=10, edge_cap=200_000):
    try:
        from sklearn.neighbors import NearestNeighbors
        nn = NearestNeighbors(n_neighbors=min(k+1, Z.shape[0]), metric="cosine", algorithm="brute")
        nn.fit(Z); dists, idxs = nn.kneighbors(Z, return_distance=True)
        dists, idxs = dists[:,1:], idxs[:,1:]; sims = 1.0 - dists
    except Exception:
        K = min(k, Z.shape[0]-1)
        sims = np.empty((Z.shape[0], K), dtype="float32")
        idxs = np.empty((Z.shape[0], K), dtype=np.int32)
        bs = 1024
        for i0 in range(0, Z.shape[0], bs):
            i1 = min(i0+bs, Z.shape[0])
            block = Z[i0:i1] @ Z.T
            for i in range(i1-i0): block[i, i0+i] = -np.inf
            topk = np.argpartition(-block, K, axis=1)[:, :K]
            vals = np.take_along_axis(block, topk, axis=1)
            order = np.argsort(-vals, axis=1); r = np.arange(vals.shape[0])[:, None]
            sims[i0:i1] = vals[r, order].astype("float32")
            idxs[i0:i1] = topk[r, order].astype(np.int32)
    G = nx.Graph(); G.add_nodes_from(genes.tolist()); added = 0
    for i, u in enumerate(genes):
        for j, w in zip(idxs[i], sims[i]):
            v = genes[int(j)]
            if u == v: continue
            wt = float(max(0.0, w))
            if G.has_edge(u, v):
                if wt > G[u][v]["weight"]: G[u][v]["weight"] = wt
            else:
                G.add_edge(u, v, weight=wt); added += 1
        if added >= edge_cap: break
    return G

genes = GDF.index.to_numpy()
Z = standardize_rows(GDF.to_numpy())
G = knn_graph_cosine(Z, genes, k=K_NEIGHBORS, edge_cap=EDGE_CAP)
print("Initial graph:", {"nodes": G.number_of_nodes(), "edges": G.number_of_edges()})


  mu = np.nanmean(M, axis=0, keepdims=True)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Initial graph: {'nodes': 12000, 'edges': 112087}


In [4]:

# === 4) De-spike: mutual-kNN + prune weakest ==============================
def mutual_knn_prune(G, mutual_k=8, prune_fraction=0.25):
    nbrs = {u: sorted(G[u].items(), key=lambda x: x[1].get("weight",0.0), reverse=True)
            for u in G.nodes()}
    topk = {u: set(v for v,_ in nbrs[u][:mutual_k]) for u in G.nodes()}
    H = nx.Graph(); H.add_nodes_from(G.nodes())
    for u,v,d in G.edges(data=True):
        if v in topk[u] and u in topk[v]:
            H.add_edge(u, v, **d)
    if H.number_of_edges()>0 and prune_fraction>0:
        w = np.array([H[u][v].get("weight",0.0) for u,v in H.edges()])
        thr = float(np.quantile(w, prune_fraction))
        H.remove_edges_from([(u,v) for u,v in H.edges() if H[u][v].get("weight",0.0) <= thr])
    return H

H = mutual_knn_prune(G, mutual_k=MUTUAL_K, prune_fraction=PRUNE_FRACTION)
print("Refined graph:", {"nodes": H.number_of_nodes(), "edges": H.number_of_edges()})


Refined graph: {'nodes': 12000, 'edges': 4846}


In [5]:

# === 5) 3D layout & render (PNG + HTML) ===================================
def layout_positions(G, use_spring=True, seed=42, iterations=180, limit=6000):
    rng = np.random.default_rng(seed)
    if use_spring and G.number_of_nodes()<=limit and G.number_of_edges()>0:
        return nx.spring_layout(G, dim=3, seed=seed, weight="weight", iterations=iterations)
    pos = {n: (float(v[0]), float(v[1]), float(v[2]))
           for n, v in zip(G.nodes(), rng.normal(size=(G.number_of_nodes(),3)))}
    for n in pos:
        x,y,z = pos[n]; r = (x*x+y*y+z*z)**0.5 or 1.0
        pos[n] = (x/r, y/r, z/r)
    return pos

pos = layout_positions(H, use_spring=USE_SPRING, seed=SEED, iterations=SPRING_ITERS, limit=SPRING_LIMIT)

fig = plt.figure(figsize=(10,8), dpi=160)
ax = fig.add_subplot(111, projection='3d')
for u,v,d in H.edges(data=True):
    x=[pos[u][0],pos[v][0]]; y=[pos[u][1],pos[v][1]]; z=[pos[u][2],pos[v][2]]
    ax.plot(x,y,z, linewidth=0.5 + 2.0*float(d.get("weight",0.0)), alpha=0.35)
xs=[pos[n][0] for n in H.nodes()]; ys=[pos[n][1] for n in H.nodes()]; zs=[pos[n][2] for n in H.nodes()]
ax.scatter(xs,ys,zs, s=14, alpha=0.85)
ax.set_title(f"CNT 3D Genomic Correlate Field — clean (nodes={H.number_of_nodes()}, edges={H.number_of_edges()})")
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
fig.tight_layout(); fig.savefig(PNG_PATH, bbox_inches="tight"); plt.close(fig)

try:
    import plotly.graph_objects as go
    edge_x=edge_y=edge_z=[]; edge_x=[]; edge_y=[]; edge_z=[]
    for u,v,d in H.edges(data=True):
        edge_x += [pos[u][0], pos[v][0], None]
        edge_y += [pos[u][1], pos[v][1], None]
        edge_z += [pos[u][2], pos[v][2], None]
    node_x=[pos[n][0] for n in H.nodes()]
    node_y=[pos[n][1] for n in H.nodes()]
    node_z=[pos[n][2] for n in H.nodes()]
    node_text=[str(n) for n in H.nodes()]
    fig = go.Figure(data=[
        go.Scatter3d(x=edge_x,y=edge_y,z=edge_z,mode='lines', line=dict(width=1), hoverinfo='none'),
        go.Scatter3d(x=node_x,y=node_y,z=node_z,mode='markers', marker=dict(size=3), text=node_text, hoverinfo='text'),
    ])
    fig.update_layout(title="CNT 3D Genomic Correlate Field — clean",
                      showlegend=False,
                      scene=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False)))
    fig.write_html(HTML_PATH, include_plotlyjs='cdn')
except Exception:
    pass

print({"png": os.path.abspath(PNG_PATH), "html": os.path.abspath(HTML_PATH)})


{'png': 'C:\\Users\\caleb\\cnt_genome\\CNT_genomic_network_3D.png', 'html': 'C:\\Users\\caleb\\cnt_genome\\CNT_genomic_network_3D.html'}


In [6]:

# === 6) (Optional) Community detection & CSV legend =======================
try:
    from networkx.algorithms.community import greedy_modularity_communities
    comms = list(greedy_modularity_communities(H, weight="weight"))
    module_of = {}
    for idx, cset in enumerate(comms):
        for n in cset:
            module_of[n] = idx
    out = pd.DataFrame({"gene": list(H.nodes()), "module": [module_of.get(n, -1) for n in H.nodes()]})
    out.to_csv("CNT_genomic_modules.csv", index=False)
    print("Wrote CNT_genomic_modules.csv with", len(out), "rows and", len(comms), "modules.")
except Exception as e:
    print("Community detection skipped:", e)


Wrote CNT_genomic_modules.csv with 12000 rows and 10420 modules.


In [7]:
# === CNT Colored Correlate Maps: per-module colors + inter-module links ===
# Requires: H (NetworkX graph), pos (node -> (x,y,z)), module dict (gene -> module)

import os, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401
from matplotlib import cm

# ------- 0) Gather modules, colors, and quick stats -------
mod_ids = sorted({m for m in (module.get(n, -1) for n in H.nodes()) if m >= 0})
if not mod_ids:
    raise ValueError("No module mapping found. Build `module` (gene->module) first.")

mod_index = {m:i for i,m in enumerate(mod_ids)}
n_mod = len(mod_ids)

# colors (distinct & repeat-safe)
cmap = cm.get_cmap("tab20", max(20, n_mod))
MOD_COLOR = {m: cmap(mod_index[m] % cmap.N) for m in mod_ids}

# node arrays for rendering
node_colors = [MOD_COLOR.get(module.get(n, -1), (0.6,0.6,0.6,1.0)) for n in H.nodes()]
node_sizes  = [10 + 6*H.degree(n) for n in H.nodes()]

# ------- 1) 3D field: colored by module, inter-module links emphasized -------
fig = plt.figure(figsize=(10,8), dpi=160)
ax  = fig.add_subplot(111, projection='3d')

# draw edges (intra faint, inter highlighted)
for u,v,d in H.edges(data=True):
    mu, mv = module.get(u,-1), module.get(v,-1)
    x=[pos[u][0], pos[v][0]]; y=[pos[u][1], pos[v][1]]; z=[pos[u][2], pos[v][2]]
    w = float(d.get("weight",0.0))
    if mu == mv and mu >= 0:
        ax.plot(x,y,z, linewidth=0.5 + 1.2*w, alpha=0.15, color=MOD_COLOR[mu])
    else:
        ax.plot(x,y,z, linewidth=0.6 + 2.0*w, alpha=0.45, color=(0.2,0.2,0.2,1.0))

# nodes (colored by module)
xs=[pos[n][0] for n in H.nodes()]; ys=[pos[n][1] for n in H.nodes()]; zs=[pos[n][2] for n in H.nodes()]
_ = ax.scatter(xs,ys,zs, s=node_sizes, c=node_colors, alpha=0.95)

ax.set_title(f"CNT Colored 3D Field — modules colored; inter-module links bold (modules={n_mod})")
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
plt.tight_layout()
plt.savefig("CNT_colored_3D_field.png", bbox_inches="tight")
plt.close(fig)

print({"png_3d": os.path.abspath("CNT_colored_3D_field.png")})

# ------- 2) Module-level graph: one node per module, edge width = cross-talk -------
# build module→size and module↔module weight matrix
mod_size = {m: 0 for m in mod_ids}
for n in H.nodes():
    m = module.get(n,-1)
    if m >= 0: mod_size[m] += 1

# aggregate edge weights by module pair
W = pd.DataFrame(0.0, index=mod_ids, columns=mod_ids)
for u,v,d in H.edges(data=True):
    mu, mv = module.get(u,-1), module.get(v,-1)
    if mu < 0 or mv < 0: continue
    w = float(d.get("weight",0.0))
    if mu == mv:
        W.at[mu, mv] += w  # intra (diagonal)
    else:
        W.at[mu, mv] += w
        W.at[mv, mu] += w

# module graph
Gm = nx.Graph()
for m in mod_ids:
    Gm.add_node(m, size=mod_size[m])
for i in mod_ids:
    for j in mod_ids:
        if j <= i: continue
        w = W.at[i,j]
        if w > 0:
            Gm.add_edge(i, j, weight=w)

# layout & draw (2D)
pos2 = nx.spring_layout(Gm, seed=42, weight="weight")
fig2 = plt.figure(figsize=(10,8), dpi=160)
ax2  = fig2.add_subplot(111)

# edges: width ∝ cross-talk weight
max_w = max((d["weight"] for *_, d in Gm.edges(data=True)), default=1.0)
for u,v,d in Gm.edges(data=True):
    lw = 0.5 + 8.0*(d["weight"]/max_w)
    ax2.plot([pos2[u][0], pos2[v][0]], [pos2[u][1], pos2[v][1]], linewidth=lw, alpha=0.35, color="black")

# nodes: size ∝ module size, color per module
sizes2 = [40 + 8*Gm.nodes[m]["size"] for m in Gm.nodes()]
cols2  = [MOD_COLOR[m] for m in Gm.nodes()]
ax2.scatter([pos2[m][0] for m in Gm.nodes()], [pos2[m][1] for m in Gm.nodes()],
            s=sizes2, c=cols2, alpha=0.95)

# labels
for m,(x,y) in pos2.items():
    ax2.text(x, y, f"{m}\n(n={mod_size[m]})", ha="center", va="center")

ax2.set_title("CNT Module Map — node size = module size; edge width = cross-talk")
ax2.set_xticks([]); ax2.set_yticks([])
plt.tight_layout()
plt.savefig("CNT_module_map.png", bbox_inches="tight")
plt.close(fig2)

print({"png_module_map": os.path.abspath("CNT_module_map.png")})

# ------- 3) Module×Module heatmap (weights) -------
fig3 = plt.figure(figsize=(10,8), dpi=160)
ax3  = fig3.add_subplot(111)
im = ax3.imshow(W.values, interpolation="nearest", aspect="auto")
ax3.set_title("Module × Module cross-talk (sum of edge weights)")
ax3.set_xlabel("Module"); ax3.set_ylabel("Module")
ax3.set_xticks(range(n_mod)); ax3.set_yticks(range(n_mod))
ax3.set_xticklabels(mod_ids, rotation=90); ax3.set_yticklabels(mod_ids)
cb = plt.colorbar(im)
cb.set_label("weight")
plt.tight_layout()
plt.savefig("CNT_module_heatmap.png", bbox_inches="tight")
plt.close(fig3)

print({
    "png_heatmap": os.path.abspath("CNT_module_heatmap.png"),
    "modules": n_mod,
    "largest_modules": sorted([(m, mod_size[m])], key=lambda x: x[1], reverse=True)[:5]
})


NameError: name 'module' is not defined

In [8]:
# === Color by module (if modules CSV exists) ===
import os, pandas as pd
mod_map = None
if os.path.exists("CNT_genomic_modules.csv"):
    dfm = pd.read_csv("CNT_genomic_modules.csv")
    mod_map = dict(zip(dfm["gene"], dfm["module"]))

# sizes and (optional) colors by module
sizes = []
colors = []
for n in H.nodes():
    sizes.append(8 + 6*H.degree(n))              # degree-scaled nodes (subtle)
    if mod_map is None:
        colors.append(0)
    else:
        colors.append(mod_map.get(n, -1))

# re-render nodes (replace the scatter in your render block)
ax.scatter([pos[n][0] for n in H.nodes()],
           [pos[n][1] for n in H.nodes()],
           [pos[n][2] for n in H.nodes()],
           s=sizes, alpha=0.85)


<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x191032a6e90>

In [9]:
# === Color by module (if modules CSV exists) ===
import os, pandas as pd
mod_map = None
if os.path.exists("CNT_genomic_modules.csv"):
    dfm = pd.read_csv("CNT_genomic_modules.csv")
    mod_map = dict(zip(dfm["gene"], dfm["module"]))

# sizes and (optional) colors by module
sizes = []
colors = []
for n in H.nodes():
    sizes.append(8 + 6*H.degree(n))              # degree-scaled nodes (subtle)
    if mod_map is None:
        colors.append(0)
    else:
        colors.append(mod_map.get(n, -1))

# re-render nodes (replace the scatter in your render block)
ax.scatter([pos[n][0] for n in H.nodes()],
           [pos[n][1] for n in H.nodes()],
           [pos[n][2] for n in H.nodes()],
           s=sizes, alpha=0.85)


<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x191032a6ad0>

In [10]:
# === CNT 3D Genomic Correlate Field — one-cell generate ===================
# Uses multivariate features (resonance_score, cnt_score, structure_score, etc.)
# to connect nearest "correlates" in cosine space (z-scored features).
# De-spikes with mutual-kNN, prunes weak threads, and lays out via
# coarse spring (top hubs) + barycentric for the rest, then a short polish.

import os, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

# ---- Paths ----
DATA_PATH = r"C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored.csv"
PNG_PATH  = "CNT_genomic_network_3D.png"
HTML_PATH = "CNT_genomic_network_3D.html"

# ---- Settings (tweak to taste) ----
INDEX_COL       = "gene_name"                # fallback to 'rsid' if missing
FEATURES_WISH   = ["resonance_score","cnt_score","structure_score","gene_deg","ccre_deg","tissue_hits","tissues"]

SEED            = 42
MAX_GENES       = 12000                      # cap by variance
K_NEIGHBORS     = 12                         # base kNN threads per node
MUTUAL_K        = 10                         # require mutual top-K (de-spike)
PRUNE_FRACTION  = 0.15                       # drop weakest 15% edges (after mutual)
MIN_W           = 0.12                       # and drop edges with weight below this
EDGE_CAP        = 300_000                    # absolute edge cap

COARSE_N        = 6000                       # nodes for coarse spring scaffold
COARSE_ITERS    = 220                        # spring iters on scaffold
POLISH_ITERS    = 60                         # short spring polish with scaffold fixed

# ---- Load + per-gene feature matrix ----
def _read_table(p):
    for sep in (None, "\t", ","):
        try:
            return pd.read_csv(p, sep=sep, engine="python")
        except Exception:
            pass
    raise FileNotFoundError(p)

df = _read_table(DATA_PATH)
if INDEX_COL not in df.columns:
    INDEX_COL = "rsid" if "rsid" in df.columns else (next((c for c in df.columns if not np.issubdtype(df[c].dtype, np.number)), df.columns[0]))

feat_cols = [c for c in FEATURES_WISH if c in df.columns]
if not feat_cols:
    feat_cols = df.select_dtypes(include=[np.number]).columns.tolist()
use_cols = [INDEX_COL] + feat_cols

F = df[use_cols].dropna(subset=[INDEX_COL]).copy()
agg = {c:"mean" for c in feat_cols}
for k in ("resonance_score","cnt_score","structure_score"):
    if k in agg: agg[k] = "max"
GDF = F.groupby(INDEX_COL).agg(agg).dropna(how="all")
GDF = GDF.fillna(GDF.median(numeric_only=True))

if GDF.shape[0] > MAX_GENES:
    keep = GDF.var(axis=1, numeric_only=True).sort_values(ascending=False).index[:MAX_GENES]
    GDF = GDF.loc[keep]

genes = GDF.index.to_numpy()
X = GDF.to_numpy().astype("float32")

# ---- Standardize rows; cosine kNN (≈ correlation on z-scores) ----
mu = np.nanmean(X, axis=0, keepdims=True)
sd = np.nanstd(X, axis=0, ddof=1, keepdims=True); sd[sd==0]=1.0
Z = (X - mu) / sd
Z = np.nan_to_num(Z, nan=0.0, posinf=0.0, neginf=0.0)
rn = np.linalg.norm(Z, axis=1, keepdims=True); rn[rn==0]=1.0
Z = Z / rn

try:
    from sklearn.neighbors import NearestNeighbors
    nn = NearestNeighbors(n_neighbors=min(K_NEIGHBORS+1, Z.shape[0]), metric="cosine", algorithm="brute")
    nn.fit(Z)
    dists, idxs = nn.kneighbors(Z, return_distance=True)
    dists, idxs = dists[:,1:], idxs[:,1:]           # drop self
    sims = 1.0 - dists                              # similarity ∈ [0,1]
except Exception:
    # NumPy fallback (chunked top-K)
    K = min(K_NEIGHBORS, Z.shape[0]-1)
    sims = np.empty((Z.shape[0], K), dtype="float32")
    idxs = np.empty((Z.shape[0], K), dtype=np.int32)
    bs = 1024
    for i0 in range(0, Z.shape[0], bs):
        i1 = min(i0+bs, Z.shape[0])
        block = Z[i0:i1] @ Z.T
        for i in range(i1-i0): block[i, i0+i] = -np.inf
        topk = np.argpartition(-block, K, axis=1)[:, :K]
        vals = np.take_along_axis(block, topk, axis=1)
        order = np.argsort(-vals, axis=1)
        r = np.arange(vals.shape[0])[:, None]
        sims[i0:i1] = vals[r, order].astype("float32")
        idxs[i0:i1] = topk[r, order].astype(np.int32)

# ---- Build graph; de-spike with mutual-kNN; prune weak fibers ----
G = nx.Graph(); G.add_nodes_from(genes.tolist())
added = 0
for i, u in enumerate(genes):
    for j, w in zip(idxs[i], sims[i]):
        v = genes[int(j)]
        if u == v: continue
        wt = float(max(0.0, w))
        if G.has_edge(u, v):
            if wt > G[u][v]["weight"]: G[u][v]["weight"] = wt
        else:
            G.add_edge(u, v, weight=wt); added += 1
    if added >= EDGE_CAP: break

# mutual-kNN filter
nbrs = {u: sorted(G[u].items(), key=lambda x: x[1].get("weight",0.0), reverse=True) for u in G.nodes()}
topk = {u: set(v for v,_ in nbrs[u][:MUTUAL_K]) for u in G.nodes()}
H = nx.Graph(); H.add_nodes_from(G.nodes())
for u,v,d in G.edges(data=True):
    if v in topk[u] and u in topk[v]:
        H.add_edge(u, v, **d)

# prune weakest fraction + absolute floor
if H.number_of_edges() > 0 and PRUNE_FRACTION > 0:
    w = np.array([H[u][v].get("weight",0.0) for u,v in H.edges()], dtype=float)
    thr = float(np.quantile(w, PRUNE_FRACTION))
    thr = max(thr, MIN_W)
    H.remove_edges_from([(u,v) for u,v in H.edges() if float(H[u][v].get("weight",0.0)) < thr])

# keep the giant component (clarity)
if H.number_of_nodes() > 0:
    giant = max(nx.connected_components(H), key=len)
    H = H.subgraph(giant).copy()

# ---- Coarse spring + barycentric; short polish ----
rng = np.random.default_rng(SEED)
deg = sorted(H.degree(weight="weight"), key=lambda x: x[1], reverse=True)
coarse_nodes = set([n for n,_ in deg[:min(COARSE_N, H.number_of_nodes())]])
fine_nodes   = [n for n in H.nodes() if n not in coarse_nodes]

Hc = H.subgraph(coarse_nodes).copy()
pos = nx.spring_layout(Hc, dim=3, seed=SEED, weight="weight", iterations=COARSE_ITERS)

for n in fine_nodes:
    nbrs = [v for v in H.neighbors(n) if v in pos]
    if nbrs:
        arr = np.array([pos[v] for v in nbrs], float)
        pos[n] = tuple(arr.mean(axis=0))
    else:
        v = rng.normal(size=3); pos[n] = tuple(v/np.linalg.norm(v))

pos = nx.spring_layout(H, dim=3, seed=SEED, weight="weight",
                       pos=pos, fixed=list(coarse_nodes), iterations=POLISH_ITERS)

# ---- Render (Matplotlib; policy-compliant) ----
fig = plt.figure(figsize=(10,8), dpi=160)
ax = fig.add_subplot(111, projection='3d')
for u,v,d in H.edges(data=True):
    x=[pos[u][0],pos[v][0]]; y=[pos[u][1],pos[v][1]]; z=[pos[u][2],pos[v][2]]
    ax.plot(x,y,z, linewidth=0.5 + 2.0*float(d.get("weight",0.0)), alpha=0.35)
xs=[pos[n][0] for n in H.nodes()]; ys=[pos[n][1] for n in H.nodes()]; zs=[pos[n][2] for n in H.nodes()]
_ = ax.scatter(xs,ys,zs, s=14, alpha=0.85)  # silence artist repr
ax.set_title(f"CNT 3D Genomic Correlate Field — clean (nodes={H.number_of_nodes()}, edges={H.number_of_edges()})")
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
fig.tight_layout(); fig.savefig(PNG_PATH, bbox_inches="tight"); plt.close(fig)

# ---- Optional interactive (Plotly) ----
try:
    import plotly.graph_objects as go
    edge_x=[]; edge_y=[]; edge_z=[]
    for u,v,d in H.edges(data=True):
        edge_x += [pos[u][0], pos[v][0], None]
        edge_y += [pos[u][1], pos[v][1], None]
        edge_z += [pos[u][2], pos[v][2], None]
    node_x=[pos[n][0] for n in H.nodes()]
    node_y=[pos[n][1] for n in H.nodes()]
    node_z=[pos[n][2] for n in H.nodes()]
    node_text=[str(n) for n in H.nodes()]
    fig = go.Figure(data=[
        go.Scatter3d(x=edge_x,y=edge_y,z=edge_z,mode='lines', line=dict(width=1), hoverinfo='none'),
        go.Scatter3d(x=node_x,y=node_y,z=node_z,mode='markers', marker=dict(size=3), text=node_text, hoverinfo='text'),
    ])
    fig.update_layout(title="CNT 3D Genomic Correlate Field — clean",
                      showlegend=False,
                      scene=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False)))
    fig.write_html(HTML_PATH, include_plotlyjs='cdn')
except Exception:
    pass

print({"nodes": H.number_of_nodes(), "edges": H.number_of_edges(),
       "png": os.path.abspath(PNG_PATH), "html": os.path.abspath(HTML_PATH)})
# ======================================================================



Mean of empty slice


Degrees of freedom <= 0 for slice.



{'nodes': 288, 'edges': 987, 'png': 'C:\\Users\\caleb\\cnt_genome\\CNT_genomic_network_3D.png', 'html': 'C:\\Users\\caleb\\cnt_genome\\CNT_genomic_network_3D.html'}


In [11]:
# === CNT Field Profile Switch ==========================================
PROFILE = "Spine"   # options: "Spine", "Veil", "Atlas"

_profiles = {
    "Spine": {   # crisp structure
        "K_NEIGHBORS": 10, "MUTUAL_K": 10, "PRUNE_FRACTION": 0.18, "MIN_W": 0.14,
        "COARSE_N": 6000, "COARSE_ITERS": 240, "POLISH_ITERS": 80,
        "EDGE_CAP": 250_000, "USE_SPRING": True, "SPRING_LIMIT": 20000
    },
    "Veil": {    # denser fabric
        "K_NEIGHBORS": 14, "MUTUAL_K": 8,  "PRUNE_FRACTION": 0.10, "MIN_W": 0.10,
        "COARSE_N": 5000, "COARSE_ITERS": 200, "POLISH_ITERS": 60,
        "EDGE_CAP": 350_000, "USE_SPRING": True, "SPRING_LIMIT": 20000
    },
    "Atlas": {   # publication layout; balanced + exports
        "K_NEIGHBORS": 12, "MUTUAL_K": 10, "PRUNE_FRACTION": 0.15, "MIN_W": 0.12,
        "COARSE_N": 6000, "COARSE_ITERS": 260, "POLISH_ITERS": 100,
        "EDGE_CAP": 300_000, "USE_SPRING": True, "SPRING_LIMIT": 20000
    },
}
locals().update(_profiles[PROFILE])
print(f"[profile] {PROFILE} →", _profiles[PROFILE])


[profile] Spine → {'K_NEIGHBORS': 10, 'MUTUAL_K': 10, 'PRUNE_FRACTION': 0.18, 'MIN_W': 0.14, 'COARSE_N': 6000, 'COARSE_ITERS': 240, 'POLISH_ITERS': 80, 'EDGE_CAP': 250000, 'USE_SPRING': True, 'SPRING_LIMIT': 20000}


In [12]:
# === CNT 3D Genomic Correlate Field — Hardened Generator ==================
import os, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

# ---- Paths ----
DATA_PATH = r"C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored.csv"
PNG_PATH  = "CNT_genomic_network_3D.png"
HTML_PATH = "CNT_genomic_network_3D.html"

# ---- Settings ----
INDEX_COL       = "gene_name"   # fallback to 'rsid' or first non-numeric
FEATURES_WISH   = ["resonance_score","cnt_score","structure_score",
                   "gene_deg","ccre_deg","tissue_hits","tissues"]

SEED            = 42
MAX_GENES       = 12000
MIN_GENES_REQ   = 200           # fail fast if we have fewer
MIN_FEATS_REQ   = 2             # need ≥2 features for multivariate; else auto-fallback to score-kNN
K_NEIGHBORS     = 12
MUTUAL_K        = 10
PRUNE_FRACTION  = 0.15
MIN_W           = 0.12
EDGE_CAP        = 300_000

COARSE_N        = 6000
COARSE_ITERS    = 220
POLISH_ITERS    = 60

def _read_table(p):
    for sep in (None, "\t", ","):
        try:
            return pd.read_csv(p, sep=sep, engine="python")
        except Exception:
            pass
    raise FileNotFoundError(p)

def _pipeline_report(stage, **kw):
    print(f"[{stage}]", " ".join(f"{k}={v}" for k,v in kw.items()))

# ---- Load ----
df = _read_table(DATA_PATH)
if INDEX_COL not in df.columns:
    INDEX_COL = "rsid" if "rsid" in df.columns else (next((c for c in df.columns
                    if not np.issubdtype(df[c].dtype, np.number)), df.columns[0]))
df = df.dropna(subset=[INDEX_COL])
feat_cols = [c for c in FEATURES_WISH if c in df.columns]
if not feat_cols:
    feat_cols = df.select_dtypes(include=[np.number]).columns.tolist()

# Drop all-NaN requested features
non_empty_feats = [c for c in feat_cols if df[c].notna().any()]
feat_cols = non_empty_feats
_pipeline_report("load", rows=len(df), index_col=INDEX_COL, feats=len(feat_cols))

if len(feat_cols) == 0:
    raise ValueError("No usable numeric feature columns found (all empty). Check column names or file.")

# ---- Aggregate to one row per gene ----
agg = {c:"mean" for c in feat_cols}
for k in ("resonance_score","cnt_score","structure_score"):
    if k in agg: agg[k] = "max"
GDF = df[[INDEX_COL] + feat_cols].groupby(INDEX_COL).agg(agg)

# Drop all-NaN genes; drop all-NaN columns (again, post-agg)
GDF = GDF.dropna(how="all")
GDF = GDF.dropna(axis=1, how="all")
# Fill remaining NaNs by column median (keeps geometry sane)
if not GDF.empty:
    GDF = GDF.fillna(GDF.median(numeric_only=True))

# Cap by variance
if GDF.shape[0] > MAX_GENES:
    var = GDF.var(axis=1, numeric_only=True).sort_values(ascending=False)
    GDF = GDF.loc[var.index[:MAX_GENES]]

# Final sanity checks
_pipeline_report("post-agg", genes=GDF.shape[0], feats=GDF.shape[1])
if GDF.shape[0] < MIN_GENES_REQ:
    raise ValueError(f"Too few genes after cleaning: {GDF.shape[0]} < {MIN_GENES_REQ}. "
                     "Lower MIN_GENES_REQ or check filters.")
if GDF.shape[1] < MIN_FEATS_REQ:
    # ---- SCORE-ONLY FALLBACK ------------------------------------------------
    # Pick the most informative single feature
    num = GDF.select_dtypes(include=[np.number])
    score_col = (next((c for c in ["resonance_score","cnt_score","structure_score","phi","psi"] if c in num.columns),
                      num.var().sort_values(ascending=False).index[0]))
    s = num[score_col].dropna()
    s = s.reindex(s.abs().sort_values(ascending=False).index)
    genes = s.index.to_numpy(); vals = s.values.astype(float)

    G = nx.Graph(); G.add_nodes_from(genes)
    for i in range(len(genes)):
        diffs = np.abs(vals - vals[i]); diffs[i] = np.inf
        k = min(K_NEIGHBORS, max(1, len(genes)-1))
        nn = np.argpartition(diffs, k)[:k]
        for j in nn:
            u,v = genes[i], genes[j]
            if u == v: continue
            w = float(np.exp(-diffs[j]))
            if G.has_edge(u,v):
                if w > G[u][v]['weight']: G[u][v]['weight'] = w
            else:
                G.add_edge(u,v,weight=w)
        if G.number_of_edges() >= EDGE_CAP: break
    mode = f"score-kNN('{score_col}')"
else:
    # ---- MULTIVARIATE kNN (cosine on z-scores) -----------------------------
    X = GDF.to_numpy().astype("float32")
    # Standardize columns safely
    mu = np.nanmean(X, axis=0, keepdims=True)
    sd = np.nanstd(X, axis=0, ddof=1, keepdims=True)
    sd[sd == 0] = 1.0
    Z = (X - mu) / sd
    Z = np.nan_to_num(Z, nan=0.0, posinf=0.0, neginf=0.0)
    rn = np.linalg.norm(Z, axis=1, keepdims=True); rn[rn==0]=1.0
    Z = Z / rn

    genes = GDF.index.to_numpy()
    try:
        from sklearn.neighbors import NearestNeighbors
        nn = NearestNeighbors(n_neighbors=min(K_NEIGHBORS+1, Z.shape[0]), metric="cosine", algorithm="brute")
        nn.fit(Z); dists, idxs = nn.kneighbors(Z, return_distance=True)
        dists, idxs = dists[:,1:], idxs[:,1:]
        sims = 1.0 - dists
    except Exception:
        # Chunked NumPy fallback
        K = min(K_NEIGHBORS, Z.shape[0]-1)
        sims = np.empty((Z.shape[0], K), dtype="float32")
        idxs = np.empty((Z.shape[0], K), dtype=np.int32)
        bs = 1024
        for i0 in range(0, Z.shape[0], bs):
            i1 = min(i0+bs, Z.shape[0])
            block = Z[i0:i1] @ Z.T
            for i in range(i1-i0): block[i, i0+i] = -np.inf
            topk = np.argpartition(-block, K, axis=1)[:, :K]
            vals = np.take_along_axis(block, topk, axis=1)
            order = np.argsort(-vals, axis=1)
            r = np.arange(vals.shape[0])[:, None]
            sims[i0:i1] = vals[r, order].astype("float32")
            idxs[i0:i1] = topk[r, order].astype(np.int32)

    G = nx.Graph(); G.add_nodes_from(genes.tolist())
    added = 0
    for i, u in enumerate(genes):
        for j, w in zip(idxs[i], sims[i]):
            v = genes[int(j)]
            if u == v: continue
            wt = float(max(0.0, w))
            if G.has_edge(u, v):
                if wt > G[u][v]["weight"]: G[u][v]["weight"] = wt
            else:
                G.add_edge(u, v, weight=wt); added += 1
        if added >= EDGE_CAP: break
    mode = f"multivariate-kNN({GDF.shape[1]} feats)"

_pipeline_report("graph", mode=mode, nodes=G.number_of_nodes(), edges=G.number_of_edges())

# ---- mutual-kNN + pruning ----
nbrs = {u: sorted(G[u].items(), key=lambda x: x[1].get("weight",0.0), reverse=True) for u in G.nodes()}
topk = {u: set(v for v,_ in nbrs[u][:MUTUAL_K]) for u in G.nodes()}
H = nx.Graph(); H.add_nodes_from(G.nodes())
for u,v,d in G.edges(data=True):
    if v in topk[u] and u in topk[v]:
        H.add_edge(u, v, **d)

if H.number_of_edges() > 0 and PRUNE_FRACTION > 0:
    w = np.array([H[u][v].get("weight",0.0) for u,v in H.edges()], dtype=float)
    thr = float(np.quantile(w, PRUNE_FRACTION))
    thr = max(thr, MIN_W)
    H.remove_edges_from([(u,v) for u,v in H.edges() if float(H[u][v].get("weight",0.0)) < thr])

if H.number_of_nodes() > 0:
    giant = max(nx.connected_components(H), key=len)
    H = H.subgraph(giant).copy()

_pipeline_report("refined", nodes=H.number_of_nodes(), edges=H.number_of_edges())

# ---- layout: coarse spring + barycentric + short polish ----
rng = np.random.default_rng(SEED)
deg = sorted(H.degree(weight="weight"), key=lambda x: x[1], reverse=True)
coarse_nodes = set([n for n,_ in deg[:min(COARSE_N, H.number_of_nodes())]])
fine_nodes   = [n for n in H.nodes() if n not in coarse_nodes]

Hc = H.subgraph(coarse_nodes).copy()
pos = nx.spring_layout(Hc, dim=3, seed=SEED, weight="weight", iterations=COARSE_ITERS)

for n in fine_nodes:
    neigh = [v for v in H.neighbors(n) if v in pos]
    if neigh:
        arr = np.array([pos[v] for v in neigh], float)
        pos[n] = tuple(arr.mean(axis=0))
    else:
        v = rng.normal(size=3); pos[n] = tuple(v/np.linalg.norm(v))

pos = nx.spring_layout(H, dim=3, seed=SEED, weight="weight",
                       pos=pos, fixed=list(coarse_nodes), iterations=POLISH_ITERS)

# ---- render ----
fig = plt.figure(figsize=(10,8), dpi=160)
ax = fig.add_subplot(111, projection='3d')
for u,v,d in H.edges(data=True):
    x=[pos[u][0],pos[v][0]]; y=[pos[u][1],pos[v][1]]; z=[pos[u][2],pos[v][2]]
    ax.plot(x,y,z, linewidth=0.5 + 2.0*float(d.get("weight",0.0)), alpha=0.35)
xs=[pos[n][0] for n in H.nodes()]; ys=[pos[n][1] for n in H.nodes()]; zs=[pos[n][2] for n in H.nodes()]
_ = ax.scatter(xs,ys,zs, s=14, alpha=0.85)
ax.set_title(f"CNT 3D Genomic Correlate Field — {mode} (nodes={H.number_of_nodes()}, edges={H.number_of_edges()})")
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
fig.tight_layout(); fig.savefig(PNG_PATH, bbox_inches="tight"); plt.close(fig)

# ---- HTML (optional) ----
try:
    import plotly.graph_objects as go
    edge_x=[]; edge_y=[]; edge_z=[]
    for u,v,d in H.edges(data=True):
        edge_x += [pos[u][0], pos[v][0], None]
        edge_y += [pos[u][1], pos[v][1], None]
        edge_z += [pos[u][2], pos[v][2], None]
    node_x=[pos[n][0] for n in H.nodes()]
    node_y=[pos[n][1] for n in H.nodes()]
    node_z=[pos[n][2] for n in H.nodes()]
    node_text=[str(n) for n in H.nodes()]
    fig = go.Figure(data=[
        go.Scatter3d(x=edge_x,y=edge_y,z=edge_z,mode='lines', line=dict(width=1), hoverinfo='none'),
        go.Scatter3d(x=node_x,y=node_y,z=node_z,mode='markers', marker=dict(size=3), text=node_text, hoverinfo='text'),
    ])
    fig.update_layout(title=f"CNT 3D Genomic Correlate Field — {mode}",
                      showlegend=False,
                      scene=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False)))
    fig.write_html(HTML_PATH, include_plotlyjs='cdn')
except Exception:
    pass

print({"png": os.path.abspath(PNG_PATH), "html": os.path.abspath(HTML_PATH)})
# ======================================================================


[load] rows=119718 index_col=gene_name feats=6
[post-agg] genes=12000 feats=6
[graph] mode=multivariate-kNN(6 feats) nodes=12000 edges=134127
[refined] nodes=288 edges=987
{'png': 'C:\\Users\\caleb\\cnt_genome\\CNT_genomic_network_3D.png', 'html': 'C:\\Users\\caleb\\cnt_genome\\CNT_genomic_network_3D.html'}


In [13]:
# === CNT 3D Field — modules, exports, and a touch more fabric ============
# Requires: H (NetworkX graph with edge["weight"]) and pos (dict: node->(x,y,z))
# produced by the generator cell you just ran.

import os, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

# --- (A) Optional: soften pruning a little to add fabric ------------------
# If your H is too sparse, uncomment the next two lines to re-add some edges:
# MIN_W_RESTORE = 0.10              # lower floor
# H = nx.Graph((u,v,d) for u,v,d in H.edges(data=True) if float(d.get("weight",0.0)) >= MIN_W_RESTORE)

# --- (B) Community detection (greedy modularity) -------------------------
try:
    from networkx.algorithms.community import greedy_modularity_communities
    comms = list(greedy_modularity_communities(H, weight="weight"))
    module = {}
    for i, cset in enumerate(comms):
        for n in cset:
            module[n] = i
    modules_df = pd.DataFrame({"gene": list(H.nodes()), "module": [module.get(n, -1) for n in H.nodes()]})
    modules_df.to_csv("CNT_genomic_modules.csv", index=False)
    print(f"[modules] {len(comms)} communities → CNT_genomic_modules.csv")
except Exception as e:
    module = {}
    print("[modules] skipped:", e)

# --- (C) Export node positions and edges ---------------------------------
nodes_out = []
for n in H.nodes():
    x,y,z = pos[n]
    nodes_out.append({"gene": n, "x": x, "y": y, "z": z, "degree": H.degree(n), "module": module.get(n, -1)})
pd.DataFrame(nodes_out).to_csv("CNT_nodes_3D.csv", index=False)

edges_out = []
for u,v,d in H.edges(data=True):
    edges_out.append({"u": u, "v": v, "weight": float(d.get("weight", 0.0))})
pd.DataFrame(edges_out).to_csv("CNT_edges.csv", index=False)
print("[export] wrote CNT_nodes_3D.csv and CNT_edges.csv")

# --- (D) Render with module colors and degree-based node sizes -----------
sizes  = [8 + 6*H.degree(n) for n in H.nodes()]
colors = [module.get(n, -1) for n in H.nodes()]  # integers; HTML hover shows labels

fig = plt.figure(figsize=(10,8), dpi=160)
ax  = fig.add_subplot(111, projection="3d")

# edges
for u,v,d in H.edges(data=True):
    x=[pos[u][0],pos[v][0]]; y=[pos[u][1],pos[v][1]]; z=[pos[u][2],pos[v][2]]
    ax.plot(x,y,z, linewidth=0.6 + 2.0*float(d.get("weight",0.0)), alpha=0.35)

# nodes
xs=[pos[n][0] for n in H.nodes()]
ys=[pos[n][1] for n in H.nodes()]
zs=[pos[n][2] for n in H.nodes()]
_ = ax.scatter(xs, ys, zs, s=sizes, alpha=0.88)  # artist muted

ax.set_title(f"CNT 3D Genomic Correlate Field — modules (nodes={H.number_of_nodes()}, edges={H.number_of_edges()})")
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
fig.tight_layout()
fig.savefig("CNT_genomic_network_3D_modules.png", bbox_inches="tight")
plt.close(fig)

# --- (E) Interactive HTML with hover gene names --------------------------
try:
    import plotly.graph_objects as go
    edge_x=edge_y=edge_z=[]
    edge_x=[]; edge_y=[]; edge_z=[]
    for u,v,d in H.edges(data=True):
        edge_x += [pos[u][0], pos[v][0], None]
        edge_y += [pos[u][1], pos[v][1], None]
        edge_z += [pos[u][2], pos[v][2], None]
    node_x=[pos[n][0] for n in H.nodes()]
    node_y=[pos[n][1] for n in H.nodes()]
    node_z=[pos[n][2] for n in H.nodes()]
    node_text=[f"{n} | deg={H.degree(n)} | mod={module.get(n,-1)}" for n in H.nodes()]
    fig = go.Figure(data=[
        go.Scatter3d(x=edge_x,y=edge_y,z=edge_z,mode="lines", line=dict(width=1), hoverinfo="none"),
        go.Scatter3d(x=node_x,y=node_y,z=node_z,mode="markers",
                     marker=dict(size=3), text=node_text, hoverinfo="text")
    ])
    fig.update_layout(title="CNT 3D Genomic Correlate Field — modules",
                      showlegend=False,
                      scene=dict(xaxis=dict(visible=False),
                                 yaxis=dict(visible=False),
                                 zaxis=dict(visible=False)))
    fig.write_html("CNT_genomic_network_3D_modules.html", include_plotlyjs="cdn")
    print("[html] CNT_genomic_network_3D_modules.html")
except Exception as e:
    print("[html] skipped:", e)


[modules] 12 communities → CNT_genomic_modules.csv
[export] wrote CNT_nodes_3D.csv and CNT_edges.csv
[html] CNT_genomic_network_3D_modules.html


In [14]:
# === CNT Field: quick insights (modules, hubs, bridges) ================
import pandas as pd, networkx as nx

# Top modules by size
mod_series = pd.Series({n: module.get(n, -1) for n in H.nodes()})
mod_counts = mod_series.value_counts().rename_axis("module").reset_index(name="nodes")
print("\nTop modules by size:\n", mod_counts.head(8))

# Top hubs (degree) and bridges (betweenness)
deg = pd.Series(dict(H.degree())).sort_values(ascending=False)
btw = pd.Series(nx.betweenness_centrality(H, weight="weight")).sort_values(ascending=False)

hubs = deg.head(25).rename("degree").to_frame()
bridges = btw.head(25).rename("betweenness").to_frame()

hubs.to_csv("CNT_top_hubs.csv")
bridges.to_csv("CNT_top_bridges.csv")
print("\nSaved: CNT_top_hubs.csv, CNT_top_bridges.csv")
print("\nTop hubs:\n", hubs.head(10))
print("\nTop bridges:\n", bridges.head(10))



Top modules by size:
    module  nodes
0       0     50
1       1     41
2       2     30
3       4     29
4       3     29
5       5     26
6       6     25
7       7     16

Saved: CNT_top_hubs.csv, CNT_top_bridges.csv

Top hubs:
                  degree
ENSG00000261294      10
RELB                 10
ENSG00000226957      10
IL37                 10
ENSG00000253824      10
ENSG00000267743      10
SARDH                10
HNRNPA1P23           10
ST14                 10
ENSG00000280143      10

Top bridges:
                  betweenness
KDM4B               0.451037
TP53INP1            0.443971
ADAMTS14            0.432811
ENSG00000301269     0.419191
EDN1                0.393411
ENSG00000306145     0.367705
KCNH2               0.279501
STN1                0.260788
JRK                 0.259716
ENSG00000294410     0.255647


In [15]:
# === CNT Module Lens — focus on one (or many) modules, with soft context ===
# Requirements: H (graph), pos (node->(x,y,z)), and `module` dict from your modules cell.

import os, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

# ---- choose which modules to spotlight ----
FOCUS_MODULES = [0]          # e.g., [0] or [0, 3]; change to target different modules
CONTEXT_ALPHA = 0.08         # how faint to draw non-focus nodes/edges
EDGE_W_SCALE  = 2.0          # visual thickness scale for edges
NODE_BASE     = 10           # base node size
NODE_HUB_BUMP = 6            # degree-based size bump

# ---- build masks ----
mod_of = module  # from previous cell (gene -> module id)
focus_nodes = {n for n in H if mod_of.get(n, -1) in FOCUS_MODULES}
context_nodes = [n for n in H if n not in focus_nodes]

# Subgraph for focused edges (both ends in focus)
focus_edges = [(u,v,d) for u,v,d in H.edges(data=True) if u in focus_nodes and v in focus_nodes]
context_edges = [(u,v,d) for u,v,d in H.edges(data=True) if not (u in focus_nodes and v in focus_nodes)]

# ---- render ----
fig = plt.figure(figsize=(10,8), dpi=160)
ax = fig.add_subplot(111, projection="3d")

# context edges (faint)
for u,v,d in context_edges:
    x = [pos[u][0], pos[v][0]]; y = [pos[u][1], pos[v][1]]; z = [pos[u][2], pos[v][2]]
    ax.plot(x, y, z, linewidth=0.5 + EDGE_W_SCALE*float(d.get("weight",0.0)), alpha=CONTEXT_ALPHA)

# focus edges (strong)
for u,v,d in focus_edges:
    x = [pos[u][0], pos[v][0]]; y = [pos[u][1], pos[v][1]]; z = [pos[u][2], pos[v][2]]
    ax.plot(x, y, z, linewidth=0.6 + EDGE_W_SCALE*float(d.get("weight",0.0)), alpha=0.45)

# context nodes (faint)
if context_nodes:
    xs=[pos[n][0] for n in context_nodes]; ys=[pos[n][1] for n in context_nodes]; zs=[pos[n][2] for n in context_nodes]
    sizes=[NODE_BASE + NODE_HUB_BUMP*H.degree(n) for n in context_nodes]
    _ = ax.scatter(xs, ys, zs, s=sizes, alpha=CONTEXT_ALPHA)

# focus nodes (bold)
if focus_nodes:
    xs=[pos[n][0] for n in focus_nodes]; ys=[pos[n][1] for n in focus_nodes]; zs=[pos[n][2] for n in focus_nodes]
    sizes=[NODE_BASE + NODE_HUB_BUMP*H.degree(n) for n in focus_nodes]
    _ = ax.scatter(xs, ys, zs, s=sizes, alpha=0.92)

ax.set_title(f"CNT 3D Module Lens — modules {FOCUS_MODULES} (nodes={len(focus_nodes)})")
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
fig.tight_layout()
fig.savefig("CNT_module_lens.png", bbox_inches="tight")
plt.close(fig)

# ---- interactive HTML (hover shows node) ----
try:
    import plotly.graph_objects as go
    # edges first (context faint)
    edge_x=[]; edge_y=[]; edge_z=[]; edge_alpha=[]
    for u,v,d in context_edges:
        edge_x += [pos[u][0], pos[v][0], None]
        edge_y += [pos[u][1], pos[v][1], None]
        edge_z += [pos[u][2], pos[v][2], None]
    ctx_edges = go.Scatter3d(x=edge_x,y=edge_y,z=edge_z,mode='lines',
                             line=dict(width=1), hoverinfo='none', opacity=CONTEXT_ALPHA)

    # focus edges strong
    edge_x=[]; edge_y=[]; edge_z=[]
    for u,v,d in focus_edges:
        edge_x += [pos[u][0], pos[v][0], None]
        edge_y += [pos[u][1], pos[v][1], None]
        edge_z += [pos[u][2], pos[v][2], None]
    foc_edges = go.Scatter3d(x=edge_x,y=edge_y,z=edge_z,mode='lines',
                             line=dict(width=2), hoverinfo='none', opacity=0.55)

    # nodes
    ctx = go.Scatter3d(x=[pos[n][0] for n in context_nodes],
                       y=[pos[n][1] for n in context_nodes],
                       z=[pos[n][2] for n in context_nodes],
                       mode='markers', marker=dict(size=2),
                       text=[str(n) for n in context_nodes], hoverinfo='text', opacity=CONTEXT_ALPHA)

    foc = go.Scatter3d(x=[pos[n][0] for n in focus_nodes],
                       y=[pos[n][1] for n in focus_nodes],
                       z=[pos[n][2] for n in focus_nodes],
                       mode='markers', marker=dict(size=4),
                       text=[f"{n} | deg={H.degree(n)} | mod={mod_of.get(n,-1)}" for n in focus_nodes],
                       hoverinfo='text', opacity=0.95)

    fig = go.Figure(data=[ctx_edges, foc_edges, ctx, foc])
    fig.update_layout(title=f"CNT 3D Module Lens — modules {FOCUS_MODULES}",
                      showlegend=False,
                      scene=dict(xaxis=dict(visible=False),
                                 yaxis=dict(visible=False),
                                 zaxis=dict(visible=False)))
    fig.write_html("CNT_module_lens.html", include_plotlyjs='cdn')
except Exception:
    pass

print({"png": os.path.abspath("CNT_module_lens.png"),
       "html": os.path.abspath("CNT_module_lens.html")})


{'png': 'C:\\Users\\caleb\\cnt_genome\\CNT_module_lens.png', 'html': 'C:\\Users\\caleb\\cnt_genome\\CNT_module_lens.html'}


In [16]:
# === CNT Module Atlas — render all modules as separate 3D plates ==========
# Requires: H (networkx graph), pos (node->(x,y,z)), module dict (gene->module)
# Creates:
#   - CNT_module_<k>.png / CNT_module_<k>.html (one per module)
#   - CNT_module_index.csv (module size & filenames)
#   - CNT_module_gallery.html (simple index page)

import os, math, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

OUT_INDEX_CSV = "CNT_module_index.csv"
OUT_GALLERY   = "CNT_module_gallery.html"

# ---------- collect module sets ----------
if 'module' not in globals() or not isinstance(module, dict):
    # build from greedy modularity if not provided
    from networkx.algorithms.community import greedy_modularity_communities
    comms = list(greedy_modularity_communities(H, weight="weight"))
    module = {}
    for i, cset in enumerate(comms):
        for n in cset:
            module[n] = i

mods = pd.Series({n: module.get(n, -1) for n in H.nodes()})
mods = mods[mods >= 0]
mod_ids = sorted(mods.unique().tolist())

rows = []
for k in mod_ids:
    focus = [n for n in H.nodes() if module.get(n, -1) == k]
    ctx   = [n for n in H.nodes() if module.get(n, -1) != k]

    # edges: focus-only (both ends in focus), context (everything else, faint)
    e_focus = [(u,v,d) for u,v,d in H.edges(data=True) if u in focus and v in focus]
    e_ctx   = [(u,v,d) for u,v,d in H.edges(data=True) if not (u in focus and v in focus)]

    # --- Matplotlib render ---
    fig = plt.figure(figsize=(10,8), dpi=160)
    ax  = fig.add_subplot(111, projection="3d")

    # context edges (faint)
    for u,v,d in e_ctx:
        x=[pos[u][0], pos[v][0]]; y=[pos[u][1], pos[v][1]]; z=[pos[u][2], pos[v][2]]
        ax.plot(x,y,z, linewidth=0.7 + 1.8*float(d.get("weight",0.0)), alpha=0.08)

    # focus edges (strong)
    for u,v,d in e_focus:
        x=[pos[u][0], pos[v][0]]; y=[pos[u][1], pos[v][1]]; z=[pos[u][2], pos[v][2]]
        ax.plot(x,y,z, linewidth=0.8 + 2.0*float(d.get("weight",0.0)), alpha=0.50)

    # context nodes (faint)
    if ctx:
        xs=[pos[n][0] for n in ctx]; ys=[pos[n][1] for n in ctx]; zs=[pos[n][2] for n in ctx]
        sizes=[10 + 6*H.degree(n) for n in ctx]
        _ = ax.scatter(xs,ys,zs, s=sizes, alpha=0.08)

    # focus nodes (bold)
    xs=[pos[n][0] for n in focus]; ys=[pos[n][1] for n in focus]; zs=[pos[n][2] for n in focus]
    sizes=[10 + 6*H.degree(n) for n in focus]
    _ = ax.scatter(xs,ys,zs, s=sizes, alpha=0.95)

    ax.set_title(f"CNT 3D Module Lens — module {k} (nodes={len(focus)})")
    ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
    plt.tight_layout()

    png_path = f"CNT_module_{k}.png"
    fig.savefig(png_path, bbox_inches="tight")
    plt.close(fig)

    # --- optional interactive HTML ---
    html_path = None
    try:
        import plotly.graph_objects as go
        # context edges
        ex,ey,ez=[],[],[]
        for u,v,d in e_ctx:
            ex += [pos[u][0], pos[v][0], None]
            ey += [pos[u][1], pos[v][1], None]
            ez += [pos[u][2], pos[v][2], None]
        ctx_edges = go.Scatter3d(x=ex,y=ey,z=ez,mode='lines',line=dict(width=1),hoverinfo='none',opacity=0.08)

        # focus edges
        ex,ey,ez=[],[],[]
        for u,v,d in e_focus:
            ex += [pos[u][0], pos[v][0], None]
            ey += [pos[u][1], pos[v][1], None]
            ez += [pos[u][2], pos[v][2], None]
        foc_edges = go.Scatter3d(x=ex,y=ey,z=ez,mode='lines',line=dict(width=2),hoverinfo='none',opacity=0.55)

        ctx_nodes = go.Scatter3d(
            x=[pos[n][0] for n in ctx], y=[pos[n][1] for n in ctx], z=[pos[n][2] for n in ctx],
            mode='markers', marker=dict(size=2), text=[str(n) for n in ctx], hoverinfo='text', opacity=0.08
        )
        foc_nodes = go.Scatter3d(
            x=[pos[n][0] for n in focus], y=[pos[n][1] for n in focus], z=[pos[n][2] for n in focus],
            mode='markers', marker=dict(size=4),
            text=[f"{n} | deg={H.degree(n)} | mod={k}" for n in focus], hoverinfo='text', opacity=0.95
        )
        fig3d = go.Figure(data=[ctx_edges, foc_edges, ctx_nodes, foc_nodes])
        fig3d.update_layout(title=f"CNT 3D Module Lens — module {k}",
                            showlegend=False,
                            scene=dict(xaxis=dict(visible=False),
                                       yaxis=dict(visible=False),
                                       zaxis=dict(visible=False)))
        html_path = f"CNT_module_{k}.html"
        fig3d.write_html(html_path, include_plotlyjs='cdn')
    except Exception:
        pass

    rows.append({"module": k, "nodes": len(focus), "png": png_path, "html": html_path})

# ---------- write index + gallery ----------
idx = pd.DataFrame(rows).sort_values(["nodes","module"], ascending=[False, True])
idx.to_csv(OUT_INDEX_CSV, index=False)

with open(OUT_GALLERY, "w", encoding="utf-8") as f:
    f.write("<html><head><meta charset='utf-8'><title>CNT Module Gallery</title></head><body>")
    f.write("<h2>CNT Module Gallery</h2><ul>")
    for _, r in idx.iterrows():
        f.write("<li>")
        if pd.notna(r["html"]):
            f.write(f"<a href='{r['html']}' target='_blank'>Module {int(r['module'])} (n={int(r['nodes'])})</a> ")
        f.write(f"— <a href='{r['png']}' target='_blank'>PNG</a></li>")
    f.write("</ul></body></html>")

print({"modules": len(idx), "largest": idx.head(3).to_dict(orient="records"),
       "index_csv": os.path.abspath(OUT_INDEX_CSV),
       "gallery_html": os.path.abspath(OUT_GALLERY)})


{'modules': 12, 'largest': [{'module': 0, 'nodes': 50, 'png': 'CNT_module_0.png', 'html': 'CNT_module_0.html'}, {'module': 1, 'nodes': 41, 'png': 'CNT_module_1.png', 'html': 'CNT_module_1.html'}, {'module': 2, 'nodes': 30, 'png': 'CNT_module_2.png', 'html': 'CNT_module_2.html'}], 'index_csv': 'C:\\Users\\caleb\\cnt_genome\\CNT_module_index.csv', 'gallery_html': 'C:\\Users\\caleb\\cnt_genome\\CNT_module_gallery.html'}


In [17]:
# === CNT One-Field Atlas: modules, bridges, and centroids in one 3D scene ===
# Requires: H (NetworkX graph), pos (node -> (x,y,z))
# Optional: module dict (gene -> module id). If absent, we compute communities.

import os, numpy as np, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401
from matplotlib import cm

PNG_PATH  = "CNT_one_field_atlas.png"
HTML_PATH = "CNT_one_field_atlas.html"

# ---- 0) Module map (build if missing) -------------------------------------
if 'module' not in globals() or not isinstance(module, dict) or len(module)==0:
    from networkx.algorithms.community import greedy_modularity_communities
    comms = list(greedy_modularity_communities(H, weight="weight"))
    module = {}
    for i, cset in enumerate(comms):
        for n in cset:
            module[n] = i

# module ids present
mod_ids = sorted({m for m in (module.get(n, -1) for n in H.nodes()) if m >= 0})
if not mod_ids:
    raise ValueError("No modules found. Run community detection first.")

# ---- 1) Colors, sizes, and helper stats -----------------------------------
mod_index = {m:i for i,m in enumerate(mod_ids)}
cmap = cm.get_cmap("tab20", max(20, len(mod_ids)))  # distinct palette
MOD_COLOR = {m: cmap(mod_index[m] % cmap.N) for m in mod_ids}

node_colors = [MOD_COLOR.get(module.get(n, -1), (0.6,0.6,0.6,1.0)) for n in H.nodes()]
node_sizes  = [10 + 6*H.degree(n) for n in H.nodes()]

# module centroids (mean of node positions per module)
centroid = {}
for m in mod_ids:
    nodes_m = [n for n in H.nodes() if module.get(n,-1)==m]
    if nodes_m:
        pts = np.array([pos[n] for n in nodes_m], float)
        centroid[m] = pts.mean(axis=0)
    else:
        centroid[m] = np.zeros(3)

# module sizes
mod_size = {m: sum(1 for n in H.nodes() if module.get(n,-1)==m) for m in mod_ids}

# ---- 2) Cross-talk matrix (sum of inter-module edge weights) --------------
W = pd.DataFrame(0.0, index=mod_ids, columns=mod_ids)
for u,v,d in H.edges(data=True):
    mu, mv = module.get(u,-1), module.get(v,-1)
    if mu < 0 or mv < 0: 
        continue
    w = float(d.get("weight",0.0))
    if mu == mv:
        W.at[mu, mv] += w
    else:
        W.at[mu, mv] += w
        W.at[mv, mu] += w

# for centroid links scaling
max_w = float(W.values.max()) if W.values.size else 1.0
if max_w == 0.0: max_w = 1.0

# ---- 3) One scene: nodes colored, intra faint, inter bold, centroids + links
fig = plt.figure(figsize=(11,9), dpi=170)
ax  = fig.add_subplot(111, projection='3d')

# 3a) draw edges
for u,v,d in H.edges(data=True):
    mu, mv = module.get(u,-1), module.get(v,-1)
    x=[pos[u][0], pos[v][0]]; y=[pos[u][1], pos[v][1]]; z=[pos[u][2], pos[v][2]]
    w = float(d.get("weight",0.0))
    if mu == mv and mu >= 0:
        # intra-module: faint, colored by module
        ax.plot(x,y,z, linewidth=0.5 + 1.2*w, alpha=0.15, color=MOD_COLOR[mu])
    else:
        # inter-module: bold, dark
        ax.plot(x,y,z, linewidth=0.6 + 2.0*w, alpha=0.45, color=(0.15,0.15,0.15,1.0))

# 3b) draw nodes
xs=[pos[n][0] for n in H.nodes()]; ys=[pos[n][1] for n in H.nodes()]; zs=[pos[n][2] for n in H.nodes()]
_ = ax.scatter(xs,ys,zs, s=node_sizes, c=node_colors, alpha=0.95)

# 3c) centroid markers (scaled by module size)
for m in mod_ids:
    cx,cy,cz = centroid[m]
    size = 60 + 10*mod_size[m]
    _ = ax.scatter([cx],[cy],[cz], s=size, alpha=0.95)  # centroid dots (neutral style)

# 3d) centroid-to-centroid links (width ∝ cross-talk)
for i in mod_ids:
    for j in mod_ids:
        if j <= i: 
            continue
        w = W.at[i,j]
        if w <= 0: 
            continue
        ci, cj = centroid[i], centroid[j]
        lw = 0.6 + 6.5*(w/max_w)   # emphasize strong cross-talk
        ax.plot([ci[0],cj[0]],[ci[1],cj[1]],[ci[2],cj[2]], linewidth=lw, alpha=0.55, color=(0,0,0,1))

# cosmetics
ax.set_title("CNT One-Field Atlas — modules (color), bridges (bold), centroids (dots)")
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])
plt.tight_layout()
fig.savefig(PNG_PATH, bbox_inches="tight")
plt.close(fig)

# ---- 4) Optional interactive HTML ----------------------------------------
try:
    import plotly.graph_objects as go
    # edges (two layers)
    edge_x_i=edge_y_i=edge_z_i=[]; edge_x_i=[]; edge_y_i=[]; edge_z_i=[]
    edge_x_e=edge_y_e=edge_z_e=[]; edge_x_e=[]; edge_y_e=[]; edge_z_e=[]
    for u,v,d in H.edges(data=True):
        mu, mv = module.get(u,-1), module.get(v,-1)
        if mu==mv and mu>=0:
            edge_x_i += [pos[u][0], pos[v][0], None]
            edge_y_i += [pos[u][1], pos[v][1], None]
            edge_z_i += [pos[u][2], pos[v][2], None]
        else:
            edge_x_e += [pos[u][0], pos[v][0], None]
            edge_y_e += [pos[u][1], pos[v][1], None]
            edge_z_e += [pos[u][2], pos[v][2], None]
    intra_edges = go.Scatter3d(x=edge_x_i,y=edge_y_i,z=edge_z_i,mode='lines',
                               line=dict(width=1), hoverinfo='none', opacity=0.15)
    inter_edges = go.Scatter3d(x=edge_x_e,y=edge_y_e,z=edge_z_e,mode='lines',
                               line=dict(width=2), hoverinfo='none', opacity=0.45)

    # nodes
    node_x=xs; node_y=ys; node_z=zs
    node_text=[f"{n} | mod={module.get(n,-1)} | deg={H.degree(n)}" for n in H.nodes()]
    nodes3d = go.Scatter3d(x=node_x,y=node_y,z=node_z,mode='markers',
                           marker=dict(size=3), text=node_text, hoverinfo='text', opacity=0.95)

    # centroid links (meta-edges)
    cx=[]; cy=[]; cz=[]; cx2=[]; cy2=[]; cz2=[]
    for i in mod_ids:
        for j in mod_ids:
            if j<=i: continue
            if W.at[i,j] > 0:
                ci, cj = centroid[i], centroid[j]
                cx += [ci[0], cj[0], None]
                cy += [ci[1], cj[1], None]
                cz += [ci[2], cj[2], None]
    cent_links = go.Scatter3d(x=cx,y=cy,z=cz,mode='lines',
                              line=dict(width=4), hoverinfo='none', opacity=0.55)

    # centroid markers
    cen_x=[centroid[m][0] for m in mod_ids]
    cen_y=[centroid[m][1] for m in mod_ids]
    cen_z=[centroid[m][2] for m in mod_ids]
    cen_text=[f"Module {m} (n={mod_size[m]})" for m in mod_ids]
    cents = go.Scatter3d(x=cen_x,y=cen_y,z=cen_z,mode='markers',
                         marker=dict(size=6), text=cen_text, hoverinfo='text', opacity=0.95)

    fig = go.Figure(data=[intra_edges, inter_edges, nodes3d, cent_links, cents])
    fig.update_layout(title="CNT One-Field Atlas — colored modules + bridges + centroids",
                      showlegend=False,
                      scene=dict(xaxis=dict(visible=False),
                                 yaxis=dict(visible=False),
                                 zaxis=dict(visible=False)))
    fig.write_html(HTML_PATH, include_plotlyjs='cdn')
except Exception:
    pass

print({"png": os.path.abspath(PNG_PATH), "html": os.path.abspath(HTML_PATH)})



The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.



{'png': 'C:\\Users\\caleb\\cnt_genome\\CNT_one_field_atlas.png', 'html': 'C:\\Users\\caleb\\cnt_genome\\CNT_one_field_atlas.html'}


In [18]:
# === TEST PACK v1 — Setup & Snapshot ======================================
# Assumes you already built: H (graph), pos (layout), module (gene→module), and GDF (per-gene features).
# If any are missing, this cell will reconstruct the minimal pieces from your CSV.

import os, numpy as np, pandas as pd, networkx as nx

DATA_PATH = r"C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored.csv"  # edit if needed
OUTDIR = "CNT_TESTS"; os.makedirs(OUTDIR, exist_ok=True)

def read_table(p):
    for sep in (None, "\t", ","):
        try: return pd.read_csv(p, sep=sep, engine="python")
        except Exception: pass
    raise FileNotFoundError(p)

# Rebuild GDF if absent
if 'GDF' not in globals():
    df = read_table(DATA_PATH)
    idx_col = "gene_name" if "gene_name" in df.columns else ("rsid" if "rsid" in df.columns else df.columns[0])
    feat_wish = ["resonance_score","cnt_score","structure_score","gene_deg","ccre_deg","tissue_hits","tissues"]
    feats = [c for c in feat_wish if c in df.columns]
    if not feats: feats = df.select_dtypes(include=[np.number]).columns.tolist()
    tmp = df[[idx_col] + feats].dropna(subset=[idx_col]).copy()
    agg = {c:"mean" for c in feats}
    for k in ("resonance_score","cnt_score","structure_score"):
        if k in agg: agg[k] = "max"
    GDF = tmp.groupby(idx_col).agg(agg).dropna(how="all").fillna(tmp.median(numeric_only=True))
    GDF.index.name = "gene"

# Rebuild module map if absent
if 'module' not in globals():
    from networkx.algorithms.community import greedy_modularity_communities
    # Need a small graph: use cosine kNN on standardized features (same as build path)
    X = GDF.to_numpy().astype("float32")
    X = (X - np.nanmean(X,0)) / (np.nanstd(X,0,ddof=1) + 1e-9)
    X = np.nan_to_num(X)
    X = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-9)
    from sklearn.neighbors import NearestNeighbors
    k = min(10, max(2, X.shape[0]-1))
    nn = NearestNeighbors(n_neighbors=k+1, metric="cosine", algorithm="brute").fit(X)
    d, idx = nn.kneighbors(X); d, idx = d[:,1:], idx[:,1:]
    genes = GDF.index.to_numpy()
    Gtmp = nx.Graph(); Gtmp.add_nodes_from(genes)
    for i,u in enumerate(genes):
        for j,dist in zip(idx[i], d[i]):
            v = genes[int(j)]; w = float(1.0 - dist)
            if u!=v:
                if Gtmp.has_edge(u,v): Gtmp[u][v]['weight'] = max(Gtmp[u][v]['weight'], w)
                else: Gtmp.add_edge(u,v,weight=w)
    comms = list(greedy_modularity_communities(Gtmp, weight="weight"))
    module = {n:i for i,c in enumerate(comms) for n in c}

# Persist current artifacts for tests
pd.DataFrame({"gene": list(GDF.index)} | {c: GDF[c].values for c in GDF.columns}).to_csv(f"{OUTDIR}/GDF_snapshot.csv", index=False)
pd.DataFrame({"gene": list(module.keys()), "module": list(module.values())}).to_csv(f"{OUTDIR}/modules_snapshot.csv", index=False)

# Basic graph metrics
if 'H' in globals():
    giant = max(nx.connected_components(H), key=len) if H.number_of_nodes() else set()
    Gg = H.subgraph(giant).copy() if giant else H
    summary = {
        "nodes": H.number_of_nodes(), "edges": H.number_of_edges(),
        "giant_nodes": Gg.number_of_nodes(), "giant_edges": Gg.number_of_edges(),
        "avg_degree": np.mean([d for _,d in H.degree()]) if H.number_of_nodes() else 0.0,
    }
else:
    summary = {"warning":"No H present; stability & null tests will rebuild ephemeral graphs."}

print("SNAPSHOT:", summary)


SNAPSHOT: {'nodes': 288, 'edges': 987, 'giant_nodes': 288, 'giant_edges': 987, 'avg_degree': np.float64(6.854166666666667)}


In [19]:
# === TEST PACK v1 — Stability & Sensitivity ===============================
# We perturb the build knobs and data a bit, recluster, and compare to your current modules with ARI.

import numpy as np, pandas as pd, networkx as nx
from sklearn.metrics import adjusted_rand_score as ARI
from sklearn.neighbors import NearestNeighbors

def build_knn_modules(GDF, k=10, noise=0.0, seed=0):
    rng = np.random.default_rng(seed)
    X = GDF.to_numpy().astype("float32")
    # standardize columns
    X = (X - np.nanmean(X,0)) / (np.nanstd(X,0,ddof=1) + 1e-9)
    X = np.nan_to_num(X)
    if noise>0: X = X + noise*rng.normal(size=X.shape).astype("float32")
    X = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-9)
    k = min(k, max(2, X.shape[0]-1))
    nn = NearestNeighbors(n_neighbors=k+1, metric="cosine", algorithm="brute").fit(X)
    d, idx = nn.kneighbors(X); d, idx = d[:,1:], idx[:,1:]
    genes = GDF.index.to_numpy()
    G = nx.Graph(); G.add_nodes_from(genes)
    for i,u in enumerate(genes):
        for j,dist in zip(idx[i], d[i]):
            v = genes[int(j)]; w = float(1.0 - dist)
            if u!=v:
                if G.has_edge(u,v): G[u][v]['weight'] = max(G[u][v]['weight'], w)
                else: G.add_edge(u,v,weight=w)
    from networkx.algorithms.community import greedy_modularity_communities
    comms = list(greedy_modularity_communities(G, weight="weight"))
    return {n:i for i,c in enumerate(comms) for n in c}

base_map = dict(pd.read_csv("CNT_TESTS/modules_snapshot.csv").values)
genes = list(GDF.index)
base_labels = np.array([base_map.get(g, -1) for g in genes])

grid = []
for k in (8,10,12,14):
    for noise in (0.0, 0.02, 0.05, 0.10):
        m = build_knn_modules(GDF, k=k, noise=noise, seed=42)
        labels = np.array([m.get(g, -1) for g in genes])
        grid.append({"k":k, "noise":noise, "ARI": float(ARI(base_labels, labels))})

stability = pd.DataFrame(grid).sort_values(["k","noise"])
stability.to_csv("CNT_TESTS/stability_grid.csv", index=False)
print("STABILITY (ARI vs base, 1.0 is perfect agreement):")
print(stability.pivot(index="k", columns="noise", values="ARI").round(3))


STABILITY (ARI vs base, 1.0 is perfect agreement):
noise   0.00   0.02   0.05   0.10
k                                
8      0.016  0.012  0.002  0.000
10     0.014  0.009  0.008 -0.004
12     0.014  0.005  0.008 -0.004
14     0.013  0.005  0.008 -0.003


In [20]:
# === TEST PACK v1 — Nulls, Enrichment, Report ============================
# (A) Modularity vs degree-preserving nulls
# (B) Over-representation of 'tissues' or 'trait' per module
# (C) Continuous-score separation by module (ANOVA)
# Writes: CNT_TESTS/null_modularity.csv, CNT_TESTS/enrichment.csv, CNT_TESTS/report.txt

import re, numpy as np, pandas as pd, networkx as nx
from math import comb
from scipy.stats import hypergeom, f_oneway

mods_df = pd.read_csv("CNT_TESTS/modules_snapshot.csv")
mod_of = dict(mods_df.values)
modules = sorted(mods_df["module"].unique())

# (A) Modularity vs degree-preserving nulls
if 'H' in globals() and H.number_of_edges()>0:
    giant = max(nx.connected_components(H), key=len)
    Gg = H.subgraph(giant).copy()
    part = {n: mod_of.get(n, -1) for n in Gg.nodes()}
    # compute modularity for current partition
    from networkx.algorithms.community.quality import modularity
    current_Q = modularity(Gg, [ [n for n in part if part[n]==m] for m in modules ], weight="weight")
    # nulls: degree-preserving swaps
    Qs = []
    for r in range(30):
        Gn = Gg.copy()
        try:
            nx.double_edge_swap(Gn, nswap=min(5*Gn.number_of_edges(), 50000), max_tries=200000)
        except Exception:
            pass
        Qs.append(modularity(Gn, [ [n for n in part if part[n]==m] for m in modules ], weight="weight"))
    null = pd.DataFrame({"Q_null": Qs})
    null["Q_obs"] = current_Q
    null["z"] = (current_Q - null["Q_null"].mean()) / (null["Q_null"].std(ddof=1) + 1e-9)
    null.to_csv("CNT_TESTS/null_modularity.csv", index=False)
    print(f"MODULARITY: Q_obs={current_Q:.3f}, null_mean={null.Q_null.mean():.3f}, z={null['z'].iloc[0]:.2f}")
else:
    print("MODULARITY: skipped (no H).")

# (B) Enrichment on 'tissues' and 'trait' (if present)
df = read_table(DATA_PATH)
idx_col = "gene_name" if "gene_name" in df.columns else ("rsid" if "rsid" in df.columns else df.columns[0])
universe = set(GDF.index)
df = df[df[idx_col].isin(universe)]

def split_tokens(series):
    toks = []
    for x in series.dropna().astype(str).values:
        toks += [t.strip().lower() for t in re.split(r"[;,/|]", x) if t.strip()]
    return toks

enrich_rows = []
for col in ["tissues","trait"]:
    if col not in df.columns: 
        continue
    # build background counts
    all_tokens = split_tokens(df[col])
    if not all_tokens:
        continue
    vocab = pd.Series(all_tokens).value_counts()
    for m in modules:
        genes_m = set(mods_df.query("module==@m")["gene"])
        toks_m = split_tokens(df[df[idx_col].isin(genes_m)][col])
        counts = pd.Series(toks_m).value_counts()
        for term, k in counts.items():
            K = int(vocab.get(term,0))
            N = int(len(all_tokens))
            n = int(len(toks_m))
            # hypergeometric p (over-representation)
            p = hypergeom.sf(k-1, N, K, n)
            enrich_rows.append({"module": m, "field": col, "term": term, "k_in_module": int(k),
                                "K_in_all": int(K), "n_module_tokens": n, "N_all_tokens": N, "p": p})

if enrich_rows:
    enr = pd.DataFrame(enrich_rows)
    # FDR (Benjamini–Hochberg)
    enr = enr.sort_values("p").reset_index(drop=True)
    m = len(enr); enr["q"] = enr["p"] * m / (np.arange(m)+1)
    enr["q"] = np.minimum.accumulate(enr["q"][::-1])[::-1]
    enr.to_csv("CNT_TESTS/enrichment.csv", index=False)
    print("ENRICHMENT: wrote CNT_TESTS/enrichment.csv (fields: tissues/trait).")
else:
    print("ENRICHMENT: skipped (no tissues/trait tokens).")

# (C) ANOVA: do modules separate continuous scores?
cont_cols = [c for c in ["resonance_score","cnt_score","structure_score","gene_deg","ccre_deg","tissue_hits"] if c in GDF.columns]
anova_rows = []
for c in cont_cols:
    groups = [GDF.loc[mods_df.query("module==@m")["gene"]][c].dropna().values for m in modules]
    if sum(len(g) for g in groups) and sum(len(g)>1 for g in groups) > 1:
        F, p = f_oneway(*[g for g in groups if len(g)>1])
        anova_rows.append({"feature": c, "F": float(F), "p": float(p)})
anova = pd.DataFrame(anova_rows)
anova.to_csv("CNT_TESTS/anova.csv", index=False)

# Write a compact report
with open(f"{OUTDIR}/report.txt","w",encoding="utf-8") as f:
    f.write("=== CNT TEST PACK v1 — REPORT ===\n")
    if 'null' in locals():
        f.write(f"Modularity Q_obs={current_Q:.3f}, null_mean={null.Q_null.mean():.3f}, z={null['z'].iloc[0]:.2f}\n")
    if not anova.empty:
        f.write("\nANOVA (module separation on continuous features):\n")
        for _,r in anova.sort_values("p").iterrows():
            f.write(f"  {r.feature}: F={r.F:.2f}, p={r.p:.2e}\n")
    if enrich_rows:
        sig = enr[enr["q"]<=0.05]
        f.write(f"\nEnrichment (BH q<=0.05): {len(sig)} rows\n")
        f.write("  Example top terms:\n")
        for _,r in sig.head(10).iterrows():
            f.write(f"   - mod {int(r.module)} | {r.field}:{r.term} (k={r.k_in_module}/{r.n_module_tokens}, q={r.q:.2e})\n")
print("DONE → see CNT_TESTS/: stability_grid.csv, null_modularity.csv, enrichment.csv, anova.csv, report.txt")


MODULARITY: Q_obs=0.830, null_mean=-0.002, z=82.90
ENRICHMENT: wrote CNT_TESTS/enrichment.csv (fields: tissues/trait).
DONE → see CNT_TESTS/: stability_grid.csv, null_modularity.csv, enrichment.csv, anova.csv, report.txt



Each of the input arrays is constant; the F statistic is not defined or infinite


Each of the input arrays is constant; the F statistic is not defined or infinite



In [21]:
from pathlib import Path
# === EDIT THIS ===
CSV = Path(r"C:\\Users\\caleb\\cnt_genome\\out\\CNT_genomic_resonance_scored.csv")  # change if needed
OUTDIR = Path("CNT_TESTS")
OUTDIR.mkdir(parents=True, exist_ok=True)
print("Using:", CSV)
print("Out:", OUTDIR.resolve())

Using: C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored.csv
Out: C:\Users\caleb\cnt_genome\CNT_TESTS


In [22]:
import pandas as pd, numpy as np
from pathlib import Path
df = pd.read_csv(CSV)
print(df.shape)
df.head(3)

(119718, 14)


Unnamed: 0,rsid,Chromosome,pos,trait,ccre_id,gene_id,gene_name,tissue_hits,tissues,resonance_score,gene_deg,ccre_deg,structure_score,cnt_score
0,esv2676630,chr16,173448,Glycated hemoglobin levels,EH38E1794437,ENSG00000294455.1,ENSG00000294455,0,,1.0,1,1,1.039721,2.039721
1,rs10000702,chr4,156771179,"Glucose (fasting status unknown, maximum, inv-...",EH38E2338838,ENSG00000248629.1,ENSG00000248629,0,,1.0,7,1,2.426015,3.426015
2,rs1000113,chr5,150860514,Crohn's disease,EH38E2421397,ENSG00000237693.6,IRGM,0,,1.0,1,1,1.039721,2.039721


In [23]:
from sklearn.preprocessing import StandardScaler
numeric_candidates = [c for c in df.columns if df[c].dtype.kind in "if"]
preferred = [c for c in ['pos','tissue_hits','tissues','resonance_score','gene_deg','ccre_deg','structure_score','cnt_score'] if c in df.columns]
num_cols = preferred if len(preferred)>=3 else numeric_candidates
assert 'resonance_score' in df.columns, "Need resonance_score"
if not all(c in df.columns for c in ['x2d','y2d']):
    from umap import UMAP
    X = df[num_cols].fillna(df[num_cols].median()).to_numpy()
    X = StandardScaler().fit_transform(X)
    emb = UMAP(n_components=2, n_neighbors=30, min_dist=0.1, metric='cosine', random_state=42).fit_transform(X)
    df['x2d'], df['y2d'] = emb[:,0], emb[:,1]
print('Has 2D cols:', all(c in df.columns for c in ['x2d','y2d']))


invalid value encountered in divide


invalid value encountered in divide


invalid value encountered in divide



ValueError: Input contains NaN.

In [None]:
# === Replacement cell: robust UMAP builder (handles NaN/±inf/constant cols) ===
import numpy as np
from sklearn.preprocessing import StandardScaler, normalize

def _build_umap_input(df, preferred_cols=None):
    numeric_candidates = [c for c in df.columns if df[c].dtype.kind in "if"]
    if preferred_cols is None:
        preferred_cols = ['pos','tissue_hits','tissues','resonance_score','gene_deg','ccre_deg','structure_score','cnt_score']
    cols = [c for c in preferred_cols if c in df.columns]
    if len(cols) < 3:
        cols = numeric_candidates

    X = df[cols].copy()

    # 1) replace ±inf with NaN
    X = X.replace([np.inf, -np.inf], np.nan)

    # 2) drop all-NaN columns
    all_nan_cols = list(X.columns[X.isna().all(axis=0)])
    if all_nan_cols:
        print("Dropping all-NaN columns:", all_nan_cols)
        X = X.drop(columns=all_nan_cols)

    # 3) median impute; if still NaN, fill 0
    med = X.median(axis=0, skipna=True)
    X = X.fillna(med).fillna(0.0)

    # 4) drop constant columns (avoid zero variance)
    const_cols = [c for c in X.columns if X[c].nunique(dropna=True) <= 1]
    if const_cols:
        print("Dropping constant columns:", const_cols)
        X = X.drop(columns=const_cols)

    if X.shape[1] < 2:
        raise ValueError("Not enough informative numeric columns remain for UMAP.")

    Xa = X.to_numpy(np.float32)
    Xa[~np.isfinite(Xa)] = 0.0

    # protect against zero-norm rows (for cosine metric)
    row_norm = np.linalg.norm(Xa, axis=1)
    zero_rows = (row_norm == 0)
    if zero_rows.any():
        Xa[zero_rows, 0] = 1e-6
        print(f"Zero-norm rows nudged: {int(zero_rows.sum())}")

    Xa = StandardScaler(with_mean=True, with_std=True).fit_transform(Xa)
    Xa = normalize(Xa, norm='l2')
    return Xa

# Only compute if missing
if not all(c in df.columns for c in ["x2d","y2d"]):
    from umap import UMAP
    Xa = _build_umap_input(df)
    emb = UMAP(
        n_components=2,
        n_neighbors=30,   # reduce to 15 if memory is tight
        min_dist=0.1,
        metric='cosine',
        random_state=42
    ).fit_transform(Xa)
    df["x2d"], df["y2d"] = emb[:,0], emb[:,1]

print("Has 2D cols:", all(c in df.columns for c in ["x2d","y2d"]))


In [None]:
import plotly.graph_objects as go
x, y, z = df['x2d'].to_numpy(), df['y2d'].to_numpy(), df['resonance_score'].to_numpy()
name_col = 'gene_name' if 'gene_name' in df.columns else df.columns[0]
q = np.quantile(z, [0.0, 0.5, 0.75, 0.9, 0.97, 1.0])
bands = list(zip(q[:-1], q[1:]))
frames = []
for i,(lo,hi) in enumerate(bands):
    m = (z>=lo)&(z<=hi)
    frames.append(go.Frame(name=f"Band {i+1}: {lo:.3f}-{hi:.3f}",
                           data=[go.Scatter3d(x=x[m], y=y[m], z=z[m], mode='markers',
                                               marker=dict(size=2, opacity=0.85), text=df[name_col][m],
                                               hovertemplate='<b>%{text}</b><br>x=%{x:.2f} y=%{y:.2f}<br>res=%{z:.4f}<extra></extra>')]))
fig = go.Figure(
    data=[go.Scatter3d(x=x, y=y, z=z, mode='markers', marker=dict(size=2, opacity=0.25), text=df[name_col],
                       hovertemplate='<b>%{text}</b><br>x=%{x:.2f} y=%{y:.2f}<br>res=%{z:.4f}<extra></extra>')],
    frames=frames,
)
fig.update_layout(title='CNT 3D Genomic Field • z = resonance_score',
                  scene=dict(xaxis_title='field‑x', yaxis_title='field‑y', zaxis_title='resonance'),
                  sliders=[dict(active=0, steps=[dict(label=f.name, method='animate', args=[[f.name], {'frame': {'duration': 0, 'redraw': True}, 'mode':'immediate', 'fromcurrent': True}]) for f in frames])])
fig.show()

In [None]:
import igraph as ig, leidenalg as la
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import normalized_mutual_info_score as NMI
from scipy.sparse import csr_matrix, tril
from scipy.sparse.csgraph import minimum_spanning_tree, dijkstra

coords3 = np.c_[df['x2d'].to_numpy(), df['y2d'].to_numpy(), df['resonance_score'].to_numpy()]
coords2 = np.c_[df['x2d'].to_numpy(), df['y2d'].to_numpy()]

def leiden_labels(coords, k=12, res=0.8):
    nbrs = NearestNeighbors(n_neighbors=k).fit(coords)
    idx = nbrs.kneighbors(return_distance=False)
    edges = []
    n = coords.shape[0]
    for i in range(n):
        for j in idx[i,1:]:
            edges.append((int(i), int(j)))
    G = ig.Graph(n=n, edges=edges, directed=False)
    part = la.find_partition(G, la.RBConfigurationVertexPartition, resolution_parameter=res)
    return np.array(part.membership)

def morans_I(values, coords, k=12):
    nbrs = NearestNeighbors(n_neighbors=k).fit(coords)
    idx = nbrs.kneighbors(return_distance=False)
    n = len(values)
    mu = values.mean()
    num = 0.0; den = ((values - mu)**2).sum(); w = 0
    for i in range(n):
        for j in idx[i,1:]:
            num += (values[i]-mu)*(values[j]-mu); w += 1
    return float((n/w) * (num/den)), int(w)

# 1) layer persistence via HDBSCAN on z‑bands
layer_persistence_index = None
try:
    import hdbscan
    z = df['resonance_score'].to_numpy()
    q = np.quantile(z, [0.0, 0.5, 0.75, 0.9, 0.97, 1.0])
    bands = list(zip(q[:-1], q[1:]))
    entropies = []
    for lo,hi in bands:
        m = (z>=lo)&(z<=hi)
        if m.sum()<50:
            entropies.append(0.0)
            continue
        X3 = coords3[m]
        cl = hdbscan.HDBSCAN(min_cluster_size=25, min_samples=10).fit_predict(X3)
        labs = cl[cl>=0]
        if labs.size==0:
            entropies.append(0.0)
        else:
            p = np.bincount(labs)/labs.size
            ent = -(p*np.log(p+1e-9)).sum()
            entropies.append(ent)
    layer_persistence_index = float(np.mean(entropies))
except Exception as e:
    print('[warn] HDBSCAN not available or failed:', e)

# 2) Leiden communities 3D vs 2D
labs3 = leiden_labels(coords3)
labs2 = leiden_labels(coords2)
community_NMI_2Dvs3D = float(NMI(labs2, labs3))

# 3) Moran's I on resonance
morI, w_edges = morans_I(df['resonance_score'].to_numpy(), coords3, k=12)

# 4) MST geodesic stretch on top‑N resonance
z = df['resonance_score'].to_numpy()
topN = min(600, len(df))
sel = np.argsort(-z)[:topN]
C = coords3[sel]
nbrs = NearestNeighbors(n_neighbors=8).fit(C)
dist, inds = nbrs.kneighbors(return_distance=True)
rows=[]; cols=[]; data=[]
for i,(d,idxs) in enumerate(zip(dist, inds)):
    for dd,j in zip(d[1:], idxs[1:]):
        rows.append(i); cols.append(int(j)); data.append(float(dd))
from scipy import sparse as sp
W = sp.csr_matrix((data,(rows,cols)), shape=(C.shape[0], C.shape[0]))
W = sp.tril(W) + sp.tril(W, -1).T
M = minimum_spanning_tree(W).tocsr()
rng = np.random.default_rng(42)
pairs = rng.choice(C.shape[0], size=(300,2), replace=False)
geo = dijkstra(M, indices=pairs[:,0], directed=False)[np.arange(pairs.shape[0]), pairs[:,1]]
eu = np.linalg.norm(C[pairs[:,0]]-C[pairs[:,1]], axis=1)
mst_stretch_mean = float(np.nanmean(np.clip(geo/eu, 1.0, None)))

# 5) Optional homology (ripser)
betti_signal = None
try:
    from ripser import ripser
    r = ripser(C, maxdim=1)
    H1 = r['dgms'][1]
    if H1.size:
        pers = (H1[:,1]-H1[:,0])
        betti_signal = float(np.median(pers[np.isfinite(pers)]))
except Exception as e:
    print('[warn] ripser not available or failed:', e)

summary = {
    'layer_persistence_index': layer_persistence_index,
    'community_NMI_2D_vs_3D': community_NMI_2Dvs3D,
    'morans_I_resonance': morI,
    'knn_edge_count': w_edges,
    'mst_geodesic_stretch_mean': mst_stretch_mean,
    'H1_loop_persistence_median(optional)': betti_signal,
}
pd.Series(summary)

In [None]:
B = 100
votes = np.zeros((len(df), B), dtype=int)
rng = np.random.default_rng(42)
for b in range(B):
    jitter = rng.uniform(0, 0.02, size=coords3.shape)
    labs_b = leiden_labels(coords3 + jitter, k=16, res=0.8)
    votes[:, b] = labs_b
mode = np.array([np.bincount(votes[i]).argmax() for i in range(len(df))])
stab = (votes == mode[:, None]).mean(1)
df['consensus_label'] = mode
df['stability'] = stab
df[['gene_name','consensus_label','stability']].head(5) if 'gene_name' in df.columns else df[['consensus_label','stability']].head(5)

In [None]:
from scipy.stats import chi2_contingency, kruskal
enrich = {}
if 'tissues' in df.columns:
    tab = pd.crosstab(df['consensus_label'], df['tissues'])
    chi2, p, dof, exp = chi2_contingency(tab)
    enrich['tissues_chi2_p'] = float(p)
if 'trait' in df.columns:
    tab2 = pd.crosstab(df['consensus_label'], df['trait'])
    chi2, p, dof, exp = chi2_contingency(tab2)
    enrich['trait_chi2_p'] = float(p)
groups = [df.loc[df['consensus_label']==c, 'resonance_score'].values for c in sorted(df['consensus_label'].unique())]
H, p_kw = kruskal(*groups)
enrich['resonance_KW_p'] = float(p_kw)
pd.Series(enrich)

In [None]:
core = df.query('stability >= 0.7')
by_comm = core.groupby('consensus_label')
for c, g in by_comm:
    g.sort_values('resonance_score', ascending=False).to_csv(OUTDIR/f'attractor_comm{c}_core.csv', index=False)

summary_path = OUTDIR/"report.txt"
with open(summary_path, 'w') as f:
    f.write("CNT 3D Genomic Field — UPGRADE Summary\n")
    for k,v in summary.items():
        f.write(f"{k}: {v}\n")
    for k,v in enrich.items():
        f.write(f"{k}: {v}\n")
print('Wrote', summary_path)

In [None]:
try:
    import plotly.graph_objects as go
    is_core = df['stability']>=0.7
    fig2 = go.Figure()
    fig2.add_trace(go.Scatter3d(x=df.loc[~is_core,'x2d'], y=df.loc[~is_core,'y2d'], z=df.loc[~is_core,'resonance_score'], mode='markers', marker=dict(size=2, opacity=0.2), name='rim'))
    fig2.add_trace(go.Scatter3d(x=df.loc[is_core,'x2d'],  y=df.loc[is_core,'y2d'],  z=df.loc[is_core,'resonance_score'], mode='markers', marker=dict(size=3, opacity=0.9), name='core'))
    fig2.update_layout(title='Stable cores (≥0.7) vs rims', scene=dict(xaxis_title='x2d', yaxis_title='y2d', zaxis_title='resonance'))
    fig2.show()
except Exception as e:
    print('[warn] plotly overlay skipped:', e)