# Symbolic Manifold — Entropy & Ricci (Colab) — Self-contained
Fluxo: deps → sync repo → validar grafo → H_rate → Ricci amostrada → integração Fig.2.

In [1]:
# 1) Ambiente e deps
import psutil, platform

print("Python:", platform.python_version())
print("RAM (GB):", round(psutil.virtual_memory().total / 1024**3, 2))

# Instalações: versões compatíveis e Ricci opcional
import subprocess, sys


def pipi(*args):
    print("pip install", " ".join(args))
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", *args])


pipi(
    "networkx>=3.2.1",
    "numpy>=1.24",
    "pandas>=2.1",
    "scipy>=1.11",
    "pyyaml",
    "matplotlib",
    "lxml",
)
try:
    pipi("GraphRicciCurvature", "pot")
    print("GraphRicciCurvature/POT instalados.")
except Exception as e:
    print("GraphRicciCurvature/POT opcionais; seguiremos sem kappa se falhar:", e)

Python: 3.11.13
RAM (GB): 12.67
pip install networkx>=3.2.1 numpy>=1.24 pandas>=2.1 scipy>=1.11 pyyaml matplotlib lxml
pip install GraphRicciCurvature pot
GraphRicciCurvature/POT instalados.


In [None]:
# 2) GitHub — clonar/atualizar
import os

REPO_URL = "https://github.com/agourakis82/entropic-symbolic-society"
BRANCH = "main"
REPO_DIR = "/content/repo"

if not os.path.exists(REPO_DIR):
    subprocess.check_call(["git", "clone", "-b", BRANCH, REPO_URL, REPO_DIR])
else:
    import pathlib

    os.chdir(REPO_DIR)
    subprocess.check_call(["git", "fetch", "origin", BRANCH])
    subprocess.check_call(["git", "checkout", BRANCH])
    subprocess.check_call(["git", "pull", "origin", BRANCH])
    os.chdir("/content")

print("Repo em:", REPO_DIR)

In [None]:
# 3) Gravar scripts embutidos
from pathlib import Path

CODE_DIR = Path("/content/repo/NHB_Symbolic_Mainfold/code")
CODE_DIR.mkdir(parents=True, exist_ok=True)

compute_path = CODE_DIR / "compute_entropy_curvature_pro.py"
integrate_path = CODE_DIR / "integrate_entropy_curvature_pro.py"

compute_src = r"#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\nfrom __future__ import annotations\n\n\"\"\"\ncompute_entropy_curvature_pro.py (embedded, Colab-ready)\n- Entropy rate (H_rate) via streaming power iteration (sem matriz densa).\n- Ollivier\u2013Ricci opcional, apenas em arestas amostradas se suportado.\n- Compat\u00edvel com NetworkX 3.x; fallback limpa para ambientes sem GraphRicciCurvature/POT.\n- Sa\u00edda CSV rastre\u00e1vel: graph,beta,H_rate,kappa,regime,nnodes,nedges,weight_attr,curv_method,elapsed_s\n\"\"\"\nimport argparse, csv, glob, math, sys, time, warnings\nfrom pathlib import Path\nfrom typing import Optional, Dict\nimport numpy as np\n\ntry:\n    import networkx as nx\nexcept Exception as e:\n    print(\"[FATAL] networkx not available:\", e, file=sys.stderr)\n    sys.exit(1)\n\n_HAS_RICCI = False\ntry:\n    from GraphRicciCurvature.OllivierRicci import OllivierRicci\n    _HAS_RICCI = True\nexcept Exception:\n    _HAS_RICCI = False\n\ndef detect_weight_attr(G: nx.DiGraph) -> Optional[str]:\n    for _,_,d in G.edges(data=True):\n        if not d: continue\n        for c in (\"weight\",\"w\",\"freq\",\"frequency\",\"strength\",\"p\",\"prob\"):\n            if c in d:\n                try:\n                    float(d[c]); return c\n                except Exception:\n                    pass\n        break\n    return None\n\ndef load_graph(path: str) -> nx.DiGraph:\n    p = path.lower()\n    if p.endswith(\".graphml\"):\n        try:\n            G = nx.read_graphml(path)\n        except Exception as e:\n            raise RuntimeError(f\"read_graphml failed, install lxml: {e}\")\n    elif p.endswith(\".gpickle\") or p.endswith(\".pickle\"):\n        G = nx.read_gpickle(path)\n    elif p.endswith(\".edgelist\") or p.endswith(\".txt\"):\n        G = nx.read_edgelist(path, create_using=nx.DiGraph)\n    else:\n        G = nx.read_graphml(path)\n    if not isinstance(G, (nx.DiGraph, nx.MultiDiGraph)):\n        G = nx.DiGraph(G)\n    if not G.is_directed():\n        G = nx.DiGraph(G)\n    return G\n\ndef neighbors_with_probs(G: nx.DiGraph, u, beta: float, wkey: Optional[str]):\n    nbrs = list(G.successors(u))\n    if not nbrs:\n        return np.array([u]), np.array([1.0])\n    ws = []\n    for v in nbrs:\n        d = G[u][v]\n        w = d.get(wkey, d.get(\"weight\", 1.0)) if wkey is not None else d.get(\"weight\", 1.0)\n        try: w = float(w)\n        except Exception: w = 1.0\n        ws.append(max(w, 1e-12))\n    ws = np.power(np.asarray(ws, dtype=float), float(beta))\n    Z = ws.sum()\n    probs = ws / Z if (np.isfinite(Z) and Z > 0.0) else np.full(len(ws), 1.0/len(ws))\n    return np.asarray(nbrs), probs\n\ndef entropy_rate_stream(G: nx.DiGraph, beta: float, wkey: Optional[str], max_iter: int = 200, tol: float = 1e-10) -> float:\n    nodes = list(G.nodes())\n    n = len(nodes); index = {u:i for i,u in enumerate(nodes)}\n    row_probs: Dict = {}; row_entropy = np.zeros(n, dtype=float)\n    for u in nodes:\n        nbrs, probs = neighbors_with_probs(G, u, beta, wkey)\n        row_probs[u] = (nbrs, probs)\n        with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n            row_entropy[index[u]] = -np.sum(probs * np.where(probs>0, np.log(probs), 0.0))\n    pi = np.full(n, 1.0/n, dtype=float); nxt = np.zeros_like(pi)\n    for _ in range(max_iter):\n        nxt.fill(0.0)\n        for u in nodes:\n            i = index[u]; p_i = pi[i]\n            nbrs, probs = row_probs[u]\n            for v, pv in zip(nbrs, probs):\n                nxt[index[v]] += p_i * pv\n        s = nxt.sum()\n        if s > 0: nxt /= s\n        if np.linalg.norm(nxt - pi, 1) < tol:\n            pi = nxt; break\n        pi, nxt = nxt, pi\n    return float(np.dot(pi, row_entropy))\n\ndef mean_ollivier_ricci(G: nx.DiGraph, undirected: bool, nsample: Optional[int], alpha: float = 0.5, method: str = \"approximate\", seed: int = 42):\n    if not _HAS_RICCI: return (float(\"nan\"), \"ricci_unavailable\")\n    H = G.to_undirected() if undirected else G\n    E = list(H.edges())\n    if not E: return (float(\"nan\"), \"empty_graph\")\n    if nsample and 0 < nsample < len(E):\n        import random; rng = random.Random(seed)\n        E = rng.sample(E, nsample); note = f\"_sample{len(E)}\"\n    else:\n        note = \"_full\"\n    try:\n        orc = OllivierRicci(H, alpha=alpha, method=method, verbose=\"ERROR\")\n        if hasattr(orc, \"compute_ricci_curvature_edges\"):\n            orc.compute_ricci_curvature_edges(E)\n        else:\n            orc.compute_ricci_curvature()\n        vals = []\n        for (u,v) in E:\n            d = orc.G[u][v]; k = d.get(\"ricciCurvature\", None)\n            if k is None: continue\n            if math.isfinite(k): vals.append(float(k))\n        if not vals: return (float(\"nan\"), f\"ollivier_{method}{note}_empty\")\n        return (float(np.mean(vals)), f\"ollivier_{method}{note}\")\n    except Exception as e:\n        warnings.warn(f\"OllivierRicci failed: {e}\")\n        return (float(\"nan\"), f\"ollivier_{method}_error\")\n\ndef run_once(graph_path: str, beta: float, regime: str, no_curv: bool, curv_undirected: bool, curv_sample: Optional[int], ricci_method: str, seed: int):\n    t0 = time.time()\n    G = load_graph(graph_path); wkey = detect_weight_attr(G)\n    H = entropy_rate_stream(G, beta=beta, wkey=wkey)\n    if no_curv:\n        kappa, method = float(\"nan\"), \"ricci_skipped\"\n    else:\n        kappa, method = mean_ollivier_ricci(G, undirected=curv_undirected, nsample=curv_sample, alpha=0.5, method=ricci_method, seed=seed)\n    elapsed = time.time() - t0\n    return {\n        \"graph\": Path(graph_path).name,\n        \"beta\": float(beta),\n        \"H_rate\": float(H),\n        \"kappa\": kappa,\n        \"regime\": regime,\n        \"nnodes\": int(G.number_of_nodes()),\n        \"nedges\": int(G.number_of_edges()),\n        \"weight_attr\": (wkey or \"\"),\n        \"curv_method\": method,\n        \"elapsed_s\": round(float(elapsed), 3),\n    }\n\ndef main():\n    ap = argparse.ArgumentParser(description=\"Compute H_rate and sampled Ollivier\u2013Ricci curvature (memory-safe).\")\n    ap.add_argument(\"--graph\", required=True)\n    ap.add_argument(\"--beta\", nargs=\"+\", type=float, default=[1.0])\n    ap.add_argument(\"--regime\", type=str, default=\"\")\n    ap.add_argument(\"--no-curvature\", action=\"store_true\")\n    ap.add_argument(\"--curv-undirected\", action=\"store_true\")\n    ap.add_argument(\"--curv-sample\", type=int, default=500)\n    ap.add_argument(\"--ricci-method\", choices=[\"approximate\",\"OTD\",\"base\"], default=\"approximate\")\n    ap.add_argument(\"--seed\", type=int, default=42)\n    ap.add_argument(\"--out\", required=True)\n    args = ap.parse_args()\n\n    paths = sorted(sum([glob.glob(args.graph)], []))\n    if not paths:\n        print(f\"[FATAL] no files matched {args.graph}\", file=sys.stderr); sys.exit(2)\n\n    rows = []\n    for p in paths:\n        for b in args.beta:\n            rows.append(run_once(p, b, args.regime, args.no_curvature, args.curv_undirected,\n                                 (None if args.curv_sample is None or args.curv_sample<=0 else args.curv_sample),\n                                 args.ricci_method, args.seed))\n\n    header = [\"graph\",\"beta\",\"H_rate\",\"kappa\",\"regime\",\"nnodes\",\"nedges\",\"weight_attr\",\"curv_method\",\"elapsed_s\"]\n    with open(args.out, \"w\", newline=\"\") as f:\n        w = csv.DictWriter(f, fieldnames=header); w.writeheader()\n        for r in rows: w.writerow(r)\n    print(f\"Wrote {args.out}\")\n\nif __name__ == \"__main__\":\n    main()\n"
integrate_src = r"#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\n\"\"\"\nintegrate_entropy_curvature_pro.py (embedded)\n- Gera `source_fig2_entropy_curvature.csv` a partir de um CSV de entrada.\n- Tolera aus\u00eancia de 'kappa' (cria coluna NaN) e permite filtro por beta.\n\"\"\"\nimport argparse, sys\nimport pandas as pd\nimport numpy as np\n\ndef main():\n    ap = argparse.ArgumentParser()\n    ap.add_argument(\"--in\", dest=\"inp\", required=True, help=\"CSV com H_rate/kappa/regime/beta\")\n    ap.add_argument(\"--out\", dest=\"out\", default=\"source_fig2_entropy_curvature.csv\")\n    ap.add_argument(\"--select-beta\", type=float, default=None)\n    args = ap.parse_args()\n\n    df = pd.read_csv(args.inp)\n    if \"H_rate\" not in df.columns or \"regime\" not in df.columns:\n        print(\"[ERROR] Input CSV must have columns: H_rate and regime\", file=sys.stderr)\n        sys.exit(2)\n    if \"kappa\" not in df.columns:\n        df[\"kappa\"] = np.nan\n\n    if args.select_beta is not None and \"beta\" in df.columns:\n        df = df[df[\"beta\"].round(8) == float(args.select_beta)]\n        if df.empty:\n            print(f\"[ERROR] no rows for beta={args.select_beta}\", file=sys.stderr)\n            sys.exit(3)\n\n    out = df[[\"H_rate\",\"kappa\",\"regime\"]].copy()\n    out.to_csv(args.out, index=False)\n    print(f\"Wrote {args.out} ({len(out)} rows)\")\n\nif __name__ == \"__main__\":\n    main()\n"

compute_path.write_text(compute_src, encoding="utf-8")
integrate_path.write_text(integrate_src, encoding="utf-8")
print("Escrevi:", compute_path, "e", integrate_path)

In [None]:
# 4) Validar grafo (.graphml)
from pathlib import Path

GRAPH_PATH = "/content/repo/NHB_Symbolic_Mainfold/data/word_network.graphml"  # ajuste se necessário
p = Path(GRAPH_PATH)
print("exists:", p.exists(), "| size:", p.stat().st_size if p.exists() else 0)

import networkx as nx

try:
    G = nx.read_graphml(str(p))
    print(
        "OK read_graphml | nodes:",
        G.number_of_nodes(),
        "edges:",
        G.number_of_edges(),
        "directed?",
        G.is_directed(),
    )
except Exception as e:
    print("read_graphml ERROR ->", repr(e))
    raise

In [None]:
# 5) Parâmetros
BETA = 1.0
REGIME = "integrated"
CURV_UNDIRECTED = True
CURV_SAMPLE = 500
RICCI_METHOD = "approximate"
OUT_CSV = "/content/repo/NHB_Symbolic_Mainfold/code/entropy_curvature_integrated_kappa_sampled.csv"
SRC_FIG2 = "/content/repo/NHB_Symbolic_Mainfold/code/source_fig2_entropy_curvature.csv"
print("GRAPH_PATH=", GRAPH_PATH)

In [None]:
# 6) Executar H_rate (sem curvatura)
import shlex

cmd = [
    "python",
    "/content/repo/NHB_Symbolic_Mainfold/code/compute_entropy_curvature_pro.py",
    "--graph",
    GRAPH_PATH,
    "--beta",
    str(BETA),
    "--regime",
    REGIME,
    "--no-curvature",
    "--out",
    OUT_CSV.replace(".csv", "_Honly.csv"),
]
print(">>", " ".join(shlex.quote(c) for c in cmd))
p = subprocess.run(cmd, capture_output=True, text=True)
print("RC=", p.returncode)
print("STDOUT:\n", p.stdout)
print("STDERR:\n", p.stderr)
assert p.returncode == 0, "Falha ao computar H_rate; ver STDERR acima."

In [None]:
# 7) Executar Ricci (amostrada) com fallback
import shlex

cmd = [
    "python",
    "/content/repo/NHB_Symbolic_Mainfold/code/compute_entropy_curvature_pro.py",
    "--graph",
    GRAPH_PATH,
    "--beta",
    str(BETA),
    "--regime",
    REGIME,
    "--curv-undirected",
    "--curv-sample",
    str(int(CURV_SAMPLE)),
    "--ricci-method",
    RICCI_METHOD,
    "--out",
    OUT_CSV,
]
print(">>", " ".join(shlex.quote(c) for c in cmd))
p = subprocess.run(cmd, capture_output=True, text=True)
print("RC=", p.returncode)
print("STDOUT:\n", p.stdout)
print("STDERR:\n", p.stderr)
if p.returncode != 0:
    print("\n[INFO] Fallback: CURV_SAMPLE=200, RICCI=approximate")
    cmd[cmd.index("--curv-sample") + 1] = "200"
    if "--ricci-method" in cmd:
        cmd[cmd.index("--ricci-method") + 1] = "approximate"
    print(">>", " ".join(shlex.quote(c) for c in cmd))
    p2 = subprocess.run(cmd, capture_output=True, text=True)
    print("RC=", p2.returncode)
    print("STDOUT:\n", p2.stdout)
    print("STDERR:\n", p2.stderr)
    assert p2.returncode == 0, "Ricci falhou mesmo com fallback; ver STDERR acima."

In [None]:
# 8) Integrar para Fig. 2
import os, shlex

assert os.path.exists(
    OUT_CSV
), "CSV de saída não encontrado — rode as células anteriores."
cmd = [
    "python",
    "/content/repo/NHB_Symbolic_Mainfold/code/integrate_entropy_curvature_pro.py",
    "--in",
    OUT_CSV,
    "--out",
    SRC_FIG2,
    "--select-beta",
    str(BETA),
]
print(">>", " ".join(shlex.quote(c) for c in cmd))
p = subprocess.run(cmd, capture_output=True, text=True)
print("RC=", p.returncode)
print("STDOUT:\n", p.stdout)
print("STDERR:\n", p.stderr)
assert p.returncode == 0, "Falha na integração; ver STDERR acima."

In [None]:
# 9) Pré-visualização
import pandas as pd, os

print("--- H_only (head) ---")
display(pd.read_csv(OUT_CSV.replace(".csv", "_Honly.csv")).head())
print("--- OUT_CSV (head) ---")
display(pd.read_csv(OUT_CSV).head())
print("--- SRC_FIG2 (head) ---")
display(pd.read_csv(SRC_FIG2).head())
print("Arquivos gerados:\n", OUT_CSV, "\n", SRC_FIG2)