# Symbolic Manifold — Entropy & Ricci Curvature (Colab, GitHub-integrated)
**Objetivo:** rodar **H_rate** e **curvatura de Ollivier–Ricci** no grafo semântico, integrando com seu repositório GitHub.

**Fluxo:** 
1. Checagem de ambiente → 2. Clone/sync do repositório → 3. Instalação de dependências → 4. Configuração de caminhos → 5. Execução (rápida e segura) → 6. Geração do `source_fig2_entropy_curvature.csv` → 7. (Opcional) Commit & Push para o seu GitHub.


In [ ]:
#@title 1) Checar ambiente (RAM)
import psutil, platform
print("Python:", platform.python_version())
print("RAM (GB):", round(psutil.virtual_memory().total/1024**3, 2))


In [ ]:
#@title 2) Clonar/atualizar o repositório (GitHub)
import os, subprocess, shlex, sys
REPO_URL = "https://github.com/agourakis82/entropic-symbolic-society"  #@param {type:"string"}
BRANCH   = "main"  #@param {type:"string"}
REPO_DIR = "/content/repo"  # pasta destino
if not os.path.exists(REPO_DIR):
    !git clone -b $BRANCH $REPO_URL $REPO_DIR
else:
    %cd $REPO_DIR
    !git fetch origin $BRANCH
    !git checkout $BRANCH
    !git pull origin $BRANCH
%cd /content
print("Repo pronto em:", REPO_DIR)


In [ ]:
#@title 3) Instalar dependências (pin compatível, NX<3.0)
!pip -q install "networkx>=2.8.8,<3.0" numpy pandas pyyaml matplotlib scipy
!pip -q install GraphRicciCurvature pot || echo 'Ricci opcional; prossiga mesmo assim'

In [ ]:
#@title 4) Garantir scripts robustos no repositório
from pathlib import Path
CODE_DIR = Path(REPO_DIR)/"NHB_Symbolic_Mainfold"/"code"
CODE_DIR.mkdir(parents=True, exist_ok=True)
pro_path = CODE_DIR/"compute_entropy_curvature_pro.py"
integrate_path = CODE_DIR/"integrate_entropy_curvature_pro.py"
OVERWRITE = True  #@param {type:"boolean"}

# Versões robustas dos scripts (inclui cálculo de Ricci em arestas amostradas)
compute_pro_src = r'''#!/usr/bin/env python3
from __future__ import annotations
import argparse, csv, glob, math, os, random, sys, time, warnings
from pathlib import Path
from typing import Optional, Tuple
import numpy as np
import networkx as nx
try:
    from GraphRicciCurvature.OllivierRicci import OllivierRicci
    _HAS_RICCI=True
except Exception:
    _HAS_RICCI=False
def detect_weight_attr(G):
    for _,_,d in G.edges(data=True):
        if not d: continue
        for c in ("weight","w","freq","frequency","strength","p","prob"):
            if c in d:
                try: float(d[c]); return c
                except Exception: pass
        break
    return None
def load_graph(path:str):
    p=path.lower()
    if p.endswith('.graphml'): G=nx.read_graphml(path)
    elif p.endswith('.gpickle') or p.endswith('.pickle'): G=nx.read_gpickle(path)
    elif p.endswith('.edgelist') or p.endswith('.txt'): G=nx.read_edgelist(path, create_using=nx.DiGraph)
    else: G=nx.read_graphml(path)
    if not G.is_directed(): G=nx.DiGraph(G)
    return G
def neighbors_with_probs(G,u,beta,wkey):
    nbrs=list(G.successors(u))
    if not nbrs: return np.array([u]), np.array([1.0])
    ws=[]
    for v in nbrs:
        d=G[u][v]
        w=d.get(wkey, d.get('weight',1.0)) if wkey is not None else d.get('weight',1.0)
        try: w=float(w)
        except Exception: w=1.0
        ws.append(max(w,1e-12))
    ws=np.power(np.asarray(ws,dtype=float), float(beta))
    Z=ws.sum(); probs= ws/Z if (np.isfinite(Z) and Z>0) else np.full(len(ws), 1.0/len(ws))
    return np.asarray(nbrs), probs
def entropy_rate_stream(G,beta,wkey,max_iter=200,tol=1e-10):
    nodes=list(G.nodes()); n=len(nodes); index={u:i for i,u in enumerate(nodes)}
    row_probs={}; row_entropy=np.zeros(n)
    for u in nodes:
        nbrs,probs=neighbors_with_probs(G,u,beta,wkey)
        row_probs[u]=(nbrs,probs)
        with np.errstate(divide='ignore', invalid='ignore'):
            row_entropy[index[u]] = -np.sum(probs * np.where(probs>0, np.log(probs), 0.0))
    pi=np.full(n,1.0/n); nxt=np.zeros_like(pi)
    for _ in range(max_iter):
        nxt.fill(0.0)
        for u in nodes:
            i=index[u]; p_i=pi[i]
            nbrs,probs=row_probs[u]
            for v,pv in zip(nbrs,probs): nxt[index[v]] += p_i * pv
        s=nxt.sum();
        if s>0: nxt/=s
        if np.linalg.norm(nxt-pi,1) < tol: pi=nxt; break
        pi,nxt=nxt,pi
    return float(np.dot(pi,row_entropy))
def mean_ollivier_ricci(G,undirected,nsample,alpha=0.5,method='approximate',seed=42):
    if not _HAS_RICCI: return (float('nan'),'ricci_unavailable')
    H=G.to_undirected() if undirected else G
    E=list(H.edges())
    if not E: return (float('nan'),'empty_graph')
    if nsample and 0<nsample<len(E):
        import random; rng=random.Random(seed); E=rng.sample(E, nsample); note=f'_sample{len(E)}'
    else:
        note='_full'
    try:
        orc=OllivierRicci(H, alpha=alpha, method=method, verbose='ERROR')
        if hasattr(orc,'compute_ricci_curvature_edges'):
            orc.compute_ricci_curvature_edges(E)
        else:
            orc.compute_ricci_curvature()
        vals=[]
        for (u,v) in E:
            d=orc.G[u][v]; k=d.get('ricciCurvature', None)
            if k is None: continue
            if math.isfinite(k): vals.append(float(k))
        if not vals: return (float('nan'), f'ollivier_{method}{note}_empty')
        return (float(np.mean(vals)), f'ollivier_{method}{note}')
    except Exception as e:
        return (float('nan'), f'ollivier_{method}_error')
def main():
    ap=argparse.ArgumentParser()
    ap.add_argument('--graph', required=True)
    ap.add_argument('--beta', nargs='+', type=float, default=[1.0])
    ap.add_argument('--regime', type=str, default='')
    ap.add_argument('--no-curvature', action='store_true')
    ap.add_argument('--curv-undirected', action='store_true')
    ap.add_argument('--curv-sample', type=int, default=500)
    ap.add_argument('--ricci-method', choices=['OTD','base','approximate'], default='approximate')
    ap.add_argument('--seed', type=int, default=42)
    ap.add_argument('--out', required=True)
    args=ap.parse_args()
    paths=sorted(sum([glob.glob(args.graph)], []))
    if not paths:
        print('[FATAL] no files matched', args.graph, file=sys.stderr); sys.exit(2)
    import csv
    header=['graph','beta','H_rate','kappa','regime','nnodes','nedges','weight_attr','curv_method','elapsed_s']
    rows=[]
    for p in paths:
        t0=time.time(); G=load_graph(p); wkey=detect_weight_attr(G)
        H=entropy_rate_stream(G, beta=args.beta[0], wkey=wkey)
        if args.no_curvature:
            kappa,method=float('nan'),'ricci_skipped'
        else:
            kappa,method=mean_ollivier_ricci(G, undirected=args.curv_undirected, nsample=(None if args.curv_sample<=0 else args.curv_sample), method=args.ricci_method)
        rows.append({
            'graph':Path(p).name,
            'beta':float(args.beta[0]),
            'H_rate':float(H),
            'kappa':kappa,
            'regime':args.regime,
            'nnodes':G.number_of_nodes(),
            'nedges':G.number_of_edges(),
            'weight_attr':(wkey or ''),
            'curv_method':method,
            'elapsed_s':round(time.time()-t0,3)
        })
    with open(args.out,'w',newline='') as f:
        w=csv.DictWriter(f, fieldnames=header); w.writeheader(); [w.writerow(r) for r in rows]
    print('Wrote', args.out)
if __name__=='__main__': main()
'''
integrate_src = r'''#!/usr/bin/env python3
import argparse, pandas as pd, sys
ap=argparse.ArgumentParser()
ap.add_argument('--in', dest='inp', required=True)
ap.add_argument('--out', dest='out', default='source_fig2_entropy_curvature.csv')
ap.add_argument('--select-beta', type=float, default=None)
args=ap.parse_args()
df=pd.read_csv(args.inp)
req={'H_rate','kappa','regime'}
if not req.issubset(df.columns):
    sys.exit(2)
if args.select_beta is not None:
    df=df[df['beta'].round(8)==float(args.select_beta)]
out=df[['H_rate','kappa','regime']].copy(); out.to_csv(args.out, index=False)
print('Wrote', args.out)
'''
if OVERWRITE or not pro_path.exists(): pro_path.write_text(compute_pro_src, encoding='utf-8')
if OVERWRITE or not integrate_path.exists(): integrate_path.write_text(integrate_src, encoding='utf-8')
print('Scripts prontos em', CODE_DIR)


In [ ]:
#@title 5) Parâmetros de execução
GRAPH_PATH = "/content/repo/NHB_Symbolic_Mainfold/data/word_network.graphml"  #@param {type:"string"}
BETA = 1.0  #@param {type:"number"}
REGIME = "integrated"  #@param {type:"string"}
CURV_UNDIRECTED = True  #@param {type:"boolean"}
CURV_SAMPLE = 500  #@param {type:"integer"}
RICCI_METHOD = "approximate"  #@param ["approximate","OTD","base"]
OUT_CSV = "/content/repo/NHB_Symbolic_Mainfold/code/entropy_curvature_integrated_kappa_sampled.csv"  #@param {type:"string"}
SRC_FIG2 = "/content/repo/NHB_Symbolic_Mainfold/code/source_fig2_entropy_curvature.csv"  #@param {type:"string"}
print(GRAPH_PATH)


In [ ]:
#@title 6) Executar (H_rate + kappa amostrada)
import subprocess, shlex
cmd = [
  'python', str((__import__('pathlib').Path(REPO_DIR)/'NHB_Symbolic_Mainfold'/'code'/'compute_entropy_curvature_pro.py')),
  '--graph', GRAPH_PATH,
  '--beta', str(BETA),
  '--regime', REGIME,
]
if CURV_UNDIRECTED: cmd += ['--curv-undirected']
if CURV_SAMPLE is not None: cmd += ['--curv-sample', str(int(CURV_SAMPLE))]
cmd += ['--ricci-method', RICCI_METHOD, '--out', OUT_CSV]
print('>>', ' '.join(shlex.quote(c) for c in cmd))
subprocess.run(cmd, check=True)
print('OK:', OUT_CSV)


In [ ]:
#@title 7) Gerar `source_fig2_entropy_curvature.csv` (β=1.0)
import subprocess, shlex
cmd=[
  'python', str((__import__('pathlib').Path(REPO_DIR)/'NHB_Symbolic_Mainfold'/'code'/'integrate_entropy_curvature_pro.py')),
  '--in', OUT_CSV,
  '--out', SRC_FIG2,
  '--select-beta', str(BETA)
]
print('>>', ' '.join(shlex.quote(c) for c in cmd))
subprocess.run(cmd, check=True)
print('OK:', SRC_FIG2)


In [ ]:
#@title 8) Pré-visualizar CSVs gerados
import pandas as pd
print('--- OUT_CSV (head) ---')
display(pd.read_csv(OUT_CSV).head())
print('--- SRC_FIG2 (head) ---')
display(pd.read_csv(SRC_FIG2).head())


## 9) (Opcional) Commit & Push de volta ao GitHub
Para *push*, você precisa de um **Personal Access Token (PAT)** com permissão para o repositório.

- Crie um token (ou use um já existente) e insira no campo abaixo.  
- **Atenção:** em Colab, o token fica no histórico da sessão. Evite compartilhar o notebook com o token inserido.


In [ ]:
#@title Commit & Push
from getpass import getpass
import os, subprocess
DO_PUSH = False  #@param {type:"boolean"}
GITHUB_USER = ""  #@param {type:"string"}
if DO_PUSH:
    TOKEN = getpass("GitHub Personal Access Token (escopo repo): ")
    os.chdir(REPO_DIR)
    subprocess.run(["git","config","user.email", f"{GITHUB_USER}@users.noreply.github.com"], check=True)
    subprocess.run(["git","config","user.name", GITHUB_USER], check=True)
    subprocess.run(["git","add","NHB_Symbolic_Mainfold/code/entropy_curvature_integrated_kappa_sampled.csv","NHB_Symbolic_Mainfold/code/source_fig2_entropy_curvature.csv"], check=True)
    subprocess.run(["git","commit","-m","Add entropy/curvature outputs (Colab)"] , check=True)
    remote_https = REPO_URL.replace("https://", f"https://{GITHUB_USER}:{TOKEN}@")
    subprocess.run(["git","push", remote_https, "HEAD"], check=True)
    print("Push realizado com sucesso.")
else:
    print("Push desativado (DO_PUSH=False)")
