# Symbolic Manifold — From GitHub (Colab)

**Objetivo**: clonar seu repositório do GitHub, gravar scripts utilitários no diretório `NHB_Symbolic_Mainfold/code/`, rodar H_rate e Ricci (com fallback), integrar e prever Fig. 2.


In [ ]:
# 1) Dependências
import subprocess, sys
def pipi(*args):
    print('pip install', ' '.join(args))
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', *args])

pipi('networkx>=3.2.1', 'numpy>=1.24', 'pandas>=2.1', 'scipy>=1.11', 'matplotlib', 'lxml', 'pyyaml')
try:
    pipi('GraphRicciCurvature', 'pot')
    HAS_RICCI = True
    print('GraphRicciCurvature/POT instalados.')
except Exception as e:
    HAS_RICCI = False
    print('Ricci opcional; seguirá sem kappa se falhar:', e)

import platform, psutil
print('Python:', platform.python_version())
print('RAM (GB):', round(psutil.virtual_memory().total/1024**3, 2))

In [ ]:
# 2) GitHub — clone/pull
import os
REPO_URL = 'https://github.com/agourakis82/entropic-symbolic-society'  # <-- ajuste se necessário
BRANCH = 'main'
REPO_DIR = '/content/repo'

if not os.path.exists(REPO_DIR):
    !git clone -b $BRANCH $REPO_URL $REPO_DIR
else:
    %cd $REPO_DIR
    !git fetch origin $BRANCH
    !git checkout $BRANCH
    !git pull origin $BRANCH
    %cd /content
print('Repo em:', REPO_DIR)

In [ ]:
# 3) Escrever utilitários no repo (compute/integrate)
from pathlib import Path
CODE_DIR = Path(REPO_DIR)/'NHB_Symbolic_Mainfold'/'code'
CODE_DIR.mkdir(parents=True, exist_ok=True)

compute_src = r'''#!/usr/bin/env python3
from __future__ import annotations
import argparse, csv, glob, math, sys, time
from pathlib import Path
import numpy as np
import networkx as nx
try:
    from GraphRicciCurvature.OllivierRicci import OllivierRicci
    _HAS_RICCI=True
except Exception:
    _HAS_RICCI=False

def detect_weight_attr(G):
    for _,_,d in G.edges(data=True):
        if 'weight' in d:
            try: float(d['weight']); return 'weight'
            except: pass
        for k,v in d.items():
            if isinstance(v,(int,float)): return k
    return None

def load_graph(p):
    p=str(p).lower()
    if p.endswith('.graphml'): G=nx.read_graphml(path)
    elif p.endswith('.gpickle') or p.endswith('.pickle'): G=nx.read_gpickle(path)
    elif p.endswith('.edgelist') or p.endswith('.txt'): G=nx.read_edgelist(path, create_using=nx.DiGraph)
    else: G=nx.read_graphml(path)
    if not isinstance(G,(nx.DiGraph,nx.MultiDiGraph)): G=nx.DiGraph(G)
    if not G.is_directed(): G=nx.DiGraph(G)
    return G

def build_P(G, wkey):
    out={u:0.0 for u in G.nodes()}
    for u,v,d in G.edges(data=True): out[u]+=float(d.get(wkey,1.0))
    P={u:{} for u in G.nodes()}
    for u,v,d in G.edges(data=True):
        w=float(d.get(wkey,1.0)); den=out[u]
        if den>0: P[u][v]=w/den
    return P

def stationary(P, nodes):
    idx={u:i for i,u in enumerate(nodes)}; n=len(nodes)
    import numpy as np
    A=np.zeros((n,n)); b=np.zeros(n)
    for i,u in enumerate(nodes):
        A[i,i]=-1.0
        for jn,row in P.items():
            if u in row: A[i,idx[jn]]+=row[u]
    A[-1,:]=1.0; b[-1]=1.0
    try: pi=np.linalg.solve(A,b)
    except: pi,_res,*_=np.linalg.lstsq(A,b,rcond=None)
    pi=np.maximum(pi,0); s=pi.sum();
    if s>0: pi/=s
    return {u:pi[i] for u,i in idx.items()}

def H_rate(P, pi):
    import math
    H=0.0
    for u,row in P.items():
        if not row: continue
        h=0.0
        for p in row.values():
            if p>0: h += -p*math.log2(p)
        H += pi.get(u,0.0)*h
    return float(H)

def mean_ricci(G, wkey, undirected, nsample, method):
    if not _HAS_RICCI: return float('nan'), 'ricci_unavailable'
    H = G.to_undirected() if undirected else G
    try:
        orc = OllivierRicci(H, alpha=0.5, weight=wkey, verbose='ERROR')
        orc.compute_ricci_curvature()
        vals=[]
        for _,_,d in orc.G.edges(data=True):
            k=d.get('ricciCurvature',None)
            if isinstance(k,(int,float)): vals.append(float(k))
        import numpy as np
        if not vals: return float('nan'), 'ollivier_empty'
        if nsample and 0<nsample<len(vals):
            rng=np.random.default_rng(42); vals=rng.choice(vals,size=nsample,replace=False)
        return float(np.mean(vals)), f'ollivier_{method}_post{nsample}' if nsample else f'ollivier_{method}_full'
    except Exception as e:
        return float('nan'), f'ollivier_error:{e.__class__.__name__}'

def main():
    ap=argparse.ArgumentParser()
    ap.add_argument('--graph', required=True)
    ap.add_argument('--beta', type=float, default=1.0)
    ap.add_argument('--regime', type=str, default='integrated')
    ap.add_argument('--curv-undirected', action='store_true')
    ap.add_argument('--curv-sample', type=int, default=300)
    ap.add_argument('--ricci-method', choices=['approximate','OTD','base'], default='approximate')
    ap.add_argument('--out', required=True)
    args=ap.parse_args()
    t0=time.time()
    G=nx.read_graphml(args.graph)
    if not G.is_directed(): G=nx.DiGraph(G)
    nn,ne = G.number_of_nodes(), G.number_of_edges()
    wkey = detect_weight_attr(G)
    if wkey is None:
        wkey='weight'
        for u,v,d in G.edges(data=True): d['weight']=1.0
    P = build_P(G, wkey)
    pi = stationary(P, list(G.nodes()))
    H = H_rate(P, pi)
    kappa, method = mean_ricci(G, wkey, args.curv_undirected, args.curv_sample, args.ricci_method)
    elapsed=time.time()-t0
    import csv
    with open(args.out,'w',newline='') as f:
        w=csv.DictWriter(f, fieldnames=['graph','beta','H_rate','kappa','regime','nnodes','nedges','weight_attr','curv_method','elapsed_s'])
        w.writeheader()
        w.writerow({
            'graph': Path(args.graph).name,
            'beta': float(args.beta),
            'H_rate': float(H),
            'kappa': float(kappa) if kappa==kappa else float('nan'),
            'regime': args.regime,
            'nnodes': int(nn),
            'nedges': int(ne),
            'weight_attr': wkey,
            'curv_method': method,
            'elapsed_s': round(float(elapsed),3)
        })
    print('Wrote', args.out)
if __name__=='__main__':
    main()
'''

integrate_src = r'''#!/usr/bin/env python3
import argparse, pandas as pd, numpy as np
def main():
    ap=argparse.ArgumentParser()
    ap.add_argument('--in', dest='inp', required=True)
    ap.add_argument('--out', dest='out', default='source_fig2_entropy_curvature.csv')
    ap.add_argument('--select-beta', type=float, default=None)
    args=ap.parse_args()
    df=pd.read_csv(args.inp)
    if 'kappa' not in df.columns: df['kappa']=np.nan
    if args.select_beta is not None and 'beta' in df.columns:
        df=df[df['beta'].round(8)==float(args.select_beta)]
        if df.empty: raise SystemExit(f'No rows for beta={args.select_beta}')
    df[['H_rate','kappa','regime']].to_csv(args.out,index=False)
    print('Wrote', args.out)
if __name__=='__main__':
    main()
'''

compute_path = CODE_DIR/'compute_entropy_curvature_pro.py'
integrate_path = CODE_DIR/'integrate_entropy_curvature_pro.py'
compute_path.write_text(compute_src, encoding='utf-8')
integrate_path.write_text(integrate_src, encoding='utf-8')
print('Scripts gravados em', CODE_DIR)

In [ ]:
# 4) Caminhos & parâmetros
GRAPH_PATH = f"{REPO_DIR}/NHB_Symbolic_Mainfold/data/word_network.graphml"  # ajuste se necessário
BETA = 1.0
REGIME = 'integrated'
CURV_UNDIRECTED = True
CURV_SAMPLE = 300
RICCI_METHOD = 'approximate'
OUT_CSV = f"{REPO_DIR}/NHB_Symbolic_Mainfold/code/entropy_curvature_integrated.csv"
SRC_FIG2 = f"{REPO_DIR}/NHB_Symbolic_Mainfold/code/source_fig2_entropy_curvature.csv"
print('GRAPH_PATH=', GRAPH_PATH)
import os, os.path
print('Arquivo existe?', os.path.exists(GRAPH_PATH))

In [ ]:
# 5) H_rate (sem Ricci) — Honly
import subprocess, shlex
HONLY = OUT_CSV.replace('.csv','_Honly.csv')
cmd = [
  'python', str(CODE_DIR/'compute_entropy_curvature_pro.py'),
  '--graph', GRAPH_PATH, '--beta', str(BETA), '--regime', REGIME,
  '--curv-undirected', '--curv-sample', '0', '--out', HONLY
]
print('>>', ' '.join(shlex.quote(c) for c in cmd))
p = subprocess.run(cmd, capture_output=True, text=True)
print('RC=', p.returncode); print('STDOUT:\n', p.stdout); print('STDERR:\n', p.stderr)
assert p.returncode==0, 'Falha no H_rate; ver STDERR.'

In [ ]:
# 6) Ricci com fallback
import subprocess, shlex, os, shutil
cmd = [
  'python', str(CODE_DIR/'compute_entropy_curvature_pro.py'),
  '--graph', GRAPH_PATH, '--beta', str(BETA), '--regime', REGIME,
  '--curv-undirected', '--curv-sample', str(int(CURV_SAMPLE)),
  '--ricci-method', RICCI_METHOD, '--out', OUT_CSV
]
print('>>', ' '.join(shlex.quote(c) for c in cmd))
p = subprocess.run(cmd, capture_output=True, text=True)
print('RC=', p.returncode); print('STDOUT:\n', p.stdout); print('STDERR:\n', p.stderr)
if p.returncode != 0 or (not os.path.exists(OUT_CSV)):
    print('[Fallback] Usando H_only como OUT_CSV')
    shutil.copyfile(HONLY, OUT_CSV)
print('OK OUT_CSV:', os.path.exists(OUT_CSV), OUT_CSV)

In [ ]:
# 7) Integrar para Fig. 2
import subprocess, shlex, os
assert os.path.exists(OUT_CSV), 'OUT_CSV ausente; ver células anteriores.'
cmd = [
  'python', str(CODE_DIR/'integrate_entropy_curvature_pro.py'),
  '--in', OUT_CSV, '--out', SRC_FIG2, '--select-beta', str(BETA)
]
print('>>', ' '.join(shlex.quote(c) for c in cmd))
p = subprocess.run(cmd, capture_output=True, text=True)
print('RC=', p.returncode); print('STDOUT:\n', p.stdout); print('STDERR:\n', p.stderr)
assert p.returncode==0, 'Falha na integração.'

In [ ]:
# 8) Preview
import pandas as pd, matplotlib.pyplot as plt
df_all = pd.read_csv(OUT_CSV)
df_src = pd.read_csv(SRC_FIG2)
display(df_all.head())
display(df_src.head())
plt.figure(); plt.scatter(df_src['H_rate'], df_src['kappa']);
plt.xlabel('H_rate (bits/step)'); plt.ylabel('kappa (mean OR)'); plt.title('Entropy–Curvature (preview)'); plt.show()