# June 16, 2025: simulate baseline graphs

In [1]:
import os 
import numpy as np 
import pandas as pd 
import graph_tool.all as gt 
import pickle 
from itertools import product
from tqdm import tqdm 

In [2]:
class ARGS():
    pass

args = ARGS()

args.SEED = 100

gt.seed_rng(args.SEED)
np.random.seed(args.SEED)

In [3]:
# synthetic graph params
num_nodes = 500
num_blocks = 4

In [4]:
# model params
args.dc, args.sbm = False, 'h'

args.nested = True if args.sbm in ['h'] else False

args.wait = 120 #6000
args.force_niter = 1000
args.num_draws = int((1/2) * args.force_niter)
args.niter = 10

args.B = num_blocks
args.gamma = 2.0

In [5]:
def create_state(args, ):
    state_df = pd.DataFrame(columns=['a', 'd', 'o', 'h', 'm'],)
    state_df.loc['state'] = [
        gt.PPBlockState, gt.BlockState, 
        gt.OverlapBlockState, gt.NestedBlockState,
        gt.ModularityState,
    ]
    state_df.loc['state_args'] = [
        dict(), dict(deg_corr=args.dc, B=args.B), 
        dict(deg_corr=args.dc, B=args.B), dict(deg_corr=args.dc, B=args.B),
        dict(entropy_args=dict(gamma=args.gamma)),
    ]
    state, state_args = state_df[args.sbm]
    return state, state_args

In [6]:
def mcmc_eq(args, g, state):
    bs = [] # partitions
    Bs = np.zeros(g.num_vertices() + 1) # number of blocks
    Bes = [] # number of effective blocks
    dls = [] # description length history
    def collect_partitions(s):
        bs.append(s.b.a.copy())
        # B = s.get_nonempty_B()
        # Bs[B] += 1
        Bes.append(s.get_Be())
        dls.append(s.entropy())

    gt.mcmc_equilibrate(
        state,
        wait=args.wait,
        force_niter=args.force_niter,
        mcmc_args=dict(niter=args.niter),
        callback=collect_partitions,
    )
    return state, bs, Bs, Bes, dls

def nested_mcmc_eq(args, g, state):
    bs = []
    Bs = [np.zeros(g.num_vertices() + 1) for s in state.get_levels()]
    Bes = [[] for s in state.get_levels()]
    dls = []
    def collect_partitions(s):
        bs.append(s.get_bs())
        for l, sl in enumerate(s.get_levels()):
            # B = sl.get_nonempty_B()
            # Bs[l][B] += 1
            Be = sl.get_Be()
            Bes[l].append(Be)
        dls.append(s.entropy())
        
    gt.mcmc_equilibrate(
        state, 
        wait=args.wait, 
        force_niter=args.force_niter, 
        mcmc_args=dict(niter=args.niter),
        callback=collect_partitions,
    )
    return state, bs, Bs, Bes, dls

In [7]:
def posterior_modes(args, bs):
    pmode = gt.ModeClusterState(bs, nested=args.nested)
    gt.mcmc_equilibrate(pmode, wait=1, mcmc_args=dict(niter=1, beta=np.inf))
    return pmode

In [8]:
def draw_graph_blocks(g, b, l, pos, folder):
    bvp = g.new_vertex_property('int')
    for v in g.vertices():
        bvp[v] = b[int(v)]

    gt.graph_draw(
        g,
        pos=pos,
        vertex_fill_color=bvp,
        output_size=(90, 90),
        output=f"{folder}/partition-level{l}.svg"
    )

In [9]:
def synthesize_per_param(p_in, p_sub, p_out):
    # 1. define the graph
    probs = np.array([
        [p_in, p_sub, p_out, 0.0], 
        [p_sub, p_in, 0.0, p_out], 
        [p_out, 0.0, p_in, p_sub], 
        [0.0, p_out, p_sub, p_in],
    ])
    probs *= num_nodes
    # display(probs)

    b = np.arange(num_blocks)
    b = np.repeat(b, num_nodes / num_blocks)
    # display(b)

    g = gt.generate_sbm(
        b, 
        probs, 
        micro_ers=True,
    )
    # display(g)

    try:
        with open(f'pos.pkl', 'rb') as f:
            [pos] = pickle.load(f)
    except:
        pos = gt.sfdp_layout(g)
        with open(f'pos.pkl', 'wb') as f:
            pickle.dump([pos], f)
            
    # 2. fit model
    state, state_args = create_state(args, )
    if not args.sbm in ['h']:
        state = gt.minimize_blockmodel_dl(g, state, state_args)
    else:
        state = gt.minimize_nested_blockmodel_dl(g, state=state, state_args=state_args)
    state.entropy()

    if args.sbm in ['h']:
        state, bs, Bs, Bes, dls = nested_mcmc_eq(args, g, state)
    else:
        state, bs, Bs, Bes, dls = mcmc_eq(args, g, state)
        
    pmode = posterior_modes(args, bs[-args.num_draws:]) # after chain equilibrates

    # 3. effective number of modes
    omegas = np.array([mode.get_M() for mode in pmode.get_modes()])
    M = omegas.sum()
    omegas = omegas / M
    omegas

    E_m = np.exp((-omegas * np.log(omegas)).sum())


    # 4. draw graph partitions
    folder = f'psub-{p_sub:.2f}_pout-{p_out:.2f}_Em-{E_m:.3f}'
    os.system(f'mkdir -p {folder}')
    gt.graph_draw(
        g, 
        pos, 
        output_size=(90, 90),
        output=f"{folder}/graph.svg"
    )

    for idx_mode in range(np.round(E_m).astype(int)):
        mode_folder = f'{folder}/mode-{idx_mode}'
        os.system(f'mkdir -p {mode_folder}')
        
        mode = pmode.get_modes()[idx_mode]
        bs = mode.get_max_nested()
        state = state.copy(bs=bs)
        
        for level in range(3):
            b = state.project_level(level).b.a 
            draw_graph_blocks(g, b, level, pos, mode_folder)

In [10]:
p_in = 0.99 * 2
p_subs = np.arange(0.0, 1.0, 0.1)
p_outs = np.arange(0.0, 0.5, 0.05)

In [11]:
for p_sub, p_out in tqdm(product(p_subs, p_outs)):
    p_sub = np.round(p_sub, decimals=1)
    p_out = np.round(p_out, decimals=2)
    synthesize_per_param(p_in, p_sub, p_out)

100it [1:50:11, 66.12s/it]
