## Altmap Experiments
### Compare altmap to map eq using networkx


In [260]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict

plt.rcParams.update({'font.size': 20})
%pylab

%run helpers.py
# loads the following helper functions:
# infomap(net_path, altmap=False, additional_args='')
# read_tree(tree_path)
# plogq(p, q)
# plogp(p)
# drawNetwork(G, communities)
# altmap_cost(G, communities)
# create_initfile(G, N_partitions=None, randomized=True)
# generate_two_rings(n_ring=10)
# 

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [3]:
num_cliques = 5
clique_size = 50
N = num_cliques * clique_size # num nodes
G = nx.ring_of_cliques(num_cliques, clique_size)
G = nx.convert_node_labels_to_integers(G, first_label=1)

communities = {}
for c in range(num_cliques):
    for n in range(1, clique_size+1):
        communities[c*clique_size + n] = c
        
cost = altmap_cost(G, communities)
print (f'Ground Truth L = {cost}\n')

Ground Truth L = -2.310761626697072



In [5]:
clique_size = 100
nodes_connection = 0
N = 2 * clique_size + nodes_connection # num nodes
G = nx.barbell_graph(clique_size, nodes_connection)
G = nx.convert_node_labels_to_integers(G, first_label=1)

# ground truth
communities = {}
for n in range(1, clique_size+1 ):
    communities[n] = 1
    
for n in range(clique_size+1, 2*clique_size+1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Ground Truth Cost L = {cost}\n')

# 2 mixed cliques
communities = {}
for n in range(1, int(clique_size/2)+1 ):
    communities[n] = 1

for n in range(clique_size + 1, clique_size + int(clique_size/2)+1 ):
    communities[n] = 1
    
for n in range(int(clique_size/2) + 1, clique_size + 1 ):
    communities[n] = 2
    
for n in range(clique_size + int(clique_size/2) + 1, 2*clique_size + 1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Mixed Communities Cost L = {cost}\n')

# 4 mixed cliques
communities = {}
for n in range(1, int(clique_size/2)+1 ):
    communities[n] = 1
    
for n in range(int(clique_size/2) + 1, clique_size + 1 ):
    communities[n] = 2

for n in range(clique_size + 1, clique_size + int(clique_size/2)+1 ):
    communities[n] = 3
    
for n in range(clique_size + int(clique_size/2) + 1, 2*clique_size + 1 ):
    communities[n] = 4

cost = altmap_cost(G, communities)
print (f'Four Mixed Communities Cost L = {cost}\n')

Ground Truth Cost L = -0.9985143509845726

Mixed Communities Cost L = -7.506413572699877e-05

Four Mixed Communities Cost L = -0.19950977558666



In [6]:
# compute essential cost function values for a barbell network with
# given clique size
def barbell_cost(clique_size = 3, print_output=False):
    nc = clique_size
    m = nc*(nc-1)+1 # number of edges in the network
    p0 = (nc - 1) /(2*m) # stat prob for 'normal' nodes
    pc = nc / (2*m) # stat prob for the 2 connecting nodes

    J_ind = 2.0 * ((nc -2)*p0*np.log2(1.0-2*p0) + (p0+pc)*np.log2(1-(p0+pc)))
    J_true = np.log2(m) - 1.0 - (m-1) / m * np.log2(m-1)
    J_init = -1 -np.log2(m) + 1/m * ((m - nc) * np.log2(3*m - nc**2) + nc*np.log2(2*m-nc))
    
    if print_output:
        print (f"\nBarbell network with nc = {nc} nodes per clique:\n")
        print (f"Each node a module - cost = {J_init}")
        print (f"Ground truth cost = {J_true}")
        print (f"Independent sets cost = {J_ind}\n")
    
    return J_init, J_true, J_ind


nc_max = 50
nc_list = list(range(2, nc_max + 1))
J_init_list = np.zeros((nc_max -1, 1))
J_true_list = np.zeros((nc_max -1, 1))
J_ind_list = np.zeros((nc_max -1, 1))
for i,nc in enumerate(nc_list):
    J_init_list[i], J_true_list[i], J_ind_list[i] = barbell_cost(nc)

plt.figure()
plt.plot(nc_list, J_init_list, 'b', label='Initial cost')
plt.plot(nc_list, J_true_list, 'm', label='Ground truth cost')
plt.plot(nc_list, J_ind_list, 'r', label='Independent sets cost')
plt.grid()
plt.title('Barbell network - cost over size')
plt.xlabel('Nodes per clique')
plt.ylabel('Altmap cost')
plt.legend()

<matplotlib.legend.Legend at 0x7f09e9ae1a20>

In [7]:
n_ring = 100
N = 2*n_ring # num nodes
G = generate_two_rings(n_ring)

# init
communities = {}
for n in range(1, N+1 ):
    communities[n] = n
    
cost = altmap_cost(G, communities)
print (f'Initial Cost L = {cost}\n')

# ground truth
communities = {}
for n in range(1, int(N / 2)+1 ):
    communities[n] = 1
    
for n in range(int(N / 2)+1, N+1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Ground Truth Cost L = {cost}\n')

# 2 mixed cliques
communities = {}
for n in range(1, int(N / 4)+1 ):
    communities[n] = 1

for n in range(int(N / 2) + 1, int(N / 2) + int(N / 4) + 1 ):
    communities[n] = 1
    
for n in range(int(N / 4)+1, int(N / 2) + 1 ):
    communities[n] = 2
    
for n in range(int(N / 2) + int(N / 4) + 1, N + 1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Mixed Communities Cost L = {cost}\n')

# 4 mixed cliques
communities = {}
for n in range(1, int(N / 4)+1 ):
    communities[n] = 1

for n in range(int(N / 2) + 1, int(N / 2) + int(N / 4) + 1 ):
    communities[n] = 2
    
for n in range(int(N / 4)+1, int(N / 2) + 1 ):
    communities[n] = 3
    
for n in range(int(N / 2) + int(N / 4) + 1, N + 1 ):
    communities[n] = 4

cost = altmap_cost(G, communities)
print (f'Four Mixed Communities Cost L = {cost}\n')

Initial Cost L = -0.00724344653766134

Ground Truth Cost L = -0.9574362123777358

Mixed Communities Cost L = -0.8367058739707411

Four Mixed Communities Cost L = -1.7993140655681912



In [8]:
# compute essential cost function values for a network of two rings with
# given ring size
def two_rings_cost(ring_size = 3, print_output=False):
    n = ring_size
    m = 2*n+1 # number of edges in the network

    J_init = ((m-3)*np.log2(m-1) + 3*np.log2(2*m-3) - 3) / m - np.log2(m)
    J_true = np.log2(m) - 1.0 - (m-1) / m * np.log2(m-1)
    J_ind = -1.0
    J_lower_bound = -(3 - 3*np.log2(3) + m*np.log2(m))/m
    
    p1 =n/(2*m); p2 = (n+1)/(2*m)
    p1not1 = 1/m; p11 = p1 - p1not1; 
    p2not2 = 3/(2*m); p22 = p2 - p2not2; 
    J_4_comms = -2*(p11*np.log2(p11 / (p1**2)) + p1not1*np.log2(p1not1 / (p1*(1-p1))) +
                    p22*np.log2(p22 / (p2**2)) + p2not2*np.log2(p2not2 / (p2*(1-p2))))
    
    if print_output:
        print (f"\n2 Rings network with nc = {nc} nodes per clique:\n")
        print (f"Each node a module - cost = {J_init}")
        print (f"Ground truth cost = {J_true}")
        print (f"Independent sets cost = {J_ind}\n")
        print (f"4 communities = {J_4_comms}\n")
        print (f"Lower bound = {J_lower_bound}\n")
    
    return J_init, J_true, J_ind, J_4_comms

n_max = 50
n_list = list(range(3, n_max + 1))
J_init_list = np.zeros((len(n_list), 1))
J_true_list = np.zeros((len(n_list), 1))
J_ind_list = np.zeros((len(n_list), 1))
J_4comms_list = np.zeros((len(n_list), 1))
for i, n in enumerate(n_list):
    J_init_list[i], J_true_list[i], J_ind_list[i], J_4comms_list[i] = two_rings_cost(n)

plt.figure()
plt.plot(n_list, J_init_list, 'b', label='Initial cost (N ind sets)')
plt.plot(n_list, J_true_list, 'm', label='Ground truth cost')
plt.plot(n_list, J_ind_list, 'r', label='2 Independent sets cost')
plt.plot(n_list, J_4comms_list, 'c', label='4 Communities')
plt.grid()
plt.title('2 Rings network - cost over size')
plt.xlabel('Nodes per clique')
plt.ylabel('Altmap cost')
plt.legend()

<matplotlib.legend.Legend at 0x7f09e82e60f0>

In [9]:
epsilon = 0.15
N1 = 200
N2 = 50
N = N1 + N2
p_self = (1.0 - epsilon) / (N/2 - 1)
p_ext = 2.0 *  epsilon / N

# generate graph
G = nx.MultiGraph()
G.add_nodes_from(range(1, N+1))
for n1 in range(1, N1+1):
    for i in range(n1 + 1, N1+1):
        G.add_edge(n1, i, weight=p_self)
    
    for n2 in range(N1+1, N1+N2+1):
        G.add_edge(n1, n2, weight=p_ext)

for n2 in range(N1+1, N1+N2+1):
    for i in range(n2 + 1, N1+N2+1):
        G.add_edge(n2, i, weight=p_self)

# 2 communities
communities = {}
i = 1
for c, n in enumerate([N1, N2]):
    for a in range(1, n+1):
        communities[i] =  c + 1
        i = i + 1
        
cost = altmap_cost(G, communities)
print (f'Ground Truth 2 communities L = {cost}')
print (f'Analytical: {-1.0 - plogp(1.0-epsilon) - plogp(epsilon)}\n')

# each node a community
communities = {}
for n in range(1, N + 1):
        communities[n] = n
        
cost = altmap_cost(G, communities)
print (f'Ground Truth each node L = {cost}')
print (f'Analytical: {-np.log2(N/(N - 1))}\n')
                

Ground Truth 2 communities L = -0.13539149546531296
Analytical: -0.3901596952835996

Ground Truth each node L = -0.00612389163691027
Analytical: -0.005782352594006178



In [10]:
G = nx.karate_club_graph()
G = nx.convert_node_labels_to_integers(G, first_label=1)

In [None]:
from networkx.algorithms.community.community_generators import LFR_benchmark_graph

N = 250
max_degree = int(0.2*N)
max_community = int(0.2*N)
average_degree = 10
tau1 = 2.1 # Power law exponent for the degree distribution 
tau2 = 1.1 # Power law exponent for the community size distribution
mu = 0.1 # [0.03, 0.75]
    
G = LFR_benchmark_graph(N, tau1, tau2, mu, average_degree=average_degree, max_degree=max_degree, 
                        max_community=max_community, min_community=25)

G = nx.convert_node_labels_to_integers(G, first_label=1)

In [237]:
ground_truth_communities = {}
num_communities = 1
for n in range(1,N+1):
    if n in ground_truth_communities:
        continue
        
    community = G.nodes[n]['community']
    node_ids = np.asarray(list(community))
    node_ids = node_ids + 1
    ground_truth_communities.update(dict.fromkeys(node_ids , num_communities))
    num_communities = num_communities + 1

ground_truth_communities = OrderedDict(sorted(ground_truth_communities.items()))
print (len(ground_truth_communities))
print (f'Number of ground truth communities = {max(ground_truth_communities.values())}')
print (ground_truth_communities)

    

250
Number of ground truth communities = 7
OrderedDict([(1, 1), (2, 2), (3, 1), (4, 3), (5, 4), (6, 2), (7, 5), (8, 3), (9, 3), (10, 3), (11, 1), (12, 4), (13, 4), (14, 5), (15, 4), (16, 1), (17, 3), (18, 2), (19, 6), (20, 5), (21, 4), (22, 5), (23, 5), (24, 5), (25, 4), (26, 2), (27, 1), (28, 5), (29, 3), (30, 5), (31, 5), (32, 2), (33, 2), (34, 2), (35, 2), (36, 3), (37, 4), (38, 3), (39, 6), (40, 6), (41, 1), (42, 1), (43, 4), (44, 2), (45, 4), (46, 2), (47, 2), (48, 4), (49, 3), (50, 4), (51, 7), (52, 2), (53, 1), (54, 5), (55, 7), (56, 5), (57, 1), (58, 6), (59, 6), (60, 1), (61, 6), (62, 7), (63, 7), (64, 7), (65, 6), (66, 6), (67, 4), (68, 4), (69, 7), (70, 1), (71, 4), (72, 4), (73, 1), (74, 7), (75, 7), (76, 1), (77, 5), (78, 7), (79, 5), (80, 7), (81, 5), (82, 4), (83, 5), (84, 4), (85, 6), (86, 7), (87, 4), (88, 4), (89, 7), (90, 1), (91, 7), (92, 3), (93, 4), (94, 1), (95, 5), (96, 6), (97, 4), (98, 7), (99, 7), (100, 6), (101, 5), (102, 6), (103, 5), (104, 7), (105, 4), (1

In [257]:
nx.write_pajek(G, workspace_path +  filename + '.net')
infomap(workspace_path +  filename + '.net', altmap=False)
# infomap(workspace_path +  filename + '.net', altmap=True)
# communities = create_initfile(G, randomized=True)
# altmap_cost(G, communities)
# infomap(workspace_path +  filename + '.net', altmap=True, additional_args=' --cluster-data ./workspace/init.tree')

# read results and generate networkx graph

#G = nx.read_pajek(net_path)
df = read_tree(workspace_path +  filename + '.tree')


communities = {}
for index, row in df.iterrows():
    node = int(row['node'])
    communities[node] = int(row['community'])

print (communities)
num_communities = max(communities.values()) - min(communities.values()) + 1
print (f'We found {num_communities} communities.')

cost = altmap_cost(G, communities)
print (f'Achieved cost L = {cost}')

  'Non-string attribute'))


{201: 1, 63: 1, 89: 1, 152: 1, 99: 1, 98: 1, 245: 1, 51: 1, 144: 1, 161: 1, 69: 1, 148: 1, 149: 1, 80: 1, 62: 1, 64: 1, 74: 1, 165: 1, 181: 1, 222: 1, 104: 1, 219: 1, 187: 1, 191: 1, 234: 1, 78: 1, 159: 1, 210: 1, 55: 1, 172: 1, 173: 1, 226: 1, 230: 1, 241: 1, 86: 1, 114: 1, 160: 1, 225: 1, 75: 1, 91: 1, 227: 1, 250: 1, 180: 1, 221: 1, 105: 2, 45: 2, 228: 2, 157: 2, 71: 2, 43: 2, 88: 2, 138: 2, 25: 2, 166: 2, 50: 2, 67: 2, 93: 2, 21: 2, 37: 2, 132: 2, 212: 2, 15: 2, 68: 2, 12: 2, 13: 2, 139: 2, 150: 2, 164: 2, 5: 2, 143: 2, 158: 2, 203: 2, 84: 2, 129: 2, 185: 2, 72: 2, 184: 2, 198: 2, 243: 2, 48: 2, 123: 2, 207: 2, 216: 2, 236: 2, 87: 2, 97: 2, 134: 2, 188: 2, 194: 2, 82: 2, 117: 2, 238: 2, 59: 3, 196: 3, 40: 3, 209: 3, 96: 3, 113: 3, 233: 3, 58: 3, 39: 3, 85: 3, 136: 3, 177: 3, 215: 3, 102: 3, 200: 3, 179: 3, 190: 3, 195: 3, 183: 3, 66: 3, 107: 3, 246: 3, 19: 3, 108: 3, 128: 3, 205: 3, 100: 3, 109: 3, 115: 3, 124: 3, 208: 3, 232: 3, 61: 3, 65: 3, 220: 3, 27: 4, 121: 4, 213: 4, 229: 4,

In [261]:
plt.close('all')
plt.figure()
plt.title('Ground Truth Communities')
drawNetwork(G, ground_truth_communities, labels=False)

ordered_communities = OrderedDict(sorted(communities.items()))

plt.figure()
plt.title('Infomap/Altmap Communities')
drawNetwork(G, ordered_communities, labels=False)

print (ordered_communities)
print (ground_truth_communities)

result = ordered_communities.items() ^ ground_truth_communities.items()
print (result)
print (len(result))

OrderedDict([(1, 4), (2, 6), (3, 4), (4, 7), (5, 2), (6, 6), (7, 5), (8, 7), (9, 7), (10, 7), (11, 4), (12, 2), (13, 2), (14, 5), (15, 2), (16, 4), (17, 7), (18, 6), (19, 3), (20, 5), (21, 2), (22, 5), (23, 5), (24, 5), (25, 2), (26, 6), (27, 4), (28, 5), (29, 7), (30, 5), (31, 5), (32, 6), (33, 6), (34, 6), (35, 6), (36, 7), (37, 2), (38, 7), (39, 3), (40, 3), (41, 4), (42, 4), (43, 2), (44, 6), (45, 2), (46, 6), (47, 6), (48, 2), (49, 7), (50, 2), (51, 1), (52, 6), (53, 4), (54, 5), (55, 1), (56, 5), (57, 4), (58, 3), (59, 3), (60, 4), (61, 3), (62, 1), (63, 1), (64, 1), (65, 3), (66, 3), (67, 2), (68, 2), (69, 1), (70, 4), (71, 2), (72, 2), (73, 4), (74, 1), (75, 1), (76, 4), (77, 5), (78, 1), (79, 5), (80, 1), (81, 5), (82, 2), (83, 5), (84, 2), (85, 3), (86, 1), (87, 2), (88, 2), (89, 1), (90, 4), (91, 1), (92, 7), (93, 2), (94, 4), (95, 5), (96, 3), (97, 2), (98, 1), (99, 1), (100, 3), (101, 5), (102, 3), (103, 5), (104, 1), (105, 2), (106, 5), (107, 3), (108, 3), (109, 3), (110,

In [241]:
plt.close('all')
plt.figure()
ordered_communities = OrderedDict(sorted(communities.items()))
drawNetwork(G, ordered_communities)
