## Altmap Experiments
### Compare altmap to map eq using networkx


In [2]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict

plt.rcParams.update({'font.size': 20})
%pylab

%run helpers.py
# loads the following helper functions:
# infomap(net_path, altmap=False, additional_args='')
# read_tree(tree_path)
# plogq(p, q)
# plogp(p)
# drawNetwork(G, communities)
# altmap_cost(G, communities)
# create_initfile(G, N_partitions=None, randomized=True)
# generate_two_rings(n_ring=10)
# 

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
num_cliques = 5
clique_size = 50
N = num_cliques * clique_size # num nodes
G = nx.ring_of_cliques(num_cliques, clique_size)
G = nx.convert_node_labels_to_integers(G, first_label=1)

communities = {}
for c in range(num_cliques):
    for n in range(1, clique_size+1):
        communities[c*clique_size + n] = c
        
cost = altmap_cost(G, communities)
print (f'Ground Truth L = {cost}\n')

Ground Truth L = -2.310761626697072



In [5]:
clique_size = 100
nodes_connection = 0
N = 2 * clique_size + nodes_connection # num nodes
G = nx.barbell_graph(clique_size, nodes_connection)
G = nx.convert_node_labels_to_integers(G, first_label=1)

# ground truth
communities = {}
for n in range(1, clique_size+1 ):
    communities[n] = 1
    
for n in range(clique_size+1, 2*clique_size+1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Ground Truth Cost L = {cost}\n')

# 2 mixed cliques
communities = {}
for n in range(1, int(clique_size/2)+1 ):
    communities[n] = 1

for n in range(clique_size + 1, clique_size + int(clique_size/2)+1 ):
    communities[n] = 1
    
for n in range(int(clique_size/2) + 1, clique_size + 1 ):
    communities[n] = 2
    
for n in range(clique_size + int(clique_size/2) + 1, 2*clique_size + 1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Mixed Communities Cost L = {cost}\n')

# 4 mixed cliques
communities = {}
for n in range(1, int(clique_size/2)+1 ):
    communities[n] = 1
    
for n in range(int(clique_size/2) + 1, clique_size + 1 ):
    communities[n] = 2

for n in range(clique_size + 1, clique_size + int(clique_size/2)+1 ):
    communities[n] = 3
    
for n in range(clique_size + int(clique_size/2) + 1, 2*clique_size + 1 ):
    communities[n] = 4

cost = altmap_cost(G, communities)
print (f'Four Mixed Communities Cost L = {cost}\n')

Ground Truth Cost L = -0.9985143509845726

Mixed Communities Cost L = -7.506413572699877e-05

Four Mixed Communities Cost L = -0.19950977558666



In [6]:
# compute essential cost function values for a barbell network with
# given clique size
def barbell_cost(clique_size = 3, print_output=False):
    nc = clique_size
    m = nc*(nc-1)+1 # number of edges in the network
    p0 = (nc - 1) /(2*m) # stat prob for 'normal' nodes
    pc = nc / (2*m) # stat prob for the 2 connecting nodes

    J_ind = 2.0 * ((nc -2)*p0*np.log2(1.0-2*p0) + (p0+pc)*np.log2(1-(p0+pc)))
    J_true = np.log2(m) - 1.0 - (m-1) / m * np.log2(m-1)
    J_init = -1 -np.log2(m) + 1/m * ((m - nc) * np.log2(3*m - nc**2) + nc*np.log2(2*m-nc))
    
    if print_output:
        print (f"\nBarbell network with nc = {nc} nodes per clique:\n")
        print (f"Each node a module - cost = {J_init}")
        print (f"Ground truth cost = {J_true}")
        print (f"Independent sets cost = {J_ind}\n")
    
    return J_init, J_true, J_ind


nc_max = 50
nc_list = list(range(2, nc_max + 1))
J_init_list = np.zeros((nc_max -1, 1))
J_true_list = np.zeros((nc_max -1, 1))
J_ind_list = np.zeros((nc_max -1, 1))
for i,nc in enumerate(nc_list):
    J_init_list[i], J_true_list[i], J_ind_list[i] = barbell_cost(nc)

plt.figure()
plt.plot(nc_list, J_init_list, 'b', label='Initial cost')
plt.plot(nc_list, J_true_list, 'm', label='Ground truth cost')
plt.plot(nc_list, J_ind_list, 'r', label='Independent sets cost')
plt.grid()
plt.title('Barbell network - cost over size')
plt.xlabel('Nodes per clique')
plt.ylabel('Altmap cost')
plt.legend()

<matplotlib.legend.Legend at 0x7f09e9ae1a20>

In [7]:
n_ring = 100
N = 2*n_ring # num nodes
G = generate_two_rings(n_ring)

# init
communities = {}
for n in range(1, N+1 ):
    communities[n] = n
    
cost = altmap_cost(G, communities)
print (f'Initial Cost L = {cost}\n')

# ground truth
communities = {}
for n in range(1, int(N / 2)+1 ):
    communities[n] = 1
    
for n in range(int(N / 2)+1, N+1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Ground Truth Cost L = {cost}\n')

# 2 mixed cliques
communities = {}
for n in range(1, int(N / 4)+1 ):
    communities[n] = 1

for n in range(int(N / 2) + 1, int(N / 2) + int(N / 4) + 1 ):
    communities[n] = 1
    
for n in range(int(N / 4)+1, int(N / 2) + 1 ):
    communities[n] = 2
    
for n in range(int(N / 2) + int(N / 4) + 1, N + 1 ):
    communities[n] = 2

cost = altmap_cost(G, communities)
print (f'Mixed Communities Cost L = {cost}\n')

# 4 mixed cliques
communities = {}
for n in range(1, int(N / 4)+1 ):
    communities[n] = 1

for n in range(int(N / 2) + 1, int(N / 2) + int(N / 4) + 1 ):
    communities[n] = 2
    
for n in range(int(N / 4)+1, int(N / 2) + 1 ):
    communities[n] = 3
    
for n in range(int(N / 2) + int(N / 4) + 1, N + 1 ):
    communities[n] = 4

cost = altmap_cost(G, communities)
print (f'Four Mixed Communities Cost L = {cost}\n')

Initial Cost L = -0.00724344653766134

Ground Truth Cost L = -0.9574362123777358

Mixed Communities Cost L = -0.8367058739707411

Four Mixed Communities Cost L = -1.7993140655681912



In [8]:
# compute essential cost function values for a network of two rings with
# given ring size
def two_rings_cost(ring_size = 3, print_output=False):
    n = ring_size
    m = 2*n+1 # number of edges in the network

    J_init = ((m-3)*np.log2(m-1) + 3*np.log2(2*m-3) - 3) / m - np.log2(m)
    J_true = np.log2(m) - 1.0 - (m-1) / m * np.log2(m-1)
    J_ind = -1.0
    J_lower_bound = -(3 - 3*np.log2(3) + m*np.log2(m))/m
    
    p1 =n/(2*m); p2 = (n+1)/(2*m)
    p1not1 = 1/m; p11 = p1 - p1not1; 
    p2not2 = 3/(2*m); p22 = p2 - p2not2; 
    J_4_comms = -2*(p11*np.log2(p11 / (p1**2)) + p1not1*np.log2(p1not1 / (p1*(1-p1))) +
                    p22*np.log2(p22 / (p2**2)) + p2not2*np.log2(p2not2 / (p2*(1-p2))))
    
    if print_output:
        print (f"\n2 Rings network with nc = {nc} nodes per clique:\n")
        print (f"Each node a module - cost = {J_init}")
        print (f"Ground truth cost = {J_true}")
        print (f"Independent sets cost = {J_ind}\n")
        print (f"4 communities = {J_4_comms}\n")
        print (f"Lower bound = {J_lower_bound}\n")
    
    return J_init, J_true, J_ind, J_4_comms

n_max = 50
n_list = list(range(3, n_max + 1))
J_init_list = np.zeros((len(n_list), 1))
J_true_list = np.zeros((len(n_list), 1))
J_ind_list = np.zeros((len(n_list), 1))
J_4comms_list = np.zeros((len(n_list), 1))
for i, n in enumerate(n_list):
    J_init_list[i], J_true_list[i], J_ind_list[i], J_4comms_list[i] = two_rings_cost(n)

plt.figure()
plt.plot(n_list, J_init_list, 'b', label='Initial cost (N ind sets)')
plt.plot(n_list, J_true_list, 'm', label='Ground truth cost')
plt.plot(n_list, J_ind_list, 'r', label='2 Independent sets cost')
plt.plot(n_list, J_4comms_list, 'c', label='4 Communities')
plt.grid()
plt.title('2 Rings network - cost over size')
plt.xlabel('Nodes per clique')
plt.ylabel('Altmap cost')
plt.legend()

<matplotlib.legend.Legend at 0x7f09e82e60f0>

In [9]:
epsilon = 0.15
N1 = 200
N2 = 50
N = N1 + N2
p_self = (1.0 - epsilon) / (N/2 - 1)
p_ext = 2.0 *  epsilon / N

# generate graph
G = nx.MultiGraph()
G.add_nodes_from(range(1, N+1))
for n1 in range(1, N1+1):
    for i in range(n1 + 1, N1+1):
        G.add_edge(n1, i, weight=p_self)
    
    for n2 in range(N1+1, N1+N2+1):
        G.add_edge(n1, n2, weight=p_ext)

for n2 in range(N1+1, N1+N2+1):
    for i in range(n2 + 1, N1+N2+1):
        G.add_edge(n2, i, weight=p_self)

# 2 communities
communities = {}
i = 1
for c, n in enumerate([N1, N2]):
    for a in range(1, n+1):
        communities[i] =  c + 1
        i = i + 1
        
cost = altmap_cost(G, communities)
print (f'Ground Truth 2 communities L = {cost}')
print (f'Analytical: {-1.0 - plogp(1.0-epsilon) - plogp(epsilon)}\n')

# each node a community
communities = {}
for n in range(1, N + 1):
        communities[n] = n
        
cost = altmap_cost(G, communities)
print (f'Ground Truth each node L = {cost}')
print (f'Analytical: {-np.log2(N/(N - 1))}\n')
                

Ground Truth 2 communities L = -0.13539149546531296
Analytical: -0.3901596952835996

Ground Truth each node L = -0.00612389163691027
Analytical: -0.005782352594006178



In [63]:
from networkx.algorithms.community.community_generators import LFR_benchmark_graph

N = 10000
max_degree = int(0.2*N)
max_community = int(0.2*N)
average_degree = 10
tau1 = 2.1 # Power law exponent for the degree distribution 
tau2 = 1.1 # Power law exponent for the community size distribution
mu = 0.5 # [0.03, 0.75]
    
G = LFR_benchmark_graph(N, tau1, tau2, mu, average_degree=average_degree, max_degree=max_degree, 
                        max_community=max_community, min_community=25)

G = nx.convert_node_labels_to_integers(G, first_label=1)

# extract communities from networkx graph object
communities_true = {}
num_communities = 0
for n in range(1,N+1):
    if n in communities_true:
        continue
        
    num_communities = num_communities + 1
    community = G.nodes[n]['community']
    node_ids = np.asarray(list(community))
    node_ids = node_ids + 1 # have node labels >= 1
    communities_true.update(dict.fromkeys(node_ids , num_communities))
    
communities_true = OrderedDict(sorted(communities_true.items()))

print (f'Number of ground truth communities = {num_communities}')
print (communities_true)


Number of ground truth communities = 26
OrderedDict([(1, 1), (2, 2), (3, 1), (4, 3), (5, 3), (6, 4), (7, 4), (8, 5), (9, 6), (10, 7), (11, 8), (12, 9), (13, 10), (14, 7), (15, 11), (16, 2), (17, 8), (18, 8), (19, 9), (20, 9), (21, 9), (22, 12), (23, 13), (24, 8), (25, 2), (26, 10), (27, 3), (28, 5), (29, 3), (30, 5), (31, 6), (32, 14), (33, 15), (34, 16), (35, 17), (36, 18), (37, 4), (38, 19), (39, 16), (40, 10), (41, 9), (42, 20), (43, 2), (44, 1), (45, 1), (46, 19), (47, 2), (48, 13), (49, 21), (50, 22), (51, 8), (52, 23), (53, 17), (54, 18), (55, 17), (56, 24), (57, 4), (58, 16), (59, 25), (60, 10), (61, 10), (62, 17), (63, 4), (64, 20), (65, 21), (66, 26), (67, 6), (68, 2), (69, 2), (70, 23), (71, 1), (72, 25), (73, 8), (74, 4), (75, 22), (76, 11), (77, 20), (78, 25), (79, 13), (80, 22), (81, 4), (82, 25), (83, 10), (84, 3), (85, 19), (86, 10), (87, 25), (88, 15), (89, 6), (90, 22), (91, 10), (92, 17), (93, 12), (94, 4), (95, 25), (96, 26), (97, 22), (98, 1), (99, 17), (100, 6), (1

In [None]:
nx.write_pajek(G, workspace_path +  filename + '.net')
# infomap(workspace_path +  filename + '.net', altmap=False)
infomap(workspace_path +  filename + '.net', altmap=True)
# communities = create_initfile(G, randomized=False)
# altmap_cost(G, communities)
# infomap(workspace_path +  filename + '.net', altmap=True, additional_args=' --cluster-data ./workspace/init.tree')

# read results and generate networkx graph

communities_found = read_communities_from_tree_file()
print (communities_found)
num_communities = max(communities_found.values()) - min(communities_found.values()) + 1
print (f'We found {num_communities} communities.')

# cost = altmap_cost(G, communities_found)
# print (f'Achieved cost L = {cost}')


  'Non-string attribute'))


OrderedDict([(1, 1126), (2, 242), (3, 2), (4, 735), (5, 9), (6, 245), (7, 828), (8, 5), (9, 212), (10, 14), (11, 1066), (12, 101), (13, 39), (14, 680), (15, 543), (16, 8), (17, 365), (18, 647), (19, 765), (20, 1086), (21, 1074), (22, 1044), (23, 97), (24, 778), (25, 691), (26, 613), (27, 798), (28, 5), (29, 1132), (30, 5), (31, 915), (32, 473), (33, 492), (34, 75), (35, 317), (36, 692), (37, 705), (38, 1045), (39, 848), (40, 422), (41, 101), (42, 1093), (43, 965), (44, 2), (45, 2), (46, 736), (47, 372), (48, 11), (49, 930), (50, 832), (51, 493), (52, 4), (53, 799), (54, 737), (55, 265), (56, 7), (57, 404), (58, 443), (59, 540), (60, 405), (61, 124), (62, 318), (63, 966), (64, 614), (65, 833), (66, 449), (67, 648), (68, 8), (69, 8), (70, 4), (71, 2), (72, 223), (73, 1), (74, 2), (75, 136), (76, 9), (77, 988), (78, 551), (79, 6), (80, 167), (81, 916), (82, 639), (83, 124), (84, 895), (85, 494), (86, 468), (87, 354), (88, 12), (89, 23), (90, 1046), (91, 346), (92, 233), (93, 13), (94, 729

In [61]:
from sklearn.metrics import normalized_mutual_info_score as nmi

labels_true = list(communities_true.values())
labels_found = list(communities_found.values())

nmi(labels_true,labels_found, average_method='arithmetic')

0.3469768784633903

In [8]:
plt.close('all')
plt.figure()
plt.title('Ground Truth Communities')
drawNetwork(G, communities_true, labels=False)


plt.figure()
plt.title('Infomap/Altmap Communities')
drawNetwork(G, communities_found, labels=False)



In [241]:
plt.close('all')
plt.figure()
ordered_communities = OrderedDict(sorted(communities.items()))
drawNetwork(G, ordered_communities)
