## Infomap + NetworkX
Generate and draw a network with NetworkX, colored
according to the community structure found by Infomap.


In [7]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import os

import pandas as pd
from collections import OrderedDict
%pylab


Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
infomap_path = '~/infomap/Infomap'

def infomap(net_path, altmap=False, additional_args=''):
    workspace_path = './workspace/'
    args = ' -2 -u -vvv'
    if altmap:
        args += ' --altmap --to-nodes -p0.05 '
    
    args += additional_args
    
    os.system(infomap_path + ' ' + net_path + ' ' + workspace_path + ' ' + args)

def read_tree(tree_path):    
    df = pd.read_csv(tree_path, sep=' ', header = 1)
    df.columns = ['community', 'flow', 'name', 'node', 'trash']
    df = df.drop(['flow', 'trash'], axis=1)
    
    for i, path in enumerate(df['community']):
        df.iloc[i,0] = path.split(':')[0]
        
    return df

def plogq(p, q):
    if q < 1e-18:
        print (f'Unexpected zero operand in plogq: p={p}, q={q}\n.')
        return 0.0
    
    return p*np.log2(q)

def plogp(p):
    if p < 1e-18:
        return 0.0
    
    return p*np.log2(p)

def drawNetwork(G, communities):
    # position map
    pos = nx.spring_layout(G)
    # community ids
    communities = [v for v in communities.values()]
   
    # Draw edges
    nx.draw_networkx_edges(G, pos)

    # Draw nodes
    nodeCollection = nx.draw_networkx_nodes(G,
        pos = pos,
        node_color = communities,
        cmap = plt.get_cmap('Set3') 
    )

    # Draw node labels
    for n in G.nodes():
        plt.annotate(n,
            xy = pos[n],
            textcoords = 'offset points',
            horizontalalignment = 'center',
            verticalalignment = 'center',
            xytext = [0, 0],
            color = 'k'
        )

    plt.axis('off')
    plt.show()


In [3]:
def altmap_cost(G, communities):
    # compute stationary and conditional distribution for the nodes
    pagerank = nx.pagerank_numpy(G, alpha=0.95)
    p_nodes = np.array([ [val] for val in pagerank.values()])
    p_node_transitions = nx.google_matrix(G, alpha=1.0).T
    
    # print (f'Stationary distribution = {p_nodes}')
    # print (f'Transition matrix = {p_node_transitions}')
    # if we dont trust the page rank results (works for undir networks)
    # p_nodes = np.linalg.matrix_power(p_node_transitions, 100000).dot(p_nodes)
    # p_nodes /= np.sum(p_nodes)
    
    # compute stationary and joint distribution for the communities
    num_communities = max(communities.values()) - min(communities.values()) + 1
    p_comm = np.zeros(num_communities)
    p_comm_stay = np.zeros(num_communities)
    H_x = 0
    H_nodes = 0
    for alpha, node in enumerate(G.nodes):
        comm_idx = communities[node] - 1
        p_comm[comm_idx] += p_nodes[alpha]
        H_nodes -= plogp(p_nodes[alpha])
        
        neighbors = nx.all_neighbors(G, node)
        for neighbor in neighbors:
            beta = neighbor - 1
            H_x += p_nodes[alpha] * plogp(p_node_transitions[beta,alpha])            
            if communities[node] == communities[neighbor]:
                p_comm_stay[comm_idx] += p_nodes[alpha] * p_node_transitions[beta,alpha]
                    
    p_comm_leave = p_comm - p_comm_stay
    
    # print (f'P_comm is {p_comm}.\n')
    # print (f'P_comm_leave is {p_comm_leave}.\n')
    
     # compute altmap cost
    epsilon = 1e-18 # vicinity threshold for numerical stability
    cost_per_module = np.zeros((num_communities,1))
    for i in range(num_communities):
        
        # check for edge cases
        if (p_comm[i] <= epsilon) or (p_comm[i] + epsilon >= 1.0):
            continue
        
        cost_per_module[i] -= plogp(p_comm_stay[i])
        cost_per_module[i] += 2.0 * plogq(p_comm_stay[i], p_comm[i])
        cost_per_module[i] -= plogp(p_comm_leave[i]) 
        cost_per_module[i] += plogq(p_comm_leave[i], p_comm[i] * (1.0 - p_comm[i]))
        #print (f'Cost for module {i+1} is {cost_per_module[i]}.\n')
    
    cost = np.sum(cost_per_module)
    max_cost = H_x + H_nodes
    # print (f'Maximum cost is {max_cost}.\n')
    print (f'AltMap cost is {cost}.')
    # print (f'Total cost would be {max_cost + cost}.\n')
    #print (f'Node entropy H(x) =  {H_x}.\n')
    return cost

# create initial partition file (init.tree)
def create_initfile(G, N_partitions = None, randomized=True):
    
    N = len(G.nodes())
    node_ids = np.asarray(range(1,N+1))
    if randomized:
        np.random.shuffle(node_ids) # randomize node order 
    
    pagerank = nx.pagerank_numpy(G, alpha=0.95)
    p_nodes = np.array([ [val] for val in pagerank.values()])
    
    num_partitions = N_partitions
    if num_partitions == None:
        num_partitions = int(np.sqrt(N))
        
    partition_size = int(N / num_partitions)
    communities = {}
    
    with open(workspace_path + 'init.tree', "w+") as init_file:
        init_file.write('# path flow name node:\n')
        n = 0
        for partition in range(1,num_partitions+1):
            for node_rank in range(1, partition_size + 1):
                if n >= N:
                    break
                node_id = node_ids[n]
                n += 1
                
                node_flow = p_nodes[node_id-1,0]
                init_file.write(str(partition) + ':' + str(node_rank) + ' ' + str(node_flow))
                init_file.write(' ' + '\"' + str(node_id) + '\"' + ' ' +  str(node_id) + '\n')
                communities[node_id] = partition
                
                
        while n < N:
            node_rank += 1
            node_id = node_ids[n]
            n += 1
            
            node_flow = p_nodes[node_id-1,0]
            init_file.write(str(partition) + ':' + str(node_rank) + ' ' + str(node_flow))
            init_file.write(' ' + '\"' + str(node_id) + '\"' + ' ' +  str(node_id) + '\n')
            communities[node_id] = partition
            
    return communities


In [4]:
# params
workspace_path = './workspace/'
filename = 'test'


In [8]:
num_cliques = 2
clique_size = 10
N = num_cliques * clique_size # num nodes
G = nx.ring_of_cliques(num_cliques, clique_size)
G = nx.convert_node_labels_to_integers(G, first_label=1)

communities = {}
for c in range(num_cliques):
    for n in range(1, clique_size+1):
        communities[c*clique_size + n] = c
        
cost = altmap_cost(G, communities)
print (f'Ground Truth L = {cost}\n')

AltMap cost is -0.8493709549963584.
Ground Truth L = -0.8493709549963584



In [12]:
nodes_graph1 = 10
nodes_connection = 0
N = 2 * nodes_graph1 + nodes_connection # num nodes
G = nx.barbell_graph(nodes_graph1, nodes_connection)
G = nx.convert_node_labels_to_integers(G, first_label=1)

# ground truth
communities = {}
for n in range(1, nodes_graph1+1 ):
    communities[n] = 1
    
for n in range(nodes_graph1+1, 2*nodes_graph1+1 ):
    communities[n] = 2

print (communities)
cost = altmap_cost(G, communities)
print (f'Ground Truth Cost L = {cost}\n')

# 2 mixed cliques
communities = {}
for n in range(1, int(clique_size/2)+1 ):
    communities[n] = 1

for n in range(clique_size + 1, clique_size + int(clique_size/2)+1 ):
    communities[n] = 1
    
for n in range(int(clique_size/2) + 1, clique_size + 1 ):
    communities[n] = 2
    
for n in range(clique_size + int(clique_size/2) + 1, 2*clique_size + 1 ):
    communities[n] = 2

print (communities)
cost = altmap_cost(G, communities)
print (f'Mixed Communities Cost L = {cost}\n')

# 4 mixed cliques
communities = {}
for n in range(1, int(clique_size/2)+1 ):
    communities[n] = 1
    
for n in range(int(clique_size/2) + 1, clique_size + 1 ):
    communities[n] = 2

for n in range(clique_size + 1, clique_size + int(clique_size/2)+1 ):
    communities[n] = 3
    
for n in range(clique_size + int(clique_size/2) + 1, 2*clique_size + 1 ):
    communities[n] = 4

print (communities)
cost = altmap_cost(G, communities)
print (f'Four Mixed Communities Cost L = {cost}\n')

{1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 2, 12: 2, 13: 2, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 2, 20: 2}
AltMap cost is -0.9130372427733722.
Ground Truth Cost L = -0.9130372427733722



NameError: name 'clique_size' is not defined

In [8]:
nodes_graph1 = 10
nodes_connection = 0
N = 2 * nodes_graph1 + nodes_connection # num nodes
G = nx.barbell_graph(nodes_graph1, nodes_connection)
G = nx.convert_node_labels_to_integers(G, first_label=1)

communities = {}
communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 5
communities[6] = 6  
initial_cost = altmap_cost(G, communities)
print (f'Initial cost  = {initial_cost}\n')
#---
communities[1] = 3
communities[2] = 1
communities[3] = 3
communities[4] = 2
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(1-3) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 3
communities[3] = 3
communities[4] = 2
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(2-3) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 3
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(3-4) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 3
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(3-5) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 5
communities[6] = 3
cost = altmap_cost(G, communities)
print (f'(3-6) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 5
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(3-6) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 5
communities[6] = 4
cost = altmap_cost(G, communities)
print (f'(3-6) delta L = {cost - initial_cost}\n')

#-----
print ('Best partition:')
communities[1] = 1
communities[2] = 1
communities[3] = 1
communities[4] = 2
communities[5] = 2
communities[6] = 2
cost = altmap_cost(G, communities)
print (f'(1-2) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 1
communities[3] = 1
communities[4] = 1
communities[5] = 2
communities[6] = 2
cost = altmap_cost(G, communities)
print (f'(1-4) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 1
communities[3] = 1
communities[4] = 2
communities[5] = 1
communities[6] = 2
cost = altmap_cost(G, communities)
print (f'(1-5) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 1
communities[3] = 1
communities[4] = 2
communities[5] = 2
communities[6] = 1
cost = altmap_cost(G, communities)
print (f'(1-6) delta L = {cost - initial_cost}\n')


KeyError: 7

In [55]:
N = 40 # num nodes
G = nx.MultiGraph()
G.add_nodes_from(range(1, N+1))

for n in range(1,N):
    G.add_edge(n, n+1, weight=1)

G.add_edge(1, int(N/2), weight=1)
G.add_edge(int(N/2) + 1, N, weight=1)

# ground truth
communities = {}
for n in range(1, int(N / 2)+1 ):
    communities[n] = 1
    
for n in range(int(N / 2)+1, N+1 ):
    communities[n] = 2

print (communities)
cost = altmap_cost(G, communities)
print (f'Ground Truth Cost L = {cost}\n')

# 2 mixed cliques
communities = {}
for n in range(1, int(N / 4)+1 ):
    communities[n] = 1

for n in range(int(N / 2) + 1, int(N / 2) + int(N / 4) + 1 ):
    communities[n] = 1
    
for n in range(int(N / 4)+1, int(N / 2) + 1 ):
    communities[n] = 2
    
for n in range(int(N / 2) + int(N / 4) + 1, N + 1 ):
    communities[n] = 2

print (communities)
cost = altmap_cost(G, communities)
print (f'Mixed Communities Cost L = {cost}\n')

# 4 mixed cliques
communities = {}
for n in range(1, int(N / 4)+1 ):
    communities[n] = 1

for n in range(int(N / 2) + 1, int(N / 2) + int(N / 4) + 1 ):
    communities[n] = 2
    
for n in range(int(N / 4)+1, int(N / 2) + 1 ):
    communities[n] = 3
    
for n in range(int(N / 2) + int(N / 4) + 1, N + 1 ):
    communities[n] = 4

print (communities)
cost = altmap_cost(G, communities)
print (f'Four Mixed Communities Cost L = {cost}\n')

# 2 alternating communities
communities = {}
for n in range(1, int(N / 2) + 1):
    communities[2*n] = 1
    communities[2*n - 1] = 2


print (communities)
cost = altmap_cost(G, communities)
print (f'Two Alternating Communities Cost L = {cost}\n')

{1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 2, 22: 2, 23: 2, 24: 2, 25: 2, 26: 2, 27: 2, 28: 2, 29: 2, 30: 2, 31: 2, 32: 2, 33: 2, 34: 2, 35: 2, 36: 2, 37: 2, 38: 2, 39: 2, 40: 2}
AltMap cost is -0.8412673009000788.
Ground Truth Cost L = -0.8412673009000788

{1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 11: 2, 12: 2, 13: 2, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 2, 20: 2, 31: 2, 32: 2, 33: 2, 34: 2, 35: 2, 36: 2, 37: 2, 38: 2, 39: 2, 40: 2}
AltMap cost is -0.4716917484229587.
Mixed Communities Cost L = -0.4716917484229587

{1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 21: 2, 22: 2, 23: 2, 24: 2, 25: 2, 26: 2, 27: 2, 28: 2, 29: 2, 30: 2, 11: 3, 12: 3, 13: 3, 14: 3, 15: 3, 16: 3, 17: 3, 18: 3, 19: 3, 20: 3, 31: 4, 32: 4, 33: 4, 34: 4, 35: 4, 36: 4, 37: 4, 38: 4, 39: 4, 40: 4}
AltMap cost is -1.285

In [63]:
epsilon = 0.01
N1 = 30
N2 = 15
N = N1 + N2
p_self = (1.0 - epsilon) / (N/2 - 1)
p_ext = 2.0 *  epsilon / N

# generate graph
G = nx.MultiGraph()
G.add_nodes_from(range(1, N+1))
for n1 in range(1, N1+1):
    for i in range(n1 + 1, N1+1):
        G.add_edge(n1, i, weight=p_self)
    
    for n2 in range(N1+1, N1+N2+1):
        G.add_edge(n1, n2, weight=p_ext)

for n2 in range(N1+1, N1+N2+1):
    for i in range(n2 + 1, N1+N2+1):
        G.add_edge(n2, i, weight=p_self)

# 2 communities
communities = {}
i = 1
for c, n in enumerate([N1, N2]):
    for a in range(1, n+1):
        communities[i] =  c + 1
        i = i + 1
        
print (communities)
cost = altmap_cost(G, communities)
print (f'Ground Truth 2 communities L = {cost}')
print (f'Analytical: {-1.0 - plogp(1.0-epsilon) - plogp(epsilon)}\n')

# each node a community
communities = {}
for n in range(1, N + 1):
        communities[n] = n
        
print (communities)
cost = altmap_cost(G, communities)
print (f'Ground Truth each node L = {cost}')
print (f'Analytical: {-np.log2(N/(N - 1))}\n')
                

{1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 2, 32: 2, 33: 2, 34: 2, 35: 2, 36: 2, 37: 2, 38: 2, 39: 2, 40: 2, 41: 2, 42: 2, 43: 2, 44: 2, 45: 2}
AltMap cost is -0.7911272499706904.
Ground Truth 2 communities L = -0.7911272499706904
Analytical: -0.9192068641040888

{1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45}
AltMap cost is -0.03271301250772696.
Ground Truth each node L = -0.03271301250772696
Analytical: -0.03242147769237743



In [100]:
nx.write_pajek(G, workspace_path +  filename + '.net')
infomap(workspace_path +  filename + '.net', altmap=False)
# infomap(workspace_path +  filename + '.net', altmap=True)
# communities = create_initfile(G, randomized=True)
# altmap_cost(G, communities)
# infomap(workspace_path +  filename + '.net', altmap=True, additional_args=' --cluster-data ./workspace/init.tree')

# read results and generate networkx graph

#G = nx.read_pajek(net_path)
df = read_tree(workspace_path +  filename + '.tree')


communities = {}
for index, row in df.iterrows():
    node = int(row['node'])
    communities[node] = int(row['community'])

nx.set_node_attributes(G, name='community', values=communities) # seems to be faulty!!
#print (nx.get_node_attributes(G, name='community'))

print (communities)
num_communities = max(communities.values()) - min(communities.values()) + 1
print (f'We found {num_communities} communities.')


# print network

plt.close('all')
plt.figure()
ordered_communities = OrderedDict(sorted(communities.items()))
drawNetwork(G, ordered_communities)

altmap_cost(G, communities)

{23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 20: 2, 16: 2, 17: 2, 18: 2, 19: 2, 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 4, 7: 4, 8: 4, 9: 4, 10: 4, 11: 5, 12: 5, 13: 5, 14: 5, 15: 5, 29: 6, 30: 6, 31: 6, 32: 6, 33: 6, 34: 7, 35: 7, 36: 7, 37: 7, 38: 7, 21: 8, 22: 8, 39: 8, 40: 8}
We found 8 communities.
AltMap cost is -1.6318967055818243.


  'Non-string attribute'))


-1.6318967055818243