## Infomap + NetworkX
Generate and draw a network with NetworkX, colored
according to the community structure found by Infomap.


In [82]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from collections import OrderedDict
%pylab


Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [83]:
infomap_path = '~/infomap/Infomap'

def infomap(net_path, altmap=False):
    out_path = './output/'
    args = '-2 -u -vvv'
    if altmap:
        args += ' --altmap --to-nodes -p0.05'
        
    os.system(infomap_path + ' ' + net_path + ' ' + out_path + ' ' + args)

def read_tree(tree_path):    
    df = pd.read_csv(tree_path, sep=' ', header = 1)
    df.columns = ['community', 'flow', 'name', 'node', 'trash']
    df = df.drop(['flow', 'trash'], axis=1)
    
    for i, path in enumerate(df['community']):
        df.iloc[i,0] = path.split(':')[0]
        
    return df

def plogq(p, q):
    if q < 1e-18:
        print (f'Unexpected zero operand in plogq: p={p}, q={q}\n.')
        return 0.0
    
    return p*np.log2(q)

def plogp(p):
    if p < 1e-18:
        return 0.0
    
    return p*np.log2(p)

def drawNetwork(G, communities):
    # position map
    pos = nx.spring_layout(G)
    # community ids
    communities = [v for v in communities.values()]
   
    # Draw edges
    nx.draw_networkx_edges(G, pos)

    # Draw nodes
    nodeCollection = nx.draw_networkx_nodes(G,
        pos = pos,
        node_color = communities,
        cmap = plt.get_cmap('Set3') 
    )

    # Draw node labels
    for n in G.nodes():
        plt.annotate(n,
            xy = pos[n],
            textcoords = 'offset points',
            horizontalalignment = 'center',
            verticalalignment = 'center',
            xytext = [0, 0],
            color = 'k'
        )

    plt.axis('off')
    plt.show()


In [84]:
def altmap_cost(G, communities):
    # compute stationary and conditional distribution for the nodes
    pagerank = nx.pagerank_numpy(G, alpha=0.95)
    p_nodes = np.array([ [val] for val in pagerank.values()])
    p_node_transitions = nx.google_matrix(G, alpha=1.0).T
    
    # print (f'Stationary distribution = {p_nodes}')
    # print (f'Transition matrix = {p_node_transitions}')
    # if we dont trust the page rank results (works for undir networks)
    # p_nodes = np.linalg.matrix_power(p_node_transitions, 100000).dot(p_nodes)
    # p_nodes /= np.sum(p_nodes)
    
    # compute stationary and joint distribution for the communities
    num_communities = max(communities.values()) - min(communities.values()) + 1
    p_comm = np.zeros(num_communities)
    p_comm_stay = np.zeros(num_communities)
    H_x = 0
    H_nodes = 0
    for alpha, node in enumerate(G.nodes):
        comm_idx = communities[node] - 1
        p_comm[comm_idx] += p_nodes[alpha]
        H_nodes -= plogp(p_nodes[alpha])
        
        neighbors = nx.all_neighbors(G, node)
        for neighbor in neighbors:
            beta = neighbor - 1
            H_x += p_nodes[alpha] * plogp(p_node_transitions[beta,alpha])            
            if communities[node] == communities[neighbor]:
                p_comm_stay[comm_idx] += p_nodes[alpha] * p_node_transitions[beta,alpha]
                    
    p_comm_leave = p_comm - p_comm_stay
    
     # compute altmap cost
    epsilon = 1e-18 # vicinity threshold for numerical stability
    cost_per_module = np.zeros((num_communities,1))
    for i in range(num_communities):
        
        # check for edge cases
        if (p_comm[i] <= epsilon) or (p_comm[i] + epsilon >= 1.0):
            continue
        
        cost_per_module[i] -= plogp(p_comm_stay[i])
        cost_per_module[i] += 2.0 * plogq(p_comm_stay[i], p_comm[i])
        cost_per_module[i] -= plogp(p_comm_leave[i]) 
        cost_per_module[i] += plogq(p_comm_leave[i], p_comm[i] * (1.0 - p_comm[i]))
        #print (f'Cost for module {i+1} is {cost_per_module[i]}.\n')
    
    cost = np.sum(cost_per_module)
    max_cost = H_x + H_nodes
    # print (f'Maximum cost is {max_cost}.\n')
    print (f'AltMap cost is {cost}.')
    # print (f'Total cost would be {max_cost + cost}.\n')
    #print (f'Node entropy H(x) =  {H_x}.\n')
    return cost
    

In [85]:

# params
out_path = './output/'
filename = 'test'


In [120]:
num_cliques = 3
clique_size = 8
N = num_cliques * clique_size # num nodes
G = nx.ring_of_cliques(num_cliques, clique_size)
G = nx.convert_node_labels_to_integers(G, first_label=1)

communities = {}
for c in range(num_cliques):
    for n in range(1, clique_size+1):
        communities[c*clique_size + n] = c
        
cost = altmap_cost(G, communities)
print (f'Ground Truth L = {cost}\n')


AltMap cost is -1.334991225523328.
Ground Truth L = -1.334991225523328



In [125]:
nodes_graph1 = 3
nodes_connection = 0
N = nodes_graph1 + nodes_connection # num nodes
G = nx.barbell_graph(nodes_graph1, nodes_connection)
G = nx.convert_node_labels_to_integers(G, first_label=1)

communities = {}
communities[1] = 0
communities[2] = 1
communities[3] = 2
communities[4] = 3
communities[5] = 4
communities[6] = 5
initial_cost = altmap_cost(G, communities)
print (f'Initial cost  = {initial_cost}\n')
#---
communities[1] = 3
communities[2] = 1
communities[3] = 3
communities[4] = 2
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(1-3) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 3
communities[3] = 3
communities[4] = 2
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(2-3) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 3
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(3-4) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 3
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(3-5) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 5
communities[6] = 3
cost = altmap_cost(G, communities)
print (f'(3-6) delta L = {cost - initial_cost}\n')

#-----
communities[1] = 1
communities[2] = 1
communities[3] = 2
communities[4] = 3
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(1-2) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 1
communities[5] = 4
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(1-4) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 1
communities[6] = 5
cost = altmap_cost(G, communities)
print (f'(1-5) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 2
communities[3] = 3
communities[4] = 4
communities[5] = 5
communities[6] = 1
cost = altmap_cost(G, communities)
print (f'(1-6) delta L = {cost - initial_cost}\n')

communities[1] = 1
communities[2] = 1
communities[3] = 1
communities[4] = 2
communities[5] = 2
communities[6] = 2
cost = altmap_cost(G, communities)
print (f'Ground Truth L = {cost}\n')


AltMap cost is -0.2750715937227542.
Initial cost  = -0.2750715937227542

AltMap cost is -0.17197073121878087.
(1-3) delta L = 0.10310086250397332

AltMap cost is -0.17197073121878087.
(2-3) delta L = 0.10310086250397332

AltMap cost is -0.13966509926881077.
(3-4) delta L = 0.13540649445394343

AltMap cost is -0.3959614681818392.
(3-5) delta L = -0.120889874459085

AltMap cost is -0.3959614681818392.
(3-6) delta L = -0.120889874459085

AltMap cost is -0.25181271770101543.
(1-2) delta L = 0.02325887602173876

AltMap cost is -0.3959614681818389.
(1-4) delta L = -0.12088987445908472

AltMap cost is -0.3514632896157998.
(1-5) delta L = -0.07639169589304562

AltMap cost is -0.3514632896157998.
(1-6) delta L = -0.07639169589304562

AltMap cost is -0.41188057085498214.
Ground Truth L = -0.41188057085498214



In [117]:
N = 20 # num nodes
G = nx.MultiGraph()
G.add_nodes_from(range(1, N+1))

for n in range(1,N):
    G.add_edge(n, n+1, weight=1)

G.add_edge(1, int(N/2), weight=1)
G.add_edge(int(N/2) + 1, N, weight=1)

communities = {}
for n in range(1, int(N / 2) + 1):
    communities[n] = 0
    
for n in range(int(N / 2) + 1, N+1):
    communities[n] = 1
    
cost = altmap_cost(G, communities)
print (f'Ground Truth L = {cost}\n')



AltMap cost is -0.7307886207689032.
Ground Truth L = -0.7307886207689032



In [131]:

nx.write_pajek(G, out_path +  filename + '.net')
infomap(out_path +  filename + '.net', altmap=True)



In [123]:

#G = nx.read_pajek(net_path)
df = read_tree(out_path +  filename + '.tree')


communities = {}
for index, row in df.iterrows():
    node = int(row['node'])
    communities[node] = int(row['community'])

nx.set_node_attributes(G, name='community', values=communities) # seems to be faulty!!
#print (nx.get_node_attributes(G, name='community'))

print (communities)
num_communities = max(communities.values()) - min(communities.values()) + 1
print (f'We found {num_communities} communities.')


# print network
plt.close('all')
plt.figure()
ordered_communities = OrderedDict(sorted(communities.items()))
drawNetwork(G, ordered_communities)

altmap_cost(G, communities)




{1: 1, 2: 2, 9: 3, 10: 4, 17: 5, 18: 6, 3: 7, 4: 8, 5: 9, 6: 10, 7: 11, 8: 12, 11: 13, 12: 14, 13: 15, 14: 16, 15: 17, 16: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24}
We found 24 communities.
AltMap cost is -0.06160788496378283.


-0.06160788496378283