## LFR benchmark for Altmap vs Map Eq
### Compare altmap to map eq using networkx


In [3]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict

plt.rcParams.update({'font.size': 20})
plt.rcParams.update({'text.usetex': True})
plt.rcParams.update({'font.family': 'sans-serif'})
plt.rcParams['text.latex.preamble'] = [
       r'\usepackage{siunitx}',   # i need upright \micro symbols, but you need...
       r'\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts
       r'\usepackage{helvet}',    # set the normal font here
       r'\usepackage{sansmath}',  # load up the sansmath so that math -> helvet
       r'\sansmath'               # <- tricky! -- gotta actually tell tex to use!
] 
%pylab

%run helpers.py
# loads the following helper functions:
# infomap(net_path, altmap=False, additional_args='')
# read_tree(tree_path)
# plogq(p, q)
# plogp(p)
# drawNetwork(G, communities)
# altmap_cost(G, communities)
# create_initfile(G, N_partitions=None, randomized=True)
# generate_two_rings(n_ring=10)
# 


Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [23]:
from networkx.algorithms.community.community_generators import LFR_benchmark_graph
from sklearn.metrics import normalized_mutual_info_score as nmi_score

# generate LFR benchmark graph + extract ground truth communities
def generate_LFR_benchmark(N = 250, mu = 0.1):
    
    # LFR params N=10000
    #params = {'max_degree':50, 'max_community':100, 'min_community':50, 'average_degree':20, 'tau1':3.0, 'tau2':1.5}
    # LFR params N=5000
    params = {'max_degree':50, 'max_community':100, 'min_community':50, 'average_degree':20, 'tau1':2.5, 'tau2':1.5}
    # LFR params N<1000
    #params = {'max_degree':50, 'max_community':50, 'min_community':20, 'average_degree':10, 'tau1':3.0, 'tau2':1.5}

    max_degree = params['max_degree']
    max_community = params['max_community']
    min_community = params['min_community']
    average_degree = params['average_degree']
    tau1 = params['tau1'] # Power law exponent for the degree distribution 
    tau2 = params['tau2'] # Power law exponent for the community size distribution

    # generate LFR benchmark graph
    
    G = LFR_benchmark_graph(N, tau1, tau2, mu, average_degree=average_degree, max_degree=max_degree, 
                            max_community=max_community, min_community=min_community, max_iters=200)
    G = nx.convert_node_labels_to_integers(G, first_label=1)
    
    # extract ground truth communities from networkx graph object
    communities_true = {}
    num_communities = 0
    for n in range(1,N+1):
        if n in communities_true:
            continue
            
        num_communities = num_communities + 1
        community = G.nodes[n]['community']
        node_ids = np.asarray(list(community))
        node_ids = node_ids + 1 # have node labels >= 1
        communities_true.update(dict.fromkeys(node_ids , num_communities))
        
    communities_true = OrderedDict(sorted(communities_true.items()))
    
    return G, communities_true

# compute normalized mutual information between two partitions
def compute_nmi(communities_true, communities_found):
    labels_true = list(communities_true.values())
    labels_found = list(communities_found.values())

    return nmi_score(labels_true,labels_found, average_method='arithmetic')

# LFR Benchmark
# num_realizations .. number of network realizations for each parameter pair (mu, N)
def run_benchmark(N_list, mu_list, num_realizations=10):
    num_benchmarks = len(N_list) * len(mu_list) * num_realizations
    benchmark_id = 0
    
    mean_nmi_infomap = np.zeros((len(mu_list), len(N_list)))
    std_infomap = np.zeros((len(mu_list), len(N_list)))
    mean_nmi_altmap = np.zeros((len(mu_list), len(N_list)))
    std_altmap = np.zeros((len(mu_list), len(N_list)))
    for mu_idx, mu in enumerate(mu_list):
        for N_idx, N in enumerate(N_list):
            nmi_list_infomap = []
            nmi_list_altmap = []
            for realization in range(0, num_realizations):
                benchmark_id = benchmark_id + 1
                print(f'Starting benchmark {benchmark_id}/{num_benchmarks} for (N,mu) = ({N},{mu})\n')
                try:
                    G, communities_true = generate_LFR_benchmark(N, mu)
                    num_communities_true = max(communities_true.values()) - min(communities_true.values()) + 1
                except nx.ExceededMaxIterations as err:
                    print(f'Failed to generate network for (N,mu) = ({N},{mu}): ', err)
                    continue
                
                nx.write_pajek(G, workspace_path +  filename + '.net')
                
                # communities = create_initfile(G, randomized=True)
                # infomap(workspace_path +  filename + '.net', altmap=altmap, additional_args=' --cluster-data ./workspace/init.tree')
                
                # test infomap
                infomap(workspace_path +  filename + '.net', altmap=False)
                communities_found, num_communities_found = read_communities_from_tree_file()
                # print (f'Infomap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                #        f'communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_infomap.append(nmi)
                
                # test altmap
                infomap(workspace_path +  filename + '.net', altmap=True)
                communities_found, num_communities_found = read_communities_from_tree_file()
                # print (f'Altmap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                #        f'communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_altmap.append(nmi)
            
            if nmi_list_infomap:
                mean_nmi_infomap[mu_idx, N_idx] = np.mean(nmi_list_infomap)
                std_infomap[mu_idx, N_idx] = np.std(nmi_list_infomap, ddof=1)
            else:
                mean_nmi_infomap[mu_idx, N_idx] = None
                std_infomap[mu_idx, N_idx] = None
                
            if nmi_list_altmap:
                mean_nmi_altmap[mu_idx, N_idx] = np.mean(nmi_list_altmap)
                std_altmap[mu_idx, N_idx] = np.std(nmi_list_altmap, ddof=1)
            else:
                mean_nmi_altmap[mu_idx, N_idx] = None
                std_altmap[mu_idx, N_idx] = None
    
    print(f'Finished benchmark successfully!\n')
    return mean_nmi_infomap, std_infomap, mean_nmi_altmap, std_altmap


In [22]:
# LFR params
N = 5000
mu = 0.3
max_degree = 50
max_community = 100
min_community = 50
average_degree = 20
gamma = 2.5 # Power law exponent for the degree distribution 
beta = 1.5 # Power law exponent for the community size distribution
max_iter = 100

# generate LFR benchmark graph
G = LFR_benchmark_graph(N, gamma, beta, mu, average_degree=average_degree, max_degree=max_degree, 
                        max_community=max_community, min_community=min_community, max_iters=max_iter)
    
    

In [24]:
Ns = [5000]
mus = np.linspace(0.15, 0.75, 10)
num_realizations = 10   
print (mus)

mean_nmi_infomap, std_infomap, mean_nmi_altmap, std_altmap = run_benchmark(Ns, mus, num_realizations=num_realizations)

[0.15       0.21666667 0.28333333 0.35       0.41666667 0.48333333
 0.55       0.61666667 0.68333333 0.75      ]
Starting benchmark 1/100 for (N,mu) = (5000,0.15)

Starting benchmark 2/100 for (N,mu) = (5000,0.15)

Starting benchmark 3/100 for (N,mu) = (5000,0.15)

Starting benchmark 4/100 for (N,mu) = (5000,0.15)

Starting benchmark 5/100 for (N,mu) = (5000,0.15)

Starting benchmark 6/100 for (N,mu) = (5000,0.15)

Starting benchmark 7/100 for (N,mu) = (5000,0.15)

Starting benchmark 8/100 for (N,mu) = (5000,0.15)

Starting benchmark 9/100 for (N,mu) = (5000,0.15)

Starting benchmark 10/100 for (N,mu) = (5000,0.15)

Starting benchmark 11/100 for (N,mu) = (5000,0.21666666666666667)

Failed to generate network for (N,mu) = (5000,0.21666666666666667):  Could not create power law sequence
Starting benchmark 12/100 for (N,mu) = (5000,0.21666666666666667)

Starting benchmark 13/100 for (N,mu) = (5000,0.21666666666666667)

Starting benchmark 14/100 for (N,mu) = (5000,0.21666666666666667)

Sta

  'Non-string attribute'))


In [26]:
print (mean_nmi_infomap)

plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, N = {5000} nodes')

axs[0].plot(mus, mean_nmi_infomap[:,0], 'x--', linewidth=2, markersize=12, label='Infomap')
axs[0].plot(mus, mean_nmi_altmap[:,0], '^--', linewidth=2, markersize=12, label='Altmap')
axs[0].plot([0.5, 0.5], [0,1], 'r')
axs[0].grid()
axs[0].set_xlabel('Mixing parameter $\mu$')
axs[0].set_ylabel('NMI')
axs[0].legend()

axs[1].plot(mus, std_infomap[:,0], 'x--', linewidth=2, markersize=12, label='Infomap')
axs[1].plot(mus, std_altmap[:,0], '^--', linewidth=2, markersize=12, label='Altmap')
# axs[1].plot(mus, benchmark_std_dev[:,1], label='N=1000')
axs[1].plot([0.5, 0.5], [0,np.max(np.max(std_infomap))], 'r')
axs[1].grid()
axs[1].set_xlabel('Mixing parameter $\mu$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

[[9.99975247e-01]
 [9.99937706e-01]
 [9.99918940e-01]
 [9.98724228e-01]
 [9.84945259e-01]
 [9.10716997e-01]
 [5.97776101e-01]
 [3.88839234e-16]
 [3.93871302e-16]
 [4.01727172e-16]]


<matplotlib.legend.Legend at 0x7fd5f99322e8>

In [None]:
plt.close('all')
plt.figure()
plt.title('Ground Truth Communities')
drawNetwork(G, communities_true, labels=False)

# 
# plt.figure()
# plt.title('Infomap/Altmap Communities')
# drawNetwork(G, communities_found, labels=False)
