## LFR benchmark for Altmap vs Map Eq
### Compare altmap to map eq using networkx


In [45]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict

plt.rcParams.update({'font.size': 20})
%pylab

%run helpers.py
# loads the following helper functions:
# infomap(net_path, altmap=False, additional_args='')
# read_tree(tree_path)
# plogq(p, q)
# plogp(p)
# drawNetwork(G, communities)
# altmap_cost(G, communities)
# create_initfile(G, N_partitions=None, randomized=True)
# generate_two_rings(n_ring=10)
# 


Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [139]:
from networkx.algorithms.community.community_generators import LFR_benchmark_graph
from sklearn.metrics import normalized_mutual_info_score as nmi_score

# generate LFR benchmark graph + extract ground truth communities
def generate_LFR_benchmark(N = 250, mu = 0.1):
    
    # LFR params
    max_degree = int(0.1*N)
    max_community = int(0.1*N)
    min_community = int(0.05*N)
    average_degree = 10
    tau1 = 2.0 # Power law exponent for the degree distribution 
    tau2 = 1.1 # Power law exponent for the community size distribution

    # generate LFR benchmark graph
    
    G = LFR_benchmark_graph(N, tau1, tau2, mu, average_degree=average_degree, max_degree=max_degree, 
                            max_community=max_community, min_community=min_community, max_iters=200)
    G = nx.convert_node_labels_to_integers(G, first_label=1)
    
    # extract ground truth communities from networkx graph object
    communities_true = {}
    num_communities = 0
    for n in range(1,N+1):
        if n in communities_true:
            continue
            
        num_communities = num_communities + 1
        community = G.nodes[n]['community']
        node_ids = np.asarray(list(community))
        node_ids = node_ids + 1 # have node labels >= 1
        communities_true.update(dict.fromkeys(node_ids , num_communities))
        
    communities_true = OrderedDict(sorted(communities_true.items()))
    
    return G, communities_true

# compute normalized mutual information between two partitions
def compute_nmi(communities_true, communities_found):
    labels_true = list(communities_true.values())
    labels_found = list(communities_found.values())

    return nmi_score(labels_true,labels_found, average_method='arithmetic')

# LFR Benchmark
# num_iterations .. number of benchmarks for each parameter pair (mu, N)
def run_benchmark(N_list, mu_list, cost_function = 'altmap', init='std', num_realizations=10):
    
    if cost_function not in {'altmap', 'mapeq'}:
        cost_function = 'altmap'
    altmap = (cost_function == 'altmap')
        
    if init not in {'std', 'random'}:
        init = 'std'
    
    benchmark_mean_nmi = np.zeros((len(mu_list), len(N_list)))
    benchmark_variance = np.zeros((len(mu_list), len(N_list)))
    for mu_idx, mu in enumerate(mu_list):
        for N_idx, N in enumerate(N_list):
            nmi_list = []
            for realization in range(0, num_realizations):
                print(f'Starting benchmark for (N,mu) = ({N},{mu})\n')
                try:
                    G, communities_true = generate_LFR_benchmark(N, mu)
                except nx.ExceededMaxIterations:
                    print(f'No benchmark for (N,mu) = ({N},{mu})\n')
                    continue
                num_communities_true = max(communities_true.values()) - min(communities_true.values()) + 1
                
                nx.write_pajek(G, workspace_path +  filename + '.net')
                
                if init == 'random':
                    communities = create_initfile(G, randomized=True)
                    infomap(workspace_path +  filename + '.net', altmap=altmap, additional_args=' --cluster-data ./workspace/init.tree')
                else:
                    infomap(workspace_path +  filename + '.net', altmap=altmap)
                    
                communities_found, num_communities_found = read_communities_from_tree_file()
                print (f'We found {num_communities_found} communities vs. {num_communities_true} ground truth communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list.append(nmi)
                
            benchmark_mean_nmi[mu_idx, N_idx] = np.mean(nmi_list)
            benchmark_variance[mu_idx, N_idx] = np.var(nmi_list, ddof=1)
            #print (f'Normalized mutual information I(.,.) = {nmi}.\n')
            
    return benchmark_mean_nmi, benchmark_variance


In [164]:
#Ns = np.linspace(300,1000, 5, dtype=int)
Ns = [482, 1000]
mus = np.linspace(0.05, 0.75, 10)

print (Ns)
print (mus)

benchmark_mean_nmi, benchmark_variance = run_benchmark(Ns, mus, num_realizations=2 ,cost_function = 'mapeq', 
                                                       init='std')

[482, 1000]
[0.05       0.12777778 0.20555556 0.28333333 0.36111111 0.43888889
 0.51666667 0.59444444 0.67222222 0.75      ]
Starting benchmark for (N,mu) = (482,0.05)

We found 13 communities vs. 13 ground truth communities.

Starting benchmark for (N,mu) = (482,0.05)

No benchmark for (N,mu) = (482,0.05)

Starting benchmark for (N,mu) = (1000,0.05)

We found 14 communities vs. 14 ground truth communities.

Starting benchmark for (N,mu) = (1000,0.05)

We found 14 communities vs. 14 ground truth communities.

Starting benchmark for (N,mu) = (482,0.12777777777777777)

We found 14 communities vs. 14 ground truth communities.

Starting benchmark for (N,mu) = (482,0.12777777777777777)

No benchmark for (N,mu) = (482,0.12777777777777777)

Starting benchmark for (N,mu) = (1000,0.12777777777777777)

We found 13 communities vs. 13 ground truth communities.

Starting benchmark for (N,mu) = (1000,0.12777777777777777)

We found 14 communities vs. 14 ground truth communities.

Starting benchmark f

  'Non-string attribute'))
  **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [None]:
benchmark_std_dev = np.sqrt(benchmark_variance)
mean_benchmark_results = np.mean(benchmark_mean_nmi, axis=1)

plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle('Infomap')

axs[0].plot(mus, benchmark_mean_nmi[:,0], label='N=482')
axs[0].plot(mus, benchmark_mean_nmi[:,1], label='N=1000')
axs[0].plot(mus, mean_benchmark_results, label='mean')
axs[0].plot([0.5, 0.5], [0,1])
axs[0].grid()
axs[0].set_xlabel('Mixing parameter \mu')
axs[0].set_ylabel('NMI')
axs[0].legend()

axs[1].plot(mus, benchmark_std_dev[:,0], label='N=482')
axs[1].plot(mus, benchmark_std_dev[:,1], label='N=1000')
axs[1].grid()
axs[1].set_xlabel('Mixing parameter \mu')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

In [None]:
plt.close('all')
plt.figure()
plt.title('Ground Truth Communities')
drawNetwork(G, communities_true, labels=False)

# 
# plt.figure()
# plt.title('Infomap/Altmap Communities')
# drawNetwork(G, communities_found, labels=False)
