## LFR benchmark for Altmap vs Map Eq
### Compare altmap to map eq using networkx

In [20]:
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.ticker import (AutoMinorLocator)
import numpy as np
from collections import OrderedDict

# show plots in separate window
%pylab
# load helpers and wrappers
%run helpers.py 

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [5]:
from networkx.generators.community import LFR_benchmark_graph
from sklearn.metrics import normalized_mutual_info_score as nmi_score

# generate LFR benchmark graph + extract ground truth communities
def generate_LFR_benchmark(N = 250, mu = 0.1):
    
    # LFR params N=10000
    #params = {'max_degree':50, 'max_community':100, 'min_community':50, 'average_degree':20, 'tau1':3.0, 'tau2':1.5}
    # LFR params N=5000
    # params = {'max_degree':50, 'max_community':100, 'min_community':50, 'average_degree':20, 'tau1':2.5, 'tau2':1.5}
    # LFR params N<1000
    # params = {'max_degree':50, 'max_community':50, 'min_community':20, 'average_degree':10, 'tau1':3.0, 'tau2':1.5}
    params = {'max_degree':int(0.1*N), 'max_community':int(0.1*N), 'min_community':20, 'average_degree':15, 'tau1':3.0, 
              'tau2':1.5}

    max_degree = params['max_degree']
    max_community = params['max_community']
    min_community = params['min_community']
    average_degree = params['average_degree']
    tau1 = params['tau1'] # Power law exponent for the degree distribution 
    tau2 = params['tau2'] # Power law exponent for the community size distribution

    # generate LFR benchmark graph
    
    G = LFR_benchmark_graph(N, tau1, tau2, mu, average_degree=average_degree, max_degree=max_degree, 
                            max_community=max_community, min_community=min_community, max_iters=200)
    G = nx.convert_node_labels_to_integers(G, first_label=1)
    
    # extract ground truth communities from networkx graph object
    communities_true = {}
    num_communities = 0
    for n in range(1,N+1):
        if n in communities_true:
            continue
            
        num_communities = num_communities + 1
        community = G.nodes[n]['community']
        node_ids = np.asarray(list(community))
        node_ids = node_ids + 1 # have node labels >= 1
        communities_true.update(dict.fromkeys(node_ids , num_communities))
        
    communities_true = OrderedDict(sorted(communities_true.items()))
    
    return G, communities_true

# compute normalized mutual information between two partitions
def compute_nmi(communities_true, communities_found):
    labels_true = list(communities_true.values())
    labels_found = list(communities_found.values())

    return nmi_score(labels_true,labels_found, average_method='arithmetic')

# LFR Benchmark
# num_realizations .. number of network realizations for each parameter pair (mu, N)
def run_benchmark(N_list, mu_list, num_realizations=10):
    num_benchmarks = len(N_list) * len(mu_list) * num_realizations
    benchmark_id = 0
    
    mean_nmi_infomap = np.zeros((len(mu_list), len(N_list)))
    std_nmi_infomap = np.zeros((len(mu_list), len(N_list)))
    mean_err_infomap = np.zeros((len(mu_list), len(N_list)))
    std_err_infomap = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_altmap = np.zeros((len(mu_list), len(N_list)))
    std_nmi_altmap = np.zeros((len(mu_list), len(N_list)))
    mean_err_altmap = np.zeros((len(mu_list), len(N_list)))
    std_err_altmap = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_altmap_init = np.zeros((len(mu_list), len(N_list)))
    std_nmi_altmap_init = np.zeros((len(mu_list), len(N_list)))
    mean_err_altmap_init = np.zeros((len(mu_list), len(N_list)))
    std_err_altmap_init = np.zeros((len(mu_list), len(N_list)))
    
    for mu_idx, mu in enumerate(mu_list):
        for N_idx, N in enumerate(N_list):
            nmi_list_infomap = []
            err_list_infomap = []
            nmi_list_altmap = []
            err_list_altmap = []
            nmi_list_altmap_init = []
            err_list_altmap_init = []
            for realization in range(0, num_realizations):
                benchmark_id = benchmark_id + 1
                print(f'Starting benchmark {benchmark_id}/{num_benchmarks} for (N,mu) = ({N},{mu})\n')
                try:
                    G, communities_true = generate_LFR_benchmark(N, mu)
                    num_communities_true = max(communities_true.values()) - min(communities_true.values()) + 1
                except nx.ExceededMaxIterations as err:
                    print(f'Failed to generate network for (N,mu) = ({N},{mu}): ', err)
                    continue
                
                
                # test infomap
                communities_found, num_communities_found = infomap(G, altmap=False)
                print (f'Infomap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                       f'communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_infomap.append(nmi)
                err_list_infomap.append(num_communities_found/num_communities_true - 1.0)
                
                # test altmap
                communities_found, num_communities_found = infomap(G, altmap=True, update_inputfile=False)
                print (f'Altmap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                       f'communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_altmap.append(nmi)
                err_list_altmap.append(num_communities_found/num_communities_true - 1.0)
                
                # test altmap with init
                communities_found, num_communities_found = infomap(G, altmap=True, init='sc', update_inputfile=False)
                print (f'Altmap with init found {num_communities_found} communities vs. {num_communities_true} ground '
                       f'truth communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_altmap_init.append(nmi)
                err_list_altmap_init.append(num_communities_found/num_communities_true - 1.0)
            
            # check if benchmark generation succeeded
            if nmi_list_infomap:
                mean_nmi_infomap[mu_idx, N_idx] = np.mean(nmi_list_infomap)
                std_nmi_infomap[mu_idx, N_idx] = np.std(nmi_list_infomap, ddof=1)
                mean_err_infomap[mu_idx, N_idx] = np.mean(err_list_infomap)
                std_err_infomap[mu_idx, N_idx] = np.std(err_list_infomap, ddof=1)
                
                mean_nmi_altmap[mu_idx, N_idx] = np.mean(nmi_list_altmap)
                std_nmi_altmap[mu_idx, N_idx] = np.std(nmi_list_altmap, ddof=1)
                mean_err_altmap[mu_idx, N_idx] = np.mean(err_list_altmap)
                std_err_altmap[mu_idx, N_idx] = np.std(err_list_altmap, ddof=1)
                
                mean_nmi_altmap_init[mu_idx, N_idx] = np.mean(nmi_list_altmap_init)
                std_nmi_altmap_init[mu_idx, N_idx] = np.std(nmi_list_altmap_init, ddof=1)
                mean_err_altmap_init[mu_idx, N_idx] = np.mean(err_list_altmap_init)
                std_err_altmap_init[mu_idx, N_idx] = np.std(err_list_altmap_init, ddof=1)
            else:
                mean_nmi_infomap[mu_idx, N_idx] = None
                std_nmi_infomap[mu_idx, N_idx] = None
                mean_err_infomap[mu_idx, N_idx] = None
                std_err_infomap[mu_idx, N_idx] = None
                
                mean_nmi_altmap[mu_idx, N_idx] = None
                std_nmi_altmap[mu_idx, N_idx] = None
                mean_err_altmap[mu_idx, N_idx] = None
                std_err_altmap[mu_idx, N_idx] = None
                
                mean_nmi_altmap_init[mu_idx, N_idx] = None
                std_nmi_altmap_init[mu_idx, N_idx] = None
                mean_err_altmap_init[mu_idx, N_idx] = None
                std_err_altmap_init[mu_idx, N_idx] = None

    
    print(f'Finished benchmark successfully!\n')
    return mean_nmi_infomap, std_err_infomap, mean_err_infomap, std_nmi_infomap, \
           mean_nmi_altmap, std_err_altmap, mean_err_altmap, std_nmi_altmap, \
           mean_nmi_altmap_init, std_err_altmap_init, mean_err_altmap_init, std_nmi_altmap_init


In [127]:
# LFR params
N = 500
mu = 0.3
max_degree = 50
max_community = 100
min_community = 50
average_degree = 20
gamma = 2.5 # Power law exponent for the degree distribution 
beta = 1.5 # Power law exponent for the community size distribution
max_iter = 100

# generate LFR benchmark graph
G = LFR_benchmark_graph(N, gamma, beta, mu, average_degree=average_degree, max_degree=max_degree, 
                        max_community=max_community, min_community=min_community, max_iters=max_iter)

# plt.close('all')
# plt.figure()
# plt.title('Ground Truth Communities')
# drawNetwork(G, communities_true, labels=False)

In [21]:
import warnings
warnings.filterwarnings('ignore')

Ns = [400]
# mus = np.linspace(0.15, 0.75, 10)
mus = np.linspace(0.1, 0.75, 3)
num_realizations = 1  
print (mus)

mean_nmi_infomap, std_err_infomap, mean_err_infomap, std_nmi_infomap, \
mean_nmi_altmap, std_err_altmap, mean_err_altmap, std_nmi_altmap, \
mean_nmi_altmap_init, std_err_altmap_init, mean_err_altmap_init, std_nmi_altmap_init = run_benchmark(Ns, mus, num_realizations=num_realizations)

[0.1   0.425 0.75 ]
Starting benchmark 1/3 for (N,mu) = (400,0.1)

Infomap found 14 communities vs. 14 ground truth communities.

Altmap found 14 communities vs. 14 ground truth communities.

Altmap with init found 14 communities vs. 14 ground truth communities.

Starting benchmark 2/3 for (N,mu) = (400,0.42500000000000004)

Infomap found 1 communities vs. 14 ground truth communities.

Altmap found 29 communities vs. 14 ground truth communities.

Altmap with init found 24 communities vs. 14 ground truth communities.

Starting benchmark 3/3 for (N,mu) = (400,0.75)

Infomap found 1 communities vs. 16 ground truth communities.

Altmap found 51 communities vs. 16 ground truth communities.

Altmap with init found 29 communities vs. 16 ground truth communities.

Finished benchmark successfully!



In [7]:
plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, N = {2000} nodes')

axs[0].plot(mus, mean_nmi_infomap[:,0], 'x--', linewidth=2, markersize=12, label='Infomap')
axs[0].plot(mus, mean_nmi_altmap[:,0], '^--', linewidth=2, markersize=12, label='Altmap')
axs[0].plot(mus, mean_nmi_altmap_init[:,0], 'o--', linewidth=2, markersize=12, label='Altmap with sc init')
axs[0].plot([0.5, 0.5], [0,1], 'r')
axs[0].grid()
axs[0].set_xlabel('Mixing parameter $\mu$')
axs[0].set_ylabel('NMI')
axs[0].legend()

axs[1].plot(mus, std_nmi_infomap[:,0], 'x--', linewidth=2, markersize=12, label='Infomap')
axs[1].plot(mus, std_nmi_altmap[:,0], '^--', linewidth=2, markersize=12, label='Altmap')
axs[1].plot(mus, std_nmi_altmap_init[:,0], 'o--', linewidth=2, markersize=12, label='Altmap with sc init')
axs[1].plot([0.5, 0.5], [0,np.max(np.max(std_nmi_infomap))], 'r')
axs[1].grid()
axs[1].set_xlabel('Mixing parameter $\mu$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

<matplotlib.legend.Legend at 0x7ff8c0ca1fd0>

In [9]:
plt.close('all')
plt.figure()
plt.title(f'LFR benchmark, N = {2000} nodes')

min_err = np.min(np.min([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))
max_err = np.max(np.max([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))


plt.plot(mus, mean_err_infomap[:,0], 'x--', linewidth=2, markersize=12, label='Infomap')
plt.plot(mus, mean_err_altmap[:,0], '^--', linewidth=2, markersize=12, label='Altmap')
plt.plot(mus, mean_err_altmap_init[:,0], 'o--', linewidth=2, markersize=12, label='Altmap with sc init')
plt.plot([0.5, 0.5], [min_err-0.2,max_err+0.2], 'r')
plt.ylim([min_err-0.2,max_err+0.2])
plt.grid()
plt.xlabel('Mixing parameter $\mu$')
plt.ylabel('Mean relative error')
plt.legend()

<matplotlib.legend.Legend at 0x7f14f0073d68>