## LFR benchmark for Altmap vs Map Eq
### Compare altmap to map eq using networkx

In [3]:
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.ticker import (AutoMinorLocator)
import numpy as np
from collections import OrderedDict

# show plots in separate window
%pylab
# load helpers and wrappers
%run helpers.py 

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
from networkx.generators.community import LFR_benchmark_graph
from sklearn.metrics import normalized_mutual_info_score as nmi_score

# generate LFR benchmark graph + extract ground truth communities
def generate_LFR_benchmark(N = 500, mu = 0.1):
    
    # LFR params N=10000
    # params = {'max_degree':50, 'max_community':100, 'min_community':50, 'average_degree':20, 'tau1':3.0, 'tau2':1.5}
    # LFR params N=5000
    # params = {'max_degree':50, 'max_community':100, 'min_community':50, 'average_degree':20, 'tau1':2.5, 'tau2':1.5}
    # LFR params N<1000
    # params = {'max_degree':50, 'max_community':50, 'min_community':20, 'average_degree':10, 'tau1':3.0, 'tau2':1.5}
    # params = {'max_degree':int(0.1*N), 'max_community':int(0.1*N), 'min_community':20, 'average_degree':15, 'tau1':3.0, 
    #           'tau2':1.5}
    # params = {'max_degree':int(0.1*N), 'max_community':int(0.1*N), 'min_community':50, 'average_degree':20, 'tau1':3.0, 
    #           'tau2':1.5}
    
    # LFR params generic
    max_community = int(0.1*N)
    min_community = int(max_community * 0.5)
    max_degree = int(max_community * 0.8)
    min_degree = int(min_community * 0.8)
    gamma = 3.5 # Power law exponent for the degree distribution 
    beta = 3.0 # Power law exponent for the community size distribution
    params = {'max_degree':max_degree, 'max_community':max_community, 'min_community':min_community,
              'min_degree':min_degree, 'tau1':gamma, 'tau2':beta}
    

    max_degree = params['max_degree']
    max_community = params['max_community']
    min_community = params['min_community']
    #average_degree = params['average_degree']
    min_degree = params['min_degree']
    tau1 = params['tau1'] # Power law exponent for the degree distribution 
    tau2 = params['tau2'] # Power law exponent for the community size distribution

    # generate LFR benchmark graph
    
    G = LFR_benchmark_graph(N, tau1, tau2, mu, min_degree=min_degree, max_degree=max_degree, 
                            max_community=max_community, min_community=min_community, max_iters=700, tol=1.1)
    G = nx.convert_node_labels_to_integers(G, first_label=1)
    
    # extract ground truth communities from networkx graph object
    communities_true = {}
    num_communities = 0
    for n in range(1,N+1):
        if n in communities_true:
            continue
            
        num_communities = num_communities + 1
        community = G.nodes[n]['community']
        node_ids = np.asarray(list(community))
        node_ids = node_ids + 1 # have node labels >= 1
        communities_true.update(dict.fromkeys(node_ids , num_communities))
        
    communities_true = OrderedDict(sorted(communities_true.items()))
    num_communities_true = max(communities_true.values()) - min(communities_true.values()) + 1
    
    return G, communities_true, num_communities_true

# compute normalized mutual information between two partitions
def compute_nmi(communities_true, communities_found):
    labels_true = list(communities_true.values())
    labels_found = list(communities_found.values())

    return nmi_score(labels_true,labels_found, average_method='arithmetic')

# LFR Benchmark
# num_realizations .. number of network realizations for each parameter pair (mu, N)
def run_benchmark(N_list, mu_list, num_realizations=10):
    num_benchmarks = len(N_list) * len(mu_list) * num_realizations
    benchmark_id = 0
    
    num_actual_realizations = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_infomap = np.zeros((len(mu_list), len(N_list)))
    std_nmi_infomap = np.zeros((len(mu_list), len(N_list)))
    mean_err_infomap = np.zeros((len(mu_list), len(N_list)))
    std_err_infomap = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_altmap = np.zeros((len(mu_list), len(N_list)))
    std_nmi_altmap = np.zeros((len(mu_list), len(N_list)))
    mean_err_altmap = np.zeros((len(mu_list), len(N_list)))
    std_err_altmap = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_altmap_init = np.zeros((len(mu_list), len(N_list)))
    std_nmi_altmap_init = np.zeros((len(mu_list), len(N_list)))
    mean_err_altmap_init = np.zeros((len(mu_list), len(N_list)))
    std_err_altmap_init = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_sc = np.zeros((len(mu_list), len(N_list)))
    std_nmi_sc = np.zeros((len(mu_list), len(N_list)))
    mean_err_sc = np.zeros((len(mu_list), len(N_list)))
    std_err_sc = np.zeros((len(mu_list), len(N_list)))
    
    for mu_idx, mu in enumerate(mu_list):
        for N_idx, N in enumerate(N_list):
            nmi_list_infomap = []
            err_list_infomap = []
            nmi_list_altmap = []
            err_list_altmap = []
            nmi_list_altmap_init = []
            err_list_altmap_init = []
            nmi_list_sc = []
            err_list_sc = []
            
            actual_realizations = 0
            for realization in range(0, num_realizations):
                benchmark_id = benchmark_id + 1
                print(f'Starting benchmark {benchmark_id}/{num_benchmarks} for (N,mu) = ({N},{mu})\n')
                try:
                    G, communities_true, num_communities_true = generate_LFR_benchmark(N, mu)
                except nx.ExceededMaxIterations as err:
                    print(f'Failed to generate network for (N,mu) = ({N},{mu}): ', err)
                    continue
                
                actual_realizations += 1
                
                # test infomap
                communities_found, num_communities_found, _, _ = infomap(G, altmap=False)
                print (f'Infomap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                       f'communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_infomap.append(nmi)
                err_list_infomap.append(num_communities_found/num_communities_true - 1.0)
                
                # test altmap
                communities_found, num_communities_found, _, _ = infomap(G, altmap=True, update_inputfile=False)
                print (f'Altmap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                       f'communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_altmap.append(nmi)
                err_list_altmap.append(num_communities_found/num_communities_true - 1.0)
                
                # test altmap with init
                communities_found, num_communities_found,\
                communities_init, num_communities_init = infomap(G, altmap=True, init='sc', update_inputfile=False)
                print (f'Altmap with SCI ({num_communities_init}) found {num_communities_found} communities vs. '
                       f'{num_communities_true} '
                       f'ground truth communities.\n')
                
                nmi = compute_nmi(communities_true, communities_found)
                nmi_list_altmap_init.append(nmi)
                err_list_altmap_init.append(num_communities_found/num_communities_true - 1.0)
            
                nmi = compute_nmi(communities_true, communities_init)
                nmi_list_sc.append(nmi)
                err_list_sc.append(num_communities_init/num_communities_true - 1.0)
        
            num_actual_realizations[mu_idx, N_idx] = actual_realizations
            
            # check if benchmark generation succeeded
            if nmi_list_infomap:
                mean_nmi_infomap[mu_idx, N_idx] = np.mean(nmi_list_infomap)
                std_nmi_infomap[mu_idx, N_idx] = np.std(nmi_list_infomap, ddof=1)
                mean_err_infomap[mu_idx, N_idx] = np.mean(err_list_infomap)
                std_err_infomap[mu_idx, N_idx] = np.std(err_list_infomap, ddof=1)
                
                mean_nmi_altmap[mu_idx, N_idx] = np.mean(nmi_list_altmap)
                std_nmi_altmap[mu_idx, N_idx] = np.std(nmi_list_altmap, ddof=1)
                mean_err_altmap[mu_idx, N_idx] = np.mean(err_list_altmap)
                std_err_altmap[mu_idx, N_idx] = np.std(err_list_altmap, ddof=1)
                
                mean_nmi_altmap_init[mu_idx, N_idx] = np.mean(nmi_list_altmap_init)
                std_nmi_altmap_init[mu_idx, N_idx] = np.std(nmi_list_altmap_init, ddof=1)
                mean_err_altmap_init[mu_idx, N_idx] = np.mean(err_list_altmap_init)
                std_err_altmap_init[mu_idx, N_idx] = np.std(err_list_altmap_init, ddof=1)
                
                mean_nmi_sc[mu_idx, N_idx] = np.mean(nmi_list_sc)
                std_nmi_sc[mu_idx, N_idx] = np.std(nmi_list_sc, ddof=1)
                mean_err_sc[mu_idx, N_idx] = np.mean(err_list_sc)
                std_err_sc[mu_idx, N_idx] = np.std(err_list_sc, ddof=1)
            else:
                mean_nmi_infomap[mu_idx, N_idx] = None
                std_nmi_infomap[mu_idx, N_idx] = None
                mean_err_infomap[mu_idx, N_idx] = None
                std_err_infomap[mu_idx, N_idx] = None
                
                mean_nmi_altmap[mu_idx, N_idx] = None
                std_nmi_altmap[mu_idx, N_idx] = None
                mean_err_altmap[mu_idx, N_idx] = None
                std_err_altmap[mu_idx, N_idx] = None
                
                mean_nmi_altmap_init[mu_idx, N_idx] = None
                std_nmi_altmap_init[mu_idx, N_idx] = None
                mean_err_altmap_init[mu_idx, N_idx] = None
                std_err_altmap_init[mu_idx, N_idx] = None
                
                mean_nmi_sc[mu_idx, N_idx] = None
                std_nmi_sc[mu_idx, N_idx] = None
                mean_err_sc[mu_idx, N_idx] = None
                std_err_sc[mu_idx, N_idx] = None

    
    print(f'Finished benchmark successfully!\n')
    return mean_nmi_infomap, std_err_infomap, mean_err_infomap, std_nmi_infomap, \
           mean_nmi_altmap, std_err_altmap, mean_err_altmap, std_nmi_altmap, \
           mean_nmi_altmap_init, std_err_altmap_init, mean_err_altmap_init, std_nmi_altmap_init, \
           mean_nmi_sc, std_err_sc, mean_err_sc, std_nmi_sc, \
           num_actual_realizations



In [4]:
import warnings
warnings.filterwarnings('ignore')

Ns = [500]
mus = np.linspace(0.15, 0.75, 10)
# Ns = np.linspace(500, 10000, 10, dtype=int)
# mus = [0.4]

num_realizations = 2  
print (mus)

mean_nmi_infomap, std_err_infomap, mean_err_infomap, std_nmi_infomap, \
mean_nmi_altmap, std_err_altmap, mean_err_altmap, std_nmi_altmap, \
mean_nmi_altmap_init, std_err_altmap_init, mean_err_altmap_init, std_nmi_altmap_init, \
mean_nmi_sc, std_err_sc, mean_err_sc, std_nmi_sc, \
num_actual_realizations = run_benchmark(Ns, mus, num_realizations=num_realizations)

print (num_actual_realizations)
avg_num_realizations = int(np.round(np.mean(num_actual_realizations)))
print (f'Average number of realizations is {avg_num_realizations}.')

[0.15       0.21666667 0.28333333 0.35       0.41666667 0.48333333
 0.55       0.61666667 0.68333333 0.75      ]
Starting benchmark 1/20 for (N,mu) = (500,0.15)

Infomap found 15 communities vs. 15 ground truth communities.

Altmap found 15 communities vs. 15 ground truth communities.

Spectral clustering finished in 0.47938800000000015 seconds.
Altmap with SCI (22) found 15 communities vs. 15 ground truth communities.

Starting benchmark 2/20 for (N,mu) = (500,0.15)

Infomap found 15 communities vs. 15 ground truth communities.

Altmap found 15 communities vs. 15 ground truth communities.

Spectral clustering finished in 0.9033739999999995 seconds.
Altmap with SCI (22) found 15 communities vs. 15 ground truth communities.

Starting benchmark 3/20 for (N,mu) = (500,0.21666666666666667)

Infomap found 16 communities vs. 16 ground truth communities.

Altmap found 16 communities vs. 16 ground truth communities.

Spectral clustering finished in 0.612171 seconds.
Altmap with SCI (22) found 

In [5]:
plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, N = {Ns[0]} nodes')

axs[0].plot(mus, mean_nmi_infomap[:,0], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[0].plot(mus, mean_nmi_sc[:,0], 's--', linewidth=2, markersize=10, label='SCI')
axs[0].plot(mus, mean_nmi_altmap[:,0], '^--', linewidth=2, markersize=10, label='Altmap')
axs[0].plot(mus, mean_nmi_altmap_init[:,0], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[0].plot([0.5, 0.5], [0,1], 'r')
axs[0].grid()
axs[0].set_xlabel('Mixing parameter $\mu$')
axs[0].set_ylabel('NMI')
axs[0].legend()

axs[1].plot(mus, std_nmi_infomap[:,0], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[1].plot(mus, std_nmi_sc[:,0], 's--', linewidth=2, markersize=10, label='SCI')
axs[1].plot(mus, std_nmi_altmap[:,0], '^--', linewidth=2, markersize=10, label='Altmap')
axs[1].plot(mus, std_nmi_altmap_init[:,0], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[1].plot([0.5, 0.5], [0,np.max(np.max(std_nmi_infomap))], 'r')
axs[1].grid()
axs[1].set_xlabel('Mixing parameter $\mu$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

<matplotlib.legend.Legend at 0x7f5b44e81ed0>

In [34]:
plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, N = {Ns[0]} nodes')

min_err = np.min(np.min([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))
max_err = np.max(np.max([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))

axs[0].plot(mus, mean_err_infomap[:,0], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[0].plot(mus, mean_err_sc[:,0], 's--', linewidth=2, markersize=10, label='SCI')
axs[0].plot(mus, mean_err_altmap[:,0], '^--', linewidth=2, markersize=10, label='Altmap')
axs[0].plot(mus, mean_err_altmap_init[:,0], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[0].plot([0.5, 0.5], [min_err-0.2,max_err+0.2], 'r')
axs[0].set_ylim([min_err-0.2,max_err+0.2])
axs[0].grid()
axs[0].set_xlabel('Mixing parameter $\mu$')
axs[0].set_ylabel(r'Mean relative error $\bar{e}_\theta$')
axs[0].legend()

axs[1].plot(mus, std_err_infomap[:,0], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[1].plot(mus, std_err_sc[:,0], 's--', linewidth=2, markersize=10, label='SCI')
axs[1].plot(mus, std_err_altmap[:,0], '^--', linewidth=2, markersize=10, label='Altmap')
axs[1].plot(mus, std_err_altmap_init[:,0], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[1].plot([0.5, 0.5], [0,np.max(np.max(std_err_altmap))], 'r')
axs[1].grid()
axs[1].set_xlabel('Mixing parameter $\mu$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

<matplotlib.legend.Legend at 0x7f0322488e10>

In [43]:
plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, $\mu$ = {mus[0]}')

axs[0].plot(Ns, mean_nmi_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[0].plot(Ns, mean_nmi_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[0].plot(Ns, mean_nmi_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[0].plot(Ns, mean_nmi_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[0].grid()
axs[0].set_xlabel('Number of nodes $N$')
axs[0].set_ylabel('NMI')
axs[0].legend()

axs[1].plot(Ns, std_nmi_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[1].plot(Ns, std_nmi_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[1].plot(Ns, std_nmi_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[1].plot(Ns, std_nmi_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[1].grid()
axs[1].set_xlabel('Number of nodes $N$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

<matplotlib.legend.Legend at 0x7f02fcd18710>

In [45]:
plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, $\mu$ = {mus[0]}')

min_err = np.min(np.min([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))
max_err = np.max(np.max([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))

axs[0].plot(Ns, mean_err_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[0].plot(Ns, mean_err_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[0].plot(Ns, mean_err_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[0].plot(Ns, mean_err_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[0].set_ylim([min_err-0.2,max_err+0.2])
axs[0].grid()
axs[0].set_xlabel('Number of nodes $N$')
axs[0].set_ylabel(r'Mean relative error $\bar{e}_\theta$')
axs[0].legend()

axs[1].plot(Ns, std_err_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[1].plot(Ns, std_err_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[1].plot(Ns, std_err_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[1].plot(Ns, std_err_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[1].grid()
axs[1].set_xlabel('Number of nodes $N$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

ValueError: Axis limits cannot be NaN or Inf

In [611]:
# LFR params
N = 5000
mu = 0.1
max_community = int(0.1*N)
min_community = int(max_community * 0.5)
max_degree = int(max_community * 0.8)
min_degree = int(min_community * 0.8)
gamma = 3.5 # Power law exponent for the degree distribution 
beta = 3.0 # Power law exponent for the community size distribution
max_iter = 500

# generate LFR benchmark graph
G = LFR_benchmark_graph(N, gamma, beta, mu, min_degree=min_degree, max_degree=max_degree, 
                        max_community=max_community, min_community=min_community, max_iters=max_iter, tol=1)


In [622]:
G, communities_true, num_communities_true = generate_LFR_benchmark(N=5000, mu=0.1)

communities_infomap, _, _, _ = infomap(G, altmap=False)
communities_altmap, _, _, _ = infomap(G, altmap=True, update_inputfile=False)
communities_altmap_sci, _, communities_sci, _ = infomap(G, altmap=True, init='sc', update_inputfile=False)
            

Spectral clustering finished in 73.5142820000001 seconds.


In [623]:
# plotting params
node_size = 150
edge_width = 0.2
font_size = 13
pos = nx.spring_layout(G)
# pos = nx.spectral_layout(G)

plt.close('all')
fig, axs = plt.subplots(2,2)

ax = axs[0,0]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Infomap')
communities = communities_infomap
color_idc = [v for v in communities.values()]
nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)


ax = axs[0,1]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Synthesizing Infomap')
communities = communities_altmap
color_idc = [v for v in communities.values()]
nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)


ax = axs[1,0]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Synthesizing Infomap with SCI')
communities = communities_altmap_sci
color_idc = [v for v in communities.values()]
nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)


ax = axs[1,1]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Ground truth')
communities = communities_true
color_idc = [v for v in communities.values()]
nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)

