## LFR benchmark for Altmap vs Map Eq
### Compare altmap to map eq using networkx

In [70]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from collections import OrderedDict

# show plots in separate window
%pylab
# load helpers and wrappers
%run helpers.py 

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [97]:
from networkx.generators.community import LFR_benchmark_graph
from sklearn.metrics import normalized_mutual_info_score as nmi_score
from sklearn.metrics import adjusted_mutual_info_score as ami_score

# generate LFR benchmark graph + extract ground truth communities
def generate_LFR_benchmark(N = 500, mu = 0.1):
    
    # LFR params N=10000
    # params = {'max_degree':50, 'max_community':100, 'min_community':50, 'average_degree':20, 'tau1':3.0, 'tau2':1.5}
    
    # LFR params generic
    max_community = int(0.2*N)
    min_community = int(max_community * 0.25) #0.5
    max_degree = int(max_community * 0.3) # 0.6
    min_degree = int(min_community * 0.4)
    gamma = 3.5 # Power law exponent for the degree distribution 
    beta = 1.1 #3.0 # Power law exponent for the community size distribution
    params = {'max_degree':max_degree, 'max_community':max_community, 'min_community':min_community,
              'min_degree':min_degree, 'tau1':gamma, 'tau2':beta}
    

    max_degree = params['max_degree']
    max_community = params['max_community']
    min_community = params['min_community']
    #average_degree = params['average_degree']
    min_degree = params['min_degree']
    tau1 = params['tau1'] # Power law exponent for the degree distribution 
    tau2 = params['tau2'] # Power law exponent for the community size distribution

    # generate LFR benchmark graph
    
    G = LFR_benchmark_graph(N, tau1, tau2, mu, min_degree=min_degree, max_degree=max_degree, 
                            max_community=max_community, min_community=min_community)
    G = nx.convert_node_labels_to_integers(G, first_label=1)
    
    # extract ground truth communities from networkx graph object
    communities_true = {}
    num_communities = 0
    for n in range(1,N+1):
        if n in communities_true:
            continue
            
        num_communities = num_communities + 1
        community = G.nodes[n]['community']
        node_ids = np.asarray(list(community))
        node_ids = node_ids + 1 # have node labels >= 1
        communities_true.update(dict.fromkeys(node_ids , num_communities))
        
    communities_true = OrderedDict(sorted(communities_true.items()))
    num_communities_true = max(communities_true.values()) - min(communities_true.values()) + 1
    
    return G, communities_true, num_communities_true

# compute normalized mutual information between two partitions
def compute_score(communities_true, communities_found):
    labels_true = list(communities_true.values())
    labels_found = list(communities_found.values())

    # return nmi_score(labels_true,labels_found, average_method='arithmetic')
    return ami_score(labels_true,labels_found, average_method='arithmetic')

class BenchmarkResults:
    def __init__(self, var_list, num_realizations):
        self.var_list = var_list
        self.num_datapoints = len(var_list)
        self.num_realizations = num_realizations
        self.actual_realizations = np.zeros((self.num_datapoints,), dtype=int)
        self.scores = np.zeros((num_realizations, self.num_datapoints))
        self.errors = np.zeros((num_realizations, self.num_datapoints))
    
    def evaluate_results(self):
        self.mean_scores = [None for _ in range(self.num_datapoints)]
        self.std_scores = [None for _ in range(self.num_datapoints)]
        self.mean_errors = [None for _ in range(self.num_datapoints)]
        self.std_errors = [None for _ in range(self.num_datapoints)]
        
        for i in range(self.num_datapoints):
            nr = self.actual_realizations[i]
            if nr == 0:
                continue
            
            self.mean_scores[i] = np.mean(self.scores[:nr, i])
            self.std_scores[i] = np.std(self.scores[:nr, i], ddof=1)
            self.mean_errors[i] = np.mean(self.errors[:nr, i])
            self.std_errors[i] = np.std(self.errors[:nr, i], ddof=1)
    
    def write_csv(self, path):
        df = pd.DataFrame()
        
        df['var_list'] = self.var_list
        df['actual_realizations'] = self.actual_realizations
        df['mean_scores'] = self.mean_scores
        df['std_scores'] = self.std_scores
        df['mean_errors'] = self.mean_errors
        df['std_errors'] = self.std_errors
        
        df.to_csv(path, index_label='id')
            
            
        
# LFR Benchmark
# num_realizations .. number of network realizations for each parameter pair (mu, N)
def run_LFR_benchmark(N, mu_list, num_realizations=10):
    num_benchmarks = len(mu_list) * num_realizations
    benchmark_id = 0
           
    infomap_results = BenchmarkResults(mu_list, num_realizations)
    altmap_results = BenchmarkResults(mu_list, num_realizations)
    altmap_sci_results = BenchmarkResults(mu_list, num_realizations)
    sci_results = BenchmarkResults(mu_list, num_realizations)
    for mu_idx, mu in enumerate(mu_list):
        
        realization_idx = -1
        for _ in range(num_realizations):
            benchmark_id = benchmark_id + 1
            print(f'Starting benchmark {benchmark_id}/{num_benchmarks} for (N,mu) = ({N},{mu})\n')
            try:
                G, communities_true, num_communities_true = generate_LFR_benchmark(N, mu)
            except nx.ExceededMaxIterations as err:
                print(f'Failed to generate network for (N,mu) = ({N},{mu}): ', err)
                continue
            
            realization_idx += 1
            
            # test infomap
            communities_found, num_communities_found, _, _ = infomap(G, altmap=False)
            print (f'Infomap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                   f'communities.\n')
            
            score = compute_score(communities_true, communities_found)
            infomap_results.scores[realization_idx, mu_idx] = score
            error = num_communities_found/num_communities_true - 1.0
            infomap_results.errors[realization_idx, mu_idx] = error
            
            # test altmap
            communities_found, num_communities_found, _, _ = infomap(G, altmap=True, update_inputfile=False)
            print (f'Altmap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                   f'communities.\n')
            
            score = compute_score(communities_true, communities_found)
            altmap_results.scores[realization_idx, mu_idx] = score
            error = num_communities_found/num_communities_true - 1.0
            altmap_results.errors[realization_idx, mu_idx] = error
        
            # test altmap with SCI
            communities_found, num_communities_found,\
            communities_init, num_communities_init = infomap(G, altmap=True, init='sc', update_inputfile=False)
            print (f'Altmap with SCI ({num_communities_init}) found {num_communities_found} communities vs. '
                   f'{num_communities_true} '
                   f'ground truth communities.\n')
            
            score = compute_score(communities_true, communities_found)
            altmap_sci_results.scores[realization_idx, mu_idx] = score
            error = num_communities_found/num_communities_true - 1.0
            altmap_sci_results.errors[realization_idx, mu_idx] = error
            
            score = compute_score(communities_true, communities_init)
            sci_results.scores[realization_idx, mu_idx] = score
            error = num_communities_init/num_communities_true - 1.0
            sci_results.errors[realization_idx, mu_idx] = error
        
        # store actual number of realizations    
        infomap_results.actual_realizations[mu_idx] = realization_idx + 1
        altmap_results.actual_realizations[mu_idx] = realization_idx + 1
        altmap_sci_results.actual_realizations[mu_idx] = realization_idx + 1
        sci_results.actual_realizations[mu_idx] = realization_idx + 1

    print(f'Finished benchmark successfully!\n')
    return infomap_results, altmap_results, altmap_sci_results, sci_results

# LFR Benchmark
# num_realizations .. number of network realizations for each parameter pair (mu, N)
def run_benchmark(N_list, mu_list, num_realizations=10):
    num_benchmarks = len(N_list) * len(mu_list) * num_realizations
    benchmark_id = 0
    
    num_actual_realizations = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_infomap = np.zeros((len(mu_list), len(N_list)))
    std_nmi_infomap = np.zeros((len(mu_list), len(N_list)))
    mean_err_infomap = np.zeros((len(mu_list), len(N_list)))
    std_err_infomap = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_altmap = np.zeros((len(mu_list), len(N_list)))
    std_nmi_altmap = np.zeros((len(mu_list), len(N_list)))
    mean_err_altmap = np.zeros((len(mu_list), len(N_list)))
    std_err_altmap = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_altmap_init = np.zeros((len(mu_list), len(N_list)))
    std_nmi_altmap_init = np.zeros((len(mu_list), len(N_list)))
    mean_err_altmap_init = np.zeros((len(mu_list), len(N_list)))
    std_err_altmap_init = np.zeros((len(mu_list), len(N_list)))
    
    mean_nmi_sc = np.zeros((len(mu_list), len(N_list)))
    std_nmi_sc = np.zeros((len(mu_list), len(N_list)))
    mean_err_sc = np.zeros((len(mu_list), len(N_list)))
    std_err_sc = np.zeros((len(mu_list), len(N_list)))
    
    for mu_idx, mu in enumerate(mu_list):
        for N_idx, N in enumerate(N_list):
            nmi_list_infomap = []
            err_list_infomap = []
            nmi_list_altmap = []
            err_list_altmap = []
            nmi_list_altmap_init = []
            err_list_altmap_init = []
            nmi_list_sc = []
            err_list_sc = []
            
            actual_realizations = 0
            for realization in range(0, num_realizations):
                benchmark_id = benchmark_id + 1
                print(f'Starting benchmark {benchmark_id}/{num_benchmarks} for (N,mu) = ({N},{mu})\n')
                try:
                    G, communities_true, num_communities_true = generate_LFR_benchmark(N, mu)
                except nx.ExceededMaxIterations as err:
                    print(f'Failed to generate network for (N,mu) = ({N},{mu}): ', err)
                    continue
                
                actual_realizations += 1
                
                # test infomap
                communities_found, num_communities_found, _, _ = infomap(G, altmap=False)
                print (f'Infomap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                       f'communities.\n')
                
                nmi = compute_score(communities_true, communities_found)
                nmi_list_infomap.append(nmi)
                err_list_infomap.append(num_communities_found/num_communities_true - 1.0)
                
                # test altmap
                communities_found, num_communities_found, _, _ = infomap(G, altmap=True, update_inputfile=False)
                print (f'Altmap found {num_communities_found} communities vs. {num_communities_true} ground truth '
                       f'communities.\n')
                
                nmi = compute_score(communities_true, communities_found)
                nmi_list_altmap.append(nmi)
                err_list_altmap.append(num_communities_found/num_communities_true - 1.0)
                
                # test altmap with init
                communities_found, num_communities_found,\
                communities_init, num_communities_init = infomap(G, altmap=True, init='sc', update_inputfile=False)
                print (f'Altmap with SCI ({num_communities_init}) found {num_communities_found} communities vs. '
                       f'{num_communities_true} '
                       f'ground truth communities.\n')
                
                nmi = compute_score(communities_true, communities_found)
                nmi_list_altmap_init.append(nmi)
                err_list_altmap_init.append(num_communities_found/num_communities_true - 1.0)
            
                nmi = compute_score(communities_true, communities_init)
                nmi_list_sc.append(nmi)
                err_list_sc.append(num_communities_init/num_communities_true - 1.0)
        
            num_actual_realizations[mu_idx, N_idx] = actual_realizations
            
            # check if benchmark generation succeeded
            if nmi_list_infomap:
                mean_nmi_infomap[mu_idx, N_idx] = np.mean(nmi_list_infomap)
                std_nmi_infomap[mu_idx, N_idx] = np.std(nmi_list_infomap, ddof=1)
                mean_err_infomap[mu_idx, N_idx] = np.mean(err_list_infomap)
                std_err_infomap[mu_idx, N_idx] = np.std(err_list_infomap, ddof=1)
                
                mean_nmi_altmap[mu_idx, N_idx] = np.mean(nmi_list_altmap)
                std_nmi_altmap[mu_idx, N_idx] = np.std(nmi_list_altmap, ddof=1)
                mean_err_altmap[mu_idx, N_idx] = np.mean(err_list_altmap)
                std_err_altmap[mu_idx, N_idx] = np.std(err_list_altmap, ddof=1)
                
                mean_nmi_altmap_init[mu_idx, N_idx] = np.mean(nmi_list_altmap_init)
                std_nmi_altmap_init[mu_idx, N_idx] = np.std(nmi_list_altmap_init, ddof=1)
                mean_err_altmap_init[mu_idx, N_idx] = np.mean(err_list_altmap_init)
                std_err_altmap_init[mu_idx, N_idx] = np.std(err_list_altmap_init, ddof=1)
                
                mean_nmi_sc[mu_idx, N_idx] = np.mean(nmi_list_sc)
                std_nmi_sc[mu_idx, N_idx] = np.std(nmi_list_sc, ddof=1)
                mean_err_sc[mu_idx, N_idx] = np.mean(err_list_sc)
                std_err_sc[mu_idx, N_idx] = np.std(err_list_sc, ddof=1)
            else:
                mean_nmi_infomap[mu_idx, N_idx] = None
                std_nmi_infomap[mu_idx, N_idx] = None
                mean_err_infomap[mu_idx, N_idx] = None
                std_err_infomap[mu_idx, N_idx] = None
                
                mean_nmi_altmap[mu_idx, N_idx] = None
                std_nmi_altmap[mu_idx, N_idx] = None
                mean_err_altmap[mu_idx, N_idx] = None
                std_err_altmap[mu_idx, N_idx] = None
                
                mean_nmi_altmap_init[mu_idx, N_idx] = None
                std_nmi_altmap_init[mu_idx, N_idx] = None
                mean_err_altmap_init[mu_idx, N_idx] = None
                std_err_altmap_init[mu_idx, N_idx] = None
                
                mean_nmi_sc[mu_idx, N_idx] = None
                std_nmi_sc[mu_idx, N_idx] = None
                mean_err_sc[mu_idx, N_idx] = None
                std_err_sc[mu_idx, N_idx] = None

    
    print(f'Finished benchmark successfully!\n')
    return mean_nmi_infomap, std_err_infomap, mean_err_infomap, std_nmi_infomap, \
           mean_nmi_altmap, std_err_altmap, mean_err_altmap, std_nmi_altmap, \
           mean_nmi_altmap_init, std_err_altmap_init, mean_err_altmap_init, std_nmi_altmap_init, \
           mean_nmi_sc, std_err_sc, mean_err_sc, std_nmi_sc, \
           num_actual_realizations

def plot_benchmark_results(results: BenchmarkResults, type='scores', color='blue', marker=None, label=None, 
                           lower_bound=None, upper_bound=None):
    
    xdata = results.var_list
    data = results.mean_scores
    data_std = results.std_scores
    
    if type == 'errors':
        data = results.mean_errors
        data_std = results.std_errors
        
    lw = 2; ms = 10
    plt.plot(xdata, data, '--', marker = marker, color=color, linewidth=lw, markersize=ms, label=label)
    upper = np.array(data, dtype=float) + np.array(data_std, dtype=float)
    lower = np.array(data, dtype=float) - np.array(data_std, dtype=float)
    if lower_bound != None:
        lower[lower < lower_bound] = lower_bound
    if upper_bound != None:
        upper[upper > upper_bound] = upper_bound
        
    plt.fill_between(xdata, upper, lower, color=color, alpha=0.25)
    
def plot_benchmark_data(xdata, data_mean, data_std, color='blue', marker=None, label=None, lower_bound=None, 
                        upper_bound=None):
    lw = 2; ms = 10
    plt.plot(xdata, data_mean, '--', marker = marker, color=color, linewidth=lw, markersize=ms, label=label)
    upper = data_mean + data_std
    lower = data_mean - data_std
    if lower_bound != None:
        lower[lower < lower_bound] = lower_bound
    if upper_bound != None:
        upper[upper > upper_bound] = upper_bound
    plt.fill_between(xdata, upper, lower, color=color, alpha=0.25)
    

In [35]:
import warnings
warnings.filterwarnings('ignore')

Ns = [500]
mus = np.linspace(0.15, 0.75, 20)
# Ns = np.linspace(500, 10000, 10, dtype=int)
# mus = [0.4]

num_realizations = 100
print (mus)

mean_nmi_infomap, std_err_infomap, mean_err_infomap, std_nmi_infomap, \
mean_nmi_altmap, std_err_altmap, mean_err_altmap, std_nmi_altmap, \
mean_nmi_altmap_init, std_err_altmap_init, mean_err_altmap_init, std_nmi_altmap_init, \
mean_nmi_sc, std_err_sc, mean_err_sc, std_nmi_sc, \
num_actual_realizations = run_benchmark(Ns, mus, num_realizations=num_realizations)

print (num_actual_realizations)
avg_num_realizations = int(np.round(np.mean(num_actual_realizations)))
print (f'Average number of realizations is {avg_num_realizations}.')
print (f'Minimum number of realizations is {np.min(num_actual_realizations)}.')

[0.15       0.18157895 0.21315789 0.24473684 0.27631579 0.30789474
 0.33947368 0.37105263 0.40263158 0.43421053 0.46578947 0.49736842
 0.52894737 0.56052632 0.59210526 0.62368421 0.65526316 0.68684211
 0.71842105 0.75      ]
Starting benchmark 1/2000 for (N,mu) = (500,0.15)

Infomap found 9 communities vs. 9 ground truth communities.

Altmap found 9 communities vs. 9 ground truth communities.

Spectral clustering finished in 1.3951850000000263 seconds.
Altmap with SCI (22) found 9 communities vs. 9 ground truth communities.

Starting benchmark 2/2000 for (N,mu) = (500,0.15)

Infomap found 9 communities vs. 9 ground truth communities.

Altmap found 10 communities vs. 9 ground truth communities.

Spectral clustering finished in 0.33737599999994927 seconds.
Altmap with SCI (22) found 10 communities vs. 9 ground truth communities.

Starting benchmark 3/2000 for (N,mu) = (500,0.15)

Infomap found 9 communities vs. 9 ground truth communities.

Altmap found 9 communities vs. 9 ground truth co

In [93]:
import warnings
warnings.filterwarnings('ignore')

# benchmark params
N = 500
mu_list = np.linspace(0.15, 0.75, 5)
num_realizations = 3
print (mu_list)

infomap_results, altmap_results, altmap_sci_results, sci_results = run_LFR_benchmark(N, mu_list, num_realizations)

[0.15 0.3  0.45 0.6  0.75]
Starting benchmark 1/15 for (N,mu) = (500,0.15)

Infomap found 9 communities vs. 9 ground truth communities.

Altmap found 9 communities vs. 9 ground truth communities.

Spectral clustering finished in 0.727476999999908 seconds.
Altmap with SCI (22) found 9 communities vs. 9 ground truth communities.

Starting benchmark 2/15 for (N,mu) = (500,0.15)

Infomap found 10 communities vs. 10 ground truth communities.

Altmap found 10 communities vs. 10 ground truth communities.

Spectral clustering finished in 0.3407320000001164 seconds.
Altmap with SCI (22) found 10 communities vs. 10 ground truth communities.

Starting benchmark 3/15 for (N,mu) = (500,0.15)

Infomap found 9 communities vs. 9 ground truth communities.

Altmap found 9 communities vs. 9 ground truth communities.

Spectral clustering finished in 0.9700250000000779 seconds.
Altmap with SCI (22) found 9 communities vs. 9 ground truth communities.

Starting benchmark 4/15 for (N,mu) = (500,0.3)

Infomap 

In [94]:
infomap_results.evaluate_results()
altmap_results.evaluate_results()
altmap_sci_results.evaluate_results()
sci_results.evaluate_results()

print (f'Minimum number of realizations is {np.min(infomap_results.actual_realizations)}.')
print (f'Actual realizations are {infomap_results.actual_realizations}.')

Minimum number of realizations is 3.
Actual realizations are [3 3 3 3 3].


In [95]:
infomap_results.write_csv('./lfr_results/infomap_500n.csv')
altmap_results.write_csv('./lfr_results/altmap_500n.csv')
altmap_sci_results.write_csv('./lfr_results/altmap_sci_500n.csv')
sci_results.write_csv('./lfr_results/sci_500n.csv')


In [100]:
plt.close('all')
plt.figure()
# plt.title(f'LFR benchmark, N = {Ns[0]} nodes')
plt.plot([0.5, 0.5], [0,1], 'r')

plot_benchmark_results(infomap_results, color='royalblue', marker='x', label='Infomap', lower_bound=0.0, 
                       upper_bound=1.0)
plot_benchmark_results(sci_results, color='crimson', marker='s', label='SCI')
plot_benchmark_results(altmap_results, color='darkorange', marker='^', label='Synthesizing Infomap')
plot_benchmark_results(altmap_sci_results, color='seagreen', marker='o', label='Synthesizing Infomap with SCI')

plt.grid()
plt.xlabel('Mixing parameter $\mu$')
plt.ylabel(r'$\mathrm{AMI}(\mathcal{Y},\mathcal{Y}_{true})$')
plt.legend()

<matplotlib.legend.Legend at 0x7f2594c279d0>

In [43]:
plt.close('all')
plt.figure()
# plt.title(f'LFR benchmark, N = {Ns[0]} nodes')
plt.plot([0.5, 0.5], [0,1], 'r')

plot_benchmark_data(mus, mean_nmi_infomap[:,0], std_nmi_infomap[:,0], color='royalblue', marker='x', label='Infomap',
                    lower_bound=0.0, upper_bound=1.0)
# plot_benchmark_data(mus, mean_nmi_sc[:,0], std_nmi_sc[:,0], color='crimson', marker='s', label='SCI')
plot_benchmark_data(mus, mean_nmi_altmap[:,0], std_nmi_altmap[:,0], color='darkorange', marker='^', label='Synthesizing Infomap')
plot_benchmark_data(mus, mean_nmi_altmap_init[:,0], std_nmi_altmap_init[:,0], color='seagreen', marker='o', 
                    label='Synthesizing Infomap with SCI')

plt.grid()
plt.xlabel('Mixing parameter $\mu$')
plt.ylabel(r'$\mathrm{AMI}(\mathcal{Y},\mathcal{Y}_{true})$')
plt.legend()

<matplotlib.legend.Legend at 0x7f259a312890>

In [48]:
min_err = np.min(np.min([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))
max_err = np.max(np.max([mean_err_infomap + std_err_infomap, mean_err_altmap + std_err_altmap, mean_err_altmap_init +
 std_err_altmap_init]))

plt.close('all')
plt.figure()
# plt.title(f'LFR benchmark, N = {Ns[0]} nodes')
plt.plot([0.5, 0.5], [min_err-0.2,max_err+0.2], 'r')

plot_benchmark_data(mus, mean_err_infomap[:,0], std_err_infomap[:,0], color='royalblue', marker='x', label='Infomap',
                    lower_bound=-1.0)
# plot_benchmark_data(mus, mean_err_sc[:,0], std_err_sc[:,0], color='crimson', marker='s', label='SCI')
plot_benchmark_data(mus, mean_err_altmap[:,0], std_err_altmap[:,0], color='darkorange', marker='^', label='Synthesizing Infomap')
plot_benchmark_data(mus, mean_err_altmap_init[:,0], std_err_altmap_init[:,0], color='seagreen', marker='o', 
                    label='Synthesizing Infomap with SCI')


plt.grid()
plt.xlabel('Mixing parameter $\mu$')
plt.ylabel(r'Mean relative error $\bar{e}_\theta$')
plt.legend(loc='upper left')

<matplotlib.legend.Legend at 0x7f259504fd50>

In [10]:
plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, $\mu$ = {mus[0]}')

axs[0].plot(Ns, mean_nmi_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[0].plot(Ns, mean_nmi_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[0].plot(Ns, mean_nmi_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[0].plot(Ns, mean_nmi_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[0].grid()
axs[0].set_xlabel('Number of nodes $N$')
axs[0].set_ylabel('NMI')
axs[0].legend()

axs[1].plot(Ns, std_nmi_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[1].plot(Ns, std_nmi_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[1].plot(Ns, std_nmi_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[1].plot(Ns, std_nmi_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[1].grid()
axs[1].set_xlabel('Number of nodes $N$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

<matplotlib.legend.Legend at 0x7f560175e450>

In [45]:
plt.close('all')
fig, axs = plt.subplots(2,1,sharex=True)
fig.suptitle(f'LFR benchmark, $\mu$ = {mus[0]}')

min_err = np.min(np.min([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))
max_err = np.max(np.max([mean_err_infomap, mean_err_altmap, mean_err_altmap_init]))

axs[0].plot(Ns, mean_err_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[0].plot(Ns, mean_err_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[0].plot(Ns, mean_err_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[0].plot(Ns, mean_err_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[0].set_ylim([min_err-0.2,max_err+0.2])
axs[0].grid()
axs[0].set_xlabel('Number of nodes $N$')
axs[0].set_ylabel(r'Mean relative error $\bar{e}_\theta$')
axs[0].legend()

axs[1].plot(Ns, std_err_infomap[0,:], 'x--', linewidth=2, markersize=10, label='Infomap')
axs[1].plot(Ns, std_err_sc[0,:], 's--', linewidth=2, markersize=10, label='SCI')
axs[1].plot(Ns, std_err_altmap[0,:], '^--', linewidth=2, markersize=10, label='Altmap')
axs[1].plot(Ns, std_err_altmap_init[0,:], 'o--', linewidth=2, markersize=10, label='Altmap with SCI')
axs[1].grid()
axs[1].set_xlabel('Number of nodes $N$')
axs[1].set_ylabel('Standard deviation')
axs[1].legend()

ValueError: Axis limits cannot be NaN or Inf

In [28]:
# LFR params
N = 5000
mu = 0.01
max_community = int(0.5*N)
min_community = int(max_community * 0.25)
max_degree = int(max_community * 0.3)
min_degree = int(min_community * 0.4)
gamma = 3.5 # Power law exponent for the degree distribution 
beta = 1.1 # Power law exponent for the community size distribution
max_iter = 500

# generate LFR benchmark graph
G = LFR_benchmark_graph(N, gamma, beta, mu, min_degree=min_degree, max_degree=max_degree, 
                        max_community=max_community, min_community=min_community, max_iters=max_iter, tol=1)


In [26]:
G, communities_true, num_communities_true = generate_LFR_benchmark(N=500, mu=0.35)

_, _, comm_sizes = nodes_per_community(communities_true)
print (comm_sizes)
# plt.close('all')
# drawNetwork(G, communities_true, labels=False)

communities_infomap, _, _, _ = infomap(G, altmap=False)
communities_altmap, _, _, _ = infomap(G, altmap=True, update_inputfile=False)
communities_altmap_sci, _, communities_sci, _ = infomap(G, altmap=True, init='sc', update_inputfile=False)
#             
# print(f'Altmap cost for true partition is {altmap_cost(G, communities_true)}')

[42 89 74 60 47 63 49 76]
Spectral clustering finished in 0.6941009999999892 seconds.


In [29]:
# plotting params
node_size = 150
edge_width = 0.2
font_size = 13
# pos = nx.spring_layout(G)
# pos = nx.spectral_layout(G)
pos = community_layout(communities_true)


plt.close('all')
fig, axs = plt.subplots(2,2)

ax = axs[0,0]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Infomap')
communities = communities_infomap
color_idc = [v for v in communities.values()]
# nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)


ax = axs[0,1]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Synthesizing Infomap')
communities = communities_altmap
color_idc = [v for v in communities.values()]
# nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)


ax = axs[1,0]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Synthesizing Infomap with SCI')
communities = communities_altmap_sci
color_idc = [v for v in communities.values()]
# nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)


ax = axs[1,1]
ax.set_frame_on(False)
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_title('Ground truth')
communities = communities_true
color_idc = [v for v in communities.values()]
# nx.draw_networkx_edges(G, pos, ax=ax, width=edge_width)
nodeCollection = nx.draw_networkx_nodes(G, pos=pos, node_color=color_idc, cmap=plt.get_cmap('Set3'), ax=ax,
                                        node_size=node_size)
# nx.draw_networkx_labels(G, pos, ax=ax, labels=communities, font_weight='bold', font_size=font_size)

