# create a family of benchmark graphs
https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.LFR_benchmark_graph.html
“Benchmark graphs for testing community detection algorithms”, Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi, Phys. Rev. E 78, 046110 2008

In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import igraph as ig
from networkx.generators.community import LFR_benchmark_graph
 

In [2]:
def add_true_labels(G):

    cm = [(G.nodes[v]["community"]) for v in G]
    unique_cm = []

    # iterate through the list of sets
    for set_ in cm:
        # check if the set is already in the list of unique sets
        if set_ not in unique_cm:
            # if not, add it to the list of unique sets
            unique_cm.append(set_)

    df = pd.DataFrame(columns=['community']) 
    for comm_id, members in enumerate(unique_cm):
        for node in list(members):
            df.at[node, 'community'] = comm_id + 1 
            
    nx.set_node_attributes(G, df.community, "community")
    return(G)

In [3]:

# Generate a LFR_benchmark_graph
seed=123
n = 1000
tau1 = 2
tau2 = 3
mu = 0.5
average_degree=5
max_degree = 50
min_community=  20
max_community = 50
max_iters = 10000

In [4]:
mui = 5
G = LFR_benchmark_graph( n=n, 
                        tau1=tau1, 
                        tau2=tau2, 
                        mu=mui/100, 
                        average_degree=average_degree, 
                        min_community=min_community, 
                        max_community=max_community,
                        max_degree = max_degree,
                        max_iters = max_iters,
                        seed=seed,
                        tol = 2)

In [5]:
mui_range = range(5,60,5) 
 
for mui in mui_range:
    #generate benchmark
    print("generating benchmark for mu = ", mui/100)
    G = LFR_benchmark_graph( n=n, 
                            tau1=tau1, 
                            tau2=tau2, 
                            mu=mui/100, 
                            average_degree=average_degree, 
                            min_community=min_community, 
                            max_community=max_community,
                            max_degree = max_degree,
                            max_iters = max_iters,
                            seed=seed,
                            tol = 2)
    
    #extract true labels
    gt = add_true_labels(G)
    print(G)
    nx.write_gml(G, f"LFR_benchmark_{mui}.gml")
     



generating benchmark for mu =  0.05
Graph with 1000 nodes and 2040 edges
generating benchmark for mu =  0.1
Graph with 1000 nodes and 2050 edges
generating benchmark for mu =  0.15
Graph with 1000 nodes and 2019 edges
generating benchmark for mu =  0.2
Graph with 1000 nodes and 2018 edges
generating benchmark for mu =  0.25
Graph with 1000 nodes and 2025 edges
generating benchmark for mu =  0.3
Graph with 1000 nodes and 2012 edges
generating benchmark for mu =  0.35
Graph with 1000 nodes and 2013 edges
generating benchmark for mu =  0.4
Graph with 1000 nodes and 2011 edges
generating benchmark for mu =  0.45
Graph with 1000 nodes and 2040 edges
generating benchmark for mu =  0.5
Graph with 1000 nodes and 2057 edges
generating benchmark for mu =  0.55
Graph with 1000 nodes and 2042 edges
