## Imports and utils

In [1]:
import networkx as nx
import csv
import time
import pandas as pd
from cdlib import algorithms, readwrite, evaluation, NodeClustering
from cdlib.benchmark import LFR, SBM, GRP
import infomap
from collections import defaultdict
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from pathlib import Path
import pickle

In [2]:
figurePath = Path("Figures")

In [3]:
def datafileToGraph(fileName):
    raw = pd.read_csv(fileName, header=None)
    raw = raw[0].str.split(n=2, expand=True)
    raw.columns = ['Source', 'Target']
    #print(emailRaw)
    network = nx.from_pandas_edgelist(raw, source='Source', target='Target', edge_attr=None)
    #nx.draw(emailNetwork)  
    network.name = fileName.split(".")[0]
    return network

In [4]:
def averageDegree(networkx):
    degrees = [val for (node, val) in networkx.degree()]
    sum = 0
    for d in degrees:
        sum += d
    return sum/len(degrees)

In [5]:
def new_infomap(g_original: object, flags: str = "") -> NodeClustering:
    
    g1 = nx.convert_node_labels_to_integers(g_original, label_attribute="name")
    name_map = nx.get_node_attributes(g1, "name")
    coms_to_node = defaultdict(list)
    
    flags += " --silent"

    im = infomap.Infomap(flags)

    im.add_nodes(g_original.nodes)

    for source, target, data in g1.edges(data=True):
        if "weight" in data:
            im.add_link(source, target, data["weight"])
        else:
            im.add_link(source, target)
    im.run()

    for node_id, module_id in im.modules:
        node_name = name_map[node_id]
        coms_to_node[module_id].append(node_name)

    coms_infomap = [list(c) for c in coms_to_node.values()]
    
    return NodeClustering(
        coms_infomap, g_original, "Infomap", method_parameters={"flags": flags}
    )

In [6]:
def network_info(network, graph = "False"):
    #print("Degree distribution:", nx.degree_histogram(emailNet))
    print("Average degree:", averageDegree(network))
    print("Clustering coefficient:", nx.average_clustering(network))

    for C in (network.subgraph(c) for c in nx.connected_components(network)):
        print("Average Path Length:", nx.average_shortest_path_length(C))
        #break


    if graph:
        #plot deggre centrality
        fig = plt.figure(figsize=(3*1.61803398875, 3))
        ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')
        d = np.array(nx.degree_histogram(network))
        y = d / len(network.nodes)
        x = np.arange(len(y))
        ax.plot(x,y,"go")
        ax.set_xlabel("k")
        ax.set_ylabel("Pk")
        ax.set_yscale('log')
        ax.set_xscale('log')
        ax.set_title("Degree distribution")
        #ax.legend()
        fig.savefig(figurePath/("DegreeDistribution_%s.png" % (network.name)))
        plt.close(fig)

In [7]:
def convertTextDatasetToCSV(txtFile, csvFile):
    txtFile = open(txtFile, 'r')
    
    txtLines = txtFile.readlines()
    csvLines = {}
    
    for line in txtLines:
        lineVec = line[:-1].split(' ')
        if lineVec[1] in csvLines.keys():
            csvLines[lineVec[1]].append(lineVec[0])
        else:
            csvLines[lineVec[1]] = [lineVec[0]]
            
    with open(csvFile, 'w',newline="") as csv_file:  
        writer = csv.writer(csv_file)
        for key, value in csvLines.items():
            writer.writerow(value)
        
convertTextDatasetToCSV('emailLabels.txt', 'emailLabels.csv')
emailLabels = readwrite.read_community_csv("emailLabels.csv", ",", str)
print(type(emailLabels))

<class 'cdlib.classes.node_clustering.NodeClustering'>


In [11]:
dblpLabels = readwrite.read_community_csv("dblpLabelsComplete.txt", "\t", str)
print(len(list(dblpLabels.to_node_community_map().keys())))

317080


In [12]:
def dataToJSON(data, filename):
    with open(filename, 'wb') as object_file:
        pickle.dump(data, object_file)

In [13]:
def JSONtoData(fileName):
    with open(fileName, 'rb') as object_file:
        data = pickle.load(object_file)
        return data

In [14]:
def bestGN(network, levels):
    print(network.name)
    modularity_vals = []
    bestGNscore = 0
    bestGNcut = 0

    for l in tqdm(levels):
        networkGN = algorithms.girvan_newman(network, l)
        a = evaluation.newman_girvan_modularity(network, networkGN)
        if a.score > bestGNscore:
            bestGNscore = a.score
            bestGNcut = l
        modularity_vals.append(a.score)

    print("best modularity with %d is: %f" % (bestGNcut, bestGNscore))

    fig = plt.figure(figsize=(3*1.61803398875, 3))
    ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

    ax.plot(levels, modularity_vals, "-ro")

    ax.set_xlabel("levels")
    ax.set_ylabel("Modularity")
    plt.xticks(levels)
    ax.set_title("Girvan-Newman Modularity Email")
    ax.legend()
    fig.savefig(figurePath/("Girvan-Newman_mod_%s.png" % (network.name)))
    plt.close(fig)

In [16]:
def bestInfomap(network, trials):
    print(network.name)
    modularity_vals = []
    bestGNscore = 0
    bestGNcut = 0
    Flags = "--silent -N "
    for t in tqdm(trials):
        Flags += str(t)
        networkInfomap = algorithms.infomap(emailNet, Flags)
        a = evaluation.newman_girvan_modularity(network, networkInfomap)
        if a.score > bestGNscore:
            bestGNscore = a.score
            bestGNtrial = t
        modularity_vals.append(a.score)

    print("best modularity with %d is: %f" % (bestGNtrial, bestGNscore))

    fig = plt.figure(figsize=(3*1.61803398875, 3))
    ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

    ax.plot(levels, modularity_vals, "-ro")

    ax.set_xlabel("Trials")
    ax.set_ylabel("Modularity")
    plt.xticks(levels)
    ax.set_title("Infomap Modularity Email")
    ax.legend()
    fig.savefig(figurePath/("Infomap_mod_%s.png" % (network.name)))
    plt.close(fig)

## Email network

In [17]:
emailNet = datafileToGraph('emailNet.txt')
print(emailNet)

Graph named 'emailNet' with 1005 nodes and 16706 edges


## Email network properties 

In [18]:
network_info(emailNet)

Average degree: 33.245771144278606
Clustering coefficient: 0.3993549664221539
Average Path Length: 2.586933824816466
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0


In [23]:
levels = [19, 21, 23, 25, 27, 29, 31, 33, 35]
bestGN(emailNet,levels)

100%|██████████| 11/11 [53:45<00:00, 293.19s/it]
No handles with labels found to put in legend.


best modularity with 21 is: 0.003584


## Community finding methods for the email network

In [19]:
algs = ["louvain", "leiden", "girvan_newman", "infomap"]
timeArray = []

start_time = time.time()
emailLouvain = algorithms.louvain(emailNet)
t = time.time() - start_time
print("Execution time for Louvain in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

start_time = time.time()
emailLeiden = algorithms.leiden(emailNet)
t = time.time() - start_time
print("Execution time for Leiden in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

start_time = time.time()
emailGN = algorithms.girvan_newman(emailNet, level=21)
t = time.time() - start_time
print("Execution time for Girvan-Newman in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

start_time = time.time()
emailInfomap = algorithms.infomap(emailNet, "--silent -N 2")
t = time.time() - start_time
print("Execution time for Infomap in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

Execution time for Louvain in email net: 0.6200 seconds
Execution time for Leiden in email net: 0.1308 seconds
Execution time for Girvan-Newman in email net: 401.77 seconds
Execution time for Infomap in email net: 0.1830 seconds


In [20]:
fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

width = 8.2 / (len(algs)*len(timeArray))  # the width of the bars
step = width / len(algs)

for alg_id in range(len(algs)):
    ax.bar(algs[alg_id], timeArray[alg_id], step, label=algs[alg_id])
      
ax.set_ylabel("Execution time")
ax.set_title("Execution time")
ax.set_yscale('log')
fig.savefig(figurePath/("Execution_Time%s.png" % (emailNet.name)))
plt.close(fig)

## Louvain properties

In [21]:
nComs = list(emailLouvain.to_node_community_map().values())[-1][0]+1
size = evaluation.size(emailNet, emailLouvain)
ad = evaluation.avg_distance(emailNet, emailLouvain)
aid = evaluation.average_internal_degree(emailNet, emailLouvain)
ae = evaluation.avg_embeddedness(emailNet, emailLouvain)
at = evaluation.avg_transitivity(emailNet, emailLouvain)

print("Louvain Number of communities:", nComs)
print("Louvain Average Community Size:", size)
print("Louvain Average Path Length:",  ad)
print("Louvain Average Internal Degree:",  aid)
print("Louvain Average Embeddedness:",  ae)
print("Louvain Average Transitivity:",  at)

Louvain Number of communities: 26
Louvain Average Community Size: FitnessResult(min=1, max=300, score=38.65384615384615, std=79.21778272958488)
Louvain Average Path Length: FitnessResult(min=0, max=2.4806172136849427, score=0.5858553058795464, std=0.9705906888685905)
Louvain Average Internal Degree: FitnessResult(min=2.0, max=25.517241379310345, score=6.690529195280717, std=8.375278276876205)
Louvain Average Embeddedness: FitnessResult(min=0.5871633279111377, max=1.0, score=0.931051977595627, std=0.11902625977134039)
Louvain Average Transitivity: FitnessResult(min=0.0, max=0.7705866949128367, score=0.14955667384623988, std=0.2531365394514653)


In [9]:
#louvainDict = louvain.to_node_community_map()
#print(louvainDict)
#louvain.to_json()

## Leiden 

In [22]:
nComs = list(emailLeiden.to_node_community_map().values())[-1][0]+1
size = evaluation.size(emailNet, emailLeiden)
ad = evaluation.avg_distance(emailNet, emailLeiden)
aid = evaluation.average_internal_degree(emailNet, emailLeiden)
ae = evaluation.avg_embeddedness(emailNet, emailLeiden)
at = evaluation.avg_transitivity(emailNet, emailLeiden)

print("Leiden Number of communities:", nComs)
print("Leiden Average Community Size:", size)
print("Leiden Average Path Length:",  ad)
print("Leiden Average Internal Degree:",  aid)
print("Leiden Average Embeddedness:",  ae)
print("Leiden Average Transitivity:",  at)

Leiden Number of communities: 31
Leiden Average Community Size: FitnessResult(min=1, max=293, score=32.41935483870968, std=67.29270400635562)
Leiden Average Path Length: FitnessResult(min=0, max=2.4556404230317272, score=0.5570684147289684, std=0.9490694575435203)
Leiden Average Internal Degree: FitnessResult(min=2.0, max=24.946428571428573, score=6.341576185459902, std=7.94560636688929)
Leiden Average Embeddedness: FitnessResult(min=0.5872473658876585, max=1.0, score=0.887815944724878, std=0.1460282320945185)
Leiden Average Transitivity: FitnessResult(min=0.0, max=0.7143330915508804, score=0.1438417275558572, std=0.24788021063725751)


## Girvan-Newman

In [23]:
nComs = list(emailGN.to_node_community_map().values())[-1][0]+1
size = evaluation.size(emailNet, emailGN)
ad = evaluation.avg_distance(emailNet, emailGN)
aid = evaluation.average_internal_degree(emailNet, emailGN)
ae = evaluation.avg_embeddedness(emailNet, emailGN)
at = evaluation.avg_transitivity(emailNet, emailGN)

print("Girvan-Newman Number of communities:", nComs)
print("Girvan-Newman Size:", size)
print("Girvan-Newman Average Path Length:",  ad)
print("Girvan-Newman Average Internal Degree:",  aid)
print("Girvan-Newman Average Embeddedness:",  ae)
print("Girvan-Newman Average Transitivity:",  at)

Girvan-Newman Number of communities: 41
Girvan-Newman Size: FitnessResult(min=1, max=960, score=24.51219512195122, std=147.91561898286426)
Girvan-Newman Average Path Length: FitnessResult(min=0, max=2.5465632603406325, score=0.10763975431725119, std=0.4812177109414415)
Girvan-Newman Average Internal Degree: FitnessResult(min=0.0, max=34.68333333333333, score=2.0247967479674798, std=5.255162801455319)
Girvan-Newman Average Embeddedness: FitnessResult(min=0.0, max=1.0, score=0.573140516458235, std=0.46856433534472747)
Girvan-Newman Average Transitivity: FitnessResult(min=0.0, max=0.41873271609026597, score=0.01021299307537234, std=0.06459263969140863)


In [None]:
print(emailInfomap.to_node_community_map())

## Infomap

In [24]:
nComs = list(emailInfomap.to_node_community_map().values())[-1][0]+1
size = evaluation.size(emailNet, emailInfomap)
ad = evaluation.avg_distance(emailNet, emailInfomap)
aid = evaluation.average_internal_degree(emailNet, emailInfomap)
ae = evaluation.avg_embeddedness(emailNet, emailInfomap)
at = evaluation.avg_transitivity(emailNet, emailInfomap)

print("Infomap Number of communities:", nComs)
print("Infomap Size:", size)
print("Infomap Average Path Length:",  ad)
print("Infomap Average Internal Degree:",  aid)
print("Infomap Average Embeddedness:",  ae)
print("Infomap Average Transitivity:",  at)

Infomap Number of communities: 34
Infomap Size: FitnessResult(min=1, max=513, score=29.558823529411764, std=89.11235181317313)
Infomap Average Path Length: FitnessResult(min=0, max=2.4256670321637426, score=0.6905898334658904, std=0.8372308904563356)
Infomap Average Internal Degree: FitnessResult(min=1.0, max=29.57504873294347, score=6.322285857617108, std=7.915326374424319)
Infomap Average Embeddedness: FitnessResult(min=0.41666666666666663, max=1.0, score=0.8548970779096592, std=0.18102253075576857)
Infomap Average Transitivity: FitnessResult(min=0.0, max=0.8140800887261141, score=0.17305192051126927, std=0.28530274277818735)


## Modularity

In [26]:
louvainMod = evaluation.newman_girvan_modularity(emailNet, emailLouvain)
leidenMod = evaluation.newman_girvan_modularity(emailNet, emailLeiden)
gnMod = evaluation.newman_girvan_modularity(emailNet, emailGN)
infomapMod = evaluation.newman_girvan_modularity(emailNet, emailInfomap)

print("Louvain modularity:", louvainMod)
print("Leiden modularity:", leidenMod)
print("Girvan-Newman modularity:", gnMod)
print("Infomap modularity:", infomapMod)

Louvain modularity: FitnessResult(min=None, max=None, score=0.43088719278076615, std=None)
Leiden modularity: FitnessResult(min=None, max=None, score=0.4325852373529564, std=None)
Girvan-Newman modularity: FitnessResult(min=None, max=None, score=0.0035840389743774913, std=None)
Infomap modularity: FitnessResult(min=None, max=None, score=0.36285668348735556, std=None)


## External Evaluation

In [27]:
nmi = evaluation.normalized_mutual_information(emailLouvain, emailLeiden)
nmi2 = evaluation.normalized_mutual_information(emailLouvain, emailGN)
nmi3 = evaluation.normalized_mutual_information(emailLouvain, emailInfomap)
nmi4 = evaluation.normalized_mutual_information(emailLeiden, emailInfomap)

print("Normalized Mutual Information between Louvain and Leiden:", nmi)
print("Normalized Mutual Information between Louvain and Girvan-Newman:", nmi2)
print("Normalized Mutual Information between Louvain and Infomap:", nmi3)
print("Normalized Mutual Information between Leiden and Infomap:", nmi4)

NameError: name 'emaiLeiden' is not defined

## Accuracy 

In [40]:
nmiLouvainLabels = emailLouvain.normalized_mutual_information(emailLabels)
nmiLeidenLabels = emailLeiden.normalized_mutual_information(emailLabels)
nmiGirvanNewmanLabels = emailGN.normalized_mutual_information(emailLabels)
nmiInfomapLabels = emailInfomap.normalized_mutual_information(emailLabels)

print("NMI for Louvain and labels:", nmiLouvainLabels)
print("NMI for Leiden and labels:", nmiLeidenLabels)
print("NMI for Girvan-Newman and labels:", nmiGirvanNewmanLabels)
print("NMI for Infomap and labels:", nmiInfomapLabels)

NMI for Louvain and labels: MatchingResult(score=0.5882969131854163, std=None)
NMI for Leiden and labels: MatchingResult(score=0.5776771281141226, std=None)
NMI for Girvan-Newman and labels: MatchingResult(score=0.0449480988954229, std=None)

AMI for Louvain and labels: MatchingResult(score=0.5527517337989198, std=None)
AMI for Leiden and labels: MatchingResult(score=0.540992332601562, std=None)
AMI for Girvan-Newman and labels: MatchingResult(score=-0.0005067215292382119, std=None)

ARI for Louvain and labels: MatchingResult(score=0.3311391803457487, std=None)
ARI for Leiden and labels: MatchingResult(score=0.2985758824644375, std=None)
NMI for Girvan-Newman and labels: MatchingResult(score=-0.0012481583268392242, std=None)

F1 for Louvain and labels: MatchingResult(score=0.21592592592592594, std=0.2803999690819878)
F1 for Leiden and labels: MatchingResult(score=0.2082142857142857, std=0.27508509815053567)
F1 for Girvan-Newman and labels: MatchingResult(score=0.05695652173913045, std=

## DBLP network

In [19]:
dblpNet = datafileToGraph('dblpNet.txt')
print(dblpNet)
dataToJSON(dblpNet, 'dblpNet.data')

Graph named 'dblpNet' with 317080 nodes and 1049866 edges


In [20]:
dblpNet = JSONtoData('dblpNet.data')

In [15]:
network_info(dblpNet)

Average degree: 6.622089062697111
Clustering coefficient: 0.6324308280637396


In [None]:
apl = nx.average_shortest_path_length(dblpNet)
print(apl)

In [None]:
algs = ["louvain", "leiden", "infomap"]
timeArray = []

start_time = time.time()
dblpLouvain = algorithms.louvain(dblpNet)
dataToJSON(dblpLouvain, 'dblp_louvain.data')
t = time.time() - start_time
print("Execution time for Louvain in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

start_time = time.time()
dblpLeiden = algorithms.leiden(dblpNet)
dataToJSON(dblpLeiden, 'dblp_leiden.data')
t = time.time() - start_time
print("Execution time for Leiden in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

"""
start_time = time.time()
dblpGN = algorithms.girvan_newman(dblpNet, level=3)
dataToJSON(dblpGN, 'dblp_gn.data')
t = time.time() - start_time
print("Execution time for Girvan-Newman in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))
"""

start_time = time.time()
dblpInfomap = algorithms.infomap(dblpNet, "--silent -N 2")
dataToJSON(dblpInfomap, 'dblp_infomap.data')
t = time.time() - start_time
print("Execution time for Infomap in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

In [None]:
dblpLouvain = JSONtoData("dblp_louvain.data")
dblpLeiden = JSONtoData("dblp_leiden.data")
dblpInfomap = JSONtoData("dblp_infomap.data")

## Accuracy 

In [40]:
nmiLouvainLabels = dblpLouvain.normalized_mutual_information(dblpLabels)
nmiLeidenLabels = dblpLeiden.normalized_mutual_information(dblpLabels)
#nmiGirvanNewmanLabels = emailGN.normalized_mutual_information(emailLabels)
nmiInfomapLabels = dblpInfomap.normalized_mutual_information(dblpLabels)

print("NMI for Louvain and labels:", nmiLouvainLabels)
print("NMI for Leiden and labels:", nmiLeidenLabels)
#print("NMI for Girvan-Newman and labels:", nmiGirvanNewmanLabels)
print("NMI for Infomap and labels:", nmiInfomapLabels)

NMI for Louvain and labels: MatchingResult(score=0.5882969131854163, std=None)
NMI for Leiden and labels: MatchingResult(score=0.5776771281141226, std=None)
NMI for Girvan-Newman and labels: MatchingResult(score=0.0449480988954229, std=None)

AMI for Louvain and labels: MatchingResult(score=0.5527517337989198, std=None)
AMI for Leiden and labels: MatchingResult(score=0.540992332601562, std=None)
AMI for Girvan-Newman and labels: MatchingResult(score=-0.0005067215292382119, std=None)

ARI for Louvain and labels: MatchingResult(score=0.3311391803457487, std=None)
ARI for Leiden and labels: MatchingResult(score=0.2985758824644375, std=None)
NMI for Girvan-Newman and labels: MatchingResult(score=-0.0012481583268392242, std=None)

F1 for Louvain and labels: MatchingResult(score=0.21592592592592594, std=0.2803999690819878)
F1 for Leiden and labels: MatchingResult(score=0.2082142857142857, std=0.27508509815053567)
F1 for Girvan-Newman and labels: MatchingResult(score=0.05695652173913045, std=

## Modularity

In [53]:
louvainModD = evaluation.newman_girvan_modularity(dblpNet, dblpLouvain)
leidenModD = evaluation.newman_girvan_modularity(dblpNet, dblpLeiden)
#gnMod = evaluation.newman_girvan_modularity(emailNet, emailGN)
infomapModD = evaluation.newman_girvan_modularity(dblpNet, dblpInfomap)

print("Louvain modularity:", louvainModD)
print("Leiden modularity:", leidenModD)
#print("Girvan-Newman modularity:", gnMod)
print("Infomap modularity:", infomapModD)

Louvain Girvan-Newman mod: FitnessResult(min=None, max=None, score=0.4318206751448841, std=None)
Louvain Erdos-Renyi mod: FitnessResult(min=None, max=None, score=0.4549865127361685, std=None)
Louvain Z-mod: FitnessResult(min=None, max=None, score=1.1462260420651909, std=None)
gn Girvan-Newman mod: FitnessResult(min=None, max=None, score=0.0031087073868774446, std=None)


## LFR 

In [23]:
lfr, _ = LFR(500, 3, 1.5, 0.1, average_degree=5, min_community=20)
levels = [7, 9, 11, 13, 15, 17, 19, 21, 23, 25]
modularity_vals = []
bestGNscore = 0
bestGNcut = 0

for l in tqdm(levels):
    lfrGN = algorithms.girvan_newman(lfr, l)
    #print("GN mod for level %d:" % (l), evaluation.newman_girvan_modularity(lfr,lfrGN))
    a = evaluation.newman_girvan_modularity(lfr,lfrGN)
    if a.score > bestGNscore:
        bestGNscore = a.score
        bestGNcut = l
    modularity_vals.append(a.score)

print("best modularity with %d is: %f" % (bestGNcut, bestGNscore))
    
fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(levels, modularity_vals, "-ro")

ax.set_xlabel("levels")
ax.set_ylabel("Modularity")
plt.xticks(levels)
ax.set_title("Girvan-Newman Modularity LFR")
ax.legend()
fig.savefig(figurePath/("Girvan-Newman_mod_lfr.png"))
plt.close(fig)


100%|██████████| 10/10 [04:43<00:00, 28.35s/it]
No handles with labels found to put in legend.


## LFR results

In [29]:
n = 500
tau1 = 3
tau2 = 1.5
#mu = 0.1
mus = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
b = "LFR"

resultsLouvain = []
resultsLeiden = []
resultsInfomap = []
resultsGN = []

for mu in tqdm(mus):
    lfrGraph, comsLFR = LFR(n, tau1, tau2, mu, average_degree=5, min_community=20)
    
    lfrLouvain = algorithms.louvain(lfrGraph)
    lfrLeiden = algorithms.leiden(lfrGraph)
    lfrInfomap = new_infomap(lfrGraph, "-N 2")
    lfrGN = algorithms.girvan_newman(lfrGraph, level=15)
    
    nmi1 = evaluation.normalized_mutual_information(lfrLouvain, comsLFR)
    resultsLouvain.append(nmi1[0])
    nmi2 = evaluation.normalized_mutual_information(lfrLeiden, comsLFR)
    resultsLeiden.append(nmi2[0])
    nmi3 =  evaluation.normalized_mutual_information(lfrInfomap, comsLFR)
    resultsInfomap.append(nmi3[0])
    nmi4 =  evaluation.normalized_mutual_information(lfrGN, comsLFR)
    resultsGN.append(nmi4[0])
    

fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(mus,resultsLouvain, "-ro", label = "Louvain")
ax.plot(mus,resultsLeiden, "-go", label = "Leiden")
ax.plot(mus,resultsInfomap, "-bo", label = "Infomap")
ax.plot(mus,resultsGN, "-yo", label = "Girvan-Newman")

ax.set_xlabel(chr(945+11))
ax.set_ylabel("Performance")
ax.set_title("nmi Accuracy")
ax.legend()
fig.savefig(figurePath/("Benchmark_Results_%s.png" % (b)))
plt.close(fig)

100%|██████████| 7/7 [09:30<00:00, 81.54s/it]


In [26]:
lfrGraph, comsLFR = LFR(500, 3, 1.5, 0.1, average_degree=5, min_community=20)
network_info(lfrGraph)

Average degree: 4.304
Clustering coefficient: 0.18908052240157525
Average Path Length: 5.686468937875752


In [37]:
print(list(comsLFR.to_node_community_map().values())[-1][0]+1)
print(len(lfrGraph.edges))

16
1076


In [29]:
lfrLouvain = algorithms.louvain(lfrGraph)
lfrLeiden = algorithms.leiden(lfrGraph)
lfrGN = algorithms.girvan_newman(lfrGraph, level=15)
lfrInfomap = new_infomap(lfrGraph, "-N 2")

louvainMod = evaluation.newman_girvan_modularity(lfrGraph, lfrLouvain)
leidenMod = evaluation.newman_girvan_modularity(lfrGraph, lfrLeiden)
gnMod = evaluation.newman_girvan_modularity(lfrGraph, lfrGN)
infomapMod = evaluation.newman_girvan_modularity(lfrGraph, lfrInfomap)

print("Louvain modularity:", louvainMod)
print("Leiden modularity:", leidenMod)
print("Girvan-Newman modularity:", gnMod)
print("Infomap modularity:", infomapMod)

Louvain modularity: FitnessResult(min=None, max=None, score=0.8643990720139303, std=None)
Leiden modularity: FitnessResult(min=None, max=None, score=0.8682016210389576, std=None)
Girvan-Newman modularity: FitnessResult(min=None, max=None, score=0.8659123180995288, std=None)
Infomap modularity: FitnessResult(min=None, max=None, score=0.8620657191028316, std=None)


## SBM 

In [42]:
sizes = [32, 32, 32, 32]
c = 0.1
probs = [[0.8, c, c, c], [c, 0.8, c, c], [c, c, 0.8, c],[c, c, c, 0.8]]
sbm, _ = SBM(sizes, probs, seed=10)

levels = [2, 3, 4, 5, 6, 7, 8, 9]
modularity_vals = []
bestGNscore = 0
bestGNcut = 0

for l in tqdm(levels):
    sbmGN = algorithms.girvan_newman(sbm, l)
    a = evaluation.newman_girvan_modularity(sbm,sbmGN)
    if a.score > bestGNscore:
        bestGNscore = a.score
        bestGNcut = l
        print(bestGNscore)
    modularity_vals.append(a.score)

print("best modularity with %d is: %f" % (bestGNcut, bestGNscore))


fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(levels, modularity_vals, "-ro")

ax.set_xlabel("levels")
ax.set_ylabel("Modularity")
plt.xticks(levels)
ax.set_title("Girvan-Newman Modularity SBM")
fig.savefig(figurePath/("Girvan-Newman_mod_sbm.png"))
plt.close(fig)

100%|██████████| 1/1 [00:52<00:00, 52.41s/it]

0.3948255783688625
best modularity with 2 is: 0.394826





'\nfig = plt.figure(figsize=(3*1.61803398875, 3))\nax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor=\'w\')\n\nax.plot(levels, modularity_vals, "-ro")\n\nax.set_xlabel("levels")\nax.set_ylabel("Modularity")\nplt.xticks(levels)\nax.set_title("Girvan-Newman Modularity SBM")\nfig.savefig(figurePath/("Girvan-Newman_mod_sbm.png"))\nplt.close(fig)\n'

## SBM results

In [30]:
resultsLouvain = []
resultsLeiden = []
resultsInfomap = []
resultsGN = []
b = "SBM"

mus = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

sizes = [32, 32, 32, 32]
for c in tqdm(mus):
    probs = [[0.8, c, c, c], [c, 0.8, c, c], [c, c, 0.8, c],[c, c, c, 0.8]]
    sbmGraph, comsSBM = SBM(sizes, probs, seed=10)

    sbmLouvain = algorithms.louvain(sbmGraph)
    sbmLeiden = algorithms.leiden(sbmGraph)
    sbmInfomap = new_infomap(sbmGraph, "-N 2")
    sbmGN = algorithms.girvan_newman(sbmGraph, level=3)

    nmi1 = evaluation.normalized_mutual_information(sbmLouvain, comsSBM)
    resultsLouvain.append(nmi1[0])
    nmi2 = evaluation.normalized_mutual_information(sbmLeiden, comsSBM)
    resultsLeiden.append(nmi2[0])
    nmi3 =  evaluation.normalized_mutual_information(sbmInfomap, comsSBM)
    resultsInfomap.append(nmi3[0])
    nmi4 =  evaluation.normalized_mutual_information(sbmGN, comsSBM)
    resultsGN.append(nmi4[0])

fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(mus,resultsLouvain, "-ro", label = "Louvain")
ax.plot(mus,resultsLeiden, "-go", label = "Leiden")
ax.plot(mus,resultsInfomap, "-bo", label = "Infomap")
ax.plot(mus,resultsGN, "-yo", label = "GN")

ax.set_xlabel(chr(945+11))
ax.set_ylabel("Performance")
ax.set_title("nmi Accuracy")
ax.legend()
fig.savefig(figurePath/("Benchmark_Results_%s.png" % (b)))
plt.close(fig)


100%|██████████| 11/11 [09:50<00:00, 53.71s/it]


In [38]:
sizes = [75, 75, 300]
probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]]
sbmGraph, comsSBM = SBM(sizes, probs, seed=10)
network_info(sbmGraph)

Average degree: 98.01777777777778
Clustering coefficient: 0.30217549105121533
Average Path Length: 1.79484286067805


In [40]:
print(list(comsSBM.to_node_community_map().values())[-1][0]+1)
print(len(sbmGraph.nodes))
print(len(sbmGraph.edges))

3
450
22054


In [32]:
sbmLouvain = algorithms.louvain(sbmGraph)
sbmLeiden = algorithms.leiden(sbmGraph)
sbmGN = algorithms.girvan_newman(sbmGraph, level=3)
sbmInfomap = new_infomap(sbmGraph, "-N 2")

louvainMod = evaluation.newman_girvan_modularity(sbmGraph, sbmLouvain)
leidenMod = evaluation.newman_girvan_modularity(sbmGraph, sbmLeiden)
gnMod = evaluation.newman_girvan_modularity(sbmGraph, sbmGN)
infomapMod = evaluation.newman_girvan_modularity(sbmGraph, sbmInfomap)

print("Louvain modularity:", louvainMod)
print("Leiden modularity:", leidenMod)
print("Girvan-Newman modularity:", gnMod)
print("Infomap modularity:", infomapMod)