## Imports and utils

In [2]:
import networkx as nx
import csv
import time
import pandas as pd
from cdlib import algorithms, readwrite, evaluation, NodeClustering
from cdlib.benchmark import LFR, SBM, GRP
import infomap
from collections import defaultdict
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from pathlib import Path
import pickle

In [3]:
figurePath = Path("Figures")
print(figurePath)

Figures


In [4]:
def datafileToGraph(fileName):
    raw = pd.read_csv(fileName, header=None)
    raw = raw[0].str.split(n=2, expand=True)
    raw.columns = ['Source', 'Target']
    #print(emailRaw)
    network = nx.from_pandas_edgelist(raw, source='Source', target='Target', edge_attr=None)
    #nx.draw(emailNetwork)  
    network.name = fileName.split(".")[0]
    return network

In [5]:
def averageDegree(networkx):
    degrees = [val for (node, val) in networkx.degree()]
    sum = 0
    for d in degrees:
        sum += d
    return sum/len(degrees)

In [6]:
def new_infomap(g_original: object, flags: str = "") -> NodeClustering:
    
    g1 = nx.convert_node_labels_to_integers(g_original, label_attribute="name")
    name_map = nx.get_node_attributes(g1, "name")
    coms_to_node = defaultdict(list)
    
    flags += " --silent"

    im = infomap.Infomap(flags)

    im.add_nodes(g_original.nodes)

    for source, target, data in g1.edges(data=True):
        if "weight" in data:
            im.add_link(source, target, data["weight"])
        else:
            im.add_link(source, target)
    im.run()

    for node_id, module_id in im.modules:
        node_name = name_map[node_id]
        coms_to_node[module_id].append(node_name)

    coms_infomap = [list(c) for c in coms_to_node.values()]
    
    return NodeClustering(
        coms_infomap, g_original, "Infomap", method_parameters={"flags": flags}
    )

In [7]:
def network_info(network, graph = "False"):
    #print("Degree distribution:", nx.degree_histogram(emailNet))
    print("Average degree:", averageDegree(network))
    print("Clustering coefficient:", nx.average_clustering(network))

    #for C in (network.subgraph(c) for c in nx.connected_components(network)):
        #print("Average Path Length:", nx.average_shortest_path_length(C))
        #break


    if graph:
        #plot deggre centrality
        fig = plt.figure(figsize=(3*1.61803398875, 3))
        ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')
        d = np.array(nx.degree_histogram(network))
        y = d / len(network.nodes)
        x = np.arange(len(y))
        ax.plot(x,y,"go")
        ax.set_xlabel("k")
        ax.set_ylabel("Pk")
        ax.set_yscale('log')
        ax.set_xscale('log')
        ax.set_title("Degree distribution")
        #ax.legend()
        fig.savefig(figurePath/("DegreeDistribution_%s.png" % (network.name)))
        plt.close(fig)

In [16]:
def convertTextDatasetToCSV(txtFile, csvFile):
    txtFile = open(txtFile, 'r')
    
    txtLines = txtFile.readlines()
    csvLines = {}
    
    for line in txtLines:
        lineVec = line[:-1].split(' ')
        if lineVec[1] in csvLines.keys():
            csvLines[lineVec[1]].append(lineVec[0])
        else:
            csvLines[lineVec[1]] = [lineVec[0]]
            
    with open(csvFile, 'w',newline="") as csv_file:  
        writer = csv.writer(csv_file)
        for key, value in csvLines.items():
            writer.writerow(value)
        
convertTextDatasetToCSV('emailLabels.txt', 'emailLabels.csv')
emailLabels = readwrite.read_community_csv("emailLabels.csv", ",", str)
print(type(emailLabels))

<class 'cdlib.classes.node_clustering.NodeClustering'>


In [35]:
def convertDblpLabelsToCSV(txtFile, csvFile):
    txtFile = open(txtFile, 'r')
    
    txtLines = txtFile.readlines()
    csvLines = {}
    
    for line in txtLines:
        lineVec = line[:-1].split(' ')
        print(lineVec)
        if lineVec[1] in csvLines.keys():
            csvLines[lineVec[1]].append(lineVec[0])
        else:
            csvLines[lineVec[1]] = [lineVec[0]]
            
    with open(csvFile, 'w',newline="") as csv_file:  
        writer = csv.writer(csv_file)
        for key, value in csvLines.items():
            writer.writerow(value)

#convertDblpLabelsToCSV("dblpLabels.txt", "dblpLabels.csv")
dblpLabels = readwrite.read_community_csv("dblpLabels.txt", "\t", str)
print(list(dblpLabels.to_node_community_map().values())[-1][0])

['61', '710', '1379', '1991', '2138', '2419', '2873', '3078', '3399', '3420', '3814', '3844', '4393', '4799', '4824', '4915', '4945', '5028', '5152', '5489', '5594', '5756', '5779', '5790', '6353', '6898', '6942', '6953', '7109', '7426', '7427', '7528', '7692', '7702', '7950', '8746', '8843', '8958', '9151', '9220', '9531', '9532', '9585', '10175', '10181', '10355', '10542', '10619', '10646', '10730', '11027', '11034', '11174', '11323', '11421', '11562', '11803', '11920', '12011', '12146', '12273', '12830', '13039', '13152', '13237', '13256', '13651', '13804', '13830', '13877', '13909', '13914', '14182', '14230', '14245', '14304', '14397', '14722', '14767', '14989', '15064', '15203', '15304', '15337', '15623', '15669', '15682', '15723', '15811', '15822', '16091', '16218', '16260', '16264', '16426', '16877', '17023', '17202', '17238', '17322', '17612', '17625', '17764', '17840', '17853', '18020', '18111', '18182', '18208', '18406', '18419', '18522', '18611', '18695', '18779', '18863', '

In [8]:
def dataToJSON(data, filename):
    with open(filename, 'wb') as object_file:
        pickle.dump(data, object_file)

In [9]:
def JSONtoData(fileName):
    with open(fileName, 'rb') as object_file:
        data = pickle.load(object_file)
        return data

## Email network

In [27]:
emailNet = datafileToGraph('emailNet.txt')
print(emailNet)

Graph named 'emailNet' with 1005 nodes and 16706 edges


## Email network properties 

In [33]:
network_info(emailNet)

Average degree: 33.245771144278606
Clustering coefficient: 0.3993549664221539
Average Path Length: 2.586933824816466
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0
Average Path Length: 0


## Community finding methods for the email network

In [34]:
algs = ["louvain", "leiden", "girvan_newman"]
timeArray = []

start_time = time.time()
emailLouvain = algorithms.louvain(emailNet)
t = time.time() - start_time
print("Execution time for Louvain in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

start_time = time.time()
emailLeiden = algorithms.leiden(emailNet)
t = time.time() - start_time
print("Execution time for Leiden in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

start_time = time.time()
emailGN = algorithms.girvan_newman(emailNet, level=3)
t = time.time() - start_time
print("Execution time for Girvan-Newman in email net: %.6s seconds" % (t))
timeArray.append(round(t,6))

Execution time for Louvain in email net: 0.3183 seconds
Execution time for Leiden in email net: 0.0458 seconds
Execution time for Girvan-Newman in email net: 124.97 seconds


In [None]:
fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

width = 3.2 / (len(algs)*len(timeArray))  # the width of the bars
step = width / len(algs)

for alg_id in range(len(algs)):
    ax.bar(algs[alg_id], timeArray[alg_id], step, label=algs[alg_id])
      
ax.set_ylabel("Execution time")
ax.set_title("Execution time")
fig.savefig(figurePath/("Execution_Time%s.png" % (emailNet.name)))
plt.close(fig)

## Louvain properties

In [34]:
nComs = list(emailLouvain.to_node_community_map().values())[-1][0]+1
size = evaluation.size(emailNet, emailLouvain)
ad = evaluation.avg_distance(emailNet, emailLouvain)
aid = evaluation.average_internal_degree(emailNet, emailLouvain)
ae = evaluation.avg_embeddedness(emailNet, emailLouvain)
at = evaluation.avg_transitivity(emailNet, emailLouvain)
hd = evaluation.hub_dominance(emailNet, emailLouvain)

print("Louvain Number of communities:", nComs)
print("Louvain Average Community Size:", size)
print("Louvain Average Path Length:",  ad)
print("Louvain Average Internal Degree:",  aid)
print("Louvain Average Embeddedness:",  ae)
print("Louvain Average Transitivity:",  at)
print("Louvain Hub Dominance:",  hd)

Louvain Number of communities: 28
Louvain Average Community Size: FitnessResult(min=1, max=242, score=35.892857142857146, std=61.7525819504822)
Louvain Average Path Length: FitnessResult(min=0, max=2.3587693798449614, score=0.6687511989004468, std=0.9814561549961194)
Louvain Average Internal Degree: FitnessResult(min=2.0, max=26.414414414414413, score=7.1414264044270235, std=8.145409774820516)
Louvain Average Embeddedness: FitnessResult(min=0.5262840971085652, max=1.0, score=0.9026454393768262, std=0.1511494864953937)
Louvain Average Transitivity: FitnessResult(min=0.0, max=0.7483573148327239, score=0.17693456966841095, std=0.26202897409459874)
Louvain Hub Dominance: FitnessResult(min=0.42323651452282157, max=1.1111111111111112, score=0.6302828175082392, std=0.1938634867901892)
Louvain Significance: FitnessResult(min=None, max=None, score=61545.919858859335, std=None)


In [9]:
#louvainDict = louvain.to_node_community_map()
#print(louvainDict)
#louvain.to_json()

## Leiden 

In [36]:
nComs = list(emailLeiden.to_node_community_map().values())[-1][0]+1
size = evaluation.size(emailNet, emailLeiden)
ad = evaluation.avg_distance(emailNet, emailLeiden)
aid = evaluation.average_internal_degree(emailNet, emailLeiden)
ae = evaluation.avg_embeddedness(emailNet, emailLeiden)
at = evaluation.avg_transitivity(emailNet, emailLeiden)
hd = evaluation.hub_dominance(emailNet, emailLeiden)

print("Leiden Number of communities:", nComs)
print("Leiden Average Community Size:", size)
print("Leiden Average Path Length:",  ad)
print("Leiden Average Internal Degree:",  aid)
print("Leiden Average Embeddedness:",  ae)
print("Leiden Average Transitivity:",  at)
print("Leiden Hub Dominance:",  hd)

Leiden Number of communities: 27
Leiden Average Community Size: FitnessResult(min=1, max=245, score=37.22222222222222, std=62.93637782968597)
Leiden Average Path Length: FitnessResult(min=0, max=2.377818668450987, score=0.6413249932430524, std=0.9921364087509856)
Leiden Average Internal Degree: FitnessResult(min=2.0, max=25.0990990990991, score=7.1751057028643785, std=8.2048837020894)
Leiden Average Embeddedness: FitnessResult(min=0.5497627459803758, max=1.0, score=0.9175145392570943, std=0.13331845281395657)
Leiden Average Transitivity: FitnessResult(min=0.0, max=0.7730782353499883, score=0.15939350931645443, std=0.25158624800626506)
Leiden Hub Dominance: FitnessResult(min=0.4057377049180328, max=0.6363636363636364, score=0.5550471907298913, std=0.07580252489705826)


## Girvan-Newman

In [38]:
nComs = list(emailGN.to_node_community_map().values())[-1][0]+1
size = evaluation.size(emailNet, emailGN)
ad = evaluation.avg_distance(emailNet, emailGN)
aid = evaluation.average_internal_degree(emailNet, emailGN)
ae = evaluation.avg_embeddedness(emailNet, emailGN)
at = evaluation.avg_transitivity(emailNet, emailGN)
hd = evaluation.hub_dominance(emailNet, emailGN)

print("Girvan-Newman Number of communities:", nComs)
print("Girvan-Newman Size:", size)
print("Girvan-Newman Average Path Length:",  ad)
print("Girvan-Newman Average Internal Degree:",  aid)
print("Girvan-Newman Average Embeddedness:",  ae)
print("Girvan-Newman Average Transitivity:",  at)
print("Girvan-Newman Hub Dominance:",  hd)

Girvan-Newman Number of communities: 23
Girvan-Newman Size: FitnessResult(min=1, max=978, score=43.69565217391305, std=199.1969605981256)
Girvan-Newman Average Path Length: FitnessResult(min=0, max=2.5665982212565908, score=0.1927506473010112, std=0.6330518282579853)
Girvan-Newman Average Internal Degree: FitnessResult(min=0.0, max=34.08997955010225, score=3.235796212323286, std=6.602873143511623)
Girvan-Newman Average Embeddedness: FitnessResult(min=0.0, max=1.0, score=0.9057542629926452, std=0.2815706562689681)
Girvan-Newman Average Transitivity: FitnessResult(min=0.0, max=0.4108588094169628, score=0.017863426496389688, std=0.08378689716311363)
Girvan-Newman Hub Dominance: FitnessResult(min=0.35516888433981575, max=1.2, score=0.7775844421699079, std=0.4224155578300921)
Girvan-Newman Significance: FitnessResult(min=None, max=None, score=78331.79378130106, std=None)


## Modularity

In [11]:
louvainGN = evaluation.newman_girvan_modularity(emailNet, emailLouvain)
louvainER = evaluation.erdos_renyi_modularity(emailNet, emailLouvain)
louvainZ = evaluation.z_modularity(emailNet, emailLouvain)

print("Louvain Girvan-Newman mod:", louvainGN)
print("Louvain Erdos-Renyi mod:", louvainER)
print("Louvain Z-mod:", louvainZ)

Louvain Girvan-Newman mod: FitnessResult(min=None, max=None, score=0.4326447395710833, std=None)
Louvain Erdos-Renyi mod: FitnessResult(min=None, max=None, score=0.45046874723811403, std=None)
Louvain Z-mod: FitnessResult(min=None, max=None, score=1.1869576999709814, std=None)


## External Evaluation

In [12]:
nmi = evaluation.normalized_mutual_information(emailLouvain, emailGN)
ami = evaluation.adjusted_mutual_information(emailLouvain, emailGN)
ari = evaluation.adjusted_rand_index(emailLouvain, emailGN)
f1 = evaluation.f1(emailLouvain, emailGN)
voi = evaluation.variation_of_information(emailLouvain, emailGN)

print("Normalized Mutual Information between Louvain and Girvan-Newman:", nmi)
print("Adjusted Mutual Information between Louvain and Girvan-Newman:", ami)
print("Adjusted Rand Index between Louvain and Girvan-Newman:", ari)
print("F1 measure between Louvain and Girvan-Newman:", f1)
print("Variation of information between Louvain and Girvan-Newman:", voi)

Normalized Mutual Information between Louvain and Girvan-Newman: MatchingResult(score=0.14375880880788014, std=None)
Adjusted Mutual Information between Louvain and Girvan-Newman: MatchingResult(score=0.10574826763710264, std=None)
Adjusted Rand Index between Louvain and Girvan-Newman: MatchingResult(score=0.01346971650042717, std=None)
F1 measure between Louvain and Girvan-Newman: MatchingResult(score=0.7685185185185185, std=0.3593585200789885)
Variation of information between Louvain and Girvan-Newman: MatchingResult(score=2.854934021362194, std=None)


## Email Labels import 

In [40]:
nmiLouvainLabels = emailLouvain.normalized_mutual_information(emailLabels)
nmiLeidenLabels = emailLeiden.normalized_mutual_information(emailLabels)
nmiGirvanNewmanLabels = emailGN.normalized_mutual_information(emailLabels)

amiLouvainLabels = emailLouvain.adjusted_mutual_information(emailLabels)
amiLeidenLabels = emailLeiden.adjusted_mutual_information(emailLabels)
amiGirvanNewmanLabels = emailGN.adjusted_mutual_information(emailLabels)

ariLouvainLabels = emailLouvain.adjusted_rand_index(emailLabels)
ariLeidenLabels = emailLeiden.adjusted_rand_index(emailLabels)
ariGirvanNewmanLabels = emailGN.adjusted_rand_index(emailLabels)

f1LouvainLabels = emailLouvain.f1(emailLabels)
f1LeidenLabels = emailLeiden.f1(emailLabels)
f1GirvanNewmanLabels = emailGN.f1(emailLabels)

voiLouvainLabels = emailLouvain.variation_of_information(emailLabels)
voiLeidenLabels = emailLeiden.variation_of_information(emailLabels)
voiGirvanNewmanLabels = emailGN.variation_of_information(emailLabels)

print("NMI for Louvain and labels:", nmiLouvainLabels)
print("NMI for Leiden and labels:", nmiLeidenLabels)
print("NMI for Girvan-Newman and labels:", nmiGirvanNewmanLabels)
print()
print("AMI for Louvain and labels:", amiLouvainLabels)
print("AMI for Leiden and labels:", amiLeidenLabels)
print("AMI for Girvan-Newman and labels:", amiGirvanNewmanLabels)
print()
print("ARI for Louvain and labels:", ariLouvainLabels)
print("ARI for Leiden and labels:", ariLeidenLabels)
print("NMI for Girvan-Newman and labels:", ariGirvanNewmanLabels)
print()
print("F1 for Louvain and labels:", f1LouvainLabels)
print("F1 for Leiden and labels:", f1LeidenLabels)
print("F1 for Girvan-Newman and labels:", f1GirvanNewmanLabels)
print()
print("VOI for Louvain and labels:", voiLouvainLabels)
print("VOI for Leiden and labels:", voiLeidenLabels)
print("VOI for Girvan-Newman and labels:", voiGirvanNewmanLabels)

NMI for Louvain and labels: MatchingResult(score=0.5882969131854163, std=None)
NMI for Leiden and labels: MatchingResult(score=0.5776771281141226, std=None)
NMI for Girvan-Newman and labels: MatchingResult(score=0.0449480988954229, std=None)

AMI for Louvain and labels: MatchingResult(score=0.5527517337989198, std=None)
AMI for Leiden and labels: MatchingResult(score=0.540992332601562, std=None)
AMI for Girvan-Newman and labels: MatchingResult(score=-0.0005067215292382119, std=None)

ARI for Louvain and labels: MatchingResult(score=0.3311391803457487, std=None)
ARI for Leiden and labels: MatchingResult(score=0.2985758824644375, std=None)
NMI for Girvan-Newman and labels: MatchingResult(score=-0.0012481583268392242, std=None)

F1 for Louvain and labels: MatchingResult(score=0.21592592592592594, std=0.2803999690819878)
F1 for Leiden and labels: MatchingResult(score=0.2082142857142857, std=0.27508509815053567)
F1 for Girvan-Newman and labels: MatchingResult(score=0.05695652173913045, std=

## DBLP network

In [13]:
dblpNet = datafileToGraph('dblpNet.txt')
print(dblpNet)
dataToJSON(dblpNet, 'dblpNet.data')

Graph named 'dblpNet' with 317080 nodes and 1049866 edges


In [14]:
dblpNet = JSONtoData('dblpNet.data')

In [15]:
network_info(dblpNet)

Average degree: 6.622089062697111
Clustering coefficient: 0.6324308280637396


## LFR 

In [23]:
lfr, _ = LFR(500, 3, 1.5, 0.1, average_degree=5, min_community=20)
levels = [7, 9, 11, 13, 15, 17, 19, 21, 23, 25]
modularity_vals = []
bestGNscore = 0
bestGNcut = 0

for l in tqdm(levels):
    lfrGN = algorithms.girvan_newman(lfr, l)
    #print("GN mod for level %d:" % (l), evaluation.newman_girvan_modularity(lfr,lfrGN))
    a = evaluation.newman_girvan_modularity(lfr,lfrGN)
    if a.score > bestGNscore:
        bestGNscore = a.score
        bestGNcut = l
    modularity_vals.append(a.score)

print("best modularity with %d is: %f" % (bestGNcut, bestGNscore))
    
fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(levels, modularity_vals, "-ro")

ax.set_xlabel("levels")
ax.set_ylabel("Modularity")
plt.xticks(levels)
ax.set_title("Girvan-Newman Modularity LFR")
ax.legend()
fig.savefig(figurePath/("Girvan-Newman_mod_lfr.png"))
plt.close(fig)


100%|██████████| 10/10 [04:43<00:00, 28.35s/it]
No handles with labels found to put in legend.


## LFR results

In [29]:
n = 500
tau1 = 3
tau2 = 1.5
#mu = 0.1
mus = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
b = "LFR"

resultsLouvain = []
resultsLeiden = []
resultsInfomap = []
resultsGN = []

for mu in tqdm(mus):
    lfrGraph, comsLFR = LFR(n, tau1, tau2, mu, average_degree=5, min_community=20)
    
    lfrLouvain = algorithms.louvain(lfrGraph)
    lfrLeiden = algorithms.leiden(lfrGraph)
    lfrInfomap = new_infomap(lfrGraph, "-N 2")
    lfrGN = algorithms.girvan_newman(lfrGraph, level=3)
    
    nmi1 = evaluation.normalized_mutual_information(lfrLouvain, comsLFR)
    resultsLouvain.append(nmi1[0])
    nmi2 = evaluation.normalized_mutual_information(lfrLeiden, comsLFR)
    resultsLeiden.append(nmi2[0])
    nmi3 =  evaluation.normalized_mutual_information(lfrInfomap, comsLFR)
    resultsInfomap.append(nmi3[0])
    nmi4 =  evaluation.normalized_mutual_information(lfrGN, comsLFR)
    resultsGN.append(nmi4[0])
    

fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(mus,resultsLouvain, "-ro", label = "Louvain")
ax.plot(mus,resultsLeiden, "-go", label = "Leiden")
ax.plot(mus,resultsInfomap, "-bo", label = "Infomap")
ax.plot(mus,resultsGN, "-yo", label = "Girvan-Newman")

ax.set_xlabel(chr(945+11))
ax.set_ylabel("Performance")
ax.set_title("nmi Accuracy")
ax.legend()
fig.savefig(figurePath/("Benchmark_Results_%s.png" % (b)))
plt.close(fig)

100%|██████████| 7/7 [09:30<00:00, 81.54s/it]


## SBM 

In [42]:
sizes = [32, 32, 32, 32]
c = 0.1
probs = [[0.8, c, c, c], [c, 0.8, c, c], [c, c, 0.8, c],[c, c, c, 0.8]]
sbm, _ = SBM(sizes, probs, seed=10)

levels = [2, 3, 4, 5, 6, 7, 8, 9]
modularity_vals = []
bestGNscore = 0
bestGNcut = 0

for l in tqdm(levels):
    sbmGN = algorithms.girvan_newman(sbm, l)
    a = evaluation.newman_girvan_modularity(sbm,sbmGN)
    if a.score > bestGNscore:
        bestGNscore = a.score
        bestGNcut = l
        print(bestGNscore)
    modularity_vals.append(a.score)

print("best modularity with %d is: %f" % (bestGNcut, bestGNscore))


fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(levels, modularity_vals, "-ro")

ax.set_xlabel("levels")
ax.set_ylabel("Modularity")
plt.xticks(levels)
ax.set_title("Girvan-Newman Modularity SBM")
fig.savefig(figurePath/("Girvan-Newman_mod_sbm.png"))
plt.close(fig)

100%|██████████| 1/1 [00:52<00:00, 52.41s/it]

0.3948255783688625
best modularity with 2 is: 0.394826





'\nfig = plt.figure(figsize=(3*1.61803398875, 3))\nax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor=\'w\')\n\nax.plot(levels, modularity_vals, "-ro")\n\nax.set_xlabel("levels")\nax.set_ylabel("Modularity")\nplt.xticks(levels)\nax.set_title("Girvan-Newman Modularity SBM")\nfig.savefig(figurePath/("Girvan-Newman_mod_sbm.png"))\nplt.close(fig)\n'

## SBM results

In [30]:
resultsLouvain = []
resultsLeiden = []
resultsInfomap = []
resultsGN = []
b = "SBM"

mus = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

sizes = [32, 32, 32, 32]
for c in tqdm(mus):
    probs = [[0.8, c, c, c], [c, 0.8, c, c], [c, c, 0.8, c],[c, c, c, 0.8]]
    sbmGraph, comsSBM = SBM(sizes, probs, seed=10)

    sbmLouvain = algorithms.louvain(sbmGraph)
    sbmLeiden = algorithms.leiden(sbmGraph)
    sbmInfomap = new_infomap(sbmGraph, "-N 2")
    sbmGN = algorithms.girvan_newman(sbmGraph, level=3)

    nmi1 = evaluation.normalized_mutual_information(sbmLouvain, comsSBM)
    resultsLouvain.append(nmi1[0])
    nmi2 = evaluation.normalized_mutual_information(sbmLeiden, comsSBM)
    resultsLeiden.append(nmi2[0])
    nmi3 =  evaluation.normalized_mutual_information(sbmInfomap, comsSBM)
    resultsInfomap.append(nmi3[0])
    nmi4 =  evaluation.normalized_mutual_information(sbmGN, comsSBM)
    resultsGN.append(nmi4[0])

fig = plt.figure(figsize=(3*1.61803398875, 3))
ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')

ax.plot(mus,resultsLouvain, "-ro", label = "Louvain")
ax.plot(mus,resultsLeiden, "-go", label = "Leiden")
ax.plot(mus,resultsInfomap, "-bo", label = "Infomap")
ax.plot(mus,resultsGN, "-yo", label = "GN")

ax.set_xlabel(chr(945+11))
ax.set_ylabel("Performance")
ax.set_title("nmi Accuracy")
ax.legend()
fig.savefig(figurePath/("Benchmark_Results_%s.png" % (b)))
plt.close(fig)


100%|██████████| 11/11 [09:50<00:00, 53.71s/it]
