In [1]:
import networkx as nx
import pandas as pd
import numpy as np
from cdlib import algorithms, readwrite, evaluation, NodeClustering
from cdlib.benchmark import LFR
import matplotlib.pyplot as plt
import time
import csv
from infomap import Infomap

In [2]:
def averageDegree(graph):
    degrees = [val for (node, val) in graph.degree()]
    sum = 0
    for d in degrees:
        sum += d
    return sum/len(degrees)

In [3]:
def APL(graph):
    if (nx.is_directed(graph)):
        largestComponent = 0
        largestAPL = 0
        for C in (graph.subgraph(c) for c in nx.weakly_connected_components(graph)):
            if largestComponent<len(C.nodes):
                largestComponent = len(C.nodes)
                apl = nx.average_shortest_path_length(C)
                largestAPL = apl
        return largestAPL
    else:
        for C in (graph.subgraph(c) for c in nx.connected_components(graph)):
            return nx.average_shortest_path_length(C)

In [4]:
def kin(graph):
    nodes = graph.nodes()
    sum = 0
    for n in nodes:
        sum += graph.in_degree(n)
    return sum/len(nodes)

In [5]:
def kout(graph):
    nodes = graph.nodes()
    sum = 0
    for n in nodes:
        sum += graph.out_degree(n)
    return sum/len(nodes)

In [6]:
def networkInfo(graph):
    print("Average degree:", averageDegree(graph))
    if (nx.is_directed(graph)):
        print("Internal average degree:", kin(graph))
        print("External average degree:", kout(graph))
    print("Clustering coefficient:", nx.average_clustering(graph))
    print("Average Path Length (highest value):", APL(graph))

In [7]:
def plotDegreeDistribution(graph):
    fig = plt.figure(figsize=(6*1.61803398875, 6))
    ax = plt.axes((0.2, 0.2, 0.70, 0.70), facecolor='w')
    d = np.array(nx.degree_histogram(graph))
    y = d / len(graph.nodes)
    x = np.arange(len(y))
    ax.plot(x,y,"go")
    ax.set_xlabel("k")
    ax.set_ylabel("Pk")
    ax.set_yscale('log')
    ax.set_xscale('log')
    ax.set_title("Degree distribution")
    #ax.legend()
    fig.savefig(("Images/DegreeDistribution_%s.png" % (graph.name)))
    plt.close(fig)

## Email network 

In [27]:
def directedEmailNet(fileName):
    raw = pd.read_csv(fileName, header=None)
    raw = raw[0].str.split(n=2, expand=True)
    raw.columns = ['Source', 'Target']
    network = nx.from_pandas_edgelist(raw, source='Source', target='Target', edge_attr=None, create_using=nx.DiGraph())
    network.name = "Email_directed"
    return network

directedGraph = directedEmailNet("Networks/Benchmark/email.txt")
directedGraph_int = nx.convert_node_labels_to_integers(directedGraph)
print(directedGraph)
#plotDegreeDistribution(directedGraph)
#networkInfo(directedGraph)

DiGraph named 'Email_directed' with 1005 nodes and 25571 edges


In [28]:
def undirectedEmailNet(fileName):
    raw = pd.read_csv(fileName, header=None)
    raw = raw[0].str.split(n=2, expand=True)
    raw.columns = ['Source', 'Target']
    network = nx.from_pandas_edgelist(raw, source='Source', target='Target', edge_attr=None)
    network.name = "Email_undirected"
    return network

undirectedGraph = undirectedEmailNet("Networks/Benchmark/email.txt")
undirectedGraph_int = nx.convert_node_labels_to_integers(undirectedGraph)
print(undirectedGraph)
#plotDegreeDistribution(undirectedGraph)
#networkInfo(undirectedGraph)

Graph named 'Email_undirected' with 1005 nodes and 16706 edges


## Email communities 

In [10]:
def convertTxtToCsv():
    txtFile = open('Networks/Benchmark/email_coms.txt', 'r')
    
    txtLines = txtFile.readlines()
    csvLines = {}
    
    for line in txtLines:
        lineVec = line[:-1].split(' ')
        if lineVec[1] in csvLines.keys():
            csvLines[lineVec[1]].append(lineVec[0])
        else:
            csvLines[lineVec[1]] = [lineVec[0]]
            
    with open('Networks/Benchmark/email_coms.csv', 'w',newline="") as csv_file:  
        writer = csv.writer(csv_file)
        for key, value in csvLines.items():
            writer.writerow(value)
#convertTxtToCsv()
emailComs = readwrite.read_community_csv('Networks/Benchmark/email_coms.csv', ",", str)
emailComs_int = readwrite.read_community_csv('Networks/Benchmark/email_coms.csv', ",", int)

## Accuracy

In [33]:
def accuracy():
        
    print("Running greedy...")
    greedy = algorithms.greedy_modularity(undirectedGraph)
    print("Running genetic...")
    #genetic = algorithms.ga(undirectedGraph)
    print("Running louvain...")
    louvain = algorithms.louvain(undirectedGraph)
    print("Running leiden...")
    leiden = algorithms.leiden(undirectedGraph)
    print("Running paris...")
    paris = algorithms.paris(undirectedGraph)
    print("Running edmot...")
    edmot = algorithms.edmot(undirectedGraph_int)
    print("Running girvan-newman...")
    #gn = algorithms.girvan_newman(undirectedGraph, level=7)
    print("Running eigenvector...")
    eigen = algorithms.eigenvector(undirectedGraph)
    print("Running markov...")
    markov = algorithms.markov_clustering(undirectedGraph)
    print("Running walktrap...")
    walktrap = algorithms.walktrap(undirectedGraph)
    print("Running label...")
    label = algorithms.label_propagation(undirectedGraph)
    print("Running surprise...")
    surprise = algorithms.surprise_communities(undirectedGraph)
    print("Running infomap...")
    infomapComs = infomap(undirectedGraph)
    print("Running cfinder...")
    cfinder = algorithms.kclique(undirectedGraph, k=3)
    print("Running conga...")
    conga = algorithms.conga(undirectedGraph, number_communities=3)
    print("Running walkscan...")
    walkscan = algorithms.walkscan(undirectedGraph)
    print("Running lpanni...")
    lpanni = algorithms.lpanni(undirectedGraph)
    
    print()

    nmi1 = evaluation.normalized_mutual_information(emailComs, greedy)
    print("Greedy NMI:", nmi1[0])
    #nmi2 = evaluation.normalized_mutual_information(emailComs, genetic)
    #print("Genetic NMI:", nmi2[0])
    nmi3 = evaluation.normalized_mutual_information(emailComs, louvain)
    print("Louvain NMI:", nmi3[0])
    nmi4 = evaluation.normalized_mutual_information(emailComs, leiden)
    print("Leiden NMI:", nmi4[0])
    nmi5 = evaluation.normalized_mutual_information(emailComs_int, paris)
    print("Paris NMI:", nmi5[0])
    nmi6 = evaluation.normalized_mutual_information(emailComs_int, edmot)
    print("Edmot NMI:", nmi6[0])
    #nmi7 = evaluation.normalized_mutual_information(emailComs, gn)
    #print("Girvan-Newman NMI:", nmi7[0])
    nmi8 = evaluation.normalized_mutual_information(emailComs, eigen)
    print("Eigenvector NMI:", nmi8[0])
    nmi9 = evaluation.normalized_mutual_information(emailComs, markov)
    print("Markov NMI:", nmi9[0])
    nmi10 = evaluation.normalized_mutual_information(emailComs, walktrap)
    print("Walktrap NMI:", nmi10[0])
    nmi11 = evaluation.normalized_mutual_information(emailComs, label)
    print("Label NMI:", nmi11[0])
    nmi12 = evaluation.normalized_mutual_information(emailComs, surprise)
    print("Surprise NMI:", nmi12[0])
    nmi13 = evaluation.normalized_mutual_information(emailComs_int, infomapComs)
    print("Infomap NMI:", nmi13[0])
    
    print()

    ari1 = evaluation.adjusted_rand_index(emailComs, greedy)
    print("Greedy ARI:", ari1[0])
    #ari2 = evaluation.adjusted_rand_index(emailComs, genetic)
    #print("Genetic ARI:", ari2[0])
    ari3 = evaluation.adjusted_rand_index(emailComs, louvain)
    print("Louvain ARI:", ari3[0])
    ari4 = evaluation.adjusted_rand_index(emailComs, leiden)
    print("Leiden ARI:", ari4[0])
    ari5 = evaluation.adjusted_rand_index(emailComs_int, paris)
    print("Paris ARI:", ari5[0])
    ari6 = evaluation.adjusted_rand_index(emailComs_int, edmot)
    print("Edmot ARI:", ari6[0])
    #ari7 = evaluation.adjusted_rand_index(emailComs, gn)
    #print("Girvan-Newman ARI:", ari7[0])
    ari8 = evaluation.adjusted_rand_index(emailComs, eigen)
    print("Eigenvector ARI:", ari8[0])
    ari9 = evaluation.adjusted_rand_index(emailComs, markov)
    print("Markov ARI:", ari9[0])
    ari10 = evaluation.adjusted_rand_index(emailComs, walktrap)
    print("Walktrap ARI:", ari10[0])
    ari11 = evaluation.adjusted_rand_index(emailComs, label)
    print("Label ARI:", ari11[0])
    ari12 = evaluation.adjusted_rand_index(emailComs, surprise)
    print("Surprise ARI:", ari12[0])
    ari13 = evaluation.adjusted_rand_index(emailComs_int, infomapComs)
    print("Infomap ARI:", ari13[0])
    
    print()

    nf11 = evaluation.nf1(emailComs, greedy)
    print("Greedy NF1:", nf11[0])
    #nf12 = evaluation.nf1(emailComs, genetic)
    #print("Genetic NF1:", nf12[0])
    nf13 = evaluation.nf1(emailComs, louvain)
    print("Louvain NF1:", nf13[0])
    nf14 = evaluation.nf1(emailComs, leiden)
    print("Leiden NF1:", nf14[0])
    nf15 = evaluation.nf1(emailComs_int, paris)
    print("Paris NF1:", nf15[0])
    nf16 = evaluation.nf1(emailComs_int, edmot)
    print("Edmot NF1:", nf16[0])
    #nf17 = evaluation.nf1(emailComs, gn)
    #print("Girvan-Newman NF1:", nf17[0])
    nf18 = evaluation.nf1(emailComs, eigen)
    print("Eigenvector NF1:", nf18[0])
    nf19 = evaluation.nf1(emailComs, markov)
    print("Markov NF1:", nf19[0])
    nf110 = evaluation.nf1(emailComs, walktrap)
    print("Walktrap NF1:", nf110[0])
    nf111 = evaluation.nf1(emailComs, label)
    print("Label NF1:", nf111[0])
    nf112 = evaluation.nf1(emailComs, surprise)
    print("Surprise NF1:", nf112[0])
    nf113 = evaluation.nf1(emailComs_int, infomapComs)
    print("Infomap NF1:", nf113[0])
    
    
accuracy()

Running greedy...
Running genetic...
Running louvain...
Running leiden...
Running paris...
Running edmot...
Running girvan-newman...
Running eigenvector...
Running markov...
Running walktrap...
Running label...
Running surprise...
Running infomap...

Greedy NMI: 0.47804271515621377
Louvain NMI: 0.5626648416549228
Leiden NMI: 0.5807526699272471
Paris NMI: 0.03291866226028452
Edmot NMI: 0.4308017550093902
Eigenvector NMI: 0.513964159513045
Markov NMI: 0.4285244451150287
Walktrap NMI: 0.5804119999038778
Label NMI: 0.18029348991274805
Surprise NMI: 0.6486262967064623
Infomap NMI: 0.6447904774975328

Greedy ARI: 0.17230136932066062
Louvain ARI: 0.2855519295101255
Leiden ARI: 0.3471046847690125
Paris ARI: -0.0007319958862987065
Edmot ARI: 0.08785682249274863
Eigenvector ARI: 0.26111991499699994
Markov ARI: 0.05416631630128914
Walktrap ARI: 0.1974976307722098
Label ARI: 0.011373024191370956
Surprise ARI: 8.09799716293502e-05
Infomap ARI: 0.30285515191212864

Greedy NF1: 0.004670282369146006
L

In [46]:
def accuracy_overlapping():
        
    print("Running cfinder...")
    #cfinder = algorithms.kclique(undirectedGraph, k=3)
    print("Running conga...")
    #conga = algorithms.conga(undirectedGraph, number_communities=3)
    print("Running walkscan...")
    walkscan = algorithms.walkscan(undirectedGraph)
    print("Running lpanni...")
    lpanni = algorithms.lpanni(undirectedGraph)
    
    print()

    #nmi1 = evaluation.overlapping_normalized_mutual_information_LFK(emailComs, cfinder)
    #print("CFinder NMI_LFK:", nmi1[0])
    #nmi2 = evaluation.overlapping_normalized_mutual_information_LFK(emailComs, conga)
    #print("CONGA NMI_LFK:", nmi2[0])
    nmi3 = evaluation.overlapping_normalized_mutual_information_LFK(emailComs, walkscan)
    print("Walkscan NMI_LFK:", nmi3[0])
    nmi4 = evaluation.overlapping_normalized_mutual_information_LFK(emailComs, lpanni)
    print("LPANNI NMI_LFK:", nmi4[0])

    
accuracy_overlapping()

Running cfinder...
Running conga...
Running walkscan...
Running lpanni...

Walkscan NMI_LFK: 0.018419531686228785
LPANNI NMI_LFK: 0.005680634041260069


## Infomap

In [34]:
def infomap(graph):
    infomapWrapper = Infomap("--two-level --silent")
    for e in graph.edges():
        infomapWrapper.addLink(int(e[0]), int(e[1]))
    infomapWrapper.run();
    communities = [[]] * infomapWrapper.num_top_modules
    for node in infomapWrapper.tree:
        if node.is_leaf:
            if communities[node.module_id-1]:
                communities[node.module_id-1].append(node.node_id)
            else:
                communities[node.module_id-1] = [node.node_id]
    return NodeClustering(communities, graph, method_name="infomap")