In [2]:
import community as community_louvain
import matplotlib.cm as cm
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import community
import pandas as pd
from scipy import stats
from collections import Counter
from networkx.algorithms.community.modularity_max import greedy_modularity_communities


In [5]:
def create_graph(data):
    graph = nx.from_pandas_edgelist(data, source='source', target='target', edge_attr=None)
    graph = graph.to_undirected()
    for component in list(nx.connected_components(graph)):
        if len(component) <= 3:
            for node in component:
                graph.remove_node(node)
    return graph

In [6]:
def create_partition(graph, communities):
    partition = {}
    for node in graph.nodes:
        i = 0
        for c in communities:
            if node in c:
                partition[node] = i
                break
            i +=1
    return partition

def create_communities(partition):
    number_of_communities = len(list(set([i for i in partition.values()])))
    communities = [[] for i in range(number_of_communities)]
    for x in partition:
        communities[partition[x]].append(x)
    return communities

In [7]:
def community_clustering_info(graph, partition):
    number_of_communities = len(list(set([i for i in partition.values()])))
    communities_members = Counter(partition.values())
    col1=["Number of communities detected",  "Largest community (id)", "Size of largest community", "Smallest community (id)",
          "Size of smallest community", "Modularity of partition"]
    col1=pd.Series(col1)
    col2=[str(number_of_communities), str(Counter(communities_members).most_common(1)[0][0]),
         str(Counter(communities_members).most_common(1)[0][1]), str(communities_members.most_common()[:-1 - 1:-1][0][0]),
         str(communities_members.most_common()[:-1 - 1:-1][0][1]), str(round(community.modularity(partition, graph), 6))]
    col2=pd.Series(col2)
    return pd.concat([col1, col2], axis=1)

In [8]:
def graph_info(graph):
    degrees = [val for (node, val) in graph.degree()]
    listcol1 = ["Number of nodes ","Number of edges ","Maximum degree ","Minimum degree ","Average degree ",
                "Most frequent degree ","Density ","Number of selfloops ","Degree assortiativity coefficient "]
    column1 = pd.Series(listcol1)
    listcol2 = [str(len(graph.nodes)),str(len(graph.edges)), str(np.max(degrees)),str(np.min(degrees)),str(np.round(np.mean(degrees),6)),
                str(stats.mode(degrees)[0][0]),str(round(nx.density(graph),6)),str(nx.number_of_selfloops(graph)),
                str(round(nx.degree_assortativity_coefficient(graph), 6))]
    column2 = pd.Series(listcol2)
    return pd.concat([column1, column2], axis=1)


In [9]:
def evaluate(graph, communities):
    modularity = nx.algorithms.community.quality.modularity(graph, communities)
    coverage = nx.algorithms.community.quality.coverage(graph, communities)
    performance = nx.algorithms.community.quality.performance(graph, communities)
    print("Modularity: ", modularity)
    print("Coverage: ", coverage)
    print("Performance: ", performance)

In [10]:
def louvain_community_detection(graph):
    partition = community_louvain.best_partition(graph)
    return partition

def greedy_community_detection(graph):
    c = list(greedy_modularity_communities(graph))
    return c

In [12]:
data = pd.read_csv("dataset/colab.csv")
graph = create_graph(data)

In [13]:
partition = louvain_community_detection(graph)
communities = create_communities(partition)
avg = 0
for c in communities:
    avg += len(c)
avg = avg/len(communities)
print(avg)

177.07766990291262


In [None]:
#modularity1 = nx.algorithms.community.quality.modularity(graph, communities)
#coverage1 = nx.algorithms.community.quality.coverage(graph, communities)

#print("Modularity: ", modularity1)
#print("Coverage: ", coverage1)

In [14]:
communities2 = greedy_community_detection(graph)
avg = 0
for c in communities2:
    avg += len(c)
avg = avg/len(communities2)
print(avg)
#modularity2 = nx.algorithms.community.quality.modularity(graph, communities2)
#coverage2 = nx.algorithms.community.quality.coverage(graph, communities2)
#performance2 = nx.algorithms.community.quality.performance(graph, communities2)


84.05069124423963


In [16]:
count = 0
for c in communities:
    if len(c)>177:
        count+=1
print(count)        

22


In [17]:
count2 = 0
for c in communities2:
    if len(c)>84:
        count2+=1
print(count2) 

11
