In [1]:
import sys
sys.path.append('..')

import networkx as nx
import numpy as np

from collections import defaultdict

r_seed = 123

%load_ext autoreload
%autoreload 2

### Read dataset

In [2]:
G = nx.read_gml("../data/raw/polblogs.gml")
print("Number of nodes:", G.order())
print("Number of edges:", G.size())

Number of nodes: 1490
Number of edges: 19090


In [3]:
actual_com = defaultdict(set)
for i in G.nodes:
    com = G.nodes[i]["value"]
    actual_com[com].add(i)

len(actual_com[0])


758

#### Get method to calculate max intersected accuracy

In [4]:
def getMaxIntersectAccuracy(communities: list, predictions: list):
    max_intersection = dict()
    for com in communities:
        max_intersection[com] = 0.0
        
        for pred in predictions:
            intersection = len(communities[com].intersection(pred))
            max_intersection[com] = max(max_intersection[com], intersection)
            
        # get accuracy
        max_intersection[com] = max_intersection[com] / len(communities[com])
    
    return max_intersection

## Test Performance

In [5]:
partition = nx.algorithms.community.louvain_communities(G)
accs = getMaxIntersectAccuracy(communities=actual_com, predictions=partition)

print("Accuracy per actual communities:")
print("\t", accs)

print("Average Accuracy:", np.mean(list(accs.values())))

Accuracy per actual communities:
	 {0: 0.633245382585752, 1: 0.5887978142076503}
Average Accuracy: 0.6110215983967011


We can see that the accuracy is better than the randomness, and this is expected. Specifically, since modularity measures the partition based on structure of community, Louvain maximize the modularity. Therefore, it can find the communities most following the structure of community.