In [87]:
from collections import defaultdict
from itertools import count

import numpy as np

In [48]:
file = "/home/ubuntu/data/tmp/karate/zkcc-77/karate_edges_77.txt"

In [92]:
class CommunityNetwork():
    def __init__(self, A):
        self._A = A
        
        self.number_of_nodes = self._A.shape[0]
        self.communities = np.array([i for i in range(self.number_of_nodes)])
        self._m = np.sum(np.sum(A))
        self.modularity = self.compute_modularity()
        
        
    @staticmethod
    def create_adjacency_matrix(file):
        node_neighbors = defaultdict(list)

        with open(file, "r") as fin:
            for line in fin:
                node_1, node_2 = line.strip().split('\t')
                node_1 = int(node_1)-1 #offset to make the integers start from zero
                node_2 = int(node_2)-1
                node_neighbors[node_1].append(node_2)

        number_of_nodes = len(node_neighbors)
        A = np.zeros((number_of_nodes, number_of_nodes))

        for node, neighbors in node_neighbors.items():
            for neighbor in neighbors:
                A[node, neighbor] = 1

        return A
    
    def modularity_difference(self, nodei, communityi):
        #save community of node
        original_community = self.communities[nodei]
        #change community
        self.communities[nodei] = communityi
        #compute new modularity
        new_modularity = self.compute_modularity()
        #print(f"{nodei}, {communityi}, {self.modularity}, {new_modularity}")
        modularity_difference = new_modularity-self.modularity
        #restore community
        self.communities[nodei] = original_community
        return modularity_difference
    
    def find_best_community(self, nodei):
        unique_communities = np.unique(self.communities)
        max_modularity_difference = 0
        best_community = self.communities[nodei]
        for communityi in unique_communities:
            modularity_difference = self.modularity_difference(nodei, communityi)
            if modularity_difference > max_modularity_difference:
                max_modularity_difference = modularity_difference
                best_community = communityi
        return best_community, max_modularity_difference
    
    def reload_communities(self):
        counter = count()
        community_mapper = defaultdict(lambda: next(counter))
        reloaded_communities = np.zeros((self.number_of_nodes,))
        for nodei, community in enumerate(self.communities):
            reloaded_communities[nodei] = community_mapper[community]
        self.communities = reloaded_communities
    
    def phase1(self):
        is_modularity_increasing = True
        while is_modularity_increasing:
            is_modularity_increasing = False
            for nodei in range(self.number_of_nodes):
                best_community, modularity_difference = self.find_best_community(nodei)
                if modularity_difference > 0:
                    self.communities[nodei] = best_community
                    self.modularity += modularity_difference
                    #print(f"moving node {nodei} to community {best_community}, modularity diff {modularity_difference}, new modularity {self.modularity}")
                    #print("verification modularity {}".format(self.compute_modularity()))
                    is_modularity_increasing = True
                    
        self.reload_communities()
                    
        return self.communities
    
    def phase2(self):
        unique_communities = np.unique(self.communities)
        n_communities = len(unique_communities)
        A = np.zeros((n_communities, n_communities))
        community_mapping = np.zeros((n_communities,))
            
        for i, communityi in enumerate(unique_communities):
            community_mapping[i] = communityi
            for j, communityj in enumerate(unique_communities):
                nodes_i = self.communities[self.communities == communityi]
                nodes_j = self.communities[self.communities == communityj]
                A[i, j] = np.sum(np.sum(self._A[nodes_i,nodes_j]))
                
        return CommunityNetwork(A), community_mapping
    
    def detect_communities(self):
        community_hierarchy = []
        
        previous_communities = self.phase1()
        community_hierarchy.append(communities)
        while True:
            cn = self.phase2()
            new_communities = cn.phase1()
            expanded_communities = CommunityNetwork.expand_communities(new_communities, previous_communities)
            community_hierarchy.append(expanded_communities)
            previous_communities = expanded_communities
        
    
    def compute_modularity(self):
        nrows, ncols = self._A.shape
        assert nrows == ncols

        modularity = 0.0
        for nodei in range(self.number_of_nodes):
            for nodej in range(self.number_of_nodes):
                ki = np.sum(self._A[nodei,:])
                kj = np.sum(self._A[nodej,:])
                if self.communities[nodei] == self.communities[nodej]:
                    modularity += self._A[nodei, nodej] - (ki*kj)/(2*self._m)

        return modularity/(2*self._m)

In [78]:
A = CommunityNetwork.create_adjacency_matrix(file)
cn = CommunityNetwork(A)

In [79]:
cn.compute_modularity()

-0.01239669421487603

In [80]:
cn.phase1()

moving node 0 to community 11, modularity diff 0.006156181480856801, new modularity -0.006240512734019229
verification modularity -0.006240512734019229
moving node 1 to community 17, modularity diff 0.006114015854275597, new modularity -0.00012649687974363236
verification modularity -0.00012649687974363195
moving node 2 to community 9, modularity diff 0.006071850227694382, new modularity 0.005945353347950749
verification modularity 0.005945353347950749
moving node 3 to community 12, modularity diff 0.0062405127340192336, new modularity 0.012185866081969983
verification modularity 0.012185866081969983
moving node 4 to community 10, modularity diff 0.006303761173891042, new modularity 0.018489627255861025
verification modularity 0.018489627255861025
moving node 5 to community 16, modularity diff 0.006324843987181646, new modularity 0.02481447124304267
verification modularity 0.02481447124304267
moving node 6 to community 16, modularity diff 0.012481025468038452, new modularity 0.03729549

In [82]:
np.unique(cn.communities)

array([10, 16, 17, 25, 32])

In [86]:
cn._A[0:2,2:5]

array([[1., 1., 1.],
       [1., 1., 0.]])

In [91]:
next(cnt)

1