In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from scipy.io import mmread
import time

In [2]:
from collections import defaultdict
import random

def async_lpa(G, max_iter=100):
    labels = {node: i for i, node in enumerate(G.nodes())}
    
    for _ in range(max_iter):
        changed = False
        nodes = list(G.nodes())
        random.shuffle(nodes)  # 비동기식의 핵심: 무작위 순서
        
        for node in nodes:
            if not G[node]:  # 이웃 없는 노드 스킵
                continue
                
            # 현재 그래프의 실시간 레이블 사용 (비동기식 핵심)
            neighbor_labels = [labels[nbr] for nbr in G[node]]
            if not neighbor_labels:  # 이웃 레이블 없을 경우
                continue
                
            # 레이블 통계 계산
            label_counts = defaultdict(int)
            for lbl in neighbor_labels:
                label_counts[lbl] += 1
                
            max_count = max(label_counts.values())
            candidates = [lbl for lbl, cnt in label_counts.items() 
                         if cnt == max_count]
            
            # 변경 여부 판단
            new_label = random.choice(candidates) if candidates else labels[node]
            if new_label != labels[node]:
                labels[node] = new_label  # 즉시 업데이트 (비동기식)
                changed = True
                
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]


In [3]:
from collections import defaultdict
import random

def sync_lpa(G, max_iter=100):
    labels = {node: i for i, node in enumerate(G.nodes())}
    
    for _ in range(max_iter):
        changed = False
        new_labels = {}  # 동기식의 핵심: 새로운 레이블을 임시 저장
        
        # 모든 노드에 대해 새로운 레이블 계산 (기존 레이블 기준)
        for node in G.nodes():
            if not G[node]:  # 이웃 없는 노드는 기존 레이블 유지
                new_labels[node] = labels[node]
                continue
                
            # 현재 iteration 시작 시점의 레이블 사용 (동기식 핵심)
            neighbor_labels = [labels[nbr] for nbr in G[node]]
            if not neighbor_labels:
                new_labels[node] = labels[node]
                continue
                
            # 레이블 통계 계산
            label_counts = defaultdict(int)
            for lbl in neighbor_labels:
                label_counts[lbl] += 1
                
            max_count = max(label_counts.values())
            candidates = [lbl for lbl, cnt in label_counts.items() 
                         if cnt == max_count]
            
            # 새로운 레이블 결정
            new_label = random.choice(candidates) if candidates else labels[node]
            new_labels[node] = new_label
            
            if new_label != labels[node]:
                changed = True
        
        # 모든 노드의 레이블을 동시에 업데이트 (동기식 핵심)
        labels = new_labels
        
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]


In [4]:
# 최대 독립집합 동기 병령 lpa

from collections import defaultdict
import random
import networkx as nx

def find_maximal_independent_set(G, nodes):
    """탐욕적 방법으로 최대 독립집합 찾기"""
    independent_set = set()
    remaining_nodes = set(nodes)
    
    while remaining_nodes:
        # 남은 노드 중 차수가 가장 낮은 노드 선택 (탐욕적 전략)
        node = min(remaining_nodes, key=lambda n: len([nbr for nbr in G[n] if nbr in remaining_nodes]))
        
        # 독립집합에 추가
        independent_set.add(node)
        remaining_nodes.remove(node)
        
        # 이웃 노드들을 남은 노드에서 제거
        neighbors_to_remove = set(G[node]) & remaining_nodes
        remaining_nodes -= neighbors_to_remove
    
    return independent_set

def sync_lpa_with_mis(G, max_iter=100):
    labels = {node: i for i, node in enumerate(G.nodes())}
    iteration_count = 0
    all_nodes = list(G.nodes())
    
    while iteration_count < max_iter:
        changed = False
        remaining_nodes = set(all_nodes)  # 매 라운드마다 모든 노드로 초기화
        
        # 한 라운드: 모든 노드가 최소 1번씩 업데이트될 때까지
        while remaining_nodes:
            new_labels = labels.copy()
            
            # 남은 노드들에서 독립집합 찾기
            independent_set = find_maximal_independent_set(G, list(remaining_nodes))
            
            # 독립집합의 노드들 업데이트
            for node in independent_set:
                if not G[node]:
                    continue
                    
                neighbor_labels = [labels[nbr] for nbr in G[node]]
                if not neighbor_labels:
                    continue
                    
                label_counts = defaultdict(int)
                for lbl in neighbor_labels:
                    label_counts[lbl] += 1
                    
                max_count = max(label_counts.values())
                candidates = [lbl for lbl, cnt in label_counts.items() 
                             if cnt == max_count]
                
                new_label = random.choice(candidates) if candidates else labels[node]
                new_labels[node] = new_label
                
                if new_label != labels[node]:
                    changed = True
            
            # 업데이트된 노드들을 남은 노드에서 제거
            remaining_nodes -= set(independent_set)
            labels = new_labels
        
        iteration_count += 1
        
        # 변화가 없으면 수렴으로 간주하고 종료
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]


In [5]:
from sklearn.metrics import normalized_mutual_info_score

def calculate_nmi(true_labels, graph, communities):
    """
    true_labels : list of int, 길이 == 노드 수
    graph       : networkx Graph 또는 __len__이 정의된 객체
    communities : List of List or Set, 각 서브리스트/서브셋이 하나의 커뮤니티를 구성하는 노드 ID들
    """
    # pred_labels 초기화: 노드 수만큼 0으로 채운 리스트 생성
    pred_labels = [0] * len(graph)

    # 커뮤니티별 인덱스를 pred_labels에 할당
    for i, com in enumerate(communities):
        for node in com:
            pred_labels[node] = i

    # NMI 계산 및 반환
    return normalized_mutual_info_score(true_labels, pred_labels)


In [6]:
graph = nx.karate_club_graph()
true_labels_karate = []
for node in graph.nodes:
    label = graph.nodes[node]['club']
    true_labels_karate.append(1 if label == 'Officer' else 0)

In [7]:
nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = async_lpa(graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_karate, graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "karate    async LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa(graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_karate, graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "karate    sync LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_karate, graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "karate    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

karate    async LPA    mean :  0.6064299525704413  std :  0.19505698941582847 time :  0.46
karate    sync LPA    mean :  0.4854464036300077  std :  0.20808481189402434 time :  19.54
karate    sync_mis LPA    mean :  0.6592472852084993  std :  0.1252786585038043 time :  8.78


In [7]:
#돌고래
matrix = mmread("./soc-dolphins/soc-dolphins.mtx")
# scipy 희소 행렬을 NetworkX 그래프로 변환

dolphin_graph = nx.from_scipy_sparse_array(matrix)

true_labels_dolphins = [
    0,0,0,0,0,0,0,0,0,0,
    0,1,1,0,0,0,0,0,1,0,
    0,0,0,0,0,0,0,0,0,0,
    1,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,
    1,1
]

In [9]:
nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = async_lpa(dolphin_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_dolphins, dolphin_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "dolphins    async LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa(dolphin_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_dolphins, dolphin_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "dolphins    sync LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(dolphin_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_dolphins, dolphin_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "dolphins    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

dolphins    async LPA    mean :  0.024975094530546676  std :  0.015478228741230777 time :  7.65
dolphins    sync LPA    mean :  0.02357895918678621  std :  0.011109600890087674 time :  33.63
dolphins    sync_mis LPA    mean :  0.03542398849612667  std :  0.025133252906735896 time :  47.6


In [8]:
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_networkx

dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]
cora_graph = to_networkx(data)
true_labels_cora = data.y

In [11]:
nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = async_lpa(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    async LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    async LPA    mean :  0.4287923552520222  std :  0.0049359924564645365 time :  2724.36
cora    sync LPA    mean :  0.41274767686855846  std :  0.0064953059922281285 time :  2607.29
cora    sync_mis LPA    mean :  0.427814400039295  std :  0.0038574877442530218 time :  613777.6


In [12]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.42782666459831775  std :  0.004237018498066012 time :  613359.0


In [13]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.42860557459540655  std :  0.00411965599503738 time :  612962.5


In [14]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.4287261822064224  std :  0.004066486199529049 time :  612758.7


In [15]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.42929331520798625  std :  0.004154880878382615 time :  612581.28


In [16]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.4293420243945339  std :  0.004127571721192989 time :  612759.25


In [17]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.42940803454884335  std :  0.004068424068396476 time :  615346.9285714285


In [18]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.4291765128539712  std :  0.003946268055472523 time :  616769.575


In [19]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.4291168396616611  std :  0.0038296921508026416 time :  616486.3333333334


In [20]:
for _ in range(10):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.42917784514353763  std :  0.00379867953882354 time :  617697.13


In [10]:
def optimized_sync_lpa_with_mis(G, max_iter=100):
    """최적화된 MIS 기반 동기 LPA"""
    labels = {node: i for i, node in enumerate(G.nodes())}
    iteration_count = 0
    all_nodes = list(G.nodes())
    
    # 노드 차수를 미리 계산하여 성능 향상
    node_degrees = dict(G.degree())
    
    while iteration_count < max_iter:
        changed = False
        remaining_nodes = set(all_nodes)
        
        while remaining_nodes:
            new_labels = labels.copy()
            
            # 최적화된 독립집합 찾기
            independent_set = find_optimized_maximal_independent_set(G, list(remaining_nodes), node_degrees)
            
            for node in independent_set:
                if not G[node]:
                    continue
                
                neighbor_labels = [labels[nbr] for nbr in G[node]]
                if not neighbor_labels:
                    continue
                
                label_counts = defaultdict(int)
                for lbl in neighbor_labels:
                    label_counts[lbl] += 1
                
                max_count = max(label_counts.values())
                candidates = [lbl for lbl, cnt in label_counts.items() 
                            if cnt == max_count]
                
                new_label = random.choice(candidates) if candidates else labels[node]
                new_labels[node] = new_label
                
                if new_label != labels[node]:
                    changed = True
            
            remaining_nodes -= set(independent_set)
            labels = new_labels
        
        iteration_count += 1
        
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]

def find_optimized_maximal_independent_set(G, nodes, node_degrees):
    """최적화된 최대 독립집합 찾기"""
    independent_set = set()
    remaining_nodes = set(nodes)
    
    while remaining_nodes:
        # 미리 계산된 차수 정보 사용
        node = min(remaining_nodes, 
                  key=lambda n: sum(1 for nbr in G[n] if nbr in remaining_nodes))
        
        independent_set.add(node)
        remaining_nodes.remove(node)
        
        # 이웃 노드들을 효율적으로 제거
        neighbors_to_remove = set(G[node]) & remaining_nodes
        remaining_nodes -= neighbors_to_remove
    
    return independent_set


In [13]:
# 최적화 버전
nmi=[]
elapsedtime=[]
for _ in range(1):
    start_ms = int(round(time.time() * 1000))
    com = optimized_sync_lpa_with_mis(cora_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_cora, cora_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "cora    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

cora    sync_mis LPA    mean :  0.4320407475900254  std :  0.0 time :  822144.0


In [None]:
def modularity_async_lpa(G, max_iter=100):
    """Modularity를 고려한 비동기 LPA"""
    labels = {node: i for i, node in enumerate(G.nodes())}
    total_edges = G.number_of_edges() * 2  # 무방향 그래프에서 총 엣지 수
    node_degrees = dict(G.degree())
    
    for iteration in range(max_iter):
        changed = False
        nodes = list(G.nodes())
        random.shuffle(nodes)
        
        for node in nodes:
            if not G[node]:
                continue
                
            current_label = labels[node]
            best_label = current_label
            best_modularity_gain = 0
            
            # 이웃 라벨들과 각각에 대한 modularity 증가량 계산
            neighbor_labels = set(labels[nbr] for nbr in G[node])
            
            for candidate_label in neighbor_labels:
                modularity_gain = calculate_modularity_gain(
                    G, labels, node, current_label, candidate_label, 
                    total_edges, node_degrees
                )
                
                if modularity_gain > best_modularity_gain:
                    best_modularity_gain = modularity_gain
                    best_label = candidate_label
            
            # Modularity 증가가 있는 경우에만 라벨 변경
            if best_label != current_label and best_modularity_gain > 0:
                labels[node] = best_label
                changed = True
        
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]

def calculate_modularity_gain(G, labels, node, old_label, new_label, total_edges, node_degrees):
    """노드의 라벨 변경으로 인한 modularity 증가량 계산"""
    if old_label == new_label:
        return 0
    
    # 현재 노드의 차수
    ki = node_degrees[node]
    
    # old_label 커뮤니티와의 연결 수
    ki_in_old = sum(1 for nbr in G[node] if labels[nbr] == old_label)
    
    # new_label 커뮤니티와의 연결 수  
    ki_in_new = sum(1 for nbr in G[node] if labels[nbr] == new_label)
    
    # old_label 커뮤니티의 총 차수
    sigma_old = sum(node_degrees[n] for n, label in labels.items() if label == old_label)
    
    # new_label 커뮤니티의 총 차수
    sigma_new = sum(node_degrees[n] for n, label in labels.items() if label == new_label)
    
    # Modularity 변화량 계산
    delta_q = (ki_in_new - ki_in_old) / total_edges - ki * (sigma_new - sigma_old) / (total_edges ** 2)
    
    return delta_q


In [None]:
def modularity_sync_lpa(G, max_iter=100):
    """Modularity를 고려한 동기 LPA"""
    labels = {node: i for i, node in enumerate(G.nodes())}
    total_edges = G.number_of_edges() * 2
    node_degrees = dict(G.degree())
    
    for iteration in range(max_iter):
        changed = False
        new_labels = {}
        
        for node in G.nodes():
            if not G[node]:
                new_labels[node] = labels[node]
                continue
            
            current_label = labels[node]
            best_label = current_label
            best_modularity_gain = 0
            
            # 이웃 라벨들 수집
            neighbor_labels = set(labels[nbr] for nbr in G[node])
            
            # 각 후보 라벨에 대해 modularity 증가량 계산
            for candidate_label in neighbor_labels:
                modularity_gain = calculate_modularity_gain(
                    G, labels, node, current_label, candidate_label,
                    total_edges, node_degrees
                )
                
                if modularity_gain > best_modularity_gain:
                    best_modularity_gain = modularity_gain
                    best_label = candidate_label
            
            new_labels[node] = best_label
            if best_label != current_label:
                changed = True
        
        # 모든 노드를 동시에 업데이트
        labels = new_labels
        
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]


In [None]:
def modularity_sync_lpa_with_mis(G, max_iter=100):
    """Modularity를 고려한 MIS 기반 동기 LPA"""
    labels = {node: i for i, node in enumerate(G.nodes())}
    total_edges = G.number_of_edges() * 2
    node_degrees = dict(G.degree())
    iteration_count = 0
    all_nodes = list(G.nodes())
    
    while iteration_count < max_iter:
        changed = False
        remaining_nodes = set(all_nodes)
        
        while remaining_nodes:
            new_labels = labels.copy()
            
            # 독립집합 찾기
            independent_set = find_maximal_independent_set(G, list(remaining_nodes))
            
            # 독립집합의 노드들에 대해 modularity 기반 라벨 업데이트
            for node in independent_set:
                if not G[node]:
                    continue
                
                current_label = labels[node]
                best_label = current_label
                best_modularity_gain = 0
                
                # 이웃 라벨들 수집
                neighbor_labels = set(labels[nbr] for nbr in G[node])
                
                # Modularity 증가량이 가장 큰 라벨 선택
                for candidate_label in neighbor_labels:
                    modularity_gain = calculate_modularity_gain(
                        G, labels, node, current_label, candidate_label,
                        total_edges, node_degrees
                    )
                    
                    if modularity_gain > best_modularity_gain:
                        best_modularity_gain = modularity_gain
                        best_label = candidate_label
                
                new_labels[node] = best_label
                if best_label != current_label:
                    changed = True
            
            # 처리된 노드들 제거
            remaining_nodes -= set(independent_set)
            labels = new_labels
        
        iteration_count += 1
        
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]


In [None]:
def calculate_modularity(G, communities):
    """커뮤니티 분할의 전체 modularity 계산"""
    # 노드-커뮤니티 매핑 생성
    node_to_comm = {}
    for i, comm in enumerate(communities):
        for node in comm:
            node_to_comm[node] = i
    
    total_edges = G.number_of_edges()
    if total_edges == 0:
        return 0
    
    modularity = 0
    node_degrees = dict(G.degree())
    
    for edge in G.edges():
        u, v = edge
        # 같은 커뮤니티에 속하는 엣지인지 확인
        if node_to_comm[u] == node_to_comm[v]:
            modularity += 1 - (node_degrees[u] * node_degrees[v]) / (2 * total_edges)
        else:
            modularity -= (node_degrees[u] * node_degrees[v]) / (2 * total_edges)
    
    return modularity / (2 * total_edges)


In [29]:
import networkx as nx

G = nx.read_edgelist('out.dimacs10-football', 
                     create_using=nx.Graph(), 
                     nodetype=int)

# 기본 정보 출력
print(f"노드 수: {G.number_of_nodes()}")
print(f"엣지 수: {G.number_of_edges()}")
print(f"평균 차수: {2 * G.number_of_edges() / G.number_of_nodes():.2f}")


노드 수: 115
엣지 수: 613
평균 차수: 10.66


In [45]:
# Football 네트워크의 12개 컨퍼런스 (커뮤니티)
FOOTBALL_CONFERENCES = {
    'Atlantic Coast': [1, 2, 3, 4, 5, 6, 7, 8, 9],
    'Big East': [10, 11, 12, 13, 14, 15, 16, 17],
    'Big Ten': [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28],
    'Big Twelve': [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40],
    'Conference USA': [41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51],
    'Independents': [52, 53, 54, 55, 56],
    'Mid-American': [57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
    'Mountain West': [70, 71, 72, 73, 74, 75, 76, 77],
    'Pacific Ten': [78, 79, 80, 81, 82, 83, 84, 85, 86, 87],
    'Southeastern': [88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
    'Sun Belt': [100, 101, 102, 103, 104, 105, 106, 107, 108],
    'Western Athletic': [109, 110, 111, 112, 113, 114, 115]
}

print("=== 컨퍼런스 정보 ===")
for conf, teams in FOOTBALL_CONFERENCES.items():
    print(f"{conf}: {len(teams)}개 팀")

def relabel_graph_nodes(G):
    """그래프 노드를 0부터 시작하도록 재매핑"""
    mapping = {node: i for i, node in enumerate(sorted(G.nodes()))}
    return nx.relabel_nodes(G, mapping)

G = relabel_graph_nodes(G)

=== 컨퍼런스 정보 ===
Atlantic Coast: 9개 팀
Big East: 8개 팀
Big Ten: 11개 팀
Big Twelve: 12개 팀
Conference USA: 11개 팀
Independents: 5개 팀
Mid-American: 13개 팀
Mountain West: 8개 팀
Pacific Ten: 10개 팀
Southeastern: 12개 팀
Sun Belt: 9개 팀
Western Athletic: 7개 팀


In [46]:
true_football = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11]

In [52]:
calculate_nmi(true_football, G, async_lpa(G) )

0.463221840251306

In [56]:
nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = async_lpa(G)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_football, G, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "FOOTBALL    async LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa(G)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_football, G, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "FOOTBALL    sync LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = sync_lpa_with_mis(G)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_football, G, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "FOOTBALL    sync_mis LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

FOOTBALL    async LPA    mean :  0.4769406244296301  std :  0.027218876575115767 time :  6.95
FOOTBALL    sync LPA    mean :  0.4778621403653957  std :  0.018788251174715748 time :  36.24
FOOTBALL    sync_mis LPA    mean :  0.4873249438544196  std :  0.02103313811994341 time :  64.74


In [55]:
def modularity_async_lpa(G, max_iter=100):
    """Modularity를 고려한 비동기 LPA"""
    labels = {node: i for i, node in enumerate(G.nodes())}
    total_edges = G.number_of_edges() * 2  # 무방향 그래프에서 총 엣지 수
    node_degrees = dict(G.degree())
    
    for iteration in range(max_iter):
        changed = False
        nodes = list(G.nodes())
        random.shuffle(nodes)
        
        for node in nodes:
            if not G[node]:
                continue
                
            current_label = labels[node]
            best_label = current_label
            best_modularity_gain = 0
            
            # 이웃 라벨들과 각각에 대한 modularity 증가량 계산
            neighbor_labels = set(labels[nbr] for nbr in G[node])
            
            for candidate_label in neighbor_labels:
                modularity_gain = calculate_modularity_gain(
                    G, labels, node, current_label, candidate_label, 
                    total_edges, node_degrees
                )
                
                if modularity_gain > best_modularity_gain:
                    best_modularity_gain = modularity_gain
                    best_label = candidate_label
            
            # Modularity 증가가 있는 경우에만 라벨 변경
            if best_label != current_label and best_modularity_gain > 0:
                labels[node] = best_label
                changed = True
        
        if not changed:
            break
    
    # 커뮤니티 구성
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]

def calculate_modularity_gain(G, labels, node, old_label, new_label, total_edges, node_degrees):
    """노드의 라벨 변경으로 인한 modularity 증가량 계산"""
    if old_label == new_label:
        return 0
    
    # 현재 노드의 차수
    ki = node_degrees[node]
    
    # old_label 커뮤니티와의 연결 수
    ki_in_old = sum(1 for nbr in G[node] if labels[nbr] == old_label)
    
    # new_label 커뮤니티와의 연결 수  
    ki_in_new = sum(1 for nbr in G[node] if labels[nbr] == new_label)
    
    # old_label 커뮤니티의 총 차수
    sigma_old = sum(node_degrees[n] for n, label in labels.items() if label == old_label)
    
    # new_label 커뮤니티의 총 차수
    sigma_new = sum(node_degrees[n] for n, label in labels.items() if label == new_label)
    
    # Modularity 변화량 계산
    delta_q = (ki_in_new - ki_in_old) / total_edges - ki * (sigma_new - sigma_old) / (total_edges ** 2)
    
    return delta_q


In [57]:
nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = modularity_async_lpa(G)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_football, G, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "FOOTBALL    async LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

FOOTBALL    async LPA    mean :  0.49922862348706304  std :  0.009916094442067501 time :  77.01


In [59]:
nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = modularity_async_lpa(graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_karate, graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "karate    async LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

karate    async LPA    mean :  0.5281422945594655  std :  0.0255884029890106 time :  12.46


In [61]:
nmi=[]
elapsedtime=[]
for _ in range(100):
    start_ms = int(round(time.time() * 1000))
    com = modularity_async_lpa(dolphin_graph)
    com
    end_ms = int(round(time.time() * 1000))
    nmi.append( calculate_nmi(true_labels_dolphins, dolphin_graph, com ) )
    elapsedtime.append( end_ms - start_ms )
print( "dolphin    async LPA    mean : ", np.mean(nmi), " std : " , np.var(nmi)**0.5, "time : ", np.mean(elapsedtime))

dolphin    async LPA    mean :  0.08980403116241964  std :  0.013453048492821027 time :  126.77
