In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from scipy.io import mmread

In [2]:
matrix = mmread("./soc-dolphins/soc-dolphins.mtx")

# scipy 희소 행렬을 NetworkX 그래프로 변환
graph = nx.from_scipy_sparse_array(matrix)

# 그래프 정보 출력
print(f"노드 개수: {graph.number_of_nodes()}")
print(f"간선 개수: {graph.number_of_edges()}")

노드 개수: 62
간선 개수: 159


In [3]:
import random
from collections import Counter

def sync_lpa(G, seed=None, max_iter=100 ):
    random.seed(seed)
    labels = {node: i for i, node in enumerate(G)} # {0:0, 1:1, 2:2 .... 다 n : n 고유하게 init
    
    for _ in range(max_iter):
        updated = False
        nodes = list(G.nodes())
        random.shuffle(nodes)
        
        for node in nodes:
            neighbors = list(G.neighbors(node))
            if not neighbors:
                continue
                
            # 이웃 라벨 degree 비교
            neighbor_labels = [labels[n] for n in neighbors]
            count = Counter(neighbor_labels)
            max_freq = max(count.values())
            candidates = [label for label, freq in count.items() if freq == max_freq]
            
            # 가장 많은걸로 업데이트
            new_label = random.choice(candidates) # 같은거 있으면 랜덤으로
            if labels[node] != new_label:
                labels[node] = new_label
                updated = True
                
        if not updated:
            break
            
    # 커뮤니티 그룹화
    communities = {}
    for node, label in labels.items():
        communities.setdefault(label, set()).add(node)
        
    return list(communities.values())


In [4]:
#최대 독립집합?

import random
from collections import Counter

def sync_lpa_indepentset(G, seed=None, max_iter=100):
    random.seed(seed)
    # 1) 초기 레이블 할당: 각 노드에 고유한 레이블 부여
    labels = {n: i for i, n in enumerate(G)}    

    for _ in range(max_iter):
        updated = False
        # 2) 무작위 순서로 노드 리스트 생성
        nodes = list(G.nodes())
        random.shuffle(nodes)

        # 3) 독립 집합 선택: 한 이터레이션 동안 동시에 업데이트할 노드 집합
        to_update = []
        blocked = set()
        for u in nodes:
            if u not in blocked:
                to_update.append(u)
                blocked.add(u)
                blocked.update(G.neighbors(u))

        # 4) 새 레이블 계산 (동시 적용 전)
        new_labels = {}
        for u in to_update:
            nbrs = list(G.neighbors(u))
            if not nbrs:
                continue
            freq = Counter(labels[v] for v in nbrs)
            max_freq = max(freq.values())
            candidates = [lab for lab, cnt in freq.items() if cnt == max_freq]
            new_labels[u] = random.choice(candidates)

        # 5) 레이블 동시 갱신
        for u, lab in new_labels.items():
            if labels[u] != lab:
                labels[u] = lab
                updated = True

        # 6) 더 이상 갱신이 없으면 종료
        if not updated:
            break

    # 7) 커뮤니티 그룹화
    communities = {}
    for u, lab in labels.items():
        communities.setdefault(lab, []).append(u)
    return list(communities.values())


In [5]:
import networkx as nx
from networkx.algorithms.community import asyn_lpa_communities

# 커뮤니티 탐지 실행
communities = list(asyn_lpa_communities(graph, seed=39))

# 결과 출력
for i, com in enumerate(communities):
    print(f"Community {i+1}: {sorted(com)}")


Community 1: [0, 2, 10, 28, 30, 42, 47]
Community 2: [1, 7, 19, 25, 26, 27]
Community 3: [3, 8, 59]
Community 4: [4, 11, 15, 18, 21, 23, 24, 29, 35, 45, 51, 55]
Community 5: [5, 6, 9, 13, 17, 22, 31, 32, 39, 41, 48, 54, 56, 57, 60]
Community 6: [12, 14, 16, 20, 33, 34, 36, 37, 38, 40, 43, 44, 46, 49, 50, 52, 58]
Community 7: [53, 61]


In [12]:
list(asyn_lpa_communities(graph, seed=39))

[{0, 2, 10, 28, 30, 42, 47},
 {1, 7, 19, 25, 26, 27},
 {3, 8, 59},
 {4, 11, 15, 18, 21, 23, 24, 29, 35, 45, 51, 55},
 {5, 6, 9, 13, 17, 22, 31, 32, 39, 41, 48, 54, 56, 57, 60},
 {12, 14, 16, 20, 33, 34, 36, 37, 38, 40, 43, 44, 46, 49, 50, 52, 58},
 {53, 61}]

In [None]:
# 성능 평가 (NMI 지표)
from sklearn.metrics import normalized_mutual_info_score

# 지상 진실 커뮤니티 (예시)
true_labels = [
    0,0,0,0,0,0,0,0,0,0,
    0,1,1,0,0,0,0,0,1,0,
    0,0,0,0,0,0,0,0,0,0,
    1,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,
    1,1
]


# 변환: 커뮤니티 -> 노드 레이블
pred_labels = [0]*len(graph)
for i, com in enumerate(communities):
    for node in com:
        pred_labels[node] = i

nmi_score = normalized_mutual_info_score(true_labels, pred_labels)
print(f"NMI Score: {nmi_score:.4f}")
print(true_labels)
print(pred_labels)

In [None]:
c = labelpropagation(graph,10)
print(c)
pred_labels = [0]*len(graph)
for i, com in enumerate(c):
    for node in com:
        pred_labels[node] = i

nmi_score = normalized_mutual_info_score(true_labels, pred_labels)
print(f"NMI Score: {nmi_score:.4f}")
print(true_labels)
print(pred_labels)


In [None]:
from sklearn.metrics import normalized_mutual_info_score

def calculate_nmi(true_labels, graph, communities):
    """
    true_labels : list of int, 길이 == 노드 수
    graph       : networkx Graph 또는 __len__이 정의된 객체
    communities : List of List or Set, 각 서브리스트/서브셋이 하나의 커뮤니티를 구성하는 노드 ID들
    """
    # pred_labels 초기화: 노드 수만큼 0으로 채운 리스트 생성
    pred_labels = [0] * len(graph)

    # 커뮤니티별 인덱스를 pred_labels에 할당
    for i, com in enumerate(communities):
        for node in com:
            pred_labels[node] = i

    # NMI 계산 및 반환
    return normalized_mutual_info_score(true_labels, pred_labels)


In [None]:
import random
import collections
import numpy as np

def custom_asyn_lpa_communities(G):
    # 1) 모든 노드에 고유 레이블 할당
    labels = {n: i for i, n in enumerate(G.nodes())}
    updated = True

    while updated:
        updated = False
        nodes = list(G.nodes())
        random.shuffle(nodes)

        # 2) 각 노드 레이블 갱신
        for u in nodes:
            nbrs = list(G.neighbors(u))
            if not nbrs:
                continue
            # 이웃 레이블 빈도 집계
            freq = collections.Counter([labels[v] for v in nbrs])
            max_freq = max(freq.values())
            best = [lab for lab, cnt in freq.items() if cnt == max_freq]
            new_label = random.choice(best)
            if labels[u] != new_label:
                labels[u] = new_label
                updated = True

    # 3) 레이블별 노드 그룹화
    comm = collections.defaultdict(list)
    for node, lab in labels.items():
        comm[lab].append(node)
    return list(comm.values())

# 사용 예
G = nx.karate_club_graph()
print(custom_asyn_lpa_communities(G))


In [None]:
calculate_nmi(true_labels,graph,async_label_propagation(graph))

In [None]:
calculate_nmi(true_labels,graph,async_label_propagation(graph))

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,graph,async_label_propagation(graph)) )

In [None]:
np.mean(answer)

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,graph,labelpropagation(graph)) )
np.mean(answer)

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,graph,custom_asyn_lpa_communities(graph)) )
np.mean(answer)

In [None]:
import networkx as nx
from collections import defaultdict
import random

def modularity(G, communities, resolution=1):
    m = G.number_of_edges()
    q = 0.0
    for comm in set(communities.values()):
        nodes = [n for n in communities if communities[n] == comm]
        subgraph = G.subgraph(nodes)
        lc = subgraph.number_of_edges()
        dc = sum(G.degree(n) for n in nodes)
        q += (lc / m) - resolution * (dc / (2 * m)) ** 2
    return q

def label_propagation_with_modularity(G, max_iter=1000):
    labels = {n: i for i, n in enumerate(G.nodes())}
    prev_modularity = modularity(G, labels)
    
    for _ in range(max_iter):
        changed = False
        nodes = list(G.nodes())
        random.shuffle(nodes)
        
        for node in nodes:
            neighbors = list(G.neighbors(node))
            if not neighbors:
                continue
                
            label_counts = defaultdict(float)
            for neighbor in neighbors:
                label = labels[neighbor]
                label_counts[label] += G[node][neighbor].get('weight', 1.0)
            
            max_count = max(label_counts.values())
            candidates = [label for label, count in label_counts.items() if count == max_count]
            new_label = random.choice(candidates)
            
            if labels[node] != new_label:
                old_label = labels[node]
                labels[node] = new_label
                
                current_modularity = modularity(G, labels)
                if current_modularity > prev_modularity:
                    prev_modularity = current_modularity
                    changed = True
                else:
                    labels[node] = old_label
        
        if not changed:
            break

    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]


In [None]:
label_propagation_with_modularity(graph)

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,graph,label_propagation_with_modularity(graph)) )
np.mean(answer)

In [None]:
import networkx as nx
from collections import defaultdict, Counter
import random

def modularity(G, communities):
    m = G.number_of_edges()
    q = 0.0
    for comm in set(communities.values()):
        nodes = [n for n in communities if communities[n] == comm]
        subgraph = G.subgraph(nodes)
        lc = subgraph.number_of_edges()
        dc = sum(G.degree(n) for n in nodes)
        q += (lc / m) - (dc / (2 * m)) ** 2
    return q

def mis_label_propagation(G, max_iter=1000):
    labels = {n: i for i, n in enumerate(G.nodes())}
    prev_modularity = modularity(G, labels)
    
    for _ in range(max_iter):
        changed = False
        nodes = list(G.nodes())
        random.shuffle(nodes)
        
        # 독립 집합 선택
        to_update = []
        blocked = set()
        for u in nodes:
            if u not in blocked:
                to_update.append(u)
                blocked.add(u)
                blocked.update(G.neighbors(u))
        
        # 후보 레이블 계산
        new_labels = {}
        for u in to_update:
            neighbors = list(G.neighbors(u))
            if not neighbors:
                continue
                
            label_weights = defaultdict(float)
            for v in neighbors:
                label = labels[v]
                label_weights[label] += G[u][v].get('weight', 1.0)
            
            if label_weights:
                max_weight = max(label_weights.values())
                candidates = [lab for lab, w in label_weights.items() if w == max_weight]
                new_labels[u] = random.choice(candidates)
        
        # 임시 레이블 적용 및 모듈러리티 검증
        temp_labels = labels.copy()
        for u, lab in new_labels.items():
            temp_labels[u] = lab
        
        current_modularity = modularity(G, temp_labels)
        if current_modularity > prev_modularity:
            labels = temp_labels
            prev_modularity = current_modularity
            changed = True
        
        if not changed:
            break
    
    # 커뮤니티 그룹화
    comm_dict = defaultdict(list)
    for node, label in labels.items():
        comm_dict[label].append(node)
    return [sorted(nodes) for nodes in comm_dict.values()]


In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,graph,mis_label_propagation(graph)) )
np.mean(answer)

In [None]:
G = nx.karate_club_graph()
labels = []
for node in G.nodes:
    label = G.nodes[node]['club']
    labels.append(1 if label == 'Officer' else 0)



In [None]:
answer = []
for _ in range(100):
    answer.append(calculate_nmi(labels, G, mis_label_propagation(G)))
print(np.mean(answer))

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(labels,G,label_propagation_with_modularity(G)) )
np.mean(answer)

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(labels,G,labelpropagation(G)) )
np.mean(answer)

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(labels,G,async_label_propagation(G)) )
np.mean(answer)

In [None]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]
true_labels = data.y
# pip install torch torch-geometric

In [None]:
print(f"노드 수: {data.num_nodes}")
print(f"엣지 수: {data.num_edges}")
print(f"노드 특성 차원: {data.num_node_features}")
print(f"클래스 수: {dataset.num_classes}")


In [None]:
import networkx as nx
from torch_geometric.utils import to_networkx
import matplotlib.pyplot as plt

G = to_networkx(data)
plt.figure(figsize=(8,8))
nx.draw(G, node_size=10)
plt.show()

In [None]:
answer = []
for _ in range(100):
    answer.append(calculate_nmi(true_labels, G, mis_label_propagation(G)))
print(np.mean(answer))

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,G,label_propagation_with_modularity(G)) )
np.mean(answer)

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,G,labelpropagation(G)) )
np.mean(answer)

In [None]:
answer = []
for _ in range(100):
    answer.append( calculate_nmi(true_labels,G,async_label_propagation(G)) )
np.mean(answer)

In [None]:
# 위에랑 아래랑 다름

In [13]:
import random
from collections import Counter

def async_lpa(G, max_iter=100):
    labels = {node: i for i, node in enumerate(G.nodes())}
    
    for _ in range(max_iter):
        changed = False
        nodes = list(G.nodes())
        random.shuffle(nodes)
        
        new_labels = {}
        for node in nodes:
            if not G[node]:
                continue
            # 이웃 노드의 라벨 집계
            neighbor_labels = Counter(labels[neighbor] for neighbor in G[node])
            max_freq = max(neighbor_labels.values())
            best_labels = [label for label, freq in neighbor_labels.items() if freq == max_freq]
            new_label = random.choice(best_labels)
            
            if new_label != labels[node]:
                changed = True
            new_labels[node] = new_label
        
        if not changed:
            break
        labels = new_labels
    
    return labels


In [15]:
async_lpa(graph)

{7: 54,
 38: 16,
 30: 16,
 61: 16,
 5: 54,
 10: 16,
 4: 16,
 48: 54,
 43: 16,
 49: 16,
 0: 16,
 29: 16,
 13: 54,
 8: 16,
 59: 16,
 1: 54,
 12: 16,
 41: 54,
 44: 16,
 37: 16,
 2: 16,
 36: 16,
 15: 16,
 26: 54,
 14: 16,
 45: 16,
 16: 16,
 32: 54,
 27: 54,
 51: 16,
 18: 16,
 21: 16,
 28: 16,
 54: 54,
 33: 16,
 35: 16,
 23: 16,
 19: 54,
 39: 54,
 17: 54,
 53: 16,
 60: 54,
 55: 16,
 6: 54,
 11: 16,
 20: 16,
 58: 16,
 40: 16,
 47: 16,
 25: 54,
 42: 16,
 52: 16,
 3: 16,
 24: 16,
 50: 16,
 46: 16,
 9: 54,
 34: 16,
 31: 54,
 22: 54,
 56: 54,
 57: 54}

In [16]:
def sync_lpa(G, max_iter=100):
    labels = {node: i for i, node in enumerate(G.nodes())}
    next_labels = labels.copy()
    
    for _ in range(max_iter):
        changed = False
        for node in G.nodes():
            if not G[node]:
                continue
            neighbor_labels = Counter(labels[neighbor] for neighbor in G[node])
            max_freq = max(neighbor_labels.values())
            best_labels = [label for label, freq in neighbor_labels.items() if freq == max_freq]
            next_labels[node] = random.choice(best_labels)
            
            if next_labels[node] != labels[node]:
                changed = True
        
        if not changed:
            break
        labels = next_labels.copy()
    
    return labels


In [17]:
sync_lpa(graph)

{0: 38,
 1: 9,
 2: 38,
 3: 38,
 4: 38,
 5: 9,
 6: 9,
 7: 9,
 8: 38,
 9: 9,
 10: 38,
 11: 38,
 12: 38,
 13: 9,
 14: 38,
 15: 38,
 16: 38,
 17: 9,
 18: 38,
 19: 9,
 20: 38,
 21: 38,
 22: 9,
 23: 38,
 24: 38,
 25: 9,
 26: 9,
 27: 9,
 28: 38,
 29: 38,
 30: 38,
 31: 9,
 32: 9,
 33: 38,
 34: 38,
 35: 38,
 36: 38,
 37: 38,
 38: 38,
 39: 38,
 40: 38,
 41: 9,
 42: 38,
 43: 38,
 44: 38,
 45: 38,
 46: 38,
 47: 38,
 48: 9,
 49: 38,
 50: 38,
 51: 38,
 52: 38,
 53: 38,
 54: 9,
 55: 38,
 56: 9,
 57: 9,
 58: 38,
 59: 38,
 60: 9,
 61: 38}

In [18]:
import networkx as nx
from collections import Counter
from concurrent.futures import ThreadPoolExecutor

def update_label(args):
    node, labels, G = args
    neighbors = list(G.neighbors(node))
    if not neighbors:
        return node, labels[node]
    neighbor_labels = Counter(labels[n] for n in neighbors)
    max_freq = max(neighbor_labels.values())
    candidates = [label for label, freq in neighbor_labels.items() if freq == max_freq]
    return node, candidates[0]  # 동점일 경우 첫 번째 선택

def parallel_sync_lpa(G, max_iter=100, n_jobs=4):
    labels = {node: i for i, node in enumerate(G.nodes())}
    for _ in range(max_iter):
        with ThreadPoolExecutor(max_workers=n_jobs) as executor:
            results = executor.map(
                update_label, 
                [(node, labels, G) for node in G.nodes()]
            )
        new_labels = dict(results)
        if new_labels == labels:
            break
        labels = new_labels
    return labels


In [19]:
parallel_sync_lpa(graph)

{0: 14,
 1: 1,
 2: 14,
 3: 14,
 4: 14,
 5: 5,
 6: 5,
 7: 1,
 8: 14,
 9: 9,
 10: 14,
 11: 14,
 12: 14,
 13: 9,
 14: 14,
 15: 14,
 16: 14,
 17: 17,
 18: 14,
 19: 17,
 20: 14,
 21: 14,
 22: 1,
 23: 14,
 24: 14,
 25: 1,
 26: 17,
 27: 17,
 28: 14,
 29: 14,
 30: 14,
 31: 1,
 32: 5,
 33: 14,
 34: 14,
 35: 14,
 36: 14,
 37: 14,
 38: 14,
 39: 14,
 40: 14,
 41: 5,
 42: 14,
 43: 14,
 44: 14,
 45: 14,
 46: 14,
 47: 14,
 48: 5,
 49: 14,
 50: 14,
 51: 14,
 52: 14,
 53: 14,
 54: 17,
 55: 14,
 56: 9,
 57: 1,
 58: 14,
 59: 14,
 60: 9,
 61: 14}

In [20]:
import random
from collections import defaultdict, Counter
from typing import List, Dict, Set
import networkx as nx

def sync_lpa_independent_set(
    graph: nx.Graph, 
    seed: int = None, 
    max_iter: int = 100
) -> List[List[int]]:
    """
    동기식 LPA를 최대 독립집합(MIS) 업데이트로 구현한 알고리즘
    
    Parameters:
        graph: networkx 그래프 객체
        seed: 랜덤 시드 값 (기본값: None)
        max_iter: 최대 반복 횟수 (기본값: 100)
        
    Returns:
        커뮤니티 리스트 (각 서브리스트가 커뮤니티)
    """
    random.seed(seed)
    nodes = list(graph.nodes())
    labels = {node: idx for idx, node in enumerate(nodes)}
    
    for _ in range(max_iter):
        updated = False
        mis = _find_maximal_independent_set(graph, nodes)
        new_labels = _calculate_new_labels(graph, labels, mis)
        
        # 레이블 업데이트 및 변경 여부 확인
        for node, new_label in new_labels.items():
            if labels[node] != new_label:
                labels[node] = new_label
                updated = True
                
        if not updated:
            break
    
    return _group_communities(labels)

def _find_maximal_independent_set(
    graph: nx.Graph, 
    nodes: List[int]
) -> Set[int]:
    """그래프에서 최대 독립집합(MIS)을 찾는 헬퍼 함수"""
    mis = set()
    blocked = set()
    shuffled_nodes = random.sample(nodes, k=len(nodes))  # 무작위 순서 생성
    
    for node in shuffled_nodes:
        if node not in blocked:
            mis.add(node)
            blocked.add(node)
            blocked.update(graph.neighbors(node))
            
    return mis

def _calculate_new_labels(
    graph: nx.Graph, 
    current_labels: Dict[int, int], 
    mis: Set[int]
) -> Dict[int, int]:
    """새로운 레이블 계산을 위한 헬퍼 함수"""
    new_labels = {}
    
    for node in mis:
        neighbors = list(graph.neighbors(node))
        if not neighbors:
            continue
            
        # 이웃 레이블 통계 계산
        neighbor_labels = Counter(current_labels[nbr] for nbr in neighbors)
        max_count = max(neighbor_labels.values())
        candidates = [label for label, count in neighbor_labels.items() if count == max_count]
        
        # 동점 처리: 후보군에서 무작위 선택
        new_labels[node] = random.choice(candidates)
        
    return new_labels

def _group_communities(labels: Dict[int, int]) -> List[List[int]]:
    """레이블을 기반으로 커뮤니티 그룹화"""
    communities = defaultdict(list)
    for node, label in labels.items():
        communities[label].append(node)
    return list(communities.values())


In [25]:
sync_lpa_independent_set(graph)

[[0, 2, 7, 10, 19, 28, 30, 42, 47],
 [1, 17, 22, 25, 26, 27, 31],
 [3, 8, 15, 18, 21, 24, 29, 35, 45, 55, 59],
 [4, 11, 23, 51],
 [5, 6, 9, 13, 32, 39, 41, 48, 54, 56, 57, 60],
 [12, 14, 16, 20, 33, 34, 36, 37, 38, 40, 43, 44, 46, 49, 50, 52, 53, 58, 61]]

In [26]:
import random
from collections import defaultdict, Counter
from typing import List, Dict, Set
import networkx as nx
from concurrent.futures import ThreadPoolExecutor, as_completed

def sync_lpa_independent_set_parallel(
    graph: nx.Graph, 
    seed: int = None, 
    max_iter: int = 100,
    n_jobs: int = 4
) -> List[List[int]]:
    """
    최대 독립집합 기반 동기식 LPA의 병렬 버전
    """
    random.seed(seed)
    nodes = list(graph.nodes())
    labels = {node: idx for idx, node in enumerate(nodes)}
    
    for _ in range(max_iter):
        updated = False
        mis = _find_maximal_independent_set(graph, nodes)
        # 병렬로 새 레이블 계산
        new_labels = {}
        with ThreadPoolExecutor(max_workers=n_jobs) as executor:
            futures = {
                executor.submit(_calc_label, graph, labels, node): node
                for node in mis
            }
            for future in as_completed(futures):
                node, new_label = future.result()
                if new_label is not None:
                    new_labels[node] = new_label

        # 레이블 업데이트 및 변경 여부 확인
        for node, new_label in new_labels.items():
            if labels[node] != new_label:
                labels[node] = new_label
                updated = True
                
        if not updated:
            break
    
    return _group_communities(labels)

def _find_maximal_independent_set(graph: nx.Graph, nodes: List[int]) -> Set[int]:
    """최대 독립집합(MIS) 선택 (순차, 병렬화 가능)"""
    mis = set()
    blocked = set()
    shuffled_nodes = random.sample(nodes, k=len(nodes))
    for node in shuffled_nodes:
        if node not in blocked:
            mis.add(node)
            blocked.add(node)
            blocked.update(graph.neighbors(node))
    return mis

def _calc_label(graph: nx.Graph, labels: Dict[int, int], node: int):
    """(병렬 실행용) 단일 노드의 새 레이블 계산"""
    neighbors = list(graph.neighbors(node))
    if not neighbors:
        return node, None
    neighbor_labels = Counter(labels[nbr] for nbr in neighbors)
    max_count = max(neighbor_labels.values())
    candidates = [label for label, count in neighbor_labels.items() if count == max_count]
    return node, random.choice(candidates)

def _group_communities(labels: Dict[int, int]) -> List[List[int]]:
    """레이블을 기반으로 커뮤니티 그룹화"""
    communities = defaultdict(list)
    for node, label in labels.items():
        communities[label].append(node)
    return list(communities.values())


In [27]:
sync_lpa_independent_set_parallel(graph)

[[0, 2, 10, 42, 47],
 [1, 5, 6, 7, 9, 13, 17, 19, 22, 30, 31, 32, 39, 41, 48, 54, 56, 57, 60],
 [3,
  8,
  12,
  14,
  16,
  20,
  23,
  28,
  33,
  34,
  36,
  37,
  38,
  40,
  43,
  44,
  46,
  49,
  50,
  52,
  53,
  58,
  61],
 [4, 11, 15, 18, 21, 24, 29, 35, 51, 55, 59],
 [25, 26, 27],
 [45]]

In [30]:
import random
from collections import Counter, defaultdict
import networkx as nx

def async_lpa_with_modularity(G, max_iter=100, seed=None):
    random.seed(seed)
    labels = {node: node for node in G.nodes()}
    m = G.number_of_edges()
    if m == 0:
        return [list(G.nodes())]
    
    # 커뮤니티 내부 간선 수와 총 차수 추적
    communities = {
        node: {'sum_in': 0, 'sum_tot': G.degree(node)}
        for node in G.nodes()
    }
    
    for _ in range(max_iter):
        updated = False
        nodes = list(G.nodes())
        random.shuffle(nodes)
        
        for node in nodes:
            current_label = labels[node]
            neighbors = list(G.neighbors(node))
            if not neighbors:
                continue
            
            # 1) 이웃 라벨 빈도 계산
            neighbor_labels = [labels[n] for n in neighbors]
            label_counts = Counter(neighbor_labels)
            max_count = max(label_counts.values())
            candidates = [
                label for label, cnt 
                in label_counts.items() 
                if cnt == max_count
            ]
            
            if len(candidates) == 1:
                new_label = candidates[0]
            else:
                # 2) 모듈러리티 기반 라벨 선택
                best_delta = -float('inf')
                best_label = current_label
                current_degree = G.degree(node)
                
                for candidate in candidates:
                    if candidate == current_label:
                        continue
                        
                    # 현재/후보 커뮤니티 정보 추출
                    sum_in_C = communities[current_label]['sum_in']
                    sum_tot_C = communities[current_label]['sum_tot']
                    sum_in_D = communities[candidate]['sum_in']
                    sum_tot_D = communities[candidate]['sum_tot']
                    
                    # 이웃 수 계산
                    k_i_in_C = sum(1 for n in neighbors if labels[n] == current_label)
                    k_i_in_D = sum(1 for n in neighbors if labels[n] == candidate)
                    
                    # ΔQ 계산 (모듈러리티 변화량)
                    delta_Q = (
                        (sum_in_D + k_i_in_D - (sum_in_C - k_i_in_C)) / (2 * m)
                    ) - (
                        ((sum_tot_D + current_degree)**2 - (sum_tot_C - current_degree)**2)
                        / (4 * (m**2))
                    )
                    
                    if delta_Q > best_delta:
                        best_delta = delta_Q
                        best_label = candidate
                
                new_label = best_label if best_delta > 0 else current_label
            
            # 3) 라벨 업데이트 및 커뮤니티 정보 갱신
            if new_label != current_label:
                # 기존 커뮤니티에서 제거
                communities[current_label]['sum_in'] -= k_i_in_C
                communities[current_label]['sum_tot'] -= current_degree
                # 신규 커뮤니티에 추가
                communities[new_label]['sum_in'] += k_i_in_D
                communities[new_label]['sum_tot'] += current_degree
                labels[node] = new_label
                updated = True
        
        if not updated:
            break
    
    # 결과 그룹화
    community_dict = defaultdict(list)
    for node, label in labels.items():
        community_dict[label].append(node)
    return list(community_dict.values())


In [32]:
async_lpa_with_modularity(graph,100,10)

[[0, 1, 2, 7, 10, 19, 25, 26, 27, 28, 30, 42, 47],
 [3, 4, 8, 11, 15, 18, 21, 23, 24, 29, 35, 45, 51, 55, 59],
 [5, 6, 9, 13, 17, 22, 31, 32, 39, 41, 48, 54, 56, 57, 60],
 [12, 14, 16, 20, 33, 34, 36, 37, 38, 40, 43, 44, 46, 49, 50, 52, 53, 58, 61]]

In [33]:
import random
import networkx as nx
from collections import defaultdict, Counter

def compute_communities_metrics(G, labels):
    """커뮤니티 메트릭 계산 헬퍼 함수"""
    sum_in = defaultdict(int)
    sum_tot = defaultdict(int)
    communities = defaultdict(list)
    
    for node, label in labels.items():
        communities[label].append(node)
    
    # sum_tot 계산 (커뮤니티 내 노드 차수 합)
    for label, nodes in communities.items():
        sum_tot[label] = sum(G.degree(n) for n in nodes)
    
    # sum_in 계산 (커뮤니티 내부 간선 수)
    for u, v in G.edges():
        if labels[u] == labels[v]:
            sum_in[labels[u]] += 1
    
    return sum_in, sum_tot

def sync_lpa_modularity(G, max_iter=100, seed=None):
    random.seed(seed)
    labels = {n: i for i, n in enumerate(G.nodes())}
    m = G.number_of_edges()
    if m == 0:
        return [list(G.nodes())]
    
    prev_labels = labels.copy()
    prev_sum_in, prev_sum_tot = compute_communities_metrics(G, prev_labels)
    
    for _ in range(max_iter):
        new_labels = {}
        for node in G.nodes():
            neighbors = list(G.neighbors(node))
            if not neighbors:
                new_labels[node] = prev_labels[node]
                continue
                
            # 이웃 라벨 빈도 계산
            neighbor_labels = [prev_labels[n] for n in neighbors]
            label_counts = Counter(neighbor_labels)
            max_count = max(label_counts.values())
            candidates = [label for label, cnt in label_counts.items() if cnt == max_count]
            
            if len(candidates) == 1:
                new_label = candidates[0]
            else:
                # 모듈러리티 기반 선택
                current_label = prev_labels[node]
                best_delta = -float('inf')
                best_label = current_label
                k_i = G.degree(node)
                k_i_in_C = label_counts.get(current_label, 0)
                
                for candidate in candidates:
                    if candidate == current_label:
                        continue
                    k_i_in_D = label_counts.get(candidate, 0)
                    sum_in_D = prev_sum_in.get(candidate, 0)
                    sum_in_C = prev_sum_in.get(current_label, 0)
                    sum_tot_D = prev_sum_tot.get(candidate, 0)
                    sum_tot_C = prev_sum_tot.get(current_label, 0)
                    
                    # ΔQ 계산식
                    delta_Q = (
                        (sum_in_D + k_i_in_D - (sum_in_C - k_i_in_C)) / (2*m)
                    ) - (
                        ((sum_tot_D + k_i)**2 - (sum_tot_C - k_i)**2)
                        / (4*(m**2))
                    )
                    
                    if delta_Q > best_delta:
                        best_delta = delta_Q
                        best_label = candidate
                
                new_label = best_label if best_delta > 0 else current_label
            
            new_labels[node] = new_label
        
        # 수렴 확인
        if new_labels == prev_labels:
            break
        
        # 메트릭 갱신
        prev_labels = new_labels.copy()
        prev_sum_in, prev_sum_tot = compute_communities_metrics(G, prev_labels)
    
    # 결과 그룹화
    communities = defaultdict(list)
    for node, label in prev_labels.items():
        communities[label].append(node)
    return list(communities.values())


In [36]:
sync_lpa_modularity(graph)

[[0,
  2,
  3,
  4,
  8,
  10,
  11,
  12,
  14,
  15,
  16,
  18,
  20,
  21,
  23,
  24,
  28,
  29,
  30,
  33,
  34,
  35,
  36,
  37,
  38,
  40,
  42,
  43,
  44,
  45,
  47,
  50,
  51,
  52,
  53,
  55,
  58,
  59,
  61],
 [1,
  5,
  6,
  7,
  9,
  13,
  17,
  19,
  22,
  25,
  26,
  27,
  31,
  32,
  39,
  41,
  48,
  54,
  56,
  57,
  60],
 [46],
 [49]]

In [37]:
import random
import networkx as nx
from collections import defaultdict, Counter
from concurrent.futures import ThreadPoolExecutor

def compute_communities_metrics(G, labels):
    """커뮤니티 메트릭 계산 헬퍼 함수"""
    sum_in = defaultdict(int)
    sum_tot = defaultdict(int)
    communities = defaultdict(list)
    
    for node, label in labels.items():
        communities[label].append(node)
    
    for label, nodes in communities.items():
        sum_tot[label] = sum(G.degree(n) for n in nodes)
    
    for u, v in G.edges():
        if labels[u] == labels[v]:
            sum_in[labels[u]] += 1
    
    return sum_in, sum_tot

def node_label_update(args):
    node, G, prev_labels, prev_sum_in, prev_sum_tot, m = args
    neighbors = list(G.neighbors(node))
    if not neighbors:
        return node, prev_labels[node]
    neighbor_labels = [prev_labels[n] for n in neighbors]
    label_counts = Counter(neighbor_labels)
    max_count = max(label_counts.values())
    candidates = [label for label, cnt in label_counts.items() if cnt == max_count]
    if len(candidates) == 1:
        return node, candidates[0]
    else:
        current_label = prev_labels[node]
        best_delta = -float('inf')
        best_label = current_label
        k_i = G.degree(node)
        k_i_in_C = label_counts.get(current_label, 0)
        for candidate in candidates:
            if candidate == current_label:
                continue
            k_i_in_D = label_counts.get(candidate, 0)
            sum_in_D = prev_sum_in.get(candidate, 0)
            sum_in_C = prev_sum_in.get(current_label, 0)
            sum_tot_D = prev_sum_tot.get(candidate, 0)
            sum_tot_C = prev_sum_tot.get(current_label, 0)
            delta_Q = (
                (sum_in_D + k_i_in_D - (sum_in_C - k_i_in_C)) / (2*m)
            ) - (
                ((sum_tot_D + k_i)**2 - (sum_tot_C - k_i)**2)
                / (4*(m**2))
            )
            if delta_Q > best_delta:
                best_delta = delta_Q
                best_label = candidate
        return node, best_label if best_delta > 0 else current_label

def sync_lpa_modularity_parallel(G, max_iter=100, seed=None, n_jobs=4):
    random.seed(seed)
    labels = {n: i for i, n in enumerate(G.nodes())}
    m = G.number_of_edges()
    if m == 0:
        return [list(G.nodes())]
    prev_labels = labels.copy()
    prev_sum_in, prev_sum_tot = compute_communities_metrics(G, prev_labels)
    for _ in range(max_iter):
        args_list = [
            (node, G, prev_labels, prev_sum_in, prev_sum_tot, m)
            for node in G.nodes()
        ]
        new_labels = {}
        with ThreadPoolExecutor(max_workers=n_jobs) as executor:
            for node, label in executor.map(node_label_update, args_list):
                new_labels[node] = label
        if new_labels == prev_labels:
            break
        prev_labels = new_labels.copy()
        prev_sum_in, prev_sum_tot = compute_communities_metrics(G, prev_labels)
    communities = defaultdict(list)
    for node, label in prev_labels.items():
        communities[label].append(node)
    return list(communities.values())


In [39]:
sync_lpa_modularity_parallel(graph)

[[0,
  2,
  3,
  4,
  8,
  10,
  11,
  12,
  14,
  15,
  16,
  18,
  20,
  21,
  23,
  24,
  28,
  29,
  30,
  33,
  34,
  35,
  36,
  37,
  38,
  40,
  42,
  43,
  44,
  45,
  47,
  50,
  51,
  52,
  53,
  55,
  58,
  59,
  61],
 [1,
  5,
  6,
  7,
  9,
  13,
  17,
  19,
  22,
  25,
  26,
  27,
  31,
  32,
  39,
  41,
  48,
  54,
  56,
  57,
  60],
 [46],
 [49]]

In [41]:
import random
import networkx as nx
from collections import defaultdict, Counter
from concurrent.futures import ThreadPoolExecutor

def parallel_mis(graph, nodes, n_jobs=4):
    """병렬 최대 독립집합(MIS) 선택 (Luby 알고리즘 스타일의 간단 병렬화)"""
    remaining = set(nodes)
    mis = set()
    while remaining:
        # 각 노드에 무작위 우선순위 할당 (병렬화 가능)
        priorities = {node: random.random() for node in remaining}
        selected = set()
        # 병렬로 각 노드의 선택 여부 판단
        def select(node):
            return all(priorities[node] > priorities.get(neigh, -1) for neigh in graph.neighbors(node) if neigh in remaining)
        with ThreadPoolExecutor(max_workers=n_jobs) as executor:
            results = list(executor.map(select, remaining))
        for node, ok in zip(remaining, results):
            if ok:
                selected.add(node)
        mis.update(selected)
        # MIS 및 그 이웃 제거
        to_remove = set(selected)
        for node in selected:
            to_remove.update(graph.neighbors(node))
        remaining -= to_remove
    return mis

def modularity_delta(graph, labels, node, old_label, new_label, sum_in, sum_tot, m):
    """모듈러리티 변화량 계산"""
    k_i = graph.degree(node)
    sum_in_old = sum_in[old_label]
    sum_tot_old = sum_tot[old_label]
    sum_in_new = sum_in[new_label]
    sum_tot_new = sum_tot[new_label]
    delta = (sum_in_new - sum_in_old + k_i) / (2 * m)
    delta -= ((sum_tot_new + k_i)**2 - (sum_tot_old - k_i)**2) / (4 * m**2)
    return delta

def compute_communities_metrics(G, labels):
    sum_in = defaultdict(int)
    sum_tot = defaultdict(int)
    communities = defaultdict(list)
    for node, label in labels.items():
        communities[label].append(node)
    for label, nodes in communities.items():
        sum_tot[label] = sum(G.degree(n) for n in nodes)
    for u, v in G.edges():
        if labels[u] == labels[v]:
            sum_in[labels[u]] += 1
    return sum_in, sum_tot

def sync_lpa_mis_modularity_parallel(
    graph: nx.Graph,
    max_iter: int = 100,
    seed: int = None,
    n_jobs: int = 4
):
    random.seed(seed)
    nodes = list(graph.nodes())
    labels = {node: idx for idx, node in enumerate(nodes)}
    m = graph.number_of_edges()
    sum_in, sum_tot = compute_communities_metrics(graph, labels)
    for _ in range(max_iter):
        # 1. 병렬 MIS 선택
        mis = parallel_mis(graph, nodes, n_jobs)
        # 2. MIS 내 노드 병렬 라벨 업데이트
        def update_label(node):
            neighbors = list(graph.neighbors(node))
            if not neighbors:
                return node, labels[node]
            counts = Counter(labels[nbr] for nbr in neighbors)
            max_count = max(counts.values())
            candidates = [label for label, cnt in counts.items() if cnt == max_count]
            if len(candidates) == 1:
                return node, candidates[0]
            # 동률: 모듈러리티 최대화
            current_label = labels[node]
            best_label = current_label
            best_delta = -float('inf')
            for candidate in candidates:
                delta = modularity_delta(graph, labels, node, current_label, candidate, sum_in, sum_tot, m)
                if delta > best_delta:
                    best_delta = delta
                    best_label = candidate
            return node, best_label if best_delta > 0 else current_label
        new_labels = {}
        with ThreadPoolExecutor(max_workers=n_jobs) as executor:
            for node, label in executor.map(update_label, mis):
                new_labels[node] = label
        # 3. 라벨 및 메트릭 동기 갱신
        changed = False
        for node, new_label in new_labels.items():
            if labels[node] != new_label:
                old_label = labels[node]
                for nbr in graph.neighbors(node):
                    if labels[nbr] == old_label:
                        sum_in[old_label] -= 1
                    if labels[nbr] == new_label:
                        sum_in[new_label] += 1
                sum_tot[old_label] -= graph.degree(node)
                sum_tot[new_label] += graph.degree(node)
                labels[node] = new_label
                changed = True
        if not changed:
            break
    # 결과 그룹화
    communities = defaultdict(list)
    for node, label in labels.items():
        communities[label].append(node)
    return list(communities.values())


In [42]:
sync_lpa_mis_modularity_parallel(graph)

[[0, 2, 10, 28, 30, 42, 47],
 [1, 5, 6, 7, 13, 17, 19, 22, 25, 26, 27, 31, 32, 39, 41, 48, 54, 56, 57, 60],
 [3, 4, 8, 11, 15, 16, 18, 20, 21, 23, 24, 36, 38, 44, 45, 50, 51, 55, 58, 59],
 [9],
 [12, 14, 33, 34, 37, 40, 43, 46, 49, 52, 53, 61],
 [29, 35]]