In [11]:
import os
from sklearn.metrics import normalized_mutual_info_score

def read_communities(file_path):
    """
    Reads a community output file where each line starts with a community ID 
    followed by a list of nodes belonging to that community.
    """
    node_to_community = {}
    print(f"Reading communities from {file_path}")
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                continue  # Skip lines that don't have both a community ID and nodes
            community_id = int(parts[0])  # First element is the community ID
            nodes = list(map(int, parts[1:]))  # Remaining elements are nodes in that community
            for node in nodes:
                node_to_community[node] = community_id  # Map each node to its community
    print(f"Total nodes read in {file_path}: {len(node_to_community)}")
    return node_to_community

# File paths for the two community output files
file_path_1 = '/lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/outputs/coPapersDBLP_with_weights.mtx.hipmcl64'
file_path_2 = '/lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/hipdpl_new/coPapersDBLP_new.txt'

# Read the community data from both files
communities_1 = read_communities(file_path_1)
communities_2 = read_communities(file_path_2)

# Find the common nodes between the two community files
common_nodes = set(communities_1.keys()) & set(communities_2.keys())
print(f"Total common nodes: {len(common_nodes)}")

# Extract community assignments for common nodes
communities_1_common = [communities_1[node] for node in sorted(common_nodes)]
communities_2_common = [communities_2[node] for node in sorted(common_nodes)]

# Check if we have valid data for NMI calculation
if len(communities_1_common) == 0 or len(communities_2_common) == 0:
    print("No valid common nodes found; cannot calculate NMI.")
else:
    # Calculate NMI
    print("Calculating NMI...")
    nmi_score = normalized_mutual_info_score(communities_1_common, communities_2_common)
    print(f"NMI Score: {nmi_score}")


Reading communities from /lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/outputs/coPapersDBLP_with_weights.mtx.hipmcl64
Total nodes read in /lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/outputs/coPapersDBLP_with_weights.mtx.hipmcl64: 521374
Reading communities from /lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/hipdpl_new/coPapersDBLP_new.txt
Total nodes read in /lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/hipdpl_new/coPapersDBLP_new.txt: 540486
Total common nodes: 521374
Calculating NMI...
NMI Score: 0.6024629850132851
