In [3]:
# import igraph as ig

# def read_graph_from_edgelist_igraph(file_path):
#     """
#     Reads an edge list from a Matrix Market format and returns an igraph Graph.
#     """
#     edges = []
#     with open(file_path, 'r') as file:
#         for line in file:
#             if line.startswith('%'):  # Skip comments
#                 continue
#             parts = line.split()
#             if len(parts) >= 2:
#                 u, v = int(parts[0]), int(parts[1])
#                 edges.append((u, v))
    
#     G = ig.Graph(edges=edges)
#     return G

# def read_communities_igraph(file_path, graph):
#     """
#     Reads the community file, skipping the empty '0' community, and returns a valid partition.
#     Adjusts for one-based community IDs and ensures all nodes are covered.
#     """
#     communities = []
#     assigned_nodes = set()
#     node_to_community = {}  # Track which community each node is in to detect duplicates

#     with open(file_path, 'r') as file:
#         for line in file:
#             nodes = list(map(int, line.split()))
#             if nodes:  # Ensure there are nodes
#                 community_nodes = nodes[1:]  # Skip the community ID (first element)
#                 if community_nodes:
#                     communities.append(community_nodes)  # Append only if the community is not empty
#                     for node in community_nodes:
#                         if node in node_to_community:
#                             print(f"Warning: Node {node} appears in multiple communities!")
#                         node_to_community[node] = nodes[0]  # Track community ID for each node
#                     assigned_nodes.update(community_nodes)
    
#     # Handle any missing nodes (nodes not assigned to any community)
#     all_nodes = set(range(graph.vcount()))
#     missing_nodes = all_nodes - assigned_nodes
    
#     # Print missing nodes
#     if missing_nodes:
#         print(f"Warning: The following nodes are missing from the community file: {sorted(missing_nodes)}")
    
#     # Low degree check for missing nodes
#     low_degree_nodes = [node for node in missing_nodes if graph.degree(node) <= 1]
#     if low_degree_nodes:
#         print(f"Low degree nodes (degree <= 1): {low_degree_nodes}")

#     # Uncomment the next lines if you want to assign missing nodes to singleton communities
#     # for node in missing_nodes:
#     #     communities.append([node])  # Each missing node gets its own community

#     return communities

# def calculate_modularity_igraph(graph, communities):
#     """
#     Calculate the modularity of the given community structure on the graph using igraph.
#     """
#     membership = [0] * graph.vcount()
#     for i, community in enumerate(communities):
#         for node in community:
#             membership[node] = i  # Assign each node its community ID

#     return graph.modularity(membership)

# # File paths
# edge_list_file = '/lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/alldegrees/road_central.mtx'
# community_file = '/lustre/orion/gen150/world-shared/abby-summer24/nawsdatasets/outputs/road_central_with_weights.mtx.hipmcl64'

# # Read the graph and communities
# G = read_graph_from_edgelist_igraph(edge_list_file)
# communities = read_communities_igraph(community_file, G)

# # Calculate modularity
# modularity_score = calculate_modularity_igraph(G, communities)
# print(f"Modularity of the given community structure: {modularity_score}")
import igraph as ig
import os
import csv

def read_graph_from_edgelist_igraph(file_path):
    edges = []
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('%'):
                continue
            parts = line.split()
            if len(parts) >= 2:
                u, v = int(parts[0]), int(parts[1])
                edges.append((u, v))
    G = ig.Graph(edges=edges)
    return G

def read_communities_igraph(file_path, graph):
    communities = []
    assigned_nodes = set()
    node_to_community = {}

    with open(file_path, 'r') as file:
        for line in file:
            nodes = list(map(int, line.split()))
            if nodes:
                community_nodes = nodes[1:]
                communities.append(community_nodes)
                for node in community_nodes:
                    if node in node_to_community:
                        print(f"Warning: Node {node} appears in multiple communities!")
                    node_to_community[node] = nodes[0]
                assigned_nodes.update(community_nodes)
    
    all_nodes = set(range(graph.vcount()))
    missing_nodes = all_nodes - assigned_nodes
    if missing_nodes:
        print(f"Warning: Missing nodes: {sorted(missing_nodes)}")
    
    low_degree_nodes = [node for node in missing_nodes if graph.degree(node) <= 1]
    if low_degree_nodes:
        print(f"Low degree nodes: {low_degree_nodes}")

    return communities

def calculate_modularity_igraph(graph, communities):
    membership = [0] * graph.vcount()
    for i, community in enumerate(communities):
        for node in community:
            membership[node] = i
    return graph.modularity(membership)

# Define file prefixes
prefixes = [
    'euk', 'virus', 'arch'
# prefixes = [
#     'europe_osm', 'road_usa', 'road_central', 'rgg_n_2_24_s0', 
#     'kron_g500-logn19', 'rgg_n_2_18_s0', 'delaunay_n24', 
#     'delaunay_n23', 'delaunay_n22'
]
# Directories for graphs and community files
graph_dir = '/lustre/orion/gen150/world-shared/abby-summer24/hipmcldatasets/mtxfiles/'
community_dir = '/lustre/orion/gen150/world-shared/abby-summer24/hipmcldatasets/proteinhipdplcommunities/new/'

# Output CSV file
output_csv = 'modularity_results_hipdplprotein.csv'

# Open CSV file for writing
with open(output_csv, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Prefix', 'Modularity'])  # Write header

    # Process each file prefix and write results to CSV
    for prefix in prefixes:
        edge_list_file = os.path.join(graph_dir, f'{prefix}.mtx')
        community_file = os.path.join(community_dir, f'{prefix}.bin_new.txt')

        print(f"Processing {prefix}...")
        
        try:
            G = read_graph_from_edgelist_igraph(edge_list_file)
            communities = read_communities_igraph(community_file, G)

            modularity_score = calculate_modularity_igraph(G, communities)
            print(f"Modularity for {prefix}: {modularity_score}")
            
            # Write result to CSV
            writer.writerow([prefix, modularity_score])
        except FileNotFoundError as e:
            print(f"Error: File not found for {prefix}: {e}")
        except Exception as e:
            print(f"Error processing {prefix}: {e}")

print(f"Results saved to {output_csv}")


Processing euk...
Error: File not found for euk: [Errno 2] No such file or directory: '/lustre/orion/gen150/world-shared/abby-summer24/hipmcldatasets/mtxfiles/eukus.mtx'
Processing vir...
Modularity for vir: -5.629331461536659e-06
Processing arch...
Error: File not found for arch: [Errno 2] No such file or directory: '/lustre/orion/gen150/world-shared/abby-summer24/hipmcldatasets/mtxfiles/archus.mtx'
Results saved to modularity_results_hipdplprotein.csv
