In [None]:
import os
import igraph as ig
import csv

def read_graph_from_mtx(file_path):
    edges = []
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('%'):  
                continue
            parts = line.split()
            if len(parts) >= 2:
                u, v = int(parts[0]), int(parts[1])
                edges.append((u, v))
    G = ig.Graph(edges=edges, directed=False)
    return G

def calculate_conductance_igraph(graph, community):
    community_set = set(community)
    cut_size = 0
    vol_S = 0  # Volume of the community
    
    for node in community_set:
        neighbors = graph.neighbors(node)
        degree = graph.degree(node)
        vol_S += degree  # Add to the community volume
        
        for neighbor in neighbors:
            if neighbor not in community_set:
                cut_size += 1  # Edge is leaving the community
    
    vol_complement = sum(graph.degree()) - vol_S  # Volume of the complement

    if vol_S == 0 or vol_complement == 0:
        return 0

    conductance_value = cut_size / min(vol_S, vol_complement)
    return conductance_value

def read_communities_igraph(file_path):
    communities = []
    with open(file_path, 'r') as file:
        for line in file:
            nodes = list(map(int, line.split()))
            communities.append(nodes[1:])  # Skip the community ID, only add nodes
    return communities

# List of prefixes for the datasets
prefixes = [
    'arch', 'euk', 'virus'
]

# Directories for the files
degree_dir = '/lustre/orion/gen150/world-shared/abby-summer24/hipmcldatasets/mtxfiles/'
community_dir = '/lustre/orion/gen150/world-shared/abby-summer24/hipmcldatasets/proteinhipdplcommunities/new/'
csv_file = 'conductance_hipdpl_proteins.csv'  # Define a combined CSV file name

# Loop over each prefix
for prefix in prefixes:
    try:
        print(f"Processing {prefix}...")

        # Construct file paths for the graph and community files
        mtx_file = os.path.join(degree_dir, f'{prefix}.mtx')
        community_file = os.path.join(community_dir, f'{prefix}.bin_new2.txt')

        # Check if the files exist before proceeding
        if not os.path.exists(mtx_file) or not os.path.exists(community_file):
            print(f"Files for {prefix} not found, skipping.")
            continue

        # Read the graph from the .mtx file
        G = read_graph_from_mtx(mtx_file)

        # Read the community structure from the file
        communities = read_communities_igraph(community_file)

        # Initialize variables to store min, max, and total conductance
        total_conductance = 0
        min_conductance = float('inf')
        max_conductance = float('-inf')
        results = []  # Accumulate results in memory

        # Calculate conductance for each community and update min, max, and total
        dataset_name = mtx_file.split('/')[-1]  # Get the dataset file name
        for i, community in enumerate(communities):
            conductance_value = calculate_conductance_igraph(G, community)
            total_conductance += conductance_value

            if conductance_value < min_conductance:
                min_conductance = conductance_value

            if conductance_value > max_conductance:
                max_conductance = conductance_value

            # Accumulate each community's conductance result
            results.append([dataset_name, f'Community {i+1}', conductance_value])

        # Get the average conductance
        average_conductance = total_conductance / len(communities)

        # Add summary stats to the results
        results.append([dataset_name, 'Average conductance', average_conductance])
        results.append([dataset_name, 'Minimum conductance', min_conductance])
        results.append([dataset_name, 'Maximum conductance', max_conductance])

        # Write everything to the CSV file in one go
        with open(csv_file, 'a', newline='') as csvfile:
            writer = csv.writer(csvfile)
            
            # Write a header only once, if this is the first time running the script (add checks if needed)
            if csvfile.tell() == 0:
                writer.writerow(['Dataset', 'Community', 'Conductance'])
            
            # Write all accumulated results
            writer.writerows(results)

        # Print results
        print(f"Average conductance for {dataset_name}: {average_conductance}")
        print(f"Minimum conductance for {dataset_name}: {min_conductance}")
        print(f"Maximum conductance for {dataset_name}: {max_conductance}")
        print("Processing complete for", prefix)
        print("\n")

    except Exception as e:
        print(f"An error occurred while processing {prefix}: {e}")
        continue  # Skip to the next dataset if an error occurs


Processing arch...
Average conductance for arch.mtx: 0.9999858504794483
Minimum conductance for arch.mtx: 0.9927536231884058
Maximum conductance for arch.mtx: 1.0
Processing complete for arch


Processing euk...
