In [2]:
import sys; sys.path.append("..")

import numpy as np
import networkx as nx

from utils.network_loader import read_communities
from utils.dyetracing_classes import evaluate_clustering

from utils.dyetracing_classes import *
from utils.node_selection import find_low_centrality_nodes, iter_node_selection, find_low_betweenness_nodes
from utils.network_loader import read_communities, load_nx_graph, setup_dyetracing_graph
# from utils.boundary import detect_boundaries, is_distinct_boundary

from cdlib import algorithms
import csv
import os

from tqdm import tqdm

Note: to be able to use all crisp methods, you need to install some additional packages:  {'graph_tool'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'pyclustering'}


In [3]:
# Define the mu values to loop over
mu_values = [10, 20, 30, 40, 50, 60, 70, 80, 90]

for mu in mu_values:
    # CSV file path
    csv_file = f'{mu}mu_comparison.csv'

    # Headers for the CSV file
    headers = ['run', 'true # of comms', 'louvain # of pred comms', 'louvain accuracy', 'louvain ari', 'louvain nmi', 
               'infomap # of pred comms', 'infomap accuracy', 'infomap ari', 'infomap nmi', 
               'rbpots # of pred comms', 'rbpots accuracy', 'rbpots ari', 'rbpots nmi']

    # Open the CSV file for writing
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)

        # Loop over i from 1 to 100 with a progress bar
        for i in tqdm(range(1, 101), desc=f'Processing mu={mu}'):
            louvain_community = f'/Users/alissachavalithumrong/Documents/research/flowcommunities/benchmarks/directed_louvain/500_node/{mu}/{i}_mu{mu}.tree'
            community_file = f'/Users/alissachavalithumrong/Documents/research/flowcommunities/benchmarks/LF_created_networks/500_node/{mu}/{i}_community_N500_k50_maxk75_mu{mu}.dat'
            network_file = f'/Users/alissachavalithumrong/Documents/research/flowcommunities/benchmarks/LF_created_networks/500_node/{mu}/{i}_network_N500_k50_maxk75_mu{mu}.dat'

            group_membership = np.array(list(read_communities(community_file).values()))
            louvain_membership = dict(sorted(read_communities(louvain_community).items(), key=lambda x: x[0]))
            louvain_membership = np.array(list(louvain_membership.values()))

            accuracy, ari, nmi = evaluate_clustering(group_membership, louvain_membership)
            num_communities = len(np.unique(group_membership))
            num_louvain_communities = len(np.unique(louvain_membership))

            node_list, edge_list = setup_dyetracing_graph(network_file)
            G_directed = load_nx_graph(node_list, network_file)

            # Run baseline algorithms and get their evaluations
            infomap_comms = algorithms.infomap(G_directed)
            infomap_labels = np.array([next(i for i, comm in enumerate(infomap_comms.communities) if node in comm) for node in G_directed.nodes()])
            infomap_accuracy, infomap_ari, infomap_nmi = evaluate_clustering(group_membership, infomap_labels)
            num_infomap_comms = len(infomap_comms.communities)

            rb_pots_comms = algorithms.rb_pots(G_directed)
            rb_pots_labels = np.array([next(i for i, comm in enumerate(rb_pots_comms.communities) if node in comm) for node in G_directed.nodes()])
            rb_pots_accuracy, rb_pots_ari, rb_pots_nmi = evaluate_clustering(group_membership, rb_pots_labels)
            num_rb_pots_comms = len(rb_pots_comms.communities)

            # Write the results to the CSV file
            writer.writerow([i, num_communities, num_louvain_communities, accuracy, ari, nmi, 
                             num_infomap_comms, infomap_accuracy, infomap_ari, infomap_nmi, 
                             num_rb_pots_comms, rb_pots_accuracy, rb_pots_ari, rb_pots_nmi])

Processing mu=10: 100%|██████████| 100/100 [00:33<00:00,  3.01it/s]
Processing mu=20: 100%|██████████| 100/100 [00:33<00:00,  2.98it/s]
Processing mu=30: 100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
Processing mu=40: 100%|██████████| 100/100 [00:33<00:00,  2.98it/s]
Processing mu=50: 100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
Processing mu=60: 100%|██████████| 100/100 [00:33<00:00,  2.99it/s]
Processing mu=70: 100%|██████████| 100/100 [00:32<00:00,  3.03it/s]
Processing mu=80: 100%|██████████| 100/100 [00:34<00:00,  2.93it/s]
Processing mu=90: 100%|██████████| 100/100 [00:35<00:00,  2.86it/s]
