In [1]:
import numpy as np
import pandas as pd
import random
import os
from collections import Counter
import time
import csv

In [2]:
def detect(G):
    # Step 1: Initialization
    labels = {node: i for i, node in enumerate(G.keys())}

    # Function to get the most frequent label in a list
    def most_frequent_label(labels):
        return Counter(labels).most_common(1)[0][0]

    # Step 2: Label Propagation
    while True:
        updated = False
        for node in G.keys():
            neighbor_labels = [labels[neighbor] for neighbor in G[node]]
            new_label = most_frequent_label(neighbor_labels)
            if labels[node] != new_label:
                labels[node] = new_label
                updated = True

        if not updated:
            break

    # Post-processing with ordered cluster numbers
    unique_labels = set(labels.values())
    label_order = {label: idx for idx, label in enumerate(sorted(unique_labels))}
    ordered_communities = {}
    for node, label in labels.items():
        ordered_label = label_order[label]
        ordered_communities.setdefault(ordered_label, []).append(node)

        
    # Step 4: Output Conversion
    output = []
    for label, nodes in ordered_communities.items():
        for node in nodes:
            output.append((node, label))

    return output

In [3]:
path1 = 'data/D1-K=2.csv'
path2 = "data/D1-UNC.csv"
path3 = "data/D2-K=7.csv"
path4 = "data/D2-UNC.csv"
path5 = "data/D3-K=12.csv"
path6 = "data/D3-UNC.csv"

df1 = pd.read_csv(path1, header=None)
df2 = pd.read_csv(path2, header=None)
df3 = pd.read_csv(path3, header=None)
df4 = pd.read_csv(path4, header=None)
df5 = pd.read_csv(path5, header=None)
df6 = pd.read_csv(path6, header=None)

def convert_to_graph_df(df):
    G = {}
    for i in range(len(df)):
        G[i] = {}
        for j in range(len(df)):
            if df.iloc[i, j] == 1:
                G[i][j] = 1
    return G

G1 = convert_to_graph_df(df1)
G2 = convert_to_graph_df(df2)
G3 = convert_to_graph_df(df3)
G4 = convert_to_graph_df(df4)
G5 = convert_to_graph_df(df5)
G6 = convert_to_graph_df(df6)

In [4]:
def detect_with_timing(G):
    start_time = time.time()
    
    communities = detect(G)
    
    end_time = time.time()
    execution_time = end_time - start_time
    return communities, execution_time

communities1, execution_time1 = detect_with_timing(G1)
communities2, execution_time2 = detect_with_timing(G2)
communities3, execution_time3 = detect_with_timing(G3)
communities4, execution_time4 = detect_with_timing(G4)
communities5, execution_time5 = detect_with_timing(G5)
communities6, execution_time6 = detect_with_timing(G6)

In [5]:
directory = 'results'
if not os.path.exists(directory):
    os.makedirs(directory)
    
with open('results/D1-K=2.csv', 'w') as file:
    file.write("vertex, cluster\n")
    for vertex, cluster in communities1:
        file.write(f"{vertex}, {cluster}\n")

with open('results/D1-UNC.csv', 'w') as file:
    file.write("vertex, cluster\n")
    for vertex, cluster in communities2:
        file.write(f"{vertex}, {cluster}\n")

with open('results/D2-K=7.csv', 'w') as file:
    file.write("vertex, cluster\n")
    for vertex, cluster in communities3:
        file.write(f"{vertex}, {cluster}\n")

with open('results/D2-UNC.csv', 'w') as file:
    file.write("vertex, cluster\n")
    for vertex, cluster in communities4:
        file.write(f"{vertex}, {cluster}\n")

with open('results/D3-K=12.csv', 'w') as file:
    file.write("vertex, cluster\n")
    for vertex, cluster in communities5:
        file.write(f"{vertex}, {cluster}\n")

with open('results/D3-UNC.csv', 'w') as file:
    file.write("vertex, cluster\n")
    for vertex, cluster in communities6:
        file.write(f"{vertex}, {cluster}\n")

In [6]:
# Create a list of tuples containing dataset name and execution time
data = [
    ('D1-K=2', execution_time1),
    ('D1-UNC', execution_time2),
    ('D2-K=7', execution_time3),
    ('D2-UNC', execution_time4),
    ('D3-K=12', execution_time5),
    ('D3-UNC', execution_time6)
]

# Write the data to a CSV file
with open('results/description.txt', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Dataset', 'Execution Time'])
    writer.writerows(data)