In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cse551/Youtube_communities.txt
/kaggle/input/cse551/Collin_CYC_Graph.txt
/kaggle/input/cse551/CYC2008_complexes.txt
/kaggle/input/cse551/roadNet_Texas.txt
/kaggle/input/cse551/Undirected_Youtube_network.txt
/kaggle/input/cse551/Email-Enron.txt


In [2]:
!pip install hypernetx matplotlib scikit-learn
!pip install networkx numpy scikit-learn matplotlib
!pip install python-louvain

Collecting hypernetx
  Downloading hypernetx-2.3.8-py3-none-any.whl.metadata (17 kB)
Collecting celluloid>=0.2.0 (from hypernetx)
  Downloading celluloid-0.2.0-py3-none-any.whl.metadata (4.8 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading hypernetx-2.3.8-py3-none-any.whl (583 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m583.8/583.8 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hDownloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m83.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading celluloid-0.2.0-py3-none-any.whl (5.4 kB)
Installing collected packages: scikit-learn, celluloid, hypernetx
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling sci

In [None]:
import networkx as nx
import hypernetx as hnx
import numpy as np
from collections import defaultdict
import community as community_louvain
from sklearn.metrics import (
    normalized_mutual_info_score,
    adjusted_rand_score,
    homogeneity_score,
    completeness_score,
    v_measure_score,
    precision_score,
    recall_score,
    f1_score,
)
import matplotlib.pyplot as plt


In [None]:
# Step 1: Load Graph and Hypergraph
def load_network_data(filename):
    G = nx.Graph()
    hyperedges = defaultdict(set)

    with open(filename, 'r') as file:
        for i, line in enumerate(file):
            if not line.startswith('#'):
                nodes = list(map(int, line.strip().split()))
                for node in nodes[1:]:
                    G.add_edge(nodes[0], node)
                    hyperedges[node].add(nodes[0])

    # Convert to hypergraph
    H = hnx.Hypergraph(hyperedges)
    return G, H


# Step 2: Convert Hypergraph to Weighted Graph
def hypergraph_to_weighted_graph(H):
    G_weighted = nx.Graph()
    for edge, nodes in H.incidence_dict.items():
        nodes = list(nodes)
        for i, u in enumerate(nodes):
            for v in nodes[i + 1:]:
                if G_weighted.has_edge(u, v):
                    G_weighted[u][v]['weight'] += 1
                else:
                    G_weighted.add_edge(u, v, weight=1)
    return G_weighted


# Step 3: Graph Community Detection
def graph_community_detection(G):
    return community_louvain.best_partition(G)


# Step 4: Hypergraph Community Detection (Louvain)
def hypergraph_community_detection(H):
    G_weighted = hypergraph_to_weighted_graph(H)
    if G_weighted.number_of_edges() == 0:
        print("No edges in the weighted graph derived from hypergraph. Cannot detect communities.")
        return {}

    partition = community_louvain.best_partition(G_weighted, weight='weight')
    return partition


# Step 5: Extract Communities
def extract_communities(partition):
    communities = defaultdict(set)
    for node, community in partition.items():
        communities[community].add(node)
    return communities


# Step 6: Find Common Nodes
def find_common_nodes(graph_partition, hypergraph_partition):
    return {node for node in graph_partition if node in hypergraph_partition}


# Step 7: Compute Clustering Metrics
def evaluate_clustering_metrics(graph_partition, hypergraph_partition):
    common_nodes = find_common_nodes(graph_partition, hypergraph_partition)
    if not common_nodes:
        print("No common nodes between graph and hypergraph.")
        return

    true_labels = [graph_partition[node] for node in common_nodes]
    pred_labels = [hypergraph_partition[node] for node in common_nodes]

    nmi = normalized_mutual_info_score(true_labels, pred_labels)
    ari = adjusted_rand_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, average='weighted', zero_division=0)
    recall = recall_score(true_labels, pred_labels, average='weighted', zero_division=0)
    f1 = f1_score(true_labels, pred_labels, average='weighted', zero_division=0)
    homogeneity = homogeneity_score(true_labels, pred_labels)
    completeness = completeness_score(true_labels, pred_labels)
    v_measure = v_measure_score(true_labels, pred_labels)
    jaccard = len(set(true_labels).intersection(set(pred_labels))) / len(set(true_labels).union(set(pred_labels)))

    purity = sum(np.max(np.bincount([true, pred])) for true, pred in zip(true_labels, pred_labels)) / len(common_nodes)

    print("\nEvaluation Metrics:")
    print(f"NMI: {nmi:.4f}")
    print(f"ARI: {ari:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Jaccard Similarity Index: {jaccard:.4f}")
    #print(f"Purity: {purity:.4f}")
    print(f"Homogeneity: {homogeneity:.4f}")
    print(f"Completeness: {completeness:.4f}")
    print(f"V-measure: {v_measure:.4f}")


# Step 8: Print Communities
def print_communities(communities, label="Community"):
    print(f"\n{label}s:")
    for community_id, nodes in communities.items():
        print(f"{label} {community_id}: size: {len(nodes)}")

In [3]:

# Main Function
def main():
    filename = '/kaggle/input/cse551/Email-Enron.txt'

    # Step 1: Load Network Data
    G, H = load_network_data(filename)
    print(f"Graph loaded with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
    print(f"Hypergraph loaded with {len(H.edges)} hyperedges.")

    # Step 2: Graph Community Detection
    graph_partition = graph_community_detection(G)
    graph_comm = extract_communities(graph_partition)
    print(f"\nTotal Graph Communities: {len(graph_comm)}")
    print_communities(graph_comm, label="Graph Community")

    # Step 3: Hypergraph Community Detection
    hypergraph_partition = hypergraph_community_detection(H)
    if not hypergraph_partition:
        print("No hypergraph communities detected.")
        return

    hypergraph_comm = extract_communities(hypergraph_partition)
    print(f"\nTotal Hypergraph Communities: {len(hypergraph_comm)}")
    print_communities(hypergraph_comm, label="Hypergraph Community")

    # Step 4: Compare Communities
    print("\nComparing Graph and Hypergraph Communities:")
    for graph_id, graph_nodes in graph_comm.items():
        for hyper_id, hyper_nodes in hypergraph_comm.items():
            overlap = len(graph_nodes & hyper_nodes)
            if overlap > 0:
                print(f"Graph Community {graph_id} overlaps with Hypergraph Community {hyper_id}: {overlap} shared nodes")

    # Step 5: Evaluate Clustering Metrics
    evaluate_clustering_metrics(graph_partition, hypergraph_partition)


if __name__ == "__main__":
    main()


Graph loaded with 36692 nodes and 183831 edges.
Hypergraph loaded with 36692 hyperedges.

Total Graph Communities: 1257

Graph Communitys:
Graph Community 18: size: 1925
Graph Community 1: size: 2752
Graph Community 2: size: 4460
Graph Community 3: size: 212
Graph Community 4: size: 4634
Graph Community 5: size: 3470
Graph Community 6: size: 1692
Graph Community 36: size: 448
Graph Community 8: size: 2081
Graph Community 9: size: 4411
Graph Community 10: size: 1111
Graph Community 11: size: 234
Graph Community 12: size: 246
Graph Community 13: size: 214
Graph Community 20: size: 226
Graph Community 17: size: 366
Graph Community 19: size: 305
Graph Community 21: size: 194
Graph Community 0: size: 178
Graph Community 24: size: 213
Graph Community 25: size: 618
Graph Community 26: size: 205
Graph Community 27: size: 6
Graph Community 28: size: 4
Graph Community 29: size: 220
Graph Community 31: size: 7
Graph Community 32: size: 5
Graph Community 33: size: 130
Graph Community 34: size: 77
