## Community Detection and Topological Metrics

This section builds user-user graphs from each edge class (and the complete network) and applies **Louvain community detection**.

For each snapshot:
- Communities are detected and saved for each edge class.
- Core topological metrics are computed:
  - Number of nodes and edges
  - Density and average degree
  - Average clustering coefficient
  - Number of connected components
  - Number of communities
  - Modularity score



In [None]:
import pandas as pd
import random
import pickle
import os
from igraph import Graph, mean

# Ensure output directories exist
os.makedirs("communities", exist_ok=True)

# Define target dates for analysis
target_dates = ['2022-11-01', '2023-01-08']

def generate_graphs(df, edge_class):
    """
    Builds an igraph.Graph for a given edge class or the complete network.

    Parameters:
    - df (pd.DataFrame): DataFrame with 'src', 'trg', and 'edge_class' columns.
    - edge_class (int or "Complete"): The class of edges to include.
    Returns:
    - g (igraph.Graph): The constructed graph.
    - vertices_to_index (dict): Mapping from original node IDs to graph indices.
    """
    if edge_class != "Complete":
        class_df = df[df['edge_class'] == edge_class][["src", "trg"]]
    else:
        class_df = df[["src", "trg"]]

    vertices = set(class_df['src']).union(class_df['trg'])
    g = Graph()
    g.add_vertices(len(vertices))

    vertices_to_index = dict(zip(vertices, range(len(vertices))))
    class_df['src_index'] = class_df['src'].map(vertices_to_index)
    class_df['trg_index'] = class_df['trg'].map(vertices_to_index)
    class_df.dropna(subset=['src_index', 'trg_index'], inplace=True)

    g.add_edges(zip(class_df['src_index'], class_df['trg_index']))

    return g, vertices_to_index

def class_topology(graph, vertices_to_index, edge_class, target_date):
    """
    Detects communities and computes topological metrics for the given graph.

    Saves:
    - community assignment as a pickle file.
    - topological data into a CSV file (`topology.csv`).
    """
    sampled_vertices = random.sample(range(graph.vcount()), int(0.2 * graph.vcount()))
    communities = graph.community_multilevel()

    # Reverse mapping to original user IDs
    index_to_vertex = {index: vertex for vertex, index in vertices_to_index.items()}
    community_mapping = {index_to_vertex[i]: community for i, community in enumerate(communities.membership)}

    # Save community assignments
    with open(f"communities/communities_{target_date}_class_{edge_class}.pkl", "wb") as f:
        pickle.dump(community_mapping, f)

    # Topological summary
    class_metrics = {
        'Network': f'Class {edge_class}',
        'Snapshot': target_date,
        '# nodes': graph.vcount(),
        '# edges': graph.ecount(),
        'Density': graph.density(loops=False),
        'Avg. degree': mean(graph.degree()),
        'Avg. clustering': mean(graph.transitivity_local_undirected(sampled_vertices, mode='zero')),
        '# connected components': len(graph.connected_components()),
        '# communities': len(communities),
        'Modularity': graph.modularity(communities.membership)
    }

    df_topology = pd.DataFrame([class_metrics])
    if os.path.isfile('topology.csv') and os.path.getsize('topology.csv') > 0:
        df_topology.to_csv('topology.csv', mode='a', index=False, header=False)
    else:
        df_topology.to_csv('topology.csv', mode='a', index=False)

# ----------------------------------------
# Main loop for processing all snapshots
# ----------------------------------------

for target_date in target_dates:
    print(f"Processing: {target_date}")
    df = pd.read_csv(f"networks/Backbone-{target_date}.csv")
    df = df[["src", "trg", "edge_class"]]

    # Complete network
    full_graph, full_index_map = generate_graphs(df, "Complete")
    class_topology(full_graph, full_index_map, "Complete", target_date)

    # Individual edge classes
    for edge_class in sorted(df['edge_class'].unique()):
        class_graph, class_index_map = generate_graphs(df, edge_class)
        class_topology(class_graph, class_index_map, edge_class, target_date)


Processing: 2022-11-01
Processing: 2023-01-08
