In [21]:
import sknetwork as skn
from sknetwork.clustering import Louvain, get_modularity
from sknetwork.linalg import normalize
from sknetwork.utils import get_membership
from sknetwork.visualization import visualize_graph
import networkx as nx  
import torch
from torch_geometric.utils import to_networkx
from torch_geometric.data import Data
import os
from tqdm import tqdm
import json
import numpy as np
import pandas as pd
from cdlib import evaluation, algorithms

In [22]:
# Additional metrics
def compute_internal_density(G, nodes):
    subG = G.subgraph(nodes)
    n = len(subG.nodes())
    m = len(subG.edges())
    if n <= 1:
        return 0.0
    return (2 * m) / (n * (n - 1))

def compute_edges_cut(G, nodes):
    sub_nodes = set(nodes)
    edges_inside = 0
    edges_cut = 0
    for u in nodes:
        for v in G.neighbors(u):
            if v in sub_nodes:
                edges_inside += 1
            else:
                edges_cut += 1
    edges_inside = edges_inside // 2 
    return edges_inside, edges_cut

In [23]:
# Load data
edges_path = os.path.join('..', '..', 'src', 'data', 'edges.csv')
edges_df = pd.read_csv(edges_path)

save_dir = os.path.join('..', '..', 'src', 'graph_dir', 'infomap_dir')
os.makedirs(save_dir, exist_ok=True)

graph_dir = os.path.join('..','..', 'src', 'data', 'graph_data.pt')
data = torch.load(graph_dir, weights_only=False)
graph = data['data']
mapping = data['idx2user']

# Extracting nodes position
G_nx = to_networkx(graph, to_undirected=True)

# Community detection with Infomap
communities = algorithms.infomap(G_nx)
print(f"Detected {len(communities.communities)} communities with Infomap")

# Conductance
conductances = evaluation.conductance(G_nx, communities, summary=False)

Detected 41 communities with Infomap


In [24]:
import numpy as np
from tqdm import tqdm

weights = {
    "density": 0.2,
    "conductance": 0.5,
    "size": 0.2,
    "modularity": 0.1
}

# Calcolo modularity totale
modularity_score = evaluation.newman_girvan_modularity(G_nx, communities).score

cluster_tree_base = {}
very_strong_comm, strong_comm, moderate_comm, weak_comm, noisy_comm = 0, 0, 0, 0, 0

# Raccolta valori per normalizzazione
densities = []
sizes = []
mod_contribs = []
for cluster in communities.communities:
    num_users = len(cluster)
    density = compute_internal_density(G_nx, cluster)
    modularity_contrib = (density * num_users) / (modularity_score + 1e-9)
    densities.append(density)
    sizes.append(num_users)
    mod_contribs.append(modularity_contrib)

# Normalizzazione min-max
densities = np.array(densities)
sizes = np.array(sizes)
mod_contribs = np.array(mod_contribs)

dens_norm = (densities - densities.min()) / (densities.max() - densities.min() + 1e-9)
sizes_norm = (sizes - sizes.min()) / (sizes.max() - sizes.min() + 1e-9)
mod_norm = (mod_contribs - mod_contribs.min()) / (mod_contribs.max() - mod_contribs.min() + 1e-9)

# Calcolo score e classificazione
idx = 0
for k, (cluster, cond_score) in enumerate(zip(communities.communities, conductances)):
    num_users = len(cluster)
    
    density = compute_internal_density(G_nx, cluster)
    edges_in, edges_cut = compute_edges_cut(G_nx, cluster)
    modularity_contrib = (density * num_users) / (modularity_score + 1e-9)

    # Prendi valori normalizzati
    d = dens_norm[idx]
    s = sizes_norm[idx]
    m = mod_norm[idx]
    c = 1 - cond_score  # più basso il conductance, meglio è

    # Score combinato
    cluster_score = (weights["density"] * d +
                        weights["conductance"] * c +
                        weights["size"] * s +
                        weights["modularity"] * m)
    
    # Classificazione basata sullo score
    if cluster_score > 0.75:
        comm_type = "Very Strong community"
        very_strong_comm += 1
    elif cluster_score > 0.5:
        comm_type = "Strong community"
        strong_comm += 1
    elif cluster_score > 0.25:
        comm_type = "Moderate community"
        moderate_comm += 1
    else:
        comm_type = "Weak community"
        weak_comm += 1
    
    idx += 1  # incremento per valori normalizzati

    users = [mapping[node_id] for node_id in cluster]
    cluster_tree_base[str(k)] = {
        "users": [str(u) for u in users],
        "type": comm_type,
        "conductance": float(cond_score),
        "internal_density": float(density),
        "edges_inside": edges_in,
        "edges_cut": edges_cut,
        "modularity_contribution": float(modularity_contrib),
        "num_users": num_users,
        "community_score": float(cluster_score)
    }


In [25]:
# Save cluster info to JSON
with open(os.path.join(save_dir, 'cluster_tree_base.json'), 'w', encoding='utf-8') as f:
    json.dump(cluster_tree_base, f, indent=4, ensure_ascii=False)

print(
    f"Total communities: {len(communities.communities)}\n"
    f"Very strong: {very_strong_comm}\n"
    f"Strong: {strong_comm}\n"
    f"Moderate: {moderate_comm}\n"
    f"Weak: {weak_comm}"
)

Total communities: 41
Very strong: 1
Strong: 3
Moderate: 7
Weak: 30
