In [1]:
import pandas as pd

In [2]:
data = pd.read_pickle("community_detection_data.pkl")

In [3]:
df_followings = data['followings']

In [4]:
df_followings.head()

Unnamed: 0,Followed_Id,Follower_Id
0,njsinc4el,brendafranzo
1,njsinc4el,praguebob
2,zachhwilliams,hungheroic
3,zachhwilliams,ridwan2906
4,zachhwilliams,nickcodipietro


# Community detection model

In [5]:
import igraph as ig

In [6]:
edges = list(zip(df_followings['Follower_Id'], df_followings['Followed_Id']))

Model 1: Leiden Method

In [7]:
grph_leiden = ig.Graph.TupleList(edges, directed=True)

In [8]:
grph_leiden.simplify() 

<igraph.Graph at 0x2417ae29050>

In [9]:
print(f"Number of nodes (users): {grph_leiden.vcount()}")
print(f"Number of edges (connections): {grph_leiden.ecount()}")

Number of nodes (users): 484252
Number of edges (connections): 18477147


In [10]:
import leidenalg as la

In [11]:
partition_leiden = la.find_partition(
    grph_leiden, 
    la.RBConfigurationVertexPartition, 
    resolution_parameter=1,
    n_iterations=10,
    seed=42
)

In [12]:
modularity_leiden = grph_leiden.modularity(partition_leiden)

In [13]:
print(f"Leiden found {len(partition_leiden)} communities with modularity {modularity_leiden:.4f}")

Leiden found 1741 communities with modularity 0.4337


Model 2: Louvian Method

In [14]:
grph_louvain = ig.Graph.TupleList(edges, directed=False)

In [15]:
grph_louvain.simplify()

<igraph.Graph at 0x2417ae2a450>

In [16]:
partition_louvain = grph_louvain.community_multilevel()

In [17]:
modularity_louvain = grph_louvain.modularity(partition_louvain)

In [18]:
print(f"Louvain found {len(partition_louvain)} communities with modularity {modularity_louvain:.4f}")

Louvain found 2260 communities with modularity 0.4183


# Leiden model evaluation

Modularity

In [19]:
modularity_leiden = grph_leiden.modularity(partition_leiden)

In [20]:
print(f"Leiden found {len(partition_leiden)} communities with modularity {modularity_leiden:.4f}")

Leiden found 1741 communities with modularity 0.4337


In [21]:
adj_outgoing_edges_leiden = {v.index: set(grph_leiden.successors(v.index)) for v in grph_leiden.vs}    

In [22]:
leiden_community_metrics = []


for community, subgraph in enumerate(partition_leiden.subgraphs()):
    density = subgraph.density()
    avg_deg = sum(subgraph.degree()) / len(subgraph.vs)

    nodes = subgraph.vs.indices
    nodes_set = set(nodes)
    
    internal_edges = sum(len(adj_outgoing_edges_leiden[node] & nodes_set) for node in nodes)
    cut_edges = sum(len(adj_outgoing_edges_leiden[node] - nodes_set) for node in nodes)
    
    total_edges = internal_edges + cut_edges
    conductance = cut_edges / total_edges if total_edges > 0 else 0

    leiden_community_metrics.append({
        "community": community,
        "size": len(nodes),
        "density": density,
        "avg_degree": avg_deg,
        "internal_edges": internal_edges,
        "cut_edges": cut_edges,
        "conductance": conductance
    })

In [23]:
df_leiden_community_metrics = pd.DataFrame(leiden_community_metrics, columns=['community', 'size', 'density', 'avg_degree', 'internal_edges', 'cut_edges', 'conductance'])

In [24]:
df_leiden_community_metrics.describe()

Unnamed: 0,community,size,density,avg_degree,internal_edges,cut_edges,conductance
count,1741.0,1741.0,1741.0,1741.0,1741.0,1741.0,1741.0
mean,870.0,278.145893,0.44539,1.734895,30551.89,81419.7,0.996323
std,502.727726,4302.89821,0.156627,5.72924,422202.3,667841.2,0.030769
min,0.0,2.0,0.000138,1.0,2.0,1426.0,0.278166
25%,435.0,2.0,0.333333,1.0,2.0,1426.0,0.998599
50%,870.0,2.0,0.5,1.0,2.0,1426.0,0.998599
75%,1305.0,3.0,0.5,1.333333,6.0,12946.0,0.999537
max,1740.0,147071.0,1.0,191.79143,11996460.0,7901668.0,0.999773


# Louvain Model Evaluation

In [25]:
modularity_louvain = grph_louvain.modularity(partition_louvain)

In [26]:
print(f"Louvain found {len(partition_louvain)} communities with modularity {modularity_louvain:.4f}")

Louvain found 2260 communities with modularity 0.4183


In [27]:
adj_edges_louvian = {v.index: set(grph_louvain.neighbors(v)) for v in grph_louvain.vs}

In [28]:
louvain_community_metrics = []

for community, subgraph in enumerate(partition_louvain.subgraphs()):
    density = subgraph.density()
    avg_deg = sum(subgraph.degree()) / len(subgraph.vs)
    
    nodes = subgraph.vs.indices
    nodes_set = set(nodes)
    
    # Internal edges: edges fully inside the community
    internal_edges = sum(len(adj_edges_louvian[node] & nodes_set) for node in nodes) / 2
    
    # Cut edges: edges leaving the community
    cut_edges = sum(len(adj_edges_louvian[node] - nodes_set) for node in nodes) / 2
    
    total_edges = internal_edges + cut_edges
    conductance = cut_edges / total_edges if total_edges > 0 else 0
    
    louvain_community_metrics.append({
        "community": community,
        "size": len(nodes),
        "density": density,
        "avg_degree": avg_deg,
        "internal_edges": internal_edges,
        "cut_edges": cut_edges,
        "conductance": conductance
    })

In [29]:
df_louvain_community_metrics = pd.DataFrame(louvain_community_metrics, columns=['community', 'size', 'density', 'avg_degree', 'internal_edges', 'cut_edges', 'conductance'])

In [30]:
df_louvain_community_metrics.describe()

Unnamed: 0,community,size,density,avg_degree,internal_edges,cut_edges,conductance
count,2260.0,2260.0,2260.0,2260.0,2260.0,2260.0,2260.0
mean,1129.5,214.270796,0.854282,1.459513,19287.2,36074.92,0.996655
std,652.550126,4156.840457,0.248876,4.362514,329798.0,332105.3,0.031775
min,0.0,2.0,0.000225,1.0,1.0,779.5,0.177751
25%,564.75,2.0,0.666667,1.0,1.0,779.5,0.998719
50%,1129.5,2.0,1.0,1.0,1.0,779.5,0.998719
75%,1694.25,3.0,1.0,1.333333,3.0,7609.5,0.999583
max,2259.0,170051.0,1.0,178.322094,11536470.0,4638498.0,0.999738
