In [78]:
# using louvain method to analyze community detection of the network. 
# source: https://sites.google.com/site/findcommunities/home?authuser=0
# source: https://networkx.org/documentation/stable/reference/algorithms/community.html#module-networkx.algorithms.community.louvain

import networkx as nx
import pandas as pd
from networkx.algorithms import community
import csv
import matplotlib.pyplot as plt
import random
import numpy as np

In [61]:
# empty lists to store nodes and edges
actor_names = []
edges = []

# Open the nodes file
with open('data/nodes2.csv', 'r', encoding='utf-8') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader)  # Skip header if present
    # Iterate over each row in the CSV file
    for row in csvreader:
        # Extract actor name from the row and append to the list
        actor_names.append(row[1])  # Assuming actorName is in the second column

# Open the edges file
with open('data/edges_weighted2.csv', 'r', encoding='utf-8') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader)  # Skip header if present
    # Iterate over each row in the CSV file
    for row in csvreader:
        # Extract source and target values from the row and append as a tuple
        edge_tuple = (row[0], row[1])  # Assuming source and target are in the first two columns
        edges.append(edge_tuple)



In [65]:
G = nx.Graph()
G.add_nodes_from(actor_names)
G.add_edges_from(edges)

In [76]:
random.seed(2)

# Detect communities using Louvain method
communities = list(community.louvain_communities(G))

# Assign unique ID to each node
partitions = {node: idx for idx, com in enumerate(communities) for node in com}

# #Print the commmunities
# print(communities)

# # Print the partitions
# for node, community_id in partition.items():
#     print(f"Node {node} belongs to community {community_id}")


# number of communities
num_communities = len(set(partitions.values()))
print("Number of Communities:", num_communities)

# Calculate modularity
modularity = community.modularity(G, communities)
print("Modularity:", modularity)


Number of Communities: 367
Modularity: 0.9620386773801757


In [80]:
# Compute the size
community_sizes = {}
for node, comm_id in partitions.items():
    if comm_id not in community_sizes:
        community_sizes[comm_id] = 0
    community_sizes[comm_id] += 1

#get the 7 largest communities
largest_communities = sorted(community_sizes.items(), key=lambda x: x[1], reverse=True)[:7]

# Compute statistics for each 
for comm_id, _ in largest_communities:
    # Nodes belonging to the current community
    community_nodes = [node for node, comm in partitions.items() if comm == comm_id]
    subgraph = G.subgraph(community_nodes)
    
    # Number of nodes 
    num_nodes = len(community_nodes)
    
    # Average degree 
    avg_degree = np.mean([d for n, d in subgraph.degree()])
    
    # Density
    density = nx.density(subgraph)
    
    # Degree centrality 
    degree_centrality = nx.degree_centrality(subgraph)
    avg_degree_centrality = np.mean(list(degree_centrality.values()))
    
    # Betweenness centrality 
    betweenness_centrality = nx.betweenness_centrality(subgraph)
    avg_betweenness_centrality = np.mean(list(betweenness_centrality.values()))

    print(f"Community {comm_id}:")
    print(f"Number of Nodes: {num_nodes}")
    print(f"Average Degree: {avg_degree}")
    print(f"Density: {density}")
    print(f"Average Degree Centrality: {avg_degree_centrality}")
    print(f"Average Betweenness Centrality: {avg_betweenness_centrality}")
    print()


Community 303:
Number of Nodes: 214
Average Degree: 8.80373831775701
Density: 0.04133210477820192
Average Degree Centrality: 0.041332104778201914
Average Betweenness Centrality: 0.015179775532049638

Community 349:
Number of Nodes: 137
Average Degree: 8.671532846715328
Density: 0.06376127093173035
Average Degree Centrality: 0.06376127093173035
Average Betweenness Centrality: 0.027100329182767998

Community 241:
Number of Nodes: 131
Average Degree: 8.916030534351146
Density: 0.06858485026423958
Average Degree Centrality: 0.06858485026423958
Average Betweenness Centrality: 0.019935635699882104

Community 5:
Number of Nodes: 129
Average Degree: 8.713178294573643
Density: 0.06807170542635659
Average Degree Centrality: 0.06807170542635659
Average Betweenness Centrality: 0.026529977720808152

Community 11:
Number of Nodes: 122
Average Degree: 8.672131147540984
Density: 0.07167050535157837
Average Degree Centrality: 0.07167050535157839
Average Betweenness Centrality: 0.027288533622363727

Com