In [None]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import community.community_louvain as community_louvain
from networkx.algorithms.community import girvan_newman
from networkx.algorithms.community.quality import modularity

In [None]:
# Load cleaned dataset
df = pd.read_csv('cleaned_dataset.csv')
df = df[df['person_A'] != df['person_B']]

# Aggregate contacts as weights
edge_df = df.groupby(['person_A', 'person_B']).size().reset_index(name='weight')

G = nx.Graph()
for _, row in edge_df.iterrows():
    G.add_edge(row['person_A'], row['person_B'], weight=row['weight'])

print("Nodes:", G.number_of_nodes(), "| Edges:", G.number_of_edges())

In [None]:
# Run Louvain community detection
partition = community_louvain.best_partition(G, weight='weight')

# Add community info to node attributes
nx.set_node_attributes(G, partition, 'community')

# Convert to DataFrame
community_df = pd.DataFrame(list(partition.items()), columns=['Person', 'Community'])
print(community_df.head())

In [None]:
# Compute first few communities
comp = girvan_newman(G)
limited = tuple(sorted(c) for c in next(comp))
print(f"Detected {len(limited)} communities using Girvan-Newman.")

In [None]:
# Extract community sets
communities = {}
for node, comm_id in partition.items():
    communities.setdefault(comm_id, set()).add(node)

mod_score = modularity(G, communities.values())
print(f"Modularity Score: {mod_score:.3f}")


In [None]:
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G, seed=42)

# Color nodes by community
communities_colors = [partition[node] for node in G.nodes()]
nx.draw_networkx(
    G, pos,
    node_color=communities_colors,
    cmap='tab10',
    with_labels=False,
    node_size=50,
    edge_color='gray',
    alpha=0.6
)

plt.title("Community Detection (Louvain Method)", fontsize=13)
plt.show()


In [None]:
community_sizes = community_df['Community'].value_counts().reset_index()
community_sizes.columns = ['Community', 'Size']
print(community_sizes)

plt.figure(figsize=(6,4))
sns.barplot(x='Community', y='Size', data=community_sizes, palette='tab10')
plt.title("Community Size Distribution")
plt.show()
