In [33]:
import importlib
import src.utils.config_loader
importlib.reload(src.utils.config_loader)

from src.utils.config_loader import ConfigLoader

config_loader = ConfigLoader()
all_configs = config_loader.load_configs()
base_configs = config_loader.get_section(all_configs, "base")
graph_configs = config_loader.get_section(all_configs, "graph")
community_cfg = config_loader.get_section(all_configs, "community")

In [2]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}") # type: ignore

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

PyTorch version: 2.8.0+cu128
CUDA available: True
CUDA version: 12.8


In [3]:
# Preprocess data
import importlib
import src.modules.data_processor
importlib.reload(src.modules.data_processor)

from src.modules.data_processor import DataProcessor

print(base_configs)
data_processor = DataProcessor(base_configs=base_configs)
processed_data = data_processor.run(device=device, summarize=False)

{'paths': {'raw': 'data/raw/deb_label.csv', 'processed': 'data/processed'}, 'labels': {0: 'disagree', 1: 'neutral', 2: 'agree'}, 'subreddits': {'brexit': 0, 'blacklivesmatter': 1, 'climate': 2, 'democrats': 3, 'republican': 4}, 'required_columns': ['label', 'msg_id_parent', 'msg_id_child', 'submission_id', 'body_parent', 'body_child', 'submission_text', 'subreddit', 'author_parent', 'author_child', 'datetime', 'agreement_fraction', 'individual_kappa'], 'cleaning': {'normalize_subreddits': True, 'rename_columns': {'author_child': 'src_author', 'author_parent': 'dst_author', 'msg_id_child': 'src_comment_id', 'msg_id_parent': 'dst_comment_id', 'body_child': 'src_comment_text', 'body_parent': 'dst_comment_text', 'datetime': 'timestamp'}, 'timestamp_parsing': {'primary_format': '%d/%m/%Y %H:%M', 'dayfirst': True, 'error_handling': 'coerce', 'fallback_formats': ['%Y-%m-%d %H:%M:%S', '%m/%d/%Y %H:%M', '%Y-%m-%d']}, 'remove_self_replies': True}, 'temporal': {'infer_parent_comment_time': {'enab

In [58]:
# Build graphs
import importlib
import src.modules.graph_processor
importlib.reload(src.modules.graph_processor)

from src.modules.graph_processor import GraphProcessor

processed_path = base_configs.get('processed_path', 'data/processed')
pairs = processed_data.user_pairs
comments = processed_data.comments

graph_processor = GraphProcessor(graph_configs=graph_configs, processed_path=processed_path)
graph_data = graph_processor.run(pairs=pairs, embeddings_source=comments)

Building node features with pooling: mean
    + Total unique authors in pairs: 35257
    + Total pooled vectors: 35212
    + Pooled vector dimension: 384
Building graph snapshots: directed=True, use_wcc=True, edge_attrs=['mean_confidence', 'net_vector']
    + [Subreddit 0, T0] 2 edges filtered by WCC (86 -> 84)
    + [Subreddit 0, T7] 1 edges filtered by WCC (177 -> 176)
    + [Subreddit 0, T11] 1 edges filtered by WCC (944 -> 943)
    + [Subreddit 0, T13] 4 edges filtered by WCC (701 -> 697)
    + [Subreddit 0, T15] 1 edges filtered by WCC (1195 -> 1194)
    + [Subreddit 0, T16] 2 edges filtered by WCC (608 -> 606)
    + [Subreddit 0, T18] 1 edges filtered by WCC (948 -> 947)
    + [Subreddit 0, T19] 1 edges filtered by WCC (893 -> 892)
    + [Subreddit 0, T21] 2 edges filtered by WCC (544 -> 542)
    + [Subreddit 0, T22] 4 edges filtered by WCC (421 -> 417)
    + [Subreddit 1, T0] 456 edges filtered by WCC (599 -> 143)
    + [Subreddit 1, T1] 281 edges filtered by WCC (323 -> 42)
   

In [62]:
# Run community detection
import importlib
import src.modules.community_processor
importlib.reload(src.modules.community_processor)

from src.modules.community_processor import LeidenCommunityProcessor

graph_dict = graph_data.graph_dict

print(community_cfg)
leiden_processor = LeidenCommunityProcessor(community_configs=community_cfg)

# Run optimization analysis
opt_meta, scans_df, best_df, summary_df = \
    leiden_processor.analyze_optimal_community_parameters(graph_dict)
print("Optimization meta:", opt_meta)
print("\nBest resolutions per graph:")
display(best_df if best_df is not None else "None")
print("Resolution summary:")
display(summary_df if summary_df is not None else "None")

{'algorithm': 'leiden', 'seed': 42, 'weights': {'strategy': 'agreement_diff'}, 'optimization': {'mode': 'per_graph', 'metric': 'modularity', 'min_communities': 2, 'min_community_size': 3, 'max_communities_factor': 1.5, 'resolution': {'grid': [0.01, 0.05, 0.1, 0.25, 0.4, 0.6, 0.9, 1.0], 'early_stop': {'window': 3, 'delta': 0.001}}}, 'algorithms': {'leiden': {'partition_type': 'RBConfigurationVertexPartition'}}}
Optimization meta: {'mode': 'per_graph', 'algorithm': 'leiden', 'partition_type': 'RBConfigurationVertexPartition', 'weight_strategy': 'agreement_diff', 'uniform_choice': None, 'uniform_scores': [], 'res_grid': [0.01, 0.05, 0.1, 0.25, 0.4, 0.6, 0.9, 1.0]}

Best resolutions per graph:


Unnamed: 0,subreddit_id,timestep,resolution,num_nodes,num_edges,num_communities,min_size,max_size,mean_size,metric_value,modularity
0,0,2,0.9,31,59,5,4,9,6.2,0.403045,0.403045
1,0,10,1.0,214,555,9,15,30,23.777778,0.402388,0.402388
2,0,13,1.0,265,697,11,16,33,24.090909,0.414099,0.414099
3,0,16,1.0,206,606,9,16,37,22.888889,0.365495,0.365495
4,0,18,0.9,264,947,8,18,50,33.0,0.340207,0.340207
5,0,19,1.0,265,892,10,10,37,26.5,0.343115,0.343115
6,0,21,1.0,221,542,9,11,36,24.555556,0.432107,0.432107
7,0,22,0.9,186,417,8,14,32,23.25,0.449899,0.449899
8,1,0,0.9,144,143,13,4,26,11.076923,0.814588,0.814588
9,1,1,0.9,43,42,6,5,9,7.166667,0.706349,0.706349


Resolution summary:


Unnamed: 0,resolution,metric_mean,metric_std,modularity_mean,num_communities_mean,num_communities_min,num_communities_max,community_size_mean,community_size_min,community_size_max
0,0.01,0.074971,,0.074971,2.0,2,2,604.0,53,1155
1,0.05,0.317899,0.168212,0.317899,3.028571,2,8,129.067891,11,1053
2,0.1,0.531722,0.166008,0.531722,4.0,2,10,72.835229,6,906
3,0.25,0.591583,0.262751,0.591583,5.810345,2,13,44.453785,3,363
4,0.4,0.679227,0.205097,0.679227,8.240741,2,19,28.956283,3,206
5,0.6,0.694737,0.165395,0.694737,10.122807,2,25,24.089772,3,112
6,0.9,0.706966,0.157605,0.706966,13.178571,3,31,18.290026,3,90
7,1.0,0.712826,0.156541,0.712826,14.333333,4,32,17.754046,3,84


In [63]:
# Community detection results using best parameters
community_result = leiden_processor.run_community_detection(
    graph_dict=graph_dict,
    use_optimization=True,
)

print("Partitions:")
display(community_result.partitions)
print("Meta:")
print(community_result.meta)

Partitions:


Unnamed: 0,subreddit_id,timestep,resolution_used,num_nodes,num_edges,num_communities,modularity,community_sizes
0,0,0,1.0,29,84,3,0.249787,"[11, 9, 9]"
1,0,1,1.0,30,79,4,0.326791,"[9, 9, 6, 6]"
2,0,2,0.9,31,59,5,0.403045,"[9, 7, 6, 5, 4]"
3,0,3,1.0,43,107,6,0.312997,"[12, 10, 7, 7, 4, 3]"
4,0,4,1.0,53,134,6,0.337464,"[12, 11, 9, 8, 7, 6]"
...,...,...,...,...,...,...,...,...
72,4,4,0.9,975,1073,27,0.852925,"[90, 54, 51, 49, 45, 44, 42, 40, 38, 37, 37, 3..."
73,4,5,0.9,1208,1332,31,0.859669,"[59, 55, 53, 52, 52, 49, 47, 43, 43, 42, 42, 4..."
74,4,6,0.9,385,400,19,0.868500,"[31, 27, 25, 25, 24, 23, 23, 22, 21, 20, 20, 1..."
75,4,7,0.9,679,727,23,0.873055,"[51, 45, 44, 43, 41, 39, 38, 37, 30, 29, 28, 2..."


Meta:
{'algorithm': 'leiden', 'partition_type': 'RBConfigurationVertexPartition', 'weight_strategy': 'agreement_diff', 'use_optimization': True, 'force_resolution': None, 'optimization_meta': {'mode': 'per_graph', 'algorithm': 'leiden', 'partition_type': 'RBConfigurationVertexPartition', 'weight_strategy': 'agreement_diff', 'uniform_choice': None, 'uniform_scores': [], 'res_grid': [0.01, 0.05, 0.1, 0.25, 0.4, 0.6, 0.9, 1.0]}}
