In [None]:
import networkx as nx
import pandas as pd
from community import community_louvain
import random
import glob

In [None]:
# Adjust the path to your data source to include the directory and the pattern
path_to_files = './ratings-*.tsv'  # Assuming the files are in the current directory

# Use glob to find all files matching the pattern
file_list = glob.glob(path_to_files)

# Load and concatenate the data from all matching files
df_list = [pd.read_csv(file, sep='\t', usecols=['noteId', 'raterParticipantId']) for file in file_list]
df = pd.concat(df_list, ignore_index=True)

# Create a graph from the DataFrame
G = nx.from_pandas_edgelist(df, 'raterParticipantId', 'noteId', create_using=nx.Graph())

In [None]:
# Detect communities
partition = community_louvain.best_partition(G)

In [None]:
# Calculate Modularity Score
modularity_score = community_louvain.modularity(partition, G)
print(f"Modularity Score: {modularity_score}")

In [None]:
# Convert partition to a community-to-nodes mapping
communities = {}
for node, comm in partition.items():
    if comm not in communities:
        communities[comm] = []
    communities[comm].append(node)

In [None]:
# Iterate through communities to calculate metrics
for comm, nodes in communities.items():
    subgraph = G.subgraph(nodes)
    edge_density = nx.density(subgraph)
    clustering_coefficient = nx.average_clustering(subgraph)
    # Conductance requires information about the community and the rest of the graph
    conductance = nx.conductance(G, nodes)
    
    print(f"Community {comm}:")
    print(f"  Nodes: {len(nodes)}")
    print(f"  Edge Density: {edge_density}")
    print(f"  Clustering Coefficient: {clustering_coefficient}")
    print(f"  Conductance: {conductance}")