# Week 6

### instructions
0) Treat your graph as undirected and unweighted. Delete loops and work on the resulting largest
connected component.
1) Implement the following three techniques for community detection:
a) Bridge removal (pick the partition with the highest modularity), b) Modularity optimization, c) Label
propagation.
In this case, you are allowed to use built-in functions from NetworkX.
2) Compare the results of each technique in terms of: a) number of detected clusters, b) cluster size
distribution, c) computational time, e) modularity, f) other aspects you consider relevant, if any. Results
should be presented in a table.
3) Give an interpretation to the differences and similarities between the three resulting partitions, and
discuss which one you think is the best and why.
4) Provide a visualization for the partition you decided to be the best using Gephi.
5) Optional: for each pair of partitions compute the NMI between them and discuss.

In [5]:
import networkx as nx
import csv
import matplotlib.pyplot as plt

# Load the graph from CSV files
def load_graph(nodes_file_path, edges_file_path):
    G = nx.DiGraph()
    with open(nodes_file_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            G.add_node(row['Id'], label=row['Label'])

    with open(edges_file_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
             source, target = row['Source'], row['Target']
             if source != target:  # Exclude self-loops
                G.add_edge(source, target, weight=1)
                G.add_edge(target, source, weight=1)  # For undirected links          

    return G



In [10]:
from networkx.algorithms import community

def get_largest_connected_component(G):
    # Convert to undirected graph to find connected components
    undirected_G = G.to_undirected()
    
    # Get the largest connected component
    components = list(nx.connected_components(undirected_G))
    
    if not components:
        # If the graph is empty, return an empty graph
        return nx.Graph()
    
    largest_component = max(components, key=len)
    return G.subgraph(largest_component)

def community_detection_bridge_removal(G):
    # Use Girvan-Newman algorithm for bridge removal
    communities = list(community.girvan_newman(G))
    best_partition = max(communities, key=lambda x: community.modularity(G, x))
    return best_partition

def community_detection_modularity_optimization(G):
    # Use greedy modularity optimization
    communities = list(community.greedy_modularity_communities(G))
    partition = {node: i for i, community in enumerate(communities) for node in community}
    return partition

def community_detection_label_propagation(G):
    # Use label propagation algorithm
    partition = community.label_propagation_communities(G)
    return partition

# Load the graph
G = load_graph('/Users/coding/Desktop/network_analysis/sna_titanic/Project/Graph/nodes.csv', '/Users/coding/Desktop/network_analysis/sna_titanic/Project/Graph/edges.csv')



In [11]:
import time
from collections import Counter

def evaluate_results(partition, ground_truth=None, G=None):
    # Evaluate the detected partition against ground truth or using other metrics

    num_clusters = len(set(partition.values()))

    cluster_sizes = Counter(partition.values())

    modularity = community.modularity(G, [set(partition.keys()) - set(partition[cluster]) for cluster in set(partition.values())])

    evaluation_metrics = {
        'Number of Clusters': num_clusters,
        'Cluster Sizes': cluster_sizes,
        'Modularity': modularity
        # Add more metrics as needed
    }

    return evaluation_metrics

def compare_techniques(G):
    # Evaluate each community detection technique and compare the results

    # Bridge removal
    start_time = time.time()
    bridge_removal_partition = community_detection_bridge_removal(G)
    bridge_removal_metrics = evaluate_results(bridge_removal_partition)
    bridge_removal_time = time.time() - start_time

    # Modularity optimization
    start_time = time.time()
    modularity_optimization_partition = community_detection_modularity_optimization(G)
    modularity_optimization_metrics = evaluate_results(modularity_optimization_partition)
    modularity_optimization_time = time.time() - start_time

    # Label propagation
    start_time = time.time()
    label_propagation_partition = community_detection_label_propagation(G)
    label_propagation_metrics = evaluate_results(label_propagation_partition)
    label_propagation_time = time.time() - start_time

    # Print the results
    print("Bridge Removal Results:", bridge_removal_metrics, "Time:", bridge_removal_time)
    print("Modularity Optimization Results:", modularity_optimization_metrics, "Time:", modularity_optimization_time)
    print("Label Propagation Results:", label_propagation_metrics, "Time:", label_propagation_time)