In [None]:
import networkx as nx
import random

In [2]:
#############################
# Utility Functions
#############################

def partition_to_communities(partition):
    """
    Given a partition (a dictionary mapping node -> community label),
    group nodes by their community label and return a list of sets,
    where each set represents one community.
    """
    communities = {}
    for node, label in partition.items():
        if label not in communities:
            communities[label] = set()
        communities[label].add(node)
    return list(communities.values())

def compute_modularity(G, partition):
    """
    Compute the weighted modularity of a given partition.
    Here, we convert the partition (dict) into a list of communities (sets)
    and then call NetworkX's modularity function.
    
    Note: This implementation assumes G is undirected. For directed graphs,
    the modularity function would need to be adapted accordingly.
    """
    communities = partition_to_communities(partition)
    Q = nx.algorithms.community.quality.modularity(G, communities, weight='weight')
    return Q

In [3]:
#############################
# Genetic Algorithm Operators
#############################

def initialize_population(G, population_size):
    """
    Initialize a population for the GA.
    Each individual is a dictionary that maps each node to a community label.
    A simple strategy is to assign each node a random integer label in [0, N-1].
    """
    population = []
    nodes = list(G.nodes())
    n = len(nodes)
    for _ in range(population_size):
        # Each node gets a random community label (labels are arbitrary integers)
        individual = {node: random.randint(0, n - 1) for node in nodes}
        population.append(individual)
    return population

def tournament_selection(population, fitnesses, tournament_size=3):
    """
    Tournament selection: randomly choose a subset (of size tournament_size)
    of the population and return the individual with the highest fitness.
    """
    selected_idx = random.sample(range(len(population)), tournament_size)
    best_idx = selected_idx[0]
    for idx in selected_idx[1:]:
        if fitnesses[idx] > fitnesses[best_idx]:
            best_idx = idx
    return population[best_idx]

def uniform_crossover(parent1, parent2):
    """
    Uniform crossover: produce a child by, for each node (gene),
    randomly selecting the community label from either parent with equal probability.
    """
    child = {}
    for node in parent1.keys():
        if random.random() < 0.5:
            child[node] = parent1[node]
        else:
            child[node] = parent2[node]
    return child

def mutate(individual, mutation_rate, num_possible_communities):
    """
    Mutation operator: for each node in the individual (i.e. each gene),
    with probability mutation_rate assign a new random community label.
    
    We assume community labels can be any integer from 0 to num_possible_communities - 1.
    """
    mutated = individual.copy()  # Create a copy of the individual to modify
    for node in individual.keys():
        if random.random() < mutation_rate:
            mutated[node] = random.randint(0, num_possible_communities - 1)
    return mutated

In [None]:
def genetic_algorithm_community_detection(G, 
                                          population_size=50, 
                                          generations=100, 
                                          mutation_rate=0.1, 
                                          tournament_size=3):
    """
    Run the genetic algorithm to search for a partition that maximizes 
    the weighted modularity in graph G.
    
    Parameters:
      G                  - A weighted graph (NetworkX graph)
      population_size    - Number of candidate partitions maintained in the population
      generations        - Number of iterations (generations) to run
      mutation_rate      - Probability of mutating a node's label in an individual
      tournament_size    - Number of individuals competing in tournament selection
      
    Returns:
      best_individual    - The partition (dict node -> community) with the highest modularity found.
      best_fitness       - The corresponding modularity score.
    """
    # Initialize the population with random partitions.
    population = initialize_population(G, population_size)
    # Evaluate fitness for each individual (fitness = modularity value).
    fitnesses = [compute_modularity(G, individual) for individual in population]
    
    # Keep track of the best individual found.
    best_individual = population[0]
    best_fitness = fitnesses[0]
    
    for gen in range(generations):
        new_population = []
        # Generate a new population by selection, crossover, and mutation.
        for _ in range(population_size):
            # Select two parents via tournament selection.
            parent1 = tournament_selection(population, fitnesses, tournament_size)
            parent2 = tournament_selection(population, fitnesses, tournament_size)
            # Combine parents to create a child using uniform crossover.
            child = uniform_crossover(parent1, parent2)
            # Apply mutation to the child.
            child = mutate(child, mutation_rate, len(G.nodes()))
            new_population.append(child)
        population = new_population
        # Recalculate fitnesses for the new population.
        fitnesses = [compute_modularity(G, individual) for individual in population]
        # Update the best individual if a better fitness is found.
        for i in range(population_size):
            if fitnesses[i] > best_fitness:
                best_fitness = fitnesses[i]
                best_individual = population[i]
        print("Generation", gen, "Best modularity:", best_fitness)
    
    return best_individual, best_fitness

In [5]:
#############################
# Example Test Case
#############################

# Create an example weighted graph with two clusters.
G = nx.Graph()

# Add nodes to the graph.
for i in range(1, 9):
    G.add_node(i)

# Add edges for cluster 1 (nodes 1-4) with relatively high weights.
edges_cluster1 = [(1, 2, 3), (2, 3, 3), (3, 4, 3), (4, 1, 3), (2, 4, 2)]
# Add edges for cluster 2 (nodes 5-8) with relatively high weights.
edges_cluster2 = [(5, 6, 3), (6, 7, 3), (7, 8, 3), (8, 5, 3), (6, 8, 2)]
# Add a weak edge between the clusters.
inter_cluster_edge = (4, 5, 1)

for u, v, w in edges_cluster1:
    G.add_edge(u, v, weight=w)
for u, v, w in edges_cluster2:
    G.add_edge(u, v, weight=w)
G.add_edge(*inter_cluster_edge[:2], weight=inter_cluster_edge[2])

# Run the genetic algorithm.
best_partition, best_modularity = genetic_algorithm_community_detection(
    G, 
    population_size=50, 
    generations=100, 
    mutation_rate=0.1, 
    tournament_size=3
)

print("\nBest partition found with modularity", best_modularity)
print("Partition (node -> community):", best_partition)
communities = partition_to_communities(best_partition)
print("Detected communities:", communities)

Generation 0 Best modularity: 0.24435196195005954
Generation 1 Best modularity: 0.24435196195005954
Generation 2 Best modularity: 0.24435196195005954
Generation 3 Best modularity: 0.24435196195005954
Generation 4 Best modularity: 0.350178359096314
Generation 5 Best modularity: 0.350178359096314
Generation 6 Best modularity: 0.350178359096314
Generation 7 Best modularity: 0.46551724137931044
Generation 8 Best modularity: 0.46551724137931044
Generation 9 Best modularity: 0.46551724137931044
Generation 10 Best modularity: 0.46551724137931044
Generation 11 Best modularity: 0.46551724137931044
Generation 12 Best modularity: 0.46551724137931044
Generation 13 Best modularity: 0.46551724137931044
Generation 14 Best modularity: 0.46551724137931044
Generation 15 Best modularity: 0.46551724137931044
Generation 16 Best modularity: 0.46551724137931044
Generation 17 Best modularity: 0.46551724137931044
Generation 18 Best modularity: 0.46551724137931044
Generation 19 Best modularity: 0.46551724137931