In [1]:
import random
from collections import Counter
from multiprocessing import Pool, cpu_count
from typing import Dict, List, Set, Tuple

import networkx as nx
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from icm_diffusion import simulate_diffusion_ICM
from src.graph import (
    create_polarized_graph,
    create_polarized_graph_multiple,
    graph_loader,
    random_color_graph,
    spectral_bipartition_coloring,
    spectral_partition_coloring,
    k_means_partition_coloring
)
from src.seed import seed_degree, seed_mia, seed_polarized, seed_random


In [2]:
from edge_addition import (
    edge_addition_custom,
    edge_addition_adamic_adar,
    edge_addition_preferential_attachment,
    edge_addition_jaccard,
    edge_addition_degree,
    edge_addition_topk,
    edge_addition_prob,
    edge_addition_kkt,
    edge_addition_random,
)

In [3]:
# Function to evaluate and compare the graph modifications

def evaluate_graph_modifications(G, seeds, k, max_iter, budget):
    # Simulate diffusion on the original graph
    original_results_dict = simulate_diffusion_ICM(G, seeds, max_iter)

    # Extract original results
    original_results = pd.DataFrame(
        {
            "Metric": [
                "Avg Activated Nodes",
                "Activated Nodes Std Dev",
                "Avg Color Activation Count",
                "Color Activation Count Std Dev",
            ],
            "Original Graph": [
                round(original_results_dict["avg_activated_nodes"], 3),
                round(original_results_dict["std_dev_activated_nodes"], 3),
                round(original_results_dict["avg_color_activation_count"], 3),
                round(original_results_dict["std_dev_color_activation_count"], 3),
            ],
        }
    )

    # Get the number of nodes and edges for all graphs
    graph_info = {
        "Metric": ["Number of Nodes", "Number of Edges"],
        "Original Graph": [G.number_of_nodes(), G.number_of_edges()],
    }

    # Define a list of modification functions
    #"Prob": edge_addition_prob,
    modification_functions = {
        "PrefAtt": edge_addition_preferential_attachment,
        "Jaccard": edge_addition_jaccard,
        "Degree": edge_addition_degree,
        "TopK": edge_addition_topk,
        "KKT": edge_addition_kkt,
        "Random": edge_addition_random,
        "Custom": edge_addition_custom,
    }

    combined_results = original_results.copy()

    # Evaluate each graph modification
    for method_name, mod_func in modification_functions.items():
        modified_graph = mod_func(G, seeds, k, budget)
        modified_results_dict = simulate_diffusion_ICM(modified_graph, seeds, max_iter)

        adapted_results = pd.DataFrame(
            {
                "Metric": [
                    "Avg Activated Nodes",
                    "Activated Nodes Std Dev",
                    "Avg Color Activation Count",
                    "Color Activation Count Std Dev",
                ],
                f"Adapted Graph {method_name}": [
                    round(modified_results_dict["avg_activated_nodes"], 3),
                    round(modified_results_dict["std_dev_activated_nodes"], 3),
                    round(modified_results_dict["avg_color_activation_count"], 3),
                    round(modified_results_dict["std_dev_color_activation_count"], 3),
                ],
            }
        )

        combined_results = pd.merge(combined_results, adapted_results, on="Metric")
        graph_info[f"Adapted Graph {method_name}"] = [
            modified_graph.number_of_nodes(),
            modified_graph.number_of_edges(),
        ]

    graph_info_df = pd.DataFrame(graph_info)

    # Combine all results into one DataFrame
    final_results = pd.concat([graph_info_df, combined_results], ignore_index=True)

    # Transpose the DataFrame and set the first row as the header
    final_results = final_results.T
    final_results.columns = final_results.iloc[
        0
    ]  # Set the first row as the column names
    final_results = final_results.drop(final_results.index[0])  # Drop the first row

    return final_results


In [4]:
num_groups = 4

G = create_polarized_graph_multiple(1000, num_groups, 0.05, 0.0005)
#G = graph_loader("datasets/facebook/facebook_combined.txt")
#G = graph_loader('datasets/congress_network/congress.edgelist')
#color the graph
spectral_partition_coloring(G, num_groups)
colors = plt.cm.get_cmap('tab10', num_groups)  # Use 'tab10' colormap to get distinct colors
color_map = [colors(G.nodes[node]["color"]) for node in G.nodes()]
print(Counter(color_map))

  adjacency = check_symmetric(adjacency)


In [5]:
seed_size = 50
k = seed_size
max_iter = 1000
budget = 3 * seed_size

In [6]:
seed = seed_mia(G, seed_size)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for MIA")
final_results

100%|██████████| 1000/1000 [00:00<00:00, 1111.90it/s]
100%|██████████| 1000/1000 [00:01<00:00, 855.22it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1047.77it/s]
100%|██████████| 1000/1000 [00:01<00:00, 776.16it/s]
100%|██████████| 1000/1000 [00:01<00:00, 810.32it/s]
100%|██████████| 1000/1000 [00:01<00:00, 835.60it/s]
100%|██████████| 1000/1000 [00:01<00:00, 733.65it/s]
100%|██████████| 1000/1000 [00:01<00:00, 768.17it/s]

Results for MIA





Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,1000.0,12754.0,155.648,81.782,6.867,5.72
Adapted Graph PrefAtt,1000.0,12904.0,188.863,109.448,15.193,11.674
Adapted Graph Jaccard,1000.0,12904.0,169.503,81.125,7.243,5.564
Adapted Graph Degree,1000.0,12904.0,205.578,119.746,16.802,12.719
Adapted Graph TopK,1000.0,12904.0,176.872,98.447,14.102,11.828
Adapted Graph KKT,1000.0,12904.0,184.22,109.79,13.911,11.773
Adapted Graph Random,1000.0,12904.0,201.693,115.525,18.428,13.782
Adapted Graph Custom,1000.0,12904.0,200.504,113.365,19.864,14.924


In [7]:
seed = seed_polarized(G, seed_size, 0)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for Polarized seed")
final_results

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:00<00:00, 1242.65it/s]
100%|██████████| 1000/1000 [00:01<00:00, 805.66it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1050.98it/s]
100%|██████████| 1000/1000 [00:01<00:00, 661.47it/s]
100%|██████████| 1000/1000 [00:01<00:00, 957.87it/s]
100%|██████████| 1000/1000 [00:01<00:00, 911.83it/s]
100%|██████████| 1000/1000 [00:01<00:00, 856.43it/s]
100%|██████████| 1000/1000 [00:01<00:00, 620.73it/s]

Results for Polarized seed





Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,1000.0,12754.0,133.413,74.125,4.43,5.244
Adapted Graph PrefAtt,1000.0,12904.0,174.994,114.07,14.922,13.572
Adapted Graph Jaccard,1000.0,12904.0,151.808,86.255,5.131,5.975
Adapted Graph Degree,1000.0,12904.0,197.136,125.831,16.465,13.574
Adapted Graph TopK,1000.0,12904.0,150.743,93.354,10.239,10.99
Adapted Graph KKT,1000.0,12904.0,159.218,98.29,11.258,11.286
Adapted Graph Random,1000.0,12904.0,178.704,113.474,14.09,11.38
Adapted Graph Custom,1000.0,12904.0,210.835,103.192,25.264,15.765


In [8]:
seed = seed_random(G, seed_size)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for Random seed")
final_results

100%|██████████| 1000/1000 [00:01<00:00, 683.37it/s]
100%|██████████| 1000/1000 [00:01<00:00, 548.02it/s]
100%|██████████| 1000/1000 [00:01<00:00, 554.48it/s]
100%|██████████| 1000/1000 [00:01<00:00, 573.34it/s]
100%|██████████| 1000/1000 [00:01<00:00, 670.81it/s]
100%|██████████| 1000/1000 [00:01<00:00, 551.98it/s]
100%|██████████| 1000/1000 [00:01<00:00, 627.96it/s]
100%|██████████| 1000/1000 [00:02<00:00, 471.26it/s]

Results for Random seed





Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,1000.0,12754.0,229.847,154.158,6.342,6.47
Adapted Graph PrefAtt,1000.0,12904.0,250.088,160.271,15.648,11.989
Adapted Graph Jaccard,1000.0,12904.0,268.198,165.941,16.411,11.927
Adapted Graph Degree,1000.0,12904.0,255.129,155.766,14.158,11.622
Adapted Graph TopK,1000.0,12904.0,233.272,153.627,11.603,10.045
Adapted Graph KKT,1000.0,12904.0,246.346,158.737,12.135,10.574
Adapted Graph Random,1000.0,12904.0,248.236,160.22,15.093,11.149
Adapted Graph Custom,1000.0,12904.0,272.293,152.893,21.425,12.386


In [9]:
seed = seed_degree(G, seed_size)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for Degree seed")
final_results

100%|██████████| 1000/1000 [00:01<00:00, 814.80it/s]
100%|██████████| 1000/1000 [00:01<00:00, 683.51it/s]
100%|██████████| 1000/1000 [00:01<00:00, 712.04it/s]
100%|██████████| 1000/1000 [00:01<00:00, 670.59it/s]
100%|██████████| 1000/1000 [00:01<00:00, 777.72it/s]
100%|██████████| 1000/1000 [00:01<00:00, 787.28it/s]
100%|██████████| 1000/1000 [00:01<00:00, 703.42it/s]
100%|██████████| 1000/1000 [00:01<00:00, 581.30it/s]

Results for Degree seed





Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,1000.0,12754.0,185.564,141.508,4.709,5.693
Adapted Graph PrefAtt,1000.0,12904.0,211.061,149.817,12.562,10.778
Adapted Graph Jaccard,1000.0,12904.0,218.128,148.007,13.863,11.236
Adapted Graph Degree,1000.0,12904.0,221.821,153.011,12.517,10.703
Adapted Graph TopK,1000.0,12904.0,198.049,145.509,10.054,9.521
Adapted Graph KKT,1000.0,12904.0,186.009,141.47,4.743,5.751
Adapted Graph Random,1000.0,12904.0,219.065,155.99,14.098,12.133
Adapted Graph Custom,1000.0,12904.0,242.22,148.141,20.335,13.013


In [10]:
def draw_colored_graph(G, num_groups):
    """Draw a graph with nodes colored based on their group attribute and positioned accordingly."""
    
    # Generate a color map with different colors for each group
    colors = plt.cm.get_cmap('tab10', num_groups)  # Use 'tab10' colormap to get distinct colors
    color_map = [colors(G.nodes[node]["color"]) for node in G.nodes()]
    
    # Create a layout that encourages nodes of the same group to be closer together
    pos = nx.spring_layout(G, k=0.3, seed=42)  # Adjust 'k' to control the spacing between groups
    
    # Adjust positions manually to separate the groups
    group_offset = 2  # Define the offset distance between groups
    group_centers = {i: (group_offset * i, 0) for i in range(num_groups)}  # Center for each group
    
    for node, (x, y) in pos.items():
        group = G.nodes[node]["color"]
        group_center_x, group_center_y = group_centers[group]
        pos[node] = (x + group_center_x, y + group_center_y)
    
    # Draw the graph
    plt.figure(figsize=(10, 10))
    nx.draw(G, pos, node_color=color_map, with_labels=True, node_size=500, font_color='white', font_size=10)
    
    # Display the graph
    plt.show()


draw_colored_graph(G, 4)

NameError: name 'plt' is not defined

In [None]:
"""
                "Edge-based Polarization",
                "Modularity-based Polarization",
                "Homophily-based Polarization",
                round(original_results_dict["polarization_scores"]["edge_based_polarization"], 3),
                round(original_results_dict["polarization_scores"]["modularity_based_polarization"], 3),
                round(original_results_dict["polarization_scores"]["homophily_based_polarization"], 3),
                
                
                
                    "Edge-based Polarization",
                    "Modularity-based Polarization",
                    "Homophily-based Polarization",
                    round(modified_results_dict["polarization_scores"]["edge_based_polarization"], 3),
                    round(modified_results_dict["polarization_scores"]["modularity_based_polarization"], 3),
                    round(modified_results_dict["polarization_scores"]["homophily_based_polarization"], 3),"""

'\n                "Edge-based Polarization",\n                "Modularity-based Polarization",\n                "Homophily-based Polarization",\n                round(original_results_dict["polarization_scores"]["edge_based_polarization"], 3),\n                round(original_results_dict["polarization_scores"]["modularity_based_polarization"], 3),\n                round(original_results_dict["polarization_scores"]["homophily_based_polarization"], 3),\n                \n                \n                \n                    "Edge-based Polarization",\n                    "Modularity-based Polarization",\n                    "Homophily-based Polarization",\n                    round(modified_results_dict["polarization_scores"]["edge_based_polarization"], 3),\n                    round(modified_results_dict["polarization_scores"]["modularity_based_polarization"], 3),\n                    round(modified_results_dict["polarization_scores"]["homophily_based_polarization"], 3),'