In [1]:
import random
from collections import Counter
from multiprocessing import Pool, cpu_count
from typing import Dict, List, Set, Tuple

import networkx as nx
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from icm_diffusion import simulate_diffusion_ICM
from src.graph import (
    create_polarized_graph,
    create_polarized_graph_multiple,
    graph_loader,
    random_color_graph,
    spectral_bipartition_coloring,
    spectral_partition_coloring,
    k_means_partition_coloring
)
from src.seed import seed_degree, seed_mia, seed_polarized, seed_random


In [2]:
import warnings

# Suppress the specific UserWarning from sklearn
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.manifold._spectral_embedding")

In [3]:
from edge_addition import (
    edge_addition_custom,
    edge_addition_adamic_adar,
    edge_addition_preferential_attachment,
    edge_addition_jaccard,
    edge_addition_degree,
    edge_addition_topk,
    edge_addition_prob,
    edge_addition_kkt,
    edge_addition_random,
)

In [4]:
# Function to evaluate and compare the graph modifications

def evaluate_graph_modifications(G, seeds, k, max_iter, budget):
    # Simulate diffusion on the original graph
    original_results_dict = simulate_diffusion_ICM(G, seeds, max_iter)

    # Extract original results
    original_results = pd.DataFrame(
        {
            "Metric": [
                "Avg Activated Nodes",
                "Activated Nodes Std Dev",
                "Avg Color Activation Count",
                "Color Activation Count Std Dev",
            ],
            "Original Graph": [
                round(original_results_dict["avg_activated_nodes"], 3),
                round(original_results_dict["std_dev_activated_nodes"], 3),
                round(original_results_dict["avg_color_activation_count"], 3),
                round(original_results_dict["std_dev_color_activation_count"], 3),
            ],
        }
    )

    # Get the number of nodes and edges for all graphs
    graph_info = {
        "Metric": ["Number of Nodes", "Number of Edges"],
        "Original Graph": [G.number_of_nodes(), G.number_of_edges()],
    }

    # Define a list of modification functions
    #"Prob": edge_addition_prob,
    modification_functions = {
        "PrefAtt": edge_addition_preferential_attachment,
        "Jaccard": edge_addition_jaccard,
        "Degree": edge_addition_degree,
        "TopK": edge_addition_topk,
        "KKT": edge_addition_kkt,
        "Random": edge_addition_random,
        "Custom": edge_addition_custom,
    }

    combined_results = original_results.copy()

    # Evaluate each graph modification
    for method_name, mod_func in modification_functions.items():
        modified_graph = mod_func(G, seeds, k, budget)
        modified_results_dict = simulate_diffusion_ICM(modified_graph, seeds, max_iter)

        adapted_results = pd.DataFrame(
            {
                "Metric": [
                    "Avg Activated Nodes",
                    "Activated Nodes Std Dev",
                    "Avg Color Activation Count",
                    "Color Activation Count Std Dev",
                ],
                f"Adapted Graph {method_name}": [
                    round(modified_results_dict["avg_activated_nodes"], 3),
                    round(modified_results_dict["std_dev_activated_nodes"], 3),
                    round(modified_results_dict["avg_color_activation_count"], 3),
                    round(modified_results_dict["std_dev_color_activation_count"], 3),
                ],
            }
        )

        combined_results = pd.merge(combined_results, adapted_results, on="Metric")
        graph_info[f"Adapted Graph {method_name}"] = [
            modified_graph.number_of_nodes(),
            modified_graph.number_of_edges(),
        ]

    graph_info_df = pd.DataFrame(graph_info)

    # Combine all results into one DataFrame
    final_results = pd.concat([graph_info_df, combined_results], ignore_index=True)

    # Transpose the DataFrame and set the first row as the header
    final_results = final_results.T
    final_results.columns = final_results.iloc[
        0
    ]  # Set the first row as the column names
    final_results = final_results.drop(final_results.index[0])  # Drop the first row

    return final_results


In [5]:
num_groups = 4

#G = create_polarized_graph_multiple(1000, num_groups, 0.05, 0.0005)
#G = graph_loader("datasets/facebook/facebook_combined.txt")
G = graph_loader('datasets/congress_network/congress.edgelist')

spectral_partition_coloring(G, num_groups)
colors = plt.cm.get_cmap('tab10', num_groups)  # Use 'tab10' colormap to get distinct colors
color_map = [colors(G.nodes[node]["color"]) for node in G.nodes()]
print(', '.join([f"Color {i+1}: {value}" for i, value in enumerate(list(Counter(color_map).values()))]))

Number of Nodes: 475
Number of Edges: 13289
Color 1: 118, Color 2: 190, Color 3: 25, Color 4: 142


In [6]:
def draw_colored_graph(G, num_groups):
    """Draw a graph with nodes colored based on their group attribute and positioned accordingly."""
    
    # Generate a color map with different colors for each group
    colors = plt.cm.get_cmap('tab10', num_groups)  # Use 'tab10' colormap to get distinct colors
    color_map = [colors(G.nodes[node]["color"]) for node in G.nodes()]
    
    # Create a layout that encourages nodes of the same group to be closer together
    pos = nx.spring_layout(G, k=0.3, seed=42)  # Adjust 'k' to control the spacing between groups
    
    # Adjust positions manually to separate the groups
    group_offset = 2  # Define the offset distance between groups
    group_centers = {i: (group_offset * i, 0) for i in range(num_groups)}  # Center for each group
    
    for node, (x, y) in pos.items():
        group = G.nodes[node]["color"]
        group_center_x, group_center_y = group_centers[group]
        pos[node] = (x + group_center_x, y + group_center_y)
    
    # Draw the graph
    plt.figure(figsize=(10, 10))
    nx.draw(G, pos, node_color=color_map, with_labels=True, node_size=500, font_color='white', font_size=10)
    
    # Display the graph
    plt.show()


draw_colored_graph(G, 4)

KeyboardInterrupt: 

In [7]:
seed_size = 50
k = seed_size
max_iter = 1000
budget = 3 * seed_size

In [8]:
seed = seed_mia(G, seed_size)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for MIA")
final_results

100%|██████████| 1000/1000 [00:02<00:00, 391.48it/s]
100%|██████████| 1000/1000 [00:02<00:00, 409.62it/s]
100%|██████████| 1000/1000 [00:02<00:00, 391.00it/s]
100%|██████████| 1000/1000 [00:02<00:00, 421.85it/s]
100%|██████████| 1000/1000 [00:02<00:00, 464.30it/s]
100%|██████████| 1000/1000 [00:02<00:00, 456.22it/s]
100%|██████████| 1000/1000 [00:02<00:00, 432.19it/s]
100%|██████████| 1000/1000 [00:02<00:00, 399.60it/s]


Results for MIA


Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,475.0,13289.0,175.037,53.168,56.169,31.362
Adapted Graph PrefAtt,475.0,13439.0,187.885,54.88,66.904,35.841
Adapted Graph Jaccard,475.0,13439.0,216.301,46.58,73.482,31.99
Adapted Graph Degree,475.0,13439.0,182.485,52.844,63.45,33.7
Adapted Graph TopK,475.0,13439.0,176.601,53.602,57.778,33.071
Adapted Graph KKT,475.0,13439.0,175.63,52.919,57.835,32.894
Adapted Graph Random,475.0,13439.0,190.567,50.96,69.487,32.607
Adapted Graph Custom,475.0,13439.0,202.804,49.1,80.38,33.376


In [9]:
seed = seed_polarized(G, seed_size, 0)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for Polarized seed")
final_results

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:01<00:00, 531.30it/s]
100%|██████████| 1000/1000 [00:02<00:00, 449.82it/s]
100%|██████████| 1000/1000 [00:02<00:00, 381.00it/s]
100%|██████████| 1000/1000 [00:02<00:00, 485.81it/s]
100%|██████████| 1000/1000 [00:01<00:00, 538.48it/s]
100%|██████████| 1000/1000 [00:01<00:00, 529.32it/s]
100%|██████████| 1000/1000 [00:02<00:00, 363.18it/s]
100%|██████████| 1000/1000 [00:03<00:00, 332.38it/s]


Results for Polarized seed


Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,475.0,13289.0,149.202,49.868,47.901,32.81
Adapted Graph PrefAtt,475.0,13439.0,169.658,51.123,59.881,33.407
Adapted Graph Jaccard,475.0,13439.0,188.633,48.189,78.963,36.826
Adapted Graph Degree,475.0,13439.0,164.055,51.985,55.762,33.538
Adapted Graph TopK,475.0,13439.0,150.228,50.883,48.614,32.593
Adapted Graph KKT,475.0,13439.0,148.351,49.246,47.181,31.697
Adapted Graph Random,475.0,13439.0,221.454,45.562,106.605,38.149
Adapted Graph Custom,475.0,13439.0,213.782,45.177,123.178,42.819


In [10]:
seed = seed_random(G, seed_size)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for Random seed")
final_results

100%|██████████| 1000/1000 [00:02<00:00, 422.07it/s]
100%|██████████| 1000/1000 [00:02<00:00, 388.66it/s]
100%|██████████| 1000/1000 [00:02<00:00, 355.51it/s]
100%|██████████| 1000/1000 [00:02<00:00, 386.80it/s]
100%|██████████| 1000/1000 [00:02<00:00, 409.09it/s]
100%|██████████| 1000/1000 [00:02<00:00, 405.68it/s]
100%|██████████| 1000/1000 [00:02<00:00, 385.93it/s]
100%|██████████| 1000/1000 [00:02<00:00, 345.27it/s]

Results for Random seed





Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,475.0,13289.0,202.744,53.34,65.828,31.445
Adapted Graph PrefAtt,475.0,13439.0,210.538,52.595,71.452,32.017
Adapted Graph Jaccard,475.0,13439.0,243.161,46.913,97.988,32.633
Adapted Graph Degree,475.0,13439.0,211.528,52.807,72.676,32.772
Adapted Graph TopK,475.0,13439.0,203.986,52.738,67.032,31.092
Adapted Graph KKT,475.0,13439.0,207.514,52.705,69.325,31.339
Adapted Graph Random,475.0,13439.0,225.145,54.143,80.299,33.109
Adapted Graph Custom,475.0,13439.0,237.076,46.576,98.351,32.265


In [11]:
seed = seed_degree(G, seed_size)
final_results = evaluate_graph_modifications(G, seed, k, max_iter, budget)
print("Results for Degree seed")
final_results

100%|██████████| 1000/1000 [00:01<00:00, 593.36it/s]
100%|██████████| 1000/1000 [00:01<00:00, 545.53it/s]
100%|██████████| 1000/1000 [00:01<00:00, 521.44it/s]
100%|██████████| 1000/1000 [00:02<00:00, 494.53it/s]
100%|██████████| 1000/1000 [00:01<00:00, 619.28it/s]
100%|██████████| 1000/1000 [00:01<00:00, 529.87it/s]
100%|██████████| 1000/1000 [00:02<00:00, 419.88it/s]
100%|██████████| 1000/1000 [00:02<00:00, 458.15it/s]

Results for Degree seed





Metric,Number of Nodes,Number of Edges,Avg Activated Nodes,Activated Nodes Std Dev,Avg Color Activation Count,Color Activation Count Std Dev
Original Graph,475.0,13289.0,122.558,52.473,29.288,25.726
Adapted Graph PrefAtt,475.0,13439.0,137.41,52.435,38.599,27.848
Adapted Graph Jaccard,475.0,13439.0,148.031,51.459,41.621,26.925
Adapted Graph Degree,475.0,13439.0,145.084,52.056,43.218,28.85
Adapted Graph TopK,475.0,13439.0,121.652,51.705,29.416,25.978
Adapted Graph KKT,475.0,13439.0,126.293,52.791,31.51,26.697
Adapted Graph Random,475.0,13439.0,161.346,51.894,53.528,29.495
Adapted Graph Custom,475.0,13439.0,158.203,50.392,64.976,34.025
