In [24]:
from src.graph import graph_loader, create_polarized_graph, spectral_bipartition_coloring, random_color_graph
from src.seed import seed_degree, seed_random 
import networkx as nx
from icm_diffusion import simulate_diffusion_ICM
import pandas as pd
import random

In [25]:
G = create_polarized_graph(500, 0.2, 0.01)
#color the graph
spectral_bipartition_coloring(G)

  adjacency = check_symmetric(adjacency)


In [26]:
def edge_addition_adamic_adar(G, seeds, k):
    graph = G.copy()

    # Convert to undirected graph for Adamic-Adar calculation
    undirected_graph = graph.to_undirected()

    for seed in seeds:
        # Compute Adamic-Adar index for pairs involving the seed node
        adamic_adar_scores = list(nx.adamic_adar_index(undirected_graph, [(seed, n) for n in undirected_graph.nodes if n != seed]))

        # Sort the scores in descending order
        adamic_adar_scores.sort(key=lambda x: x[2], reverse=True)

        # Add edges to the graph as a connection from the seed to the top k nodes
        for i in range(min(k, len(adamic_adar_scores))):
            target_node = adamic_adar_scores[i][1]
            graph.add_edge(seed, target_node)

    return graph

def edge_addition_preferential_attachment(G, seeds, k):
    graph = G.copy()
    
    for seed in seeds:
        # Calculate the degree of all nodes in the graph
        node_degrees = dict(graph.degree())
        
        # Generate the cumulative distribution of degrees for random selection
        nodes, degrees = zip(*node_degrees.items())
        total_degree = sum(degrees)
        cumulative_distribution = [sum(degrees[:i+1]) / total_degree for i in range(len(degrees))]

        # Add edges from the seed node to k nodes chosen by the preferential attachment rule
        for _ in range(k):
            random_value = random.random()
            for i, cum_dist in enumerate(cumulative_distribution):
                if random_value <= cum_dist:
                    target_node = nodes[i]
                    # Prevent self-loops and duplicate edges
                    if target_node != seed and not graph.has_edge(seed, target_node):
                        graph.add_edge(seed, target_node)
                        break
    
    return graph

# Jaccard Coefficient
def edge_addition_jaccard(G, seeds, k):
    graph = G.copy()
    undirected_graph = graph.to_undirected()

    for seed in seeds:
        jaccard_scores = list(nx.jaccard_coefficient(undirected_graph, [(seed, n) for n in undirected_graph.nodes if n != seed]))
        jaccard_scores.sort(key=lambda x: x[2], reverse=True)

        for i in range(min(k, len(jaccard_scores))):
            target_node = jaccard_scores[i][1]
            graph.add_edge(seed, target_node)

    return graph

# Degree
def edge_addition_degree(G, seeds, k):
    graph = G.copy()

    for seed in seeds:
        nodes_sorted_by_degree = sorted(graph.nodes, key=lambda n: graph.out_degree(n), reverse=True)
        for target_node in nodes_sorted_by_degree[:k]:
            if target_node != seed:
                graph.add_edge(seed, target_node)

    return graph

# Harmonic Centrality (Topk)
def edge_addition_topk(G, seeds, k):
    graph = G.copy()
    harmonic_centralities = nx.harmonic_centrality(graph)

    for seed in seeds:
        nodes_sorted_by_centrality = sorted(harmonic_centralities.items(), key=lambda x: x[1], reverse=True)
        for i in range(min(k, len(nodes_sorted_by_centrality))):
            target_node = nodes_sorted_by_centrality[i][0]
            if target_node != seed:
                graph.add_edge(seed, target_node)

    return graph

# Probabilistic Edge Addition (Prob)
def edge_addition_prob(G, seeds, k):
    graph = G.copy()

    for seed in seeds:
        all_possible_edges = [(seed, n) for n in graph.nodes if n != seed and not graph.has_edge(seed, n)]
        if len(all_possible_edges) == 0:
            continue
        random.shuffle(all_possible_edges)
        selected_edges = random.sample(all_possible_edges, min(k, len(all_possible_edges)))

        for edge in selected_edges:
            graph.add_edge(*edge)

    return graph

# Kempe et al. Seed Selection (KKT)
def edge_addition_kkt(G, seeds, k):
    graph = G.copy()

    for seed in seeds:
        candidates = sorted(graph.nodes, key=lambda n: nx.degree_centrality(graph)[n], reverse=True)
        for target_node in candidates[:k]:
            if target_node != seed:
                graph.add_edge(seed, target_node)

    return graph

# Random
def edge_addition_random(G, seeds, k):
    graph = G.copy()

    for seed in seeds:
        available_nodes = [n for n in graph.nodes if n != seed]
        selected_nodes = random.sample(available_nodes, min(k, len(available_nodes)))

        for target_node in selected_nodes:
            graph.add_edge(seed, target_node)

    return graph

In [27]:
# Function to evaluate and compare the graph modifications
def evaluate_graph_modifications(G, seeds, k):
    # Simulate diffusion on the original graph
    count, count_std, color_count, color_count_std = simulate_diffusion_ICM(G, seeds, 1, 1000)
    
    # Results for the original graph
    original_results = pd.DataFrame({
        'Metric': ['Count', 'Count Standard Deviation', 'Color Count', 'Color Count Standard Deviation'],
        'Original Graph': [round(count, 3), round(count_std, 3), round(color_count, 3), round(color_count_std, 3)]
    })

    # Define a list of modification functions
    modification_functions = {
        'Adamic Adar': edge_addition_adamic_adar,
        'PrefAtt': edge_addition_preferential_attachment,
        'Jaccard': edge_addition_jaccard,
        'Degree': edge_addition_degree,
        'TopK': edge_addition_topk,
        'Prob': edge_addition_prob,
        'KKT': edge_addition_kkt,
        'Random': edge_addition_random
    }

    combined_results = original_results.copy()

    # Evaluate each graph modification
    for method_name, mod_func in modification_functions.items():
        modified_graph = mod_func(G, seeds, k)
        count, count_std, color_count, color_count_std = simulate_diffusion_ICM(modified_graph, seeds, 1, 1000)

        adapted_results = pd.DataFrame({
            'Metric': ['Count', 'Count Standard Deviation', 'Color Count', 'Color Count Standard Deviation'],
            f'Adapted Graph {method_name}': [round(count, 3), round(count_std, 3), round(color_count, 3), round(color_count_std, 3)]
        })

        combined_results = pd.merge(combined_results, adapted_results, on='Metric')

    # Get the number of nodes and edges for all graphs
    graph_info = {
        'Metric': ['Number of Nodes', 'Number of Edges'],
        'Original Graph': [G.number_of_nodes(), G.number_of_edges()],
    }
    
    for method_name, mod_func in modification_functions.items():
        modified_graph = mod_func(G, seeds, k)
        graph_info[f'Adapted Graph {method_name}'] = [modified_graph.number_of_nodes(), modified_graph.number_of_edges()]
    
    graph_info_df = pd.DataFrame(graph_info)

    # Combine all results into one DataFrame
    final_results = pd.concat([graph_info_df, combined_results], ignore_index=True)

    # Transpose the DataFrame and set the first row as the header
    final_results = final_results.T
    final_results.columns = final_results.iloc[0]  # Set the first row as the column names
    final_results = final_results.drop(final_results.index[0])  # Drop the first row

    return final_results

In [28]:
seed = seed_degree(G, 50)
k = 15

In [29]:
final_results = evaluate_graph_modifications(G, seed, k)
final_results

100%|██████████| 1000/1000 [00:02<00:00, 359.63it/s]
100%|██████████| 1000/1000 [00:02<00:00, 348.04it/s]
100%|██████████| 1000/1000 [00:02<00:00, 356.34it/s]
100%|██████████| 1000/1000 [00:02<00:00, 340.94it/s]
100%|██████████| 1000/1000 [00:02<00:00, 339.04it/s]
100%|██████████| 1000/1000 [00:02<00:00, 343.16it/s]
100%|██████████| 1000/1000 [00:03<00:00, 308.82it/s]
100%|██████████| 1000/1000 [00:02<00:00, 388.58it/s]
100%|██████████| 1000/1000 [00:03<00:00, 321.68it/s]


Metric,Number of Nodes,Number of Edges,Count,Count Standard Deviation,Color Count,Color Count Standard Deviation
Original Graph,500.0,25655.0,139.809,78.336,3.559,5.074
Adapted Graph Adamic Adar,500.0,26220.0,146.586,81.399,3.49,4.765
Adapted Graph PrefAtt,500.0,26405.0,145.2,77.848,9.42,10.939
Adapted Graph Jaccard,500.0,26247.0,144.938,78.022,3.396,4.569
Adapted Graph Degree,500.0,26311.0,138.477,78.318,3.884,4.826
Adapted Graph TopK,500.0,26283.0,142.412,79.136,5.536,6.86
Adapted Graph Prob,500.0,26405.0,150.901,80.155,10.997,12.903
Adapted Graph KKT,500.0,26284.0,136.781,79.807,3.443,4.637
Adapted Graph Random,500.0,26300.0,149.777,80.547,9.985,12.02
