In [1]:

from collections import Counter
from multiprocessing import Pool, cpu_count
from typing import Dict, List, Set
import os
import networkx as nx
import matplotlib.pyplot as plt

from src.graph import (
    create_polarized_graph,
    create_polarized_graph_multiple,
    graph_loader,
    random_color_graph,
    spectral_bipartition_coloring,
    spectral_partition_coloring,
    color_from_file
)

from src.seed import (seed_degree, 
    seed_polarized, 
    seed_random, 
    seed_polarized_centrality_mixed, 
    seed_polarized_centrality, 
    seed_polarized_degree, 
    seed_centrality_mixed, 
    seed_centrality, 
    seed_influence_maximization
)

#from src.seed_ima import 



from evaluation_fixed_seed import evaluate_all_seeds

In [2]:
import warnings

# Suppress the specific UserWarning from sklearn
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.manifold._spectral_embedding")

In [None]:
num_groups = 2

G = graph_loader("datasets/twitter_social_cirlce/twitter_combined.txt")

spectral_bipartition_coloring(G)

colors = plt.cm.get_cmap('tab10', num_groups)  # Use 'tab10' colormap to get distinct colors
color_map = [colors(G.nodes[node]["color"]) for node in G.nodes()]
print(', '.join([f"Color {i+1}: {value}" for i, value in enumerate(list(Counter(color_map).values()))]))

In [None]:
seed_size = G.number_of_nodes() // 20
print(f"Seed size: {seed_size}")
max_iter = 1000

In [None]:
k_values = [1,3,5,7,10,13,15] #[1,5,10,15,20,25,30]

seed_functions = {
        "Random": seed_random(G, seed_size),
        "MIA": seed_influence_maximization(G, seed_size, 1000, 8, 0),
        "Degree": seed_degree(G, seed_size),
        "Centrality": seed_centrality(G, seed_size),
        "Centrality Mixed": seed_centrality_mixed(G, seed_size),
        "Polarized": seed_polarized(G, seed_size, 0),
        "Polarized Degree": seed_polarized_degree(G, seed_size, 0),
        "Polarized Centrality": seed_polarized_centrality(G, seed_size, 0),
        "Polarized Centrality Mixed": seed_polarized_centrality_mixed(G, seed_size, 0),
    }

for k in k_values:
    print(f"K: {k}")
    budget = 2 * k

    # Example of how to call the function
    all_results_df = evaluate_all_seeds(G, seed_functions, k, max_iter, budget, "netin_09_test", verbose=False)

    all_results_df.to_csv(f"results/test/results_netin_{N}_nodes_{h}_hvalue_{d}_density_{k}_k_{budget}_budget.csv", index=False)

In [None]:
directory = 'results/test_twitter/'  # Adjust the path if needed

# Dictionary to store k values and corresponding dataframes
k_dict = {}

# Iterate over all the files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):  # assuming the files are in CSV format
        # Extract the k value from the filename (assuming k is before "_budget")
        try:
            k_value = filename.split("_k_")[0].split("_")[-1]
            k_value = int(k_value)
        except (IndexError, ValueError):
            print(f"Filename {filename} does not match the expected pattern.")
            continue
        
        # Read the CSV file into a DataFrame
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        
        # Store the DataFrame in the dictionary with k as the key
        k_dict[k_value] = df[["Seed Function", "Graph Modification", "Avg Activated Nodes"]]