In [1]:
import random
import numpy as np
from tqdm import tqdm

random.seed(123)
np.random.seed(123)

# Loading social network graph data

In [2]:
import networkx as nx
import ndlib.models.ModelConfig as mc
import ndlib.models.epidemics as ids

# Load social network graph
g = nx.read_edgelist("data/Cit-HepPh.txt", create_using = nx.Graph(), nodetype = int)

In [3]:
#Quick snapshot of the Network
print(nx.info(g))

Name: 
Type: DiGraph
Number of nodes: 34546
Number of edges: 421578
Average in degree:  12.2034
Average out degree:  12.2034


# Defining experiment functions

In [4]:
# propagation probability functions
def propagation_probability_1():
    return 0.01

def propagation_probability_2():
    return np.random.exponential(scale=0.01)

def propagation_probability_3():
    sampled_prob = random.choice([0.1, 0.01, 0.001])
    return sampled_prob

In [5]:
# functions for greedy algorithm and Dynamic Independent Cascade

def generate_seed_status(activation_probability=1):
    if random.random() <= activation_probability:
        return 1
    else:
        return 0

def evaluated_expected_activated_nodes(model, seeds=[], iteration_num=10, simulation_num=3):
    current_status = model.status.copy()
    total_activated_node_count = 0

    for simulation_index in range(simulation_num):
        if simulation_index != 0:
            model.reset()
            model.status = current_status.copy()

        # activating seeds
        for s in seeds:
            model.status[s] = generate_seed_status()

        # starting propagation simulation
        iterations = model.iteration_bunch(iteration_num)
        
        current_simulation_activated_node_count = iterations[-1]['node_count'][1] + iterations[-1]['node_count'][2]

        total_activated_node_count += current_simulation_activated_node_count


    expected_activated_nodes = total_activated_node_count / simulation_num

    model.reset()
    model.status = current_status.copy()

    return expected_activated_nodes


def select_seed_node_with_a_greedy_algorithm(model, node_sample_proportion=1.0, iteration_num=10, simulation_num=3):
    current_status = model.status.copy()
    candidate_nodes = []
    # node_score = {}
    max_expected_activated_nodes = 0
    node_top_choice = None
    for node_id, node_status in model.status.items():
        if node_status == 0:
            candidate_nodes.append(node_id)

    if node_sample_proportion != 1.0:            
        random.shuffle(candidate_nodes)
        sample_count = int(len(candidate_nodes) * node_sample_proportion)
        candidate_nodes = candidate_nodes[:sample_count]

    for node_id in tqdm(candidate_nodes):
        expected_activated_nodes = evaluated_expected_activated_nodes(model, seeds=[node_id], iteration_num=iteration_num, simulation_num=simulation_num)
        # node_score[node_id] = expected_activated_nodes
        if max_expected_activated_nodes < expected_activated_nodes:
            max_expected_activated_nodes = expected_activated_nodes
            node_top_choice = node_id

    model.reset()
    model.status = current_status.copy()

    return node_top_choice




def evaluated_expected_activated_nodes_for_first_seed(graph, config, seeds=[], iteration_num=10, simulation_num=3):
    # current_status = model.status.copy()
    total_activated_node_count = 0

    for simulation_index in range(simulation_num):

        # Propagation Model selection
        simulated_model = ids.IndependentCascadesModel(graph)
        # # Model Configuration
        # config = mc.Configuration()
        # Set all nodes to inactive at the beginning
        config.add_model_parameter('percentage_infected', 0.0)
        # Set first seed(s) to initialize the model
        activated_seeds = []
        for s in seeds:
            activated_seeds.append(s)
        config.add_model_initial_configuration('Infected', activated_seeds)
        # config.add_model_initial_configuration('Infected', [1,2,3,4])

        simulated_model.set_initial_status(config)

        iterations = simulated_model.iteration_bunch(iteration_num)
        
        current_simulation_activated_node_count = iterations[-1]['node_count'][1] + iterations[-1]['node_count'][2]

        total_activated_node_count += current_simulation_activated_node_count


    expected_activated_nodes = total_activated_node_count / simulation_num

    # simulated_model.reset()

    return expected_activated_nodes



def select_first_seed_node_with_a_greedy_algorithm(model, config, node_sample_proportion=1.0, iteration_num=10, simulation_num=3):
    candidate_nodes = list(model.graph.nodes())
    # node_score = {}
    max_expected_activated_nodes = 0
    node_top_choice = None

    if node_sample_proportion != 1.0:            
        random.shuffle(candidate_nodes)
        sample_count = int(len(candidate_nodes) * node_sample_proportion)
        candidate_nodes = candidate_nodes[:sample_count]

    for node_id in tqdm(candidate_nodes):
        expected_activated_nodes = evaluated_expected_activated_nodes_for_first_seed(model.graph.graph, config, seeds=[node_id], iteration_num=iteration_num, simulation_num=simulation_num)
        # node_score[node_id] = expected_activated_nodes
        if max_expected_activated_nodes < expected_activated_nodes:
            max_expected_activated_nodes = expected_activated_nodes
            node_top_choice = node_id

    return node_top_choice


In [6]:
# function to execute one run of experiment
def run_experiment(model, budget=10, decision_interval_period=5, propagation_probability_function=1, max_propagation_after_last_seed=1000, node_sample_proportion=1.0, iteration_num=10, simulation_num=3):

    # Set propagation_probability_function
    if propagation_probability_function == 1:
        propagation_probability = propagation_probability_1
    elif propagation_probability_function == 2:
        propagation_probability = propagation_probability_2
    elif propagation_probability_function == 3:
        propagation_probability = propagation_probability_3

    # Model Configuration
    config = mc.Configuration()

    # Setting the edge parameters
    for index, e in enumerate(g.edges()):
        threshold = propagation_probability()
        config.add_edge_configuration("threshold", e, threshold)

    
    print("decision # ", 1)
    first_seed = select_first_seed_node_with_a_greedy_algorithm(model, config, node_sample_proportion=node_sample_proportion, iteration_num=iteration_num, simulation_num=simulation_num)
    print("selected node: ", first_seed)
    
    # Set all nodes to inactive at the beginning
    config.add_model_parameter('percentage_infected', 0.0)
    # Set first seed(s) to initialize the model
    config.add_model_initial_configuration('Infected', [first_seed])
    # config.add_model_initial_configuration('Infected', [1,2,3,4])
    model.set_initial_status(config)

    # Run propagation steps after seeding the first node
    iterations = model.iteration_bunch(decision_interval_period + 1) # +1 as the first iteration only initializes the model, with no propagation

    # Propagation steps for subsequent seed nodes
    for decision_index in range(budget):
        if decision_index != 0:
            print("decision # ", decision_index + 1)
            # propagation for non-first seed
            selected_seed_node_id = select_seed_node_with_a_greedy_algorithm(model, node_sample_proportion=node_sample_proportion, iteration_num=iteration_num, simulation_num=simulation_num)
            print("selected node: ", selected_seed_node_id)
            # activate selected node
            model.status[selected_seed_node_id] = generate_seed_status()
            current_iterations = model.iteration_bunch(decision_interval_period)

            iterations += current_iterations

    propagation_iterations = model.iteration_bunch(max_propagation_after_last_seed-decision_interval_period)

    iterations += propagation_iterations

    total_activated_node_count = iterations[-1]['node_count'][1] + iterations[-1]['node_count'][2]

    return total_activated_node_count, iterations


In [7]:
# Propagation Model selection
model = ids.IndependentCascadesModel(g)

In [8]:
# code in run_experiment function
if False:
    # Model Configuration
    config = mc.Configuration()
    # Set all nodes to inactive at the beginning
    config.add_model_parameter('percentage_infected', 0.0)
    # Set first seed(s) to initialize the model
    config.add_model_initial_configuration('Infected', [1])
    # config.add_model_initial_configuration('Infected', [1,2,3,4])

    # Setting the edge parameters
    for index, e in enumerate(g.edges()):
        threshold = propagation_probability_1()
    #     threshold = propagation_probability_2()
    #     threshold = propagation_probability_3()
        config.add_edge_configuration("threshold", e, threshold)

In [9]:
BUDGET = 10 # how many seeds are allowed to be activated
DECISION_INTERVAL_PERIOD = 10 # how many propagation steps before the next decision to pick a seed node
PROPAGATION_FUNCTION = 1 # F1, F2 or F3 to model the node propagation
PROP_AFTER_LAST_SEED = 200 # propagation steps to simulate after the budget is used up
NODE_SAMPLE_RATE = 0.01 # proportion of available nodes to run Monte Carlo simulations while deciding which node to select
# NODE_SAMPLE_RATE = 1.0 # proportion of available nodes to run Monte Carlo simulations while deciding which node to select
ITER_NUM = 50 # propagation steps to simulate while evaluating the reward of seeding a node
SIM_NUM = 3 # number of Monte Carlo simluations per node

In [10]:
NUM_EXPERIMENTS_TO_RUN = 1

In [11]:
total_activated_node_count_list = []

for exp_index in range(NUM_EXPERIMENTS_TO_RUN):
    print("Running experiment # ", exp_index+1)
    total_activated_node_count, iterations = run_experiment(
                                                    model, 
                                                    budget=BUDGET, 
                                                    decision_interval_period=DECISION_INTERVAL_PERIOD, 
                                                    propagation_probability_function=PROPAGATION_FUNCTION, 
                                                    max_propagation_after_last_seed=PROP_AFTER_LAST_SEED, 
                                                    node_sample_proportion=NODE_SAMPLE_RATE, 
                                                    iteration_num=ITER_NUM, 
                                                    simulation_num=SIM_NUM
                                                )
    total_activated_node_count_list.append(total_activated_node_count)

mean_activated_node_count = np.mean(total_activated_node_count_list)

Running experiment #  1
decision #  1


 12%|██████████                                                                       | 43/345 [02:45<19:20,  3.84s/it]

KeyboardInterrupt: 

In [None]:
print("mean_activated_node_count: ", mean_activated_node_count)

In [None]:
total_activated_node_count_list

In [None]:
# model.status

In [None]:
delta, node_count, status_delta = model.status_delta(model.status)
print('delta: ', delta)
print('node_count: ', node_count)
print('status_delta: ', status_delta)

In [None]:
# iterations

In [None]:
# trends = model.build_trends(iterations)

# Visualization

In [None]:
# from bokeh.io import output_notebook, show
# from ndlib.viz.bokeh.DiffusionTrend import DiffusionTrend

# viz = DiffusionTrend(model, trends)
# p = viz.plot(width=400, height=400)
# #show(p)

In [None]:
# from ndlib.viz.bokeh.DiffusionPrevalence import DiffusionPrevalence

# viz2 = DiffusionPrevalence(model, trends)
# p2 = viz2.plot(width=400, height=400)
# show(p2)

In [None]:
# from ndlib.viz.bokeh.MultiPlot import MultiPlot
# vm = MultiPlot()
# vm.add_plot(p)
# vm.add_plot(p2)
# m = vm.plot()
# show(m)

In [None]:
# model.status