In [1]:
import random
import numpy as np
from tqdm import tqdm

random.seed(123)
np.random.seed(123)

# Loading social network graph data

In [2]:
import networkx as nx
import ndlib.models.ModelConfig as mc
import ndlib.models.epidemics as ids

# Load social network graph
g = nx.read_edgelist("data/facebook_combined.txt", create_using = nx.Graph(), nodetype = int)

In [3]:
#Quick snapshot of the Network
print(nx.info(g))

Name: 
Type: Graph
Number of nodes: 4039
Number of edges: 88234
Average degree:  43.6910


# Defining experiment functions

In [4]:
# propagation probability functions
def propagation_probability_1():
    return 0.01

def propagation_probability_2():
    return np.random.exponential(scale=0.01)

def propagation_probability_3():
    sampled_prob = random.choice([0.1, 0.01, 0.001])
    return sampled_prob

In [5]:
# functions for greedy algorithm and Dynamic Independent Cascade

def generate_seed_status(activation_probability=1):
    if random.random() <= activation_probability:
        return 1
    else:
        return 0

def evaluated_expected_activated_nodes(model, seeds=[], iteration_num=10, simulation_num=3):
    current_status = model.status.copy()
    total_activated_node_count = 0

    for simulation_index in range(simulation_num):
        if simulation_index != 0:
            model.reset()
            model.status = current_status.copy()

        # activating seeds
        for s in seeds:
            model.status[s] = generate_seed_status()

        # starting propagation simulation
        iterations = model.iteration_bunch(iteration_num)
        
        current_simulation_activated_node_count = iterations[-1]['node_count'][1] + iterations[-1]['node_count'][2]

        total_activated_node_count += current_simulation_activated_node_count


    expected_activated_nodes = total_activated_node_count / simulation_num

    model.reset()
    model.status = current_status.copy()

    return expected_activated_nodes


def select_seed_node_with_a_greedy_algorithm(model, node_sample_proportion=1.0, iteration_num=10, simulation_num=3):
    current_status = model.status.copy()
    candidate_nodes = []
    # node_score = {}
    max_expected_activated_nodes = 0
    node_top_choice = None
    for node_id, node_status in model.status.items():
        if node_status == 0:
            candidate_nodes.append(node_id)

    if node_sample_proportion != 1.0:            
        random.shuffle(candidate_nodes)
        sample_count = int(len(candidate_nodes) * node_sample_proportion)
        candidate_nodes = candidate_nodes[:sample_count]

    for node_id in tqdm(candidate_nodes):
        expected_activated_nodes = evaluated_expected_activated_nodes(model, seeds=[node_id], iteration_num=iteration_num, simulation_num=simulation_num)
        # node_score[node_id] = expected_activated_nodes
        if max_expected_activated_nodes < expected_activated_nodes:
            max_expected_activated_nodes = expected_activated_nodes
            node_top_choice = node_id

    model.reset()
    model.status = current_status.copy()

    return node_top_choice




def evaluated_expected_activated_nodes_for_first_seed(graph, config, seeds=[], iteration_num=10, simulation_num=3):
    # current_status = model.status.copy()
    total_activated_node_count = 0

    for simulation_index in range(simulation_num):

        # Propagation Model selection
        simulated_model = ids.IndependentCascadesModel(graph)
        # # Model Configuration
        # config = mc.Configuration()
        # Set all nodes to inactive at the beginning
        config.add_model_parameter('percentage_infected', 0.0)
        # Set first seed(s) to initialize the model
        activated_seeds = []
        for s in seeds:
            activated_seeds.append(s)
        config.add_model_initial_configuration('Infected', activated_seeds)
        # config.add_model_initial_configuration('Infected', [1,2,3,4])

        simulated_model.set_initial_status(config)

        iterations = simulated_model.iteration_bunch(iteration_num)
        
        current_simulation_activated_node_count = iterations[-1]['node_count'][1] + iterations[-1]['node_count'][2]

        total_activated_node_count += current_simulation_activated_node_count


    expected_activated_nodes = total_activated_node_count / simulation_num

    # simulated_model.reset()

    return expected_activated_nodes



def select_first_seed_node_with_a_greedy_algorithm(model, config, node_sample_proportion=1.0, iteration_num=10, simulation_num=3):
    candidate_nodes = list(model.graph.nodes())
    # node_score = {}
    max_expected_activated_nodes = 0
    node_top_choice = None

    if node_sample_proportion != 1.0:            
        random.shuffle(candidate_nodes)
        sample_count = int(len(candidate_nodes) * node_sample_proportion)
        candidate_nodes = candidate_nodes[:sample_count]

    for node_id in tqdm(candidate_nodes):
        expected_activated_nodes = evaluated_expected_activated_nodes_for_first_seed(model.graph.graph, config, seeds=[node_id], iteration_num=iteration_num, simulation_num=simulation_num)
        # node_score[node_id] = expected_activated_nodes
        if max_expected_activated_nodes < expected_activated_nodes:
            max_expected_activated_nodes = expected_activated_nodes
            node_top_choice = node_id

    return node_top_choice


In [6]:
# function to execute one run of experiment
def run_experiment(model, budget=10, decision_interval_period=5, propagation_probability_function=1, max_propagation_after_last_seed=1000, node_sample_proportion=1.0, iteration_num=10, simulation_num=3):

    # Set propagation_probability_function
    if propagation_probability_function == 1:
        propagation_probability = propagation_probability_1
    elif propagation_probability_function == 2:
        propagation_probability = propagation_probability_2
    elif propagation_probability_function == 3:
        propagation_probability = propagation_probability_3

    # Model Configuration
    config = mc.Configuration()

    # Setting the edge parameters
    for index, e in enumerate(g.edges()):
        threshold = propagation_probability()
        config.add_edge_configuration("threshold", e, threshold)

    
    print("decision # ", 1)
    first_seed = select_first_seed_node_with_a_greedy_algorithm(model, config, node_sample_proportion=node_sample_proportion, iteration_num=iteration_num, simulation_num=simulation_num)
    print("selected node: ", first_seed)
    
    # Set all nodes to inactive at the beginning
    config.add_model_parameter('percentage_infected', 0.0)
    # Set first seed(s) to initialize the model
    config.add_model_initial_configuration('Infected', [first_seed])
    # config.add_model_initial_configuration('Infected', [1,2,3,4])
    model.set_initial_status(config)

    # Run propagation steps after seeding the first node
    iterations = model.iteration_bunch(decision_interval_period + 1) # +1 as the first iteration only initializes the model, with no propagation

    # Propagation steps for subsequent seed nodes
    for decision_index in range(budget):
        if decision_index != 0:
            print("decision # ", decision_index + 1)
            # propagation for non-first seed
            selected_seed_node_id = select_seed_node_with_a_greedy_algorithm(model, node_sample_proportion=node_sample_proportion, iteration_num=iteration_num, simulation_num=simulation_num)
            print("selected node: ", selected_seed_node_id)
            # activate selected node
            model.status[selected_seed_node_id] = generate_seed_status()
            current_iterations = model.iteration_bunch(decision_interval_period)

            iterations += current_iterations

    propagation_iterations = model.iteration_bunch(max_propagation_after_last_seed-decision_interval_period)

    iterations += propagation_iterations

    total_activated_node_count = iterations[-1]['node_count'][1] + iterations[-1]['node_count'][2]

    return total_activated_node_count, iterations


In [7]:
# Propagation Model selection
model = ids.IndependentCascadesModel(g)

In [8]:
# code in run_experiment function
if False:
    # Model Configuration
    config = mc.Configuration()
    # Set all nodes to inactive at the beginning
    config.add_model_parameter('percentage_infected', 0.0)
    # Set first seed(s) to initialize the model
    config.add_model_initial_configuration('Infected', [1])
    # config.add_model_initial_configuration('Infected', [1,2,3,4])

    # Setting the edge parameters
    for index, e in enumerate(g.edges()):
        threshold = propagation_probability_1()
    #     threshold = propagation_probability_2()
    #     threshold = propagation_probability_3()
        config.add_edge_configuration("threshold", e, threshold)

In [9]:
BUDGET = 15 # how many seeds are allowed to be activated
DECISION_INTERVAL_PERIOD = 10 # how many propagation steps before the next decision to pick a seed node
PROPAGATION_FUNCTION = 1 # F1, F2 or F3 to model the node propagation
PROP_AFTER_LAST_SEED = 200 # propagation steps to simulate after the budget is used up
NODE_SAMPLE_RATE = 0.01 # proportion of available nodes to run Monte Carlo simulations while deciding which node to select
# NODE_SAMPLE_RATE = 1.0 # proportion of available nodes to run Monte Carlo simulations while deciding which node to select
ITER_NUM = 50 # propagation steps to simulate while evaluating the reward of seeding a node
SIM_NUM = 3 # number of Monte Carlo simluations per node

In [10]:
NUM_EXPERIMENTS_TO_RUN = 3

In [11]:
total_activated_node_count_list = []

for exp_index in range(NUM_EXPERIMENTS_TO_RUN):
    print("Running experiment # ", exp_index+1)
    total_activated_node_count, iterations = run_experiment(
                                                    model, 
                                                    budget=BUDGET, 
                                                    decision_interval_period=DECISION_INTERVAL_PERIOD, 
                                                    propagation_probability_function=PROPAGATION_FUNCTION, 
                                                    max_propagation_after_last_seed=PROP_AFTER_LAST_SEED, 
                                                    node_sample_proportion=NODE_SAMPLE_RATE, 
                                                    iteration_num=ITER_NUM, 
                                                    simulation_num=SIM_NUM
                                                )
    total_activated_node_count_list.append(total_activated_node_count)

mean_activated_node_count = np.mean(total_activated_node_count_list)

Running experiment #  1
decision #  1


100%|██████████████████████████████████████████████████████████████████████████████████| 40/40 [00:13<00:00,  2.94it/s]


selected node:  1912
decision #  2


100%|██████████████████████████████████████████████████████████████████████████████████| 40/40 [00:10<00:00,  3.72it/s]


selected node:  1516
decision #  3


100%|██████████████████████████████████████████████████████████████████████████████████| 39/39 [00:11<00:00,  3.55it/s]


selected node:  2395
decision #  4


100%|██████████████████████████████████████████████████████████████████████████████████| 38/38 [00:11<00:00,  3.41it/s]


selected node:  3979
decision #  5


100%|██████████████████████████████████████████████████████████████████████████████████| 38/38 [00:10<00:00,  3.67it/s]


selected node:  1736
decision #  6


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.67it/s]


selected node:  2928
decision #  7


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.69it/s]


selected node:  3400
decision #  8


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.68it/s]


selected node:  1959
decision #  9


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.67it/s]


selected node:  1584
decision #  10


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.66it/s]


selected node:  404
decision #  11


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.64it/s]


selected node:  3590
decision #  12


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.64it/s]


selected node:  2203
decision #  13


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.66it/s]


selected node:  2467
decision #  14


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.66it/s]


selected node:  2681
decision #  15


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.66it/s]


selected node:  3378
Running experiment #  2
decision #  1


100%|██████████████████████████████████████████████████████████████████████████████████| 40/40 [00:13<00:00,  3.00it/s]


selected node:  1962
decision #  2


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.44it/s]


selected node:  1966
decision #  3


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.16it/s]


selected node:  1686
decision #  4


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.61it/s]


selected node:  3173
decision #  5


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.59it/s]


selected node:  2135
decision #  6


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.68it/s]


selected node:  942
decision #  7


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.66it/s]


selected node:  1839
decision #  8


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.62it/s]


selected node:  3146
decision #  9


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.64it/s]


selected node:  1204
decision #  10


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.63it/s]


selected node:  1315
decision #  11


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.65it/s]


selected node:  815
decision #  12


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.67it/s]


selected node:  1752
decision #  13


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.51it/s]


selected node:  1065
decision #  14


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.64it/s]


selected node:  1879
decision #  15


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.66it/s]


selected node:  1616
Running experiment #  3
decision #  1


100%|██████████████████████████████████████████████████████████████████████████████████| 40/40 [00:13<00:00,  3.07it/s]


selected node:  2485
decision #  2


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.67it/s]


selected node:  2518
decision #  3


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.62it/s]


selected node:  1684
decision #  4


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.65it/s]


selected node:  3552
decision #  5


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.67it/s]


selected node:  1378
decision #  6


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.65it/s]


selected node:  2223
decision #  7


100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:10<00:00,  3.65it/s]


selected node:  1292
decision #  8


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:09<00:00,  3.64it/s]


selected node:  1953
decision #  9


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:09<00:00,  3.67it/s]


selected node:  1078
decision #  10


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:09<00:00,  3.65it/s]


selected node:  2294
decision #  11


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:09<00:00,  3.66it/s]


selected node:  2179
decision #  12


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:10<00:00,  3.64it/s]


selected node:  2664
decision #  13


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:10<00:00,  3.65it/s]


selected node:  2939
decision #  14


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:09<00:00,  3.65it/s]


selected node:  503
decision #  15


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:10<00:00,  3.58it/s]


selected node:  1680


In [12]:
print("mean_activated_node_count: ", mean_activated_node_count)

mean_activated_node_count:  315.3333333333333


In [13]:
total_activated_node_count_list

[269, 303, 374]

In [14]:
model.status

{0: 0,
 1: 0,
 2: 0,
 3: 0,
 4: 0,
 5: 0,
 6: 0,
 7: 0,
 8: 0,
 9: 0,
 10: 0,
 11: 0,
 12: 0,
 13: 0,
 14: 0,
 15: 0,
 16: 0,
 17: 0,
 18: 0,
 19: 0,
 20: 0,
 21: 0,
 22: 0,
 23: 0,
 24: 0,
 25: 0,
 26: 0,
 27: 0,
 28: 0,
 29: 0,
 30: 0,
 31: 0,
 32: 0,
 33: 0,
 34: 0,
 35: 0,
 36: 0,
 37: 0,
 38: 0,
 39: 0,
 40: 0,
 41: 0,
 42: 0,
 43: 0,
 44: 0,
 45: 0,
 46: 0,
 47: 0,
 48: 0,
 49: 0,
 50: 0,
 51: 0,
 52: 0,
 53: 0,
 54: 0,
 55: 0,
 56: 0,
 57: 0,
 58: 0,
 59: 0,
 60: 0,
 61: 0,
 62: 0,
 63: 0,
 64: 0,
 65: 0,
 66: 0,
 67: 0,
 68: 0,
 69: 0,
 70: 0,
 71: 0,
 72: 0,
 73: 0,
 74: 0,
 75: 0,
 76: 0,
 77: 0,
 78: 0,
 79: 0,
 80: 0,
 81: 0,
 82: 0,
 83: 0,
 84: 0,
 85: 0,
 86: 0,
 87: 0,
 88: 0,
 89: 0,
 90: 0,
 91: 0,
 92: 0,
 93: 0,
 94: 0,
 95: 0,
 96: 0,
 97: 0,
 98: 0,
 99: 0,
 100: 0,
 101: 0,
 102: 0,
 103: 0,
 104: 0,
 105: 0,
 106: 0,
 107: 0,
 108: 0,
 109: 0,
 110: 0,
 111: 0,
 112: 0,
 113: 0,
 114: 0,
 115: 0,
 116: 0,
 117: 0,
 118: 0,
 119: 0,
 120: 0,
 121: 0,
 122: 0,
 12

In [15]:
delta, node_count, status_delta = model.status_delta(model.status)
print('delta: ', delta)
print('node_count: ', node_count)
print('status_delta: ', status_delta)

delta:  {}
node_count:  {0: 3665, 1: 0, 2: 374}
status_delta:  {0: 0, 1: 0, 2: 0}


In [16]:
iterations

[{'iteration': 200,
  'status': {2485: 2},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: -1, 2: 1}},
 {'iteration': 201,
  'status': {},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: 0, 2: 0}},
 {'iteration': 202,
  'status': {},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: 0, 2: 0}},
 {'iteration': 203,
  'status': {},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: 0, 2: 0}},
 {'iteration': 204,
  'status': {},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: 0, 2: 0}},
 {'iteration': 205,
  'status': {},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: 0, 2: 0}},
 {'iteration': 206,
  'status': {},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: 0, 2: 0}},
 {'iteration': 207,
  'status': {},
  'node_count': {0: 3735, 1: 0, 2: 304},
  'status_delta': {0: 0, 1: 0, 2: 0}},
 {'iteration': 208,
  'status': {},
  'node_count': {0: 3735, 1:

In [17]:
trends = model.build_trends(iterations)

# Visualization

In [21]:
from bokeh.io import output_notebook, show
from ndlib.viz.bokeh.DiffusionTrend import DiffusionTrend

viz = DiffusionTrend(model, trends)
p = viz.plot(width=400, height=400)
show(p)

In [22]:
from ndlib.viz.bokeh.DiffusionPrevalence import DiffusionPrevalence

viz2 = DiffusionPrevalence(model, trends)
p2 = viz2.plot(width=400, height=400)
show(p2)

In [20]:
from ndlib.viz.bokeh.MultiPlot import MultiPlot
vm = MultiPlot()
vm.add_plot(p)
vm.add_plot(p2)
m = vm.plot()
show(m)

RuntimeError: Models must be owned by only a single document, BoxAnnotation(id='1215', ...) is already in a doc

In [None]:
model.status