In [11]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import figure
import random as rd
import copy
import time

In [3]:
class Independent_Cascade():
    def __init__(self):
        self.g = nx.DiGraph()

    def fit(self, g):
        # fit graph with probability
        out_degree = g.out_degree(weight='None')
        max_out_degree = max(out_degree, key=lambda item:item[1])
        for e in g.edges():
            if(out_degree[e[0]] >= 10):
                # g[e[0]][e[1]]['probability'] = 1 - out_degree[e[0]]/max_out_degree[1]
                g[e[0]][e[1]]['probability'] = 1 / int(np.log(out_degree[e[0]]))
            else:
                g[e[0]][e[1]]['probability'] = 1
        self.g = g
        return g
        
    # diffusion to all possible nodes
    def diffusion_all(self, seed_nodes):
        if(seed_nodes == []):
            return [], []
        activated_nodes = copy.deepcopy(seed_nodes)
        old_activated_nodes = seed_nodes
        activate_nums = [len(activated_nodes)]
        while(True):
            new_activated_nodes = []
            for node in old_activated_nodes:
                for predecessors  in self.g.predecessors(node):
                    if( predecessors in activated_nodes):
                        continue
                    if self.g[predecessors][node]['probability'] >= rd.random():
                        new_activated_nodes.append(predecessors)
                activated_nodes.extend(new_activated_nodes)
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activate_nums.append(len(new_activated_nodes))
        return activated_nodes, activate_nums

    # diffusion to max step
    def diffusion_step(self, seed_nodes, max_step=1):
        if(seed_nodes == []):
            return [], []
        activated_nodes = copy.deepcopy(seed_nodes)
        old_activated_nodes = seed_nodes
        activate_nums = [len(activated_nodes)]
        for _ in range(max_step):
            new_activated_nodes = []
            for node in old_activated_nodes:
                for predecessors  in self.g.predecessors(node):
                    if( predecessors in activated_nodes):
                        continue
                    if self.g[predecessors][node]['probability'] >= rd.random():
                        new_activated_nodes.append(predecessors)
                activated_nodes.extend(new_activated_nodes)
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activate_nums.append(len(new_activated_nodes))
        return activated_nodes, activate_nums

In [19]:
class Decreasing_Cascade():
    def __init__(self):
        self.g = nx.DiGraph()
        self.num_nodes = 0
        self.node_label = []
        self.label2id = {}
        self.max_out_degree = 0
        self.probability = None

    def fit(self, g):
        # fit graph with probability
        self.g = g
        self.num_nodes = g.number_of_nodes()
        self.node_label = [i for i in g.nodes()]
        self.label2id = {self.node_label[i]: i for i in range(self.num_nodes)}
        self.max_out_degree = max(j for _, j in g.out_degree(weight='None'))
        out_degree = g.out_degree(weight='None')
        self.probability = np.zeros((self.num_nodes, self.num_nodes), dtype=float)
        for e in g.edges():
            if(out_degree[e[0]] >= 10):
                self.probability[self.label2id[e[0]], self.label2id[e[1]]] = 1 / int(np.log(out_degree[e[0]]))
            else:
                self.probability[self.label2id[e[0]], self.label2id[e[1]]] = 1
        return g
        
    # diffusion to all possible nodes
    def diffusion_all(self, seed_nodes):
        if(seed_nodes == []):
            return [], []
        activated_nodes = [self.label2id[name] for name in seed_nodes]
        old_activated_nodes = copy.deepcopy(activated_nodes)
        activate_nums = [len(activated_nodes)]
        inform_times = np.zeros(self.num_nodes)
        while(True):
            new_activated_nodes = []
            new_inform_times = np.zeros(self.num_nodes)
            for node in old_activated_nodes:
                predecessors = self.probability[:, node].nonzero()[0]
                if(len(predecessors) == 0):
                    continue
                for predecessor in predecessors:
                    if(predecessor in activated_nodes):
                        continue
                    new_inform_times[predecessor] += 1
                    if (self.probability[predecessor][node] >= (rd.random() + inform_times[predecessor] / self.max_out_degree)):
                        new_activated_nodes.append(predecessor)
                activated_nodes.extend(new_activated_nodes)
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activate_nums.append(len(new_activated_nodes))
            inform_times = inform_times + new_inform_times
        return activated_nodes, activate_nums

    # diffusion to max step
    def diffusion_step(self, seed_nodes, max_step=1):
        if(seed_nodes == []):
            return [], []
        activated_nodes = [self.label2id[name] for name in seed_nodes]
        old_activated_nodes = copy.deepcopy(activated_nodes)
        activate_nums = [len(activated_nodes)]
        inform_times = np.zeros(self.num_nodes)
        for step in range(max_step):
            new_activated_nodes = []
            new_inform_times = np.zeros(self.num_nodes)
            for node in old_activated_nodes:
                predecessors = self.probability[:, node].nonzero()[0]
                if(len(predecessors) == 0):
                    continue
                for predecessor in predecessors:
                    if(predecessor in activated_nodes):
                        continue
                    new_inform_times[predecessor] += 1
                    if (self.probability[predecessor][node] >= (rd.random() + inform_times[predecessor] / self.max_out_degree)):
                        new_activated_nodes.append(predecessor)
                activated_nodes.extend(new_activated_nodes)
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activate_nums.append(len(new_activated_nodes))
            inform_times = inform_times + new_inform_times
        return activated_nodes, activate_nums

In [20]:
def Naive_Greedy(g, k, model_type):
    if(model_type == 'IC'):
        model = Independent_Cascade()
    elif(model_type == 'DC'):
        model = Decreasing_Cascade()
    model.fit(g)
    max_nodes = []
    start_time = time.time()
    for i in range(k):
        max_num = 0
        for node in g.nodes() - set(max_nodes):
            _, activate_nums = model.diffusion_step(max_nodes + [node], 1)
            if(len(activate_nums) > 1 and activate_nums[1] > max_num):
                max_num = activate_nums[1]
                max_node = node
        max_nodes.append(max_node)
        print('Greedy: ', i, ' time: ', time.time() - start_time)
    return max_nodes

In [7]:
G = nx.read_gml('PB2020.gml')

In [13]:
max_nodes = Naive_Greedy(G, 7, 'IC')
print(max_nodes)

Greedy:  0  time:  0.056549787521362305
Greedy:  1  time:  6.117049694061279
Greedy:  2  time:  46.887306213378906
Greedy:  3  time:  157.17537879943848
Greedy:  4  time:  335.29198837280273
Greedy:  5  time:  573.6669981479645
Greedy:  6  time:  879.822151184082
['principe_giovan', 'Premises187', 'MoralDK', 'proudboy_', 'enrique_tarrio', 'GavinM_ProudBoy', 'proudboy2012']


In [21]:
max_nodes = Naive_Greedy(G, 7, 'DC')
print(max_nodes)

Greedy:  0  time:  19.21763038635254
Greedy:  1  time:  45.43269228935242
Greedy:  2  time:  132.1605098247528
Greedy:  3  time:  304.2506899833679
Greedy:  4  time:  586.5289032459259
Greedy:  5  time:  950.7929458618164
Greedy:  6  time:  1412.717613697052
['principe_giovan', 'Premises187', 'MoralDK', 'proudboy_', 'enrique_tarrio', 'GavinM_ProudBoy', 'proudboy2012']
