In [8]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import figure
import random as rd
import copy

In [3]:
class Independent_Cascade():
    def __init__(self):
        self.g = nx.DiGraph()

    def fit(self, g):
        # fit graph with probability
        out_degree = g.out_degree(weight='None')
        max_out_degree = max(out_degree, key=lambda item:item[1])
        for e in g.edges():
            if(out_degree[e[0]] >= 10):
                # g[e[0]][e[1]]['probability'] = 1 - out_degree[e[0]]/max_out_degree[1]
                g[e[0]][e[1]]['probability'] = 1 / int(np.log(out_degree[e[0]]))
            else:
                g[e[0]][e[1]]['probability'] = 1
        self.g = g
        return g
        
    # diffusion to all possible nodes
    def diffusion_all(self, seed_nodes):
        activated_nodes = copy.deepcopy(seed_nodes)
        old_activated_nodes = seed_nodes
        activate_nums = [len(activated_nodes)]
        while(True):
            new_activated_nodes = []
            for node in self.g.nodes():
                if node in activated_nodes:
                    continue
                for neighbor in self.g.neighbors(node):
                    if neighbor in old_activated_nodes and self.g[node][neighbor]['probability'] >= rd.random():
                        new_activated_nodes.append(node)
                        break
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activated_nodes.extend(new_activated_nodes)
            activate_nums.append(len(new_activated_nodes))
        return activated_nodes, activate_nums

    # diffusion to max step
    def diffusion_step(self, seed_nodes, max_step=1):
        activated_nodes = copy.deepcopy(seed_nodes)
        old_activated_nodes = seed_nodes
        activate_nums = [len(activated_nodes)]
        for _ in range(max_step):
            new_activated_nodes = []
            for node in self.g.nodes():
                if node in activated_nodes:
                    continue
                for neighbor in self.g.neighbors(node):
                    if neighbor in old_activated_nodes and self.g[node][neighbor]['probability'] >= rd.random():
                        new_activated_nodes.append(node)
                        break
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activated_nodes.extend(new_activated_nodes)
            activate_nums.append(len(new_activated_nodes))
        return activated_nodes, activate_nums

In [2]:
class Decreasing_Cascade():
    def __init__(self):
        self.g = nx.DiGraph()

    def fit(self, g):
        # fit graph with probability
        out_degree = g.out_degree(weight='None')
        nx.set_node_attributes(g, 0, "inform_times")
        for e in g.edges():
            if(out_degree[e[0]] >= 10):
                g[e[0]][e[1]]['probability'] = 1 / int(np.log(out_degree[e[0]]))
            else:
                g[e[0]][e[1]]['probability'] = 1
        self.g = g
        return g
        
    # diffusion to all possible nodes
    def diffusion_all(self, seed_nodes):
        activated_nodes = copy.deepcopy(seed_nodes)
        old_activated_nodes = seed_nodes
        activate_nums = [len(activated_nodes)]
        # sum_out_degree = sum(j for i, j in self.g.out_degree(weight='None'))
        max_out_degree = max(j for i, j in self.g.out_degree(weight='None'))
        while(True):
            new_activated_nodes = []
            for node in self.g.nodes():
                if node in activated_nodes:
                    continue
                for neighbor in self.g.neighbors(node):
                    if neighbor in old_activated_nodes:
                        if self.g[node][neighbor]['probability'] >= (rd.random() + self.g.nodes()[node]['inform_times'] / max_out_degree):
                            new_activated_nodes.append(node)
                            break
                        self.g.nodes()[node]['inform_times'] += 1
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activated_nodes.extend(new_activated_nodes)
            activate_nums.append(len(new_activated_nodes))
        return activated_nodes, activate_nums

    # diffusion to max step
    def diffusion_step(self, seed_nodes, max_step=1):
        activated_nodes = copy.deepcopy(seed_nodes)
        old_activated_nodes = seed_nodes
        activate_nums = [len(activated_nodes)]
        max_out_degree = max(j for i, j in self.g.out_degree(weight='None'))
        for _ in range(max_step):
            new_activated_nodes = []
            for node in self.g.nodes():
                if node in activated_nodes:
                    continue
                for neighbor in self.g.neighbors(node):
                    if neighbor in old_activated_nodes:
                        if self.g[node][neighbor]['probability'] >= (rd.random() + self.g.nodes()[node]['inform_times'] / max_out_degree):
                            new_activated_nodes.append(node)
                            break
                        self.g.nodes()[node]['inform_times'] += 1
            if len(new_activated_nodes) == 0:
                break
            old_activated_nodes = new_activated_nodes
            activated_nodes.extend(new_activated_nodes)
            activate_nums.append(len(new_activated_nodes))
        return activated_nodes, activate_nums

In [4]:
def Naive_Greedy(g, k, model_type):
    if(model_type == 'IC'):
        model = Independent_Cascade()
    elif(model_type == 'DC'):
        model = Decreasing_Cascade()
    model.fit(g)
    max_nodes = []
    for _ in range(k):
        max_num = 0
        for node in g.nodes() - set(max_nodes):
            activated_nodes, activate_nums = model.diffusion_step([max_nodes, node], 1)
            if(len(activate_nums) > 1 and activate_nums[1] > max_num):
                max_num = activate_nums[1]
                max_node = node
        max_nodes.append(max_node)
    return max_nodes

In [5]:
G = nx.read_gml('PB2020.gml')

In [43]:
max_nodes = Naive_Greedy(G, 10, 'IC')
print(max_nodes)

['principe_giovan', 'Premises187', 'MoralDK', 'proudboy_', 'enrique_tarrio', 'GavinM_ProudBoy', 'proudboy2012', 'ProudBoysGBG', 'ProudBoysUS', 'ProudBoyzLondon']


In [6]:
max_nodes = Naive_Greedy(G, 10, 'DC')
print(max_nodes)

['principe_giovan', 'Premises187', 'MoralDK', 'proudboy_', 'enrique_tarrio', 'GavinM_ProudBoy', 'proudboy2012', 'ProudBoysGBG', 'ProudBoysUS', 'ProudBoyzLondon']
