In [62]:
import argparse
import sys
import random
import heapq

sys.path.insert(0,'..')
from utils.digraph import DiGraph
from utils.esitimater import Estimater

class InfluenceNetwork(DiGraph):
    '''
    Inheritance from Digraph
    '''
    def __init__(self):
        DiGraph.__init__(self)
        self.spec = {'nodes':-1, 'edges':-1}
    
    def load_from_file(self, filed):
        '''
        load from the file
        '''
        lines = filed.readlines()
        self.spec['nodes'] = int(lines[0][0])
        self.spec['edges'] = int(lines[0][1])
        for line in lines[1:]:
            data = line.split()
            if len(data) == 3:
                self.add_weighted_edge((int(data[0]), int(data[1])), float(data[2]))
                # print((int(data[0]), int(data[1])), float(data[2]))

def loadseeds(filed):
    '''
    load seeds from the file
    '''
    seeds = set()
    lines = filed.readlines()
    for line in lines:
        data = line.split()
        if len(data) == 1:
            seeds.add(int(data[0]))
    return seeds

In [67]:
network = open('../network.txt')
seed_f = open('../seeds.txt')

graph = InfluenceNetwork()
graph.load_from_file(network)
seeds = loadseeds(seed_f)
print(seeds)
print(len(graph.vertices()))

network.close()
seed_f.close()

set([56, 58, 53, 62])
62


In [57]:
graph[9][4]

{'weight': 0.333333}

In [4]:
graph.inverse[4]

{9: {'weight': 0.333333}, 15: {'weight': 0.333333}, 60: {'weight': 0.333333}}

In [5]:
import random
from __future__ import division

In [83]:
def ic_simulate():
    activated = set()
    next_layer = set(seeds)
    while next_layer:
        new_layer = set()
        for node in next_layer:
            for linked_node, value  in graph[node].iteritems():
                rnd = random.random()
                if linked_node not in activated and rnd < value['weight']:
                    new_layer.add(linked_node)
        activated = set.union(activated, next_layer)
        print(activated)
        next_layer = new_layer
    return activated

def lt_simulate():
    activated = set(seeds)
    threshold = dict()
    for node in graph.vertices():
        threshold[node] = random.random()
    
    def get_nextround(changed_vertices):
        next_round = set()
        for vertex in changed_vertices:
            next_round = set.union(next_round, set(graph[vertex].keys()))
        return next_round
    next_round = get_nextround(activated)
    while len(next_round) > 0:
        changed_vertices = set()
        for node in next_round:
            indicator = 0 
            for linked_node, value  in graph.inverse[node].iteritems():
                if linked_node in activated:
                    indicator += graph.inverse[node][linked_node]['weight']
            if indicator > threshold[node]:
                changed_vertices.add(node)
                activated.add(node)
        next_round = get_nextround(changed_vertices)
        # print(next_round)
    return activated

In [11]:
ic_simulate()

set([1, 12, 14, 15])
set([1, 6, 7, 12, 14, 15])


{1, 6, 7, 12, 14, 15}

In [68]:
seeds

{53, 56, 58, 62}

In [84]:
import time
start = time.time()
cnt = 0
for _ in range(10000):
    cnt+=len(lt_simulate())
print(cnt/10000, time.time() - start)

(31.8443, 2.0672390460968018)


In [48]:
from __future__ import division
def ic_evaluate(seeds):
    cnt = 0
    for _ in range(10000):
        activated = set()
        next_layer = seeds
        while next_layer:
            new_layer = set()
            for node in next_layer:
                for linked_node, value  in graph[node].iteritems():
                    rnd = random.random()
                    if linked_node not in activated and rnd < value['weight']:
                        new_layer.add(linked_node)
            activated = set.union(activated, next_layer)
            next_layer = new_layer
        cnt += len(activated)
    return cnt/10000

In [42]:
num_k = 10

def ic_celf():
    '''
    implementation of CELF
    note that the first element in the tuple is negative form of the the spread contribution
    '''
    
    state_list = list()
    cur_spread = 0
    cur_set = set()
    # init the heap, note this is a minheap
    for nodeid in graph.vertices():
        new_spread = ic_evaluate(set.union(cur_set, {nodeid}))
        state_list.append((-new_spread, nodeid))
    heapq.heapify(state_list)
    inserted_node = heapq.heappop(state_list)
    cur_set.add(inserted_node[1])
    cur_spread = -inserted_node[0]
    cur_max = 1
    count = 0
    while len(cur_set) < num_k:
        next_node = heapq.heappop(state_list)
        # print("pop", next_node)
        if next_node[0] < cur_max:
            count += 1
            new_spread = ic_evaluate(set.union(cur_set, {next_node[1]}))
            # print(next_node[1], new_spread, cur_spread)
            diff = new_spread - cur_spread
            next_node = (-diff, next_node[1])
            # print("new", next_node)
            if next_node[0] < cur_max:
                cur_max = next_node[0]
                # print("max", cur_max)
            heapq.heappush(state_list, next_node)
        else:
            inserted_node = next_node
            # print("select", inserted_node)
            cur_set.add(inserted_node[1])
            cur_spread += -inserted_node[0]
            cur_max = 1
    return cur_set, cur_spread

In [43]:
import time
st = time.time()
print(ic_celf())
print(time.time()-st)

(set([60, 48, 50, 52, 53, 56, 58, 28, 61, 62]), 43.9829)
22.2927880287


In [33]:
ic_evaluate({56, 48, 50, 53})

23.3224

In [31]:
x = {1:2,3:3,5:7,0:10}
sorted(x.iteritems(), key=lambda (k,v):v, reverse=True)

[(0, 10), (5, 7), (3, 3), (1, 2)]

In [82]:
from collections import defaultdict

def degree_discount():
    seed_set = set()
    full_set = set(graph.vertices())
    degree = defaultdict(dict)
    discount_degree = defaultdict(dict)
    t_selected = defaultdict(dict)
    for vertex in full_set:
        degree[vertex] = len(graph[vertex])
        discount_degree[vertex] = degree[vertex]
        t_selected[vertex] = 0
    for _ in range(num_k):
        max_vertex = sorted(discount_degree.iteritems(), key=lambda (k,v):v, reverse=True)[0][0]
        del discount_degree[max_vertex]
        seed_set.add(max_vertex)
        full_set.remove(max_vertex)
        for vertex in graph[max_vertex].keys():
            if vertex in full_set:
                t_selected[vertex] += 1
                #discount_degree[vertex] = degree[vertex] - t_selected[vertex]
                # how much is the p? average?
                discount_degree[vertex] = degree[vertex] - 2*t_selected[vertex] -\
                 (degree[vertex]-t_selected[vertex])*t_selected[vertex]*0.75
                print (vertex, discount_degree[vertex])
    return seed_set

In [83]:
degree_discount()

(51, 0.25)
(5, -1.25)
(12, -1.25)
(46, 0.75)
(19, -1.0)
(22, -1.0)
(24, -1.25)
(25, -0.5)
(30, -0.25)
(6, -1.25)
(7, -1.25)
(40, -1.0)
(10, -0.75)
(14, -0.5)
(49, -1.25)
(18, -0.25)
(55, 0.25)
(42, -0.5)
(34, -0.25)
(35, -0.75)
(37, -0.5)
(9, -1.0)
(15, -0.75)
(17, -1.0)
(22, -1.5)
(1, -1.25)
(34, -3.0)
(37, -2.5)
(8, -1.25)
(15, -2.0)
(16, -1.0)


{38, 41, 52, 58}

In [75]:
ic_evaluate({38, 41, 52, 58})

23.0307

In [68]:
total_weight = 0
for vertex in graph.vertices():
    for k2,v2 in graph[vertex].iteritems():
        total_weight += v2['weight']
print(total_weight/len(graph.vertices()))

0.758064612903
