In [1]:
import networkx as nx # version 2.2
import matplotlib.pyplot as plt
import re
import random
import operator #to sort elements in a list of tuples
import itertools
import math
import numpy as np
import os
import sys
    
import Cascade_generation_functions as C_gen
import Init_NetInf

In [2]:
'''
Global variables
'''

EPS = 1e-64 #zero machine
ALPHA = 1.0 #Incubation parameter (for exp and power law)
MODEL = 0 # 0 = exp law, 1 = power law (power law is not fully implemented yet)
ITER = 5 #Number of interation of the greedy algo
MAX = sys.float_info.max #Max value of a float in python
MIN = sys.float_info.min #Min value of a float in python

graph_txt_file_path = "./Graph_files/"
cascade_txt_file_path = "./Cascade_files/"

In [None]:
def Create_ground_truth_from_file(file):
    f = open(file,"r")
    G = nx.DiGraph()
    for nodes in f:
        if not nodes.strip(): ## Stop at the first blank line
            print("stop")
            break
        node = re.split(',|\n',nodes) # the format of the input file is <id>,<name>  
        vertex = node[0]
        names = node[1]
        G.add_node(int(vertex),name = names)
    for edges in f :
        edge = re.split(',|\n',edges)
        vertex_i = edge[0] # initial vertex of the directed edge
        vertex_f = edge[1] # final vertex of the edge
        G.add_edge(int(vertex_i),int(vertex_f),number_of_cascade_edge_is_in =0)
    return G

def Save_graph_to_file(G,file_name):
    f = open(file_name,"w")
    for nodes in G.nodes() :
        f.write(str(nodes)+","+str(nodes)+"\n")
    f.write("\n")
    for edges in G.edges():
        v1,v2 = edges
        f.write(str(v1)+","+str(v2)+"\n")
    f.close()

In [None]:
# This does not make sense for me. But for now I implement the same function they used in their c++ code
def TransProb(DAG, v1,v2):
    global MODEL
    global ALPHA
    if( v1 not in DAG.nodes() or v2 not in DAG.nodes()) :
        return EPS
    t1 = DAG.nodes[v1]["time"]
    t2 = DAG.nodes[v2]["time"]
    if t1>=t2 :
        return EPS
    if MODEL == 0:
        prob = ALPHA*math.exp(-ALPHA*(t2-t1))
    elif MODEL ==1 :
        prob = (ALPHA-1)*math.pow((t2-t1),-ALPHA)
    return prob


def GetAllCascProb(v1,v2,DAG_Tree_c_dic,cascades_per_edge_dic):
    p = 0
    if(v1==-1 and v2 ==-1):
        for c_key in DAG_Tree_c_dic :
            (Tree_c,current_prob_Tc) = UpdateProb(DAG_Tree_c_dic[c_key],v1,v2,False) # Initial Log likelihood for all trees
            p += current_prob_Tc
        return p
    cascade_edge_list = cascades_per_edge_dic[(v1,v2)]
    
    for c_key in cascade_edge_list :
#         print("In GetALL : ",DAG_Tree_c_dic[c_key])
        (Tree_c,current_prob_Tc) = UpdateProb(DAG_Tree_c_dic[c_key],v1,v2,False)
#         print("okay")
        p +=(current_prob_Tc-DAG_Tree_c_dic[c_key][2]) # marginal gain of adding edge (v1,v2)
    return p
        
def UpdateProb(DAG_Tree_c_prob,v1,v2,updateProb_bool): 
    DAG_c,Tree_c,current_prob_Tc = DAG_Tree_c_prob
    if(v1 not in Tree_c.nodes() or v2 not in Tree_c.nodes()):
        return (Tree_c,current_prob_Tc)
    if DAG_c.nodes[v1]["time"]>=DAG_c.nodes[v2]["time"] :
        return (Tree_c,current_prob_Tc)
    parent_v2_list = list(Tree_c.predecessors(v2))
    if len(parent_v2_list) == 0:
        parent_v2 = -1 #set an impossible node
    else :
        parent_v2 = parent_v2_list[0]
    p1 = math.log(TransProb(DAG_c, parent_v2,v2))
    p2 = math.log(TransProb(DAG_c,v1,v2))
    if (p1<p2) :
        if(updateProb_bool) :
            if (parent_v2,v2) in Tree_c.edges():
                Tree_c.remove_edge(parent_v2,v2)
            Tree_c.add_edge(v1,v2)
        current_prob_Tc = current_prob_Tc-p1+p2
    return(Tree_c,current_prob_Tc)


In [None]:
def GetBestEdge(current_prob,last_gain,msort,MIN,dic_of_gain_per_edge,G_star,DAG_Tree_c_dic,cascades_per_edge_dic):
    best_gain = MIN #Assigne value -infinity to the best gain
    best_gain_index = -1
    zero_edge_list = []
    if msort :
        sorted_gain_per_edge_list = sorted(dic_of_gain_per_edge.items(), key=operator.itemgetter(1),reverse=True)
        dic_of_gain_per_edge = dict(sorted_gain_per_edge_list)
        
    key_list = list(dic_of_gain_per_edge.keys())
    attempts = 0
    for index,key_edge in enumerate(dic_of_gain_per_edge) :
        edge = key_edge
        if edge in G_star.edges(): #The edge is already in the network
            continue
        #Computes the marginal gain of adding the edge to the network
        edge_marginal_gain = GetAllCascProb(edge[0],edge[1],DAG_Tree_c_dic,cascades_per_edge_dic)
        dic_of_gain_per_edge[edge] = edge_marginal_gain #Update marginal gain
        if(best_gain<edge_marginal_gain):
            best_gain = edge_marginal_gain
            best_edge = edge
            best_gain_index = index
        attempts +=1 # Needed for sorting later
        
        if (edge not in G_star.edges() and G_star.number_of_edges()>1):
            if(edge_marginal_gain==0) : #Case where there is no improvement in the marginal gain
                zero_edge_list.append(index)
        
        #Lazy evaluation
        if (index+1 == len(dic_of_gain_per_edge) or best_gain>=dic_of_gain_per_edge[key_list[index+1]]):
            current_prob += best_gain
            if best_gain == 0 :
                return ((-1,-1),current_prob,msort,last_gain,dic_of_gain_per_edge)
            
            del dic_of_gain_per_edge[key_list[best_gain_index]] 
            
            for i in reversed(zero_edge_list):
                if i > best_gain_index :
                    del dic_of_gain_per_edge[key_list[i-1]]
                else :
                    del dic_of_gain_per_edge[key_list[i]]
            if len(zero_edge_list)>2:
                attempts = attempts-(len(zero_edge_list)-1)
            msort = (attempts>1)
            last_gain = best_gain
            
            return (best_edge,current_prob,msort,last_gain,dic_of_gain_per_edge)

In [None]:
def GreedyOpt(max_edges,DAG_Tree_c_dic,cascades_per_edge_dic,dic_of_gain_per_edge,G_star,MAX,MIN) :
    current_log_likelihood = GetAllCascProb(-1,-1,DAG_Tree_c_dic,cascades_per_edge_dic)
    last_gain = MAX
    msort = False
    k = 0
    while (k<max_edges and len(edge_gain_dic)>0):
        prev = current_log_likelihood
        print("itteration : ",k)
        (best_edge,current_log_likelihood,msort,last_gain,dic_of_gain_per_edge) = GetBestEdge(current_log_likelihood,
                                                                                    last_gain,
                                                                                    msort,
                                                                                    MIN,
                                                                                    dic_of_gain_per_edge,
                                                                                    G_star,
                                                                                    DAG_Tree_c_dic,
                                                                                    cascades_per_edge_dic)
        print("Best edge is ",best_edge)
        if best_edge == (-1,-1): #No more edges can be added to G_star
            break
        #To DO Compare GroundTruth stuff
        G_star.add_edge(best_edge[0],best_edge[1])
        k+=1
        #To DO BoundON stuff
        
        #Localized update
        cascade_local = cascades_per_edge_dic[best_edge]
        for c in cascade_local :
            Tree_c,current_prob_Tc = UpdateProb(DAG_Tree_c_dic[c],best_edge[0],best_edge[1],True)
            DAG_Tree_c_dic[c] = (DAG_Tree_c_dic[c][0],Tree_c,current_prob_Tc)
    return G_star

In [None]:
G_true = Create_ground_truth_from_file("example-network.txt")
# nx.draw(G_true, with_labels=True, node_size=1500, node_color="skyblue", pos=nx.spring_layout(G_true))
# plt.title("spring")
# plt.show()
# nx.write_gexf(G_true,"Test.gexf") # Save the graph in a file that is going to be used in the Gephi software (for visualization)


In [None]:
G_star,DAG_Tree_c_dic,cascades_per_edge_dic,edge_gain_dic = Init_NetInf.Init("example-cascades.txt",EPS,MAX)
print(len(cascades_per_edge_dic))
print(len(DAG_Tree_c_dic))

In [None]:
G_max = GreedyOpt(50,DAG_Tree_c_dic,cascades_per_edge_dic,edge_gain_dic,G_star,MAX,MIN)


In [None]:
current_log_likelihood = GetAllCascProb(-1,-1,DAG_Tree_c_dic,cascades_per_edge_dic)
last_gain = MAX
msort = False
print("Current log likelihood of the graph :",  current_log_likelihood)
print("Last gain ", last_gain)
print("Sort is : ", msort)
print("Min is : ", MIN)
print("Number of edges in the dic of edge :",len(edge_gain_dic))
print("Number of edges in G star : ",G_star.number_of_edges())
print("Number of edges in G true : ",G_true.number_of_edges())

In [None]:
correct = 0
for edge in G_max_5.edges() :
    if edge in G_true.edges():
        correct +=1
    else :
        print("Wrong edge. ",edge)
correct_ratio = correct/G_star.number_of_edges() * 100
print(correct_ratio)

In [None]:
G_txt = "Test_graph.txt"
C_gen.Save_graph_to_file(os.path.join(graph_txt_file_path+G_txt),G_gen)

In [3]:
G_gen = C_gen.Generate_random_graph(1024,1446)
ratio = 95
beta = 0.5
alpha = 1
window = 100
model = 0
cascade_dic = C_gen.Generate_all_cascades(G_gen,ratio,beta,alpha,window,model)
C_gen.Save_cascade_to_file(os.path.join(cascade_txt_file_path+"big_cascade.txt"),cascade_dic,G_gen)

In [4]:
G,cascade_list = Init_NetInf.load_cascade_from_file(os.path.join(cascade_txt_file_path+"big_cascade.txt"))

All nodes were read


In [None]:
bla_list = [1,10,156,3021]
for bla in bla_list :
    if bla == bla_list[-1] :
        print(bla)
    else :
        print("plop")

In [None]:
G_star,DAG_Tree_c_dic,cascades_per_edge_dic,edge_gain_dic = Init_NetInf.Init(os.path.join(cascade_txt_file_path+"big_cascade.txt"),EPS,MAX)
print(len(cascades_per_edge_dic))
print(len(DAG_Tree_c_dic))

All nodes were read
