In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import re
import random
import operator #to sort elements in a list of tuples
import itertools
import math
import numpy as np
import os

import Cascade_generation_functions as C_gen
import Init_NetInf

In [2]:
'''
Global variables
'''

EPS = 1e-64 #zero machine
ALPHA = 1.0 #Incubation parameter (for exp and power law)
MODEL = 0 # 0 = exp law, 1 = power law (power law is not fully implemented yet)
ITER = 5 #Number of interation of the greedy algo

In [None]:
def Create_ground_truth_from_file(file):
    f = open(file,"r")
    G = nx.DiGraph()
    for nodes in f:
        if not nodes.strip(): ## Stop at the first blank line
            print("stop")
            break
        node = re.split(',|\n',nodes) # the format of the input file is <id>,<name>  
        vertex = node[0]
        names = node[1]
        G.add_node(int(vertex),name = names)
    for edges in f :
        edge = re.split(',|\n',edges)
        vertex_i = edge[0] # initial vertex of the directed edge
        vertex_f = edge[1] # final vertex of the edge
        G.add_edge(int(vertex_i),int(vertex_f),number_of_cascade_edge_is_in =0)
    return G

def Save_graph_to_file(G,file_name):
    f = open(file_name,"w")
    for nodes in G.nodes() :
        f.write(str(nodes)+","+str(nodes)+"\n")
    f.write("\n")
    for edges in G.edges():
        v1,v2 = edges
        f.write(str(v1)+","+str(v2)+"\n")
    f.close()

In [47]:
# This does not make sense for me. But for now I implement the same function they used in their c++ code
def TransProb(DAG, v1,v2):
    global MODEL
    global ALPHA
    if( v1 not in DAG.nodes() or v2 not in DAG.nodes()) :
        return EPS
    t1 = DAG.nodes[v1]["time"]
    t2 = DAG.nodes[v2]["time"]
    if t1>=t2 :
        return EPS
    if MODEL == 0:
        prob = ALPHA*math.exp(-ALPHA*(t2-t1))
    elif MODEL ==1 :
        prob = (ALPHA-1)*math.pow((t2-t1),-ALPHA)
    return prob

def GetProb(DAG) :
    p = 0
    Tree = nx.DiGraph()
    Tree.clear()
    for i in DAG.nodes():
        destination_node = i
        destination_time = DAG.nodes[i]["time"]
        if(destination_node not in Tree.nodes()):
            Tree.add_node(destination_node, time = destination_time)
        maxProb = math.log(EPS)
        bestParent = -1
        parents = list(DAG.predecessors(destination_node))
        for source_node in parents :
            prob = math.log(TransProb(DAG,source_node,destination_node))
            if(prob>=maxProb) :
                maxProb = prob
                bestParent = source_node
        parent_time = DAG.nodes[bestParent]["time"]
        if(bestParent not in Tree.nodes()):
            Tree.add_node(bestParent, time = parent_time)
        Tree.add_edge(bestParent,destination_node)
        p += maxProb
    return Tree, p

def GetAllCascProb(v1,v2,DAG_Tree_c_dic):
    p = 0
    if(v1==-1 and v2 ==-1):
        for c_key in DAG_Tree_c_dic :
        # I STOPPED HERE !!!
        
def UpdateProb(DAG_Tree_c_prob,v1,v2,updateProb_bool): 
    DAG_c,Tree_c,current_prob_Tc = DAG_Tree_c_prob
    if(v1 not in Tree_c.nodes() or v2 not in Tree_c.nodes()):
        return (Tree_c,current_prob_Tc)
    if DAG_c.nodes[v1]["time"]>=DAG_c.nodes[v2]["time"] :
        return (Tree_c,current_prob_Tc)
    parent_v2_list = list(Tree_c.predecessors(v2))
    if len(parent_v2_list) == 0:
        parent_v2 = -1 #set an impossible node
    else :
        parent_v2 = parent_v2_list[0]
    p1 = math.log(TransProb(DAG_c, parent_v2,v2))
    p2 = math.log(TransProb(DAG_c,v1,v2))
    if (p1<p2) :
        if(updateProb_bool) :
            if (parent_v2,v2) in Tree_c.edge():
                Tree_c.remove_edge(parent_v2,v2)
            Tree_c.add_edge(v1,v2)
        current_prob_Tc = current_prob_Tc-p1+p2
        return(Tree_c,current_prob_Tc)


IndentationError: expected an indented block (<ipython-input-47-1a205128f99d>, line 46)

In [None]:

def Compute_marginal_gain(G,DAG_Tree_c_dic):
    poss_edge = itertools.combinations(G.nodes,2) #Compute all possible edges
    dic_of_gains = {}
    dic_of_cascades_per_edge = {}
    for edge in list(poss_edge) :
        if edge not in G.edges : #Considers only the edges that where not already added to G
            marginal_improve = 0
            list_of_cascade = []
            for i in range(0,len(DAG_Tree_c_dic)):
                if edge in DAG_Tree_c_dic[i][0].edges : #Consider only the cascades in which the edge is actually present
                    u,v = edge
                    time_u = DAG_Tree_c_dic[i][0].node[u]["time"]
                    time_v = DAG_Tree_c_dic[i][0].node[v]["time"]
                    parent_v = list(DAG_Tree_c_dic[i][1].predecessors(v))[0]
                    current_weight = DAG_Tree_c_dic[i][1].edges[(parent_v,v)]["weight"]
                    weight_uv = Get_edge_weight(1.0,time_u,time_v)
                    if weight_uv >= current_weight :
                        marginal_improve = marginal_improve + weight_uv-current_weight
                        list_of_cascade.append(i)
            dic_of_gains[edge] = marginal_improve
            dic_of_cascades_per_edge[edge] = list_of_cascade
    return dic_of_gains,dic_of_cascades_per_edge

def Find_best_edge(dic_of_gains,G) :
    maxi = 0
    for key in dic_of_gains.keys() :
        current = dic_of_gains[key]
        if current>maxi :
            maxi = current
            max_key = key      
    G.add_edge(max_key[0],max_key[1])
    return G,max_key
    
def Update_trees(best_edge,dic_of_cascades_per_edge,DAG_Tree_c_dic,alpha):
    for i in dic_of_cascades_per_edge[best_edge] :
        u,v = best_edge
        Tree = DAG_Tree_c_dic[i][1]
        DAG = DAG_Tree_c_dic[i][0]
        parent_v = list(Tree.predecessors(v))[0]
        Tree.remove_edge(parent_v,v)
        t_u = DAG.nodes[u]["time"]
        t_v = DAG.nodes[v]["time"]
        best_edge_weight = Get_edge_weight(alpha,t_u,t_v) 
        Tree.add_edge(u,v,weight = best_edge_weight)
        DAG_Tree_c_dic[i] = (DAG,Tree)
    return DAG_Tree_c_dic


In [3]:
G,DAG_Tree_c_dic,cascades_per_edge_dic = Init_NetInf.Init("example-cascades.txt",EPS)

All nodes were read


In [None]:
G_true = Create_ground_truth_from_file("example-network.txt")
nx.draw(G_true, with_labels=True, node_size=1500, node_color="skyblue", pos=nx.spring_layout(G_true))
plt.title("spring")
plt.show()
nx.write_gexf(G_true,"Test.gexf") # Save the graph in a file that is going to be used in the Gephi software (for visualization)


In [46]:
tree_test = DAG_Tree_c_dic[0][1]
DAG_test = DAG_Tree_c_dic[0][0]
tree_test.nodes()
parent = list(tree_test.predecessors(5))
if len(parent)==0:
    v1 = -5
if v1 not in tree_test.nodes():
    print("yes")

yes


NetworkXError: The edge 1-2 not in graph.

In [None]:
'''
Old functions
'''
# def Find_max_weight_spanning_tree_in_DAG(DAG):
#     Spanning_tree = nx.DiGraph()
#     Spanning_tree.add_nodes_from(DAG.nodes)
#     for vertex in DAG.nodes:
#         if DAG.in_degree[vertex]!=0 :
#             max_weight = 0
#             for parents in list(DAG.predecessors(vertex)):
#                 weight = DAG.edges[parents,vertex]['weight']
#                 if weight>=max_weight:
#                     max_weight = weight
#                     edge_to_add = (parents,vertex)
#             Spanning_tree.add_edge(edge_to_add[0],edge_to_add[1], weight = max_weight)
#     return Spanning_tree
