In [2]:
import networkx as nx # version 2.2
import matplotlib.pyplot as plt
import re
import random
import operator #to sort elements in a list of tuples
import itertools
import math
import numpy as np
import os
import sys
    
import Cascade_generation_functions as C_gen
import Init_NetInf
import Greedy_NetInf as Greed

# Some variables

In [None]:
'''
Global variables
'''

EPS = 1e-64 #zero machine
ALPHA = 1.0 #Incubation parameter (for exp and power law)
MODEL = 0 # 0 = exp law, 1 = power law (power law is not fully implemented yet)
MAX = sys.float_info.max #Max value of a float in python
#MIN = sys.float_info.min #Min value of a float in python
MIN = -MAX

#(works only if groundtruth is available)
#When set to True (especially boundOn) it slow down greatly the computation
compare_groud_truth = True # If set to True outputs some aditional information (precision and recall of the algo)
boundOn = True


greedy_global_param = (ALPHA,MODEL,MAX,MIN,EPS,compare_groud_truth,boundOn)
'''
Model generation parameter
'''
ratio = 99 #we want 95% of the true edges to be present at least once
beta = 0.5 # proba of an edge propagating the infection
alpha = 1.0 #incubation time param (for exp law and power law)
window = 100 #max time duration of a cascade
model = 0 # 0 = exp law, 1 = power law
model_param = (ratio,beta,alpha,window,model)

#Path to store the txt file of the ground truth graph and the associated cascades
gen_path = "./Generation_files/"

# Some functions 

In [None]:
def Generation_of_Ground_truth_and_corresponding_cascades(model_param,nb_vertex,nb_edges,file_name,dir_path) :
    ratio,beta,alpha,window,model = model_param
    
    G_true = C_gen.Generate_random_graph(nb_vertex,nb_edges)
    cascade_dic = C_gen.Generate_all_cascades(G_true,ratio,beta,alpha,window,model)
    
    dir_name = "Gen_"+file_name
    dir_path = os.path.join(dir_path)+dir_name
    os.mkdir(dir_path)
    
    G_name = "G_"+file_name+".txt"
    C_name = "C_"+file_name+".txt"
    
    G_file = os.path.join(dir_path+"/"+G_name)
    C_file = os.path.join(dir_path+"/"+C_name)
    C_gen.Save_graph_to_file(G_file,G_true)
    C_gen.Save_cascade_to_file(C_file,cascade_dic,G_true)
    return dir_path

def Correct_guess_ratio(G_true,G_star) :
    correct = 0
    wrong_edge_list = []
    for edge in G_star.edges() :
        if edge in G_true.edges():
            correct +=1
        else :
            wrong_edge_list.append(edge)
    correct_ratio = correct/G_star.number_of_edges() * 100
    print(correct_ratio)
    return wrong_edge_list

# NetInf

## Generation of Ground truth and associated cascades

In [None]:
file_name = str(16)
nb_vertex,nb_edges = (1024,1446)
dir_name = Generation_of_Ground_truth_and_corresponding_cascades(model_param,nb_vertex,nb_edges,file_name,gen_path)
G_true_file = dir_name+"/G_"+file_name+".txt"
G_true = Init_NetInf.Create_ground_truth_from_file(G_true_file)

## Initialization of G_star and the different dictionaries

In [None]:
cascade_file = dir_name+"/C_"+file_name+".txt" #this assumed that we generated everything before.
#If you already have the files juste replace this variable by the correct path
G_star,DAG_Tree_c_dic,cascades_per_edge_dic,edge_gain_dic = Init_NetInf.Init(cascade_file,EPS,MAX)
print("number of edges is : ", len(edge_gain_dic))


## Computation of G* by the greedy algo

In [None]:
nb_max_edge = int(1*G_true.number_of_edges()) #fix a number of edges we want to recover (here 90%)
if compare_groud_truth :
    ground_truth = G_true
else :
    ground_truth = nx.DiGraph() # empty graph
G_max,precision,recall,edge_info = Greed.GreedyOpt(nb_max_edge,DAG_Tree_c_dic,cascades_per_edge_dic,edge_gain_dic,G_star,ground_truth,greedy_global_param)
G_approx_file_name = dir_name+"/G_max_"+file_name+".txt"
C_gen.Save_graph_to_file(G_approx_file_name,G_max)

## Plots and stats

In [None]:
wrong_edge_list = Correct_guess_ratio(G_true,G_max)
print("There are %i wrong edges in G_max" % len(wrong_edge_list))


In [None]:
nb_edge_of_G_true_not_in_Cascades = 0
for edge in G_true.edges() :
    if edge not in cascades_per_edge_dic :
        nb_edge_of_G_true_not_in_Cascades +=1
f_fraction = 1-nb_edge_of_G_true_not_in_Cascades/G_true.number_of_edges()

In [None]:
total_number_of_edge_transmission = 0

for edge in cascades_per_edge_dic :
    total_number_of_edge_transmission += len(cascades_per_edge_dic[edge])


In [None]:
'''Genrale Infos'''

print("Genral Infos\n")
print("Ground truth has %i vertices and %i edges" %(G_true.number_of_nodes(),G_true.number_of_edges()))
print("f fraction of edges that participated in at least 1 cascade is : %i " % int(f_fraction*100) + "%")
print("Number of cascades is : ",len(DAG_Tree_c_dic))
print("Number of different edges is : ",len(cascades_per_edge_dic))
print("Accuracy of the final G_k :", precision[-1]*100)
print(" r : number of edge transmission is : ", total_number_of_edge_transmission)
print("Average size of a cascade is : ",total_number_of_edge_transmission/len(DAG_Tree_c_dic))
print("Average number of cascade an edge belongs to ", total_number_of_edge_transmission/len(cascades_per_edge_dic))
print ("Break even point is : 0.97 ")

f = open("General_info.txt","w")
f.write("Genral Infos\n")
f.write("Ground truth has " + str(G_true.number_of_nodes())+ " vertices and "+str(G_true.number_of_edges())+ " edges\n")
f.write("f fraction of edges that participated in at least 1 cascade is : " + str(int(f_fraction*100)) + "%\n")
f.write("Number of cascades is : " + str(len(DAG_Tree_c_dic)) + "\n")
f.write("Number of different edges is : " + str(len(cascades_per_edge_dic)) + "\n")
f.write("Accuracy of the final G_k : " + str(precision[-1]*100))
f.write(" r : number of edge transmission is : "+ str(total_number_of_edge_transmission) + "\n")
f.write("Average size of a cascade is : " + str(total_number_of_edge_transmission/len(DAG_Tree_c_dic)) + "\n")
f.write("Average number of cascade an edge belongs to " + str(total_number_of_edge_transmission/len(cascades_per_edge_dic)) + "\n")
f.write("Break even point is : 0.97\n")
f.write("alpha = " + str(ALPHA) + "\n")
f.write("beta = " + str(beta) )
f.close()

In [None]:
'''Plot the precision and the recall'''
nb_edge_itt = range(1,len(precision)+1)
plt.plot(nb_edge_itt,precision,label = "Precision")
plt.plot(nb_edge_itt,recall,label = "Recall")
plt.legend()
plt.xlabel("Number of edges of G_k")
plt.ylabel("Percentage")
plt.savefig("precision_vs_nb_edges.png")
plt.show()

plt.plot(recall,precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.savefig("precision_vs_recall.png")
plt.show()

In [None]:
'''plot of the objective function and the upper bound'''

nb_edge = list(range(1,len(edge_info)+1))
gains = []
bound = []
for edge in edge_info :
    if len(gains)!= 0:
        marginal_gain = edge_info[edge][0]+gains[-1]
        theoretical_bound = edge_info[edge][1]+gains[-1]
    else :
        marginal_gain = edge_info[edge][0]
        theoretical_bound = edge_info[edge][0]+edge_info[edge][1] #Marginal_gain + marginal_bound
    gains.append(marginal_gain)
    bound.append(theoretical_bound)
plt.plot(nb_edge,gains,label = "objectif function")
plt.plot(nb_edge,bound,label = "Upper Bound")
plt.legend()
plt.xlabel("Number of edges")
plt.ylabel("Value of the objective function")
plt.savefig("Objective_fct_and_UpperBound.png")
plt.show()

In [None]:
'''
Plot of the number of cascades per edge
'''

tmp_dic = {}
for edge in cascades_per_edge_dic :
    nb_cascade_edge_is_in = len(cascades_per_edge_dic[edge])
    try :
        tmp_dic[nb_cascade_edge_is_in] +=1
    except KeyError :
        tmp_dic[nb_cascade_edge_is_in] = 1
sorted_nb_edge_cascade_list = sorted(tmp_dic.items(), key=operator.itemgetter(0))
x_data = []
y_data = []
for pairs in sorted_nb_edge_cascade_list :
    x_data.append(pairs[0])
    y_data.append(pairs[1])
plt.bar(x_data,y_data)
plt.xlabel("Number of cascades per edge")
plt.ylabel("Number of edges")
plt.savefig("cascade_per_edge.png")
plt.show()


In [None]:
'''
plot the number of edges per cascades
'''
tmp_2_dic = {}
for c in DAG_Tree_c_dic :
    nb_edge_c = DAG_Tree_c_dic[c][0].number_of_edges()
    try :
        tmp_2_dic[nb_edge_c] += 1
    except KeyError :
        tmp_2_dic[nb_edge_c] = 1
tmp_list = list(sorted(tmp_2_dic.items(),key = operator.itemgetter(0)))
nb_edge_per_cascade = []
nb_cascades = []
for pair in tmp_list :
    nb_edge_per_cascade.append(pair[0])
    nb_cascades.append(pair[1])
plt.scatter(nb_edge_per_cascade,nb_cascades)
plt.xlabel("Number of edges per cascade")
plt.ylabel("Number of cascades")
plt.savefig("cascade_size.png")
plt.show()

