In [1]:
import networkx as nx # version 2.2
import matplotlib.pyplot as plt
import re
import cvxpy as cp
import operator #to sort elements in a list of tuples
import itertools
import math
import numpy as np
import os
import sys
import time
import random

import Init_NetRate as Init
import cvxpy as cp
import Cascade_generation_functions_NetRate as Gen
import CVX_functions as cvx

In [2]:
'''
Create the matrix Mi for the ADMM method

Each cell k,l in Mi is one if in cascade k node l infected node i, 0 otherwise

Input :
    cascade_dic : a dictionnary of all cascade (in a graph object, DAG)
    node : an int describing which node we are considering in the ADMM iteration
    number_of_nodes : number of nodes in the underlying network (we assume that rhe union of all cascades cover all the nodes)
'''
def Create_matrix_Mi_and_Ti(cascade_dic,node,number_of_nodes,window) : 
    M_i = np.zeros((len(cascade_dic),number_of_nodes))
    T_i = np.zeros((len(cascade_dic),number_of_nodes))
    for cascade in cascade_dic :
        c = cascade_dic[cascade] # graph object
        if node in c.nodes():
            t_i = c.node[node]["time"]
            parent_list = list(c.predecessors(node)) # create a list of all nodes (int) that where infected before node i in the cascade
            for parent in parent_list :
                t_parent = c.node[parent]["time"]
                M_i[cascade,parent] = 1
                T_i[cascade,parent] = -(t_i-t_parent)
                if (t_i-t_parent)<=0:
                    print("Time error, the flow of time is reversed the world'send is near")
        else :
            for j in c.nodes() :
                t_j = c.node[j]["time"]
                T_i[cascade,j] = -(window-t_j) # check if + or -
                
    return M_i,T_i

In [3]:
network_file_name = "./Graph_test_SG.txt"
cascade_file_name = "./Cascade_test_SG.txt"
window = 10
model=0 # not important but needed. When constructing the underlying network specify that we use the exponential law
beta = 1 # used for the construction of the cascades
eps = 0.0005
alpha_max = 10
iter_ADMM = 10 #number of itteration for 1 node in the ADMM method. This is a parameter to tune
iter_GD = 1000
gamma = 0.0005 # Learning rate of the GD for alpha

In [7]:
# G_true = Init.Load_ground_truth(network_file_name)
# G_star, DAG_C_dic = Init.Init(cascade_file_name)
# N = G_true.number_of_nodes()

G_true = Gen.Generate_random_graph(10,20)
Cascades = Gen.Generate_all_cascades(G_true,-100,window,model,beta)
Gen.Save_cascade_to_file("Cascade_test_ADMM.txt",Cascades,G_true)
Gen.Save_graph_to_file("Graph_test_ADMM.txt",G_true)
G_star,DAG_C_dic = Init.Init("Cascade_test_ADMM.txt")
N = G_true.number_of_nodes()

All nodes were read


In [8]:
# (A_pot,A_bad),num_casc_per_node = cvx.Create_matrices_ADMM(G_true,DAG_C_dic,window)
A_hat = np.zeros((N,N))
u = 3 # used for the gradient descent of rho and as a penalizer and the constrain


In [9]:
t_start_global = time.time()
dic_of_obj_per_node_per_iter = {}
obj_per_node = []
for i in G_true.nodes :
    print("Node : ",i)
    t_start_node_i = time.time()
    dic_of_obj_per_node_per_iter[i] = []
#     try :
#         cascade_list = num_casc_per_node[i]
#     except KeyError : # node i was present in no cascade hence all there should be no edge connection to it
#         A_hat[:,i] =0
#         continue
    
    
    '''
    initialization
    '''
    a_k = np.random.rand(N,1)
    z = np.random.rand(N,1)
    rho = np.zeros((len(DAG_C_dic),1)) 
    M_i,T_i = Create_matrix_Mi_and_Ti(DAG_C_dic,i,N,window) # TO do : consider to use sparse matrix
    S_i = np.matmul(M_i,z)  
    grad_i = (np.sum(T_i,axis=0).T)
    
    
    #Start iteration of ADMM
    for k in range(0,iter_ADMM) :
        '''
        Update alpha using gradient descent
        '''
        grad = grad_i + np.matmul(rho.T,M_i) # check if + or -
        for j in range(0,iter_GD):

            grad_j = grad - u*(np.matmul(M_i.T,(S_i-np.matmul(M_i,a_k))).T) # check sign
            a_k = a_k + gamma*grad_j.T
            a_k = np.maximum(a_k,eps)
            a_k = np.minimum(a_k,alpha_max)
        '''
        update S_i and rho for each cascades via the closed form formula and the gradient descent respectively
        '''
        for cascs in DAG_C_dic :
            c = DAG_C_dic[cascs]
            Malpha = 0
            if i in c.nodes :
                parent_i_c = list(c.predecessors(i))
                for papa in parent_i_c :
                    Malpha += a_k[papa]
            sqrt_delta = math.sqrt((rho[cascs]+Malpha)**2 + 4*u)
            S_i[cascs] = ((rho[cascs]+Malpha)+sqrt_delta)/(2*u)
            if S_i[cascs]<0 :
                print("Huston Huston we have a probleme")
            rho[cascs] = rho[cascs]-u*(S_i[cascs]-Malpha)
            
#         '''compute the objective function for node i in iteration k'''
#         obj_i_k = 0
#         for c in DAG_C_dic :
#             cascade = DAG_C_dic[c]
#             sum_tmp = 0
#             for j in cascade.nodes():
#                 t_j = cascade.nodes[j]["time"]
#                 if a_k[j]<eps:
#                         a_k[j]=0
#                 if (j,i) in cascade.edges():
#                     t_i = cascade.node[i]["time"]
#                     obj_i_k += -a_k[j]*(t_i-t_j)
# #                     if a_k[j]<eps:
# #                         sum_tmp += eps
# #                     else:
# #                         sum_tmp += a_k[j]
#                     sum_tmp += a_k[j]
#                 if j in cascade.nodes() and i not in cascade.nodes():
#                     obj_i_k += -a_k[j]*(window-t_j)
#             if sum_tmp >0:
#                 obj_i_k += math.log(sum_tmp)
#         dic_of_obj_per_node_per_iter[i].append(obj_i_k)
    A_hat[:,i] = a_k.flatten()
#     obj_per_node.append(obj_i_k)
#     print("Total objective function value is : ",sum(obj_per_node))
    t_end_node_i = time.time()
    print("computation time for node i : ", t_end_node_i-t_start_node_i)
t_end_global = time.time()
print("total computation time : ",t_end_global-t_start_global) 

Node :  0
computation time for node i :  0.09430384635925293
Node :  1
computation time for node i :  0.06861472129821777
Node :  2
computation time for node i :  0.09805488586425781
Node :  3
computation time for node i :  0.07810664176940918
Node :  4
computation time for node i :  0.07810664176940918
Node :  5
computation time for node i :  0.10137820243835449
Node :  6
computation time for node i :  0.11170268058776855
Node :  7
computation time for node i :  0.12366819381713867
Node :  8
computation time for node i :  0.13463973999023438
Node :  9
computation time for node i :  0.1226813793182373
total computation time :  1.01423978805542


In [11]:
'''
Precision and recall computation
'''
correct = 0
edge_G_star = list(G_star.edges())
G_star.remove_edges_from(edge_G_star)
for i in range(0,G_true.number_of_nodes()):
    for j in range(0,G_true.number_of_nodes()):
        if A_hat[i,j] >eps:
            G_star.add_edge(i,j)
            if (i,j) in G_true.edges():
                correct +=1
#                 print("edge ",(i,j))
#                 print("alpha is ",A_hat[i,j])
print("Precision :",correct/G_star.number_of_edges())
print("Recall :",correct/G_true.number_of_edges())
print("Number of correct infered edges : ",correct)
print("Number of edges in G_hat :",G_star.number_of_edges())
print("Number of edges in the true network ",G_true.number_of_edges())
        

Precision : 0.0
Recall : 0.0
Number of correct infered edges :  0
Number of edges in G_hat : 10
Number of edges in the true network  20


In [10]:
A_hat

array([[3.63206378e-01, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04],
       [5.00000000e-04, 2.42439789e-01, 5.00000000e-04, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04],
       [5.00000000e-04, 5.00000000e-04, 4.84592102e-01, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04],
       [5.00000000e-04, 5.00000000e-04, 5.00000000e-04, 8.18937043e-02,
        5.00000000e-04, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04],
       [5.00000000e-04, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
        3.51180949e-01, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
        5.00000000e-04, 5.00000000e-04],
       [5.00000000e-04, 5.00000000e-04, 5.00000000e-04, 5.00000000e-04,
   