In [66]:
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
from tqdm.notebook import tnrange,tqdm
import pandas as pd
import os

import warnings
warnings.filterwarnings("ignore")

In [2]:
from diffusion_simulation import diffusion_simulation
from network import network

## Generate synthetic network instances 

### __Example 1__: Generate one ER graph with size 100, density 0.1

In [34]:
n = 100 # Network size, choose from [20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]
pn = '1' # Network density, choose from ['1_n13', '1_n11', '1_n', 'sqrtlogn_n', 'logn_n', 'logn2_n', '1']
rep = 1 # For illustration, generate one repetition. To fully replicate the result in the paper, change this to 50.

for i in range(1,rep+1):
    sim_id = f'ER_{pn}/n{n}_{i}'
    os.makedirs(f'../instances/{sim_id}', exist_ok=True)
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
              'noise_dist': 'logistic', 'noise_dist_param':1,
              'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
              'network_type': 'ER',
              'is_network_given':False, 'network_size': n, 'ER_prob':0.1, 'network_dir':'',
              'is_value_given':False, 'v_dist':'uniform_neg', 'v_dist_param': 4, 'value_dir':''
             }
    
    G = network(params)
    sample = diffusion_simulation(G,params)
    sample.start_diffusion()
    sample.run_fixed_point()
    sample.output()

Instance generated with 100 nodes.


In [36]:
# Figure 3, illustration for one of the replications of a specific (n,p(n)) ER random graph

sim_id = f'ER_{pn}/n{n}_{1}'

df = pd.read_csv(f'../instances/{sim_id}/results.csv')
params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
          'noise_dist': 'logistic', 'noise_dist_param': 1,
          'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
          'is_network_given':True, 'network_dir':f'../instances/{sim_id}/edge.csv',  
          'is_value_given':True, 'value_dir':f'../instances/{sim_id}/v.csv'
         }
G = network(params)

is_not_0 = G.in_degree > 0
if G.n<len(df):
    df = df[:G.n]
df_1 = df[is_not_0]
mape = np.mean(np.abs(df_1['sim']-df_1['fp'])/df_1['sim'])
    
G.cal_mean_inv_indeg()

print('MAPE:', mape*100) # Figure 3(a)
print('Inverse in-degree density:', np.mean(1/G.in_degree[is_not_0])) # Figure 3(b)
print('Inverse of average in-degree:', 1/np.mean(G.in_degree)) # Figure 3(c)

Instance generated with 100 nodes.
MAPE: 3.6896278293831886
Inverse in-degree density: 0.11699688
Inverse of average in-degree: 0.1027749277540214


### __Example 2__: Generate one ER graph with size 1000, density $\log{n}/n$

In [7]:
n = 1000 # Network size, choose from [20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]
pn = 'logn_n' # Network density, choose from ['1_n13', '1_n11', '1_n', 'sqrtlogn_n', 'logn_n', 'logn2_n', '1']
rep = 1 # For illustration, generate one repetition. To fully replicate the result in the paper, change this to 50.

for i in range(1,rep+1):
    sim_id = f'ER_{pn}/n{n}_{i}'
    os.makedirs(f'../instances/{sim_id}', exist_ok=True)
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
              'noise_dist': 'logistic', 'noise_dist_param':1,
              'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
              'network_type': 'ER',
              'is_network_given':False, 'network_size': n, 'ER_prob':np.log(n)/n, 'network_dir':'',
              'is_value_given':False, 'v_dist':'uniform_neg', 'v_dist_param': 4, 'value_dir':''
             }
    
    G = network(params)
    sample = diffusion_simulation(G,params)
    sample.start_diffusion()
    sample.run_fixed_point()
    sample.output()

Instance generated with 1000 nodes.


In [8]:
# Figure 4, illustration for one of the replications of a specific (n,p(n)) ER random graph

sim_id = f'ER_{pn}/n{n}_{1}'

df = pd.read_csv(f'../instances/{sim_id}/results.csv')
params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
          'noise_dist': 'logistic', 'noise_dist_param': 1,
          'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
          'is_network_given':True, 'network_dir':f'../instances/{sim_id}/edge.csv',  
          'is_value_given':True, 'value_dir':f'../instances/{sim_id}/v.csv'
         }
G = network(params)

is_not_0 = G.in_degree > 0
if G.n<len(df):
    df = df[:G.n]
df_1 = df[is_not_0]
mape = np.mean(np.abs(df_1['sim']-df_1['fp'])/df_1['sim'])

print('MAPE:', mape*100) # Figure 4(a)
print('Inverse in-degree density:', np.mean(1/G.in_degree[is_not_0])) # Figure 4(b)
print('Inverse of average in-degree:', 1/np.mean(G.in_degree)) # Figure 4(c)

Instance generated with 1000 nodes.
MAPE: 6.60778471532297
Inverse in-degree density 0.17600192
Inverse of average in-degree 0.1440299615549603


### __Example 3__: Generate one PL graph with size 100, exponent 2.5, degree correlation 0

In [29]:
n = 100 # Network size, choose from [20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]
pn = 'alpha25_theta0' # Network density, choose from ['1_n13', '1_n11', '1_n', 'sqrtlogn_n', 'logn_n', 'logn2_n', '1']
expon, corr, xmin = 3, 0, 2
rep = 1 # For illustration, generate one repetition. To fully replicate the result in the paper, change this to 50.

for i in range(1,rep+1):
    sim_id = f'PL_alpha{expon}_theta{corr}/n{n}_{i}'
    os.makedirs(f'../instances/{sim_id}', exist_ok=True)
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
              'noise_dist': 'logistic', 'noise_dist_param':1,
              'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
              'network_type': 'PL',
              'is_network_given':False, 'network_size': n, 'PL_exponent':expon, 'PL_corr':corr, 'PL_xmin':xmin, 'network_dir':'',
              'is_value_given':False, 'v_dist':'uniform_neg', 'v_dist_param': 4, 'value_dir':''
             }
    
    G = network(params)
    sample = diffusion_simulation(G,params)
    sample.start_diffusion()
    sample.run_fixed_point()
    sample.output()

Instance generated with 100 nodes.


In [31]:
# Figure 5, illustration for one of the replications of a specific (n,p(n)) ER random graph

sim_id = f'PL_alpha{expon}_theta{corr}/n{n}_{1}'

df = pd.read_csv(f'../instances/{sim_id}/results.csv')
params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
          'noise_dist': 'logistic', 'noise_dist_param': 1,
          'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
          'is_network_given':True, 'network_dir':f'../instances/{sim_id}/edge.csv',  
          'is_value_given':True, 'value_dir':f'../instances/{sim_id}/v.csv'
         }
G = network(params)

mape = np.mean(np.abs(df['sim']-df['fp'])/df['sim'])

print('MAPE:', mape*100) # Figure 5
print('Inverse in-degree density:', np.mean(1/G.in_degree)) # Figure 5

Instance generated with 100 nodes.
MAPE: 14.989297687283692
Inverse in-degree density: 0.40200797


## Generate real-world network instances

### __Example 1__: Caltech36 instance

In [13]:
sim, folder = 'Caltech36', 'caltech'
os.makedirs(f'../instances/{folder}', exist_ok=True)
df = pd.read_csv(f'../instances/{sim}.txt',sep=' ', names=['from_edge', 'to_edge'])
df_1 = df.copy()
df_1['from_edge'], df_1['to_edge'] = df['to_edge'], df['from_edge']
# build a undirected graph
pd.concat([df,df_1], ignore_index=True).to_csv(f'../instances/{folder}/edge.csv', index=None)

In [15]:
folder = 'caltech'
rep = 1 # For illustration, generate one repetition. To fully replicate the result in the paper, change this to 50.
for i in range(1,rep+1):
    sim_id = f'{folder}/sim_{i}'
    os.makedirs(f'../instances/{sim_id}', exist_ok=True)
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
              'noise_dist': 'logistic', 'noise_dist_param':1,
              'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
              'is_network_given':True, 'network_dir':f'../instances/{folder}/edge.csv',
              'is_value_given':False, 'v_dist':'uniform_neg', 'v_dist_param': 4
             }
    
    G = network(params)
    sample = diffusion_simulation(G,params)
    sample.start_diffusion()
    sample.run_fixed_point()
    sample.output()

Instance generated with 770 nodes.


### __Example 2__: Amherst41 instance

In [48]:
sim, folder = 'Amherst41', 'amherst'
os.makedirs(f'../instances/{folder}', exist_ok=True)
df = pd.read_csv(f'../instances/{sim}.txt',sep=' ', names=['from_edge', 'to_edge'])
df_1 = df.copy()
df_1['from_edge'], df_1['to_edge'] = df['to_edge'], df['from_edge']
# build a undirected graph
pd.concat([df,df_1], ignore_index=True).to_csv(f'../instances/{folder}/edge.csv', index=None)

In [50]:
folder = 'amherst'
rep = 1 # For illustration, generate one repetition. To fully replicate the result in the paper, change this to 50.
for i in range(1,rep+1):
    sim_id = f'{folder}/sim_{i}'
    os.makedirs(f'../instances/{sim_id}', exist_ok=True)
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
              'noise_dist': 'logistic', 'noise_dist_param':1,
              'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
              'is_network_given':True, 'network_dir':f'../instances/{folder}/edge.csv',
              'is_value_given':False, 'v_dist':'uniform_neg', 'v_dist_param': 4
             }
    
    G = network(params)
    sample = diffusion_simulation(G,params)
    sample.start_diffusion()
    sample.run_fixed_point()
    sample.output()

Instance generated with 2236 nodes.


## Calculate the descriptive statistics for the instances

In [58]:
# Table 2
def print_metrics(sim_id):
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
          'noise_dist': 'logistic', 'noise_dist_param': 1,
          'cnt_iter': 100000, 't_warmup':1000, 'is_traj':False, 
          'is_network_given':True, 'network_dir':f'../instances/{sim_id}/edge.csv',  
          'is_value_given':True, 'value_dir':f'../instances/{sim_id}/v.csv'
         }
    G = network(params)
    df = pd.read_csv(f'../instances/{sim_id}/results.csv')
    mape = np.mean(np.abs(df['sim']-df['fp'])/df['sim'])
    G.cal_mean_inv_indeg()
    pd.DataFrame({'n':[G.n], 'd_min':[np.min(G.in_degree_adj)], 'd_max':[np.max(G.in_degree)], 
                 'd_mean':[np.mean(G.in_degree)], 'out_in_ratio': [np.max(G.out_degree)/max(np.min(G.in_degree),1)], 
                 'deg_corr':[np.corrcoef(G.in_degree, G.out_degree)[0,1]],
                 'inv_ind_density':[G.ave_inv_indeg], 'mape':[mape]}
                ).to_csv(f'../instances/{sim_id}/metrics.csv', index=None)

In [62]:
sim_id = f'caltech/sim_1'
print_metrics(sim_id)
df = pd.read_csv(f'../instances/{sim_id}/metrics.csv')
df

Instance generated with 770 nodes.


Unnamed: 0,n,d_min,d_max,d_mean,out_in_ratio,deg_corr,inv_ind_density,mape
0,770,1.0,248.0,43.262337,248.0,1.0,0.11078,0.034243


In [60]:
sim_id = f'amherst/sim_1'
print_metrics(sim_id)
df = pd.read_csv(f'../instances/{sim_id}/metrics.csv')
df

Instance generated with 2236 nodes.


Unnamed: 0,n,d_min,d_max,d_mean,out_in_ratio,deg_corr,inv_ind_density,mape
0,2236,1.0,467.0,81.3542,467.0,1.0,0.048817,0.016332


## Test the efficiency

In [82]:
n = 100 # Network size, choose from [20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]
pn = '1' # Network density
rep = 1 # For illustration, generate one repetition. To fully replicate the result in the paper, change this to 50.

for i in range(1,rep+1):
    sim_id = f'ER_{pn}_keep_traj/n{n}_{i}'
    os.makedirs(f'../instances/{sim_id}', exist_ok=True)
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
              'noise_dist': 'logistic', 'noise_dist_param':1,
              'cnt_iter': 100000, 't_warmup':1000, 'is_traj':True, 
              'network_type': 'ER',
              'is_network_given':False, 'network_size': n, 'ER_prob':0.1, 'network_dir':'',
              'is_value_given':False, 'v_dist':'uniform_neg', 'v_dist_param': 4, 'value_dir':''
             }
    
    G = network(params)
    sample = diffusion_simulation(G,params)
    sample.start_diffusion()
    sample.run_fixed_point()
    sample.output()

Instance generated with 100 nodes.


In [88]:
sim_id = f'ER_1_keep_traj/n1000_1'
df_traj = pd.read_csv(f'../instances/{sim_id}/results_traj.csv')
df = pd.read_csv(f'../instances/{sim_id}/results.csv')

In [92]:
# The runtime can vary largily based on different computers.

print(np.mean(np.abs(df['sim']-df['fp'])/df['sim']))
for t in range(200):
    t1 = t*500+1
    print(t1,np.mean(np.abs(df['sim']-df_traj[f'sim_{t1}'])/df['sim']),np.mean(np.abs(df['sim']-df_traj[f'p_{t1}'])/df['sim']),df_traj[f't_{t1}'][0])

0.0058812922511805
1 1.2669325000342155 0.07549875400730773 1.2360527515411377
501 0.057008477619694395 0.005280058906231603 1.8544256687164309
1001 0.04101410506786104 0.004706231185011217 2.471374034881592
1501 0.032565657819428946 0.004513070665492216 3.0879712104797363
2001 0.028398252894247335 0.004418485422058362 3.702304363250733
2501 0.025415198377386428 0.004363983213068175 4.311804533004761
3001 0.02340471250509389 0.004219052769316904 4.936525583267212
3501 0.02104762395323073 0.00417052449784165 5.553074836730957
4001 0.019761860862416363 0.00415534943728373 6.165019512176514
4501 0.018285349319837194 0.0041633954538311145 6.779497623443604
5001 0.017561930224665134 0.004127417062220962 7.3971734046936035
5501 0.016722077615942087 0.004119977902562848 8.011270523071289
6001 0.015892963725421587 0.004128839044724167 8.635516166687012
6501 0.01544689213570758 0.004138114852338778 9.25640034675598
7001 0.01442145832500727 0.0040995422951556545 9.876288652420044
7501 0.01416290

In [95]:
folder = 'caltech'
rep = 1 # For illustration, generate one repetition. To fully replicate the result in the paper, change this to 50.
for i in range(1,rep+1):
    sim_id = f'{folder}_keep_traj/sim_{i}'
    os.makedirs(f'../instances/{sim_id}', exist_ok=True)
    params = {'sim_id':sim_id, 'alpha':0.1, 'beta':3.5,
              'noise_dist': 'logistic', 'noise_dist_param':1,
              'cnt_iter': 100000, 't_warmup':1000, 'is_traj':True, 
              'is_network_given':True, 'network_dir':f'../instances/{folder}/edge.csv',
              'is_value_given':False, 'v_dist':'uniform_neg', 'v_dist_param': 4
             }
    
    G = network(params)
    sample = diffusion_simulation(G,params)
    sample.start_diffusion()
    sample.run_fixed_point()
    sample.output()

Instance generated with 770 nodes.


In [96]:
sim_id = f'caltech/sim_1'
df_traj = pd.read_csv(f'../instances/{sim_id}/results_traj.csv')
df = pd.read_csv(f'../instances/{sim_id}/results.csv')

FileNotFoundError: [Errno 2] No such file or directory: '../instances/caltech/sim_1/results_traj.csv'

In [None]:
# The runtime can vary largily based on different computers.

print(np.mean(np.abs(df['sim']-df['fp'])/df['sim']))
for t in range(200):
    t1 = t*500+1
    print(t1,np.mean(np.abs(df['sim']-df_traj[f'sim_{t1}'])/df['sim']),np.mean(np.abs(df['sim']-df_traj[f'p_{t1}'])/df['sim']),df_traj[f't_{t1}'][0])