In [0]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random as rm
import copy as cp
import scipy.stats as stats
import csv
from spreading_CR import SpreadingProcess
from zipfile import ZipFile
import os

In [0]:
# Parameters
Gamma = 1.0
Beta = 0.95
FracImNodes = 0.1
max_cplusplus_int = 4294967295

In [0]:
#Function for evaluating centrality measures
def list_meas_centr(Graph):
    dgr = nx.degree_centrality(Graph)
    btw = nx.betweenness_centrality(Graph, normalized=False)
    ev = nx.eigenvector_centrality(Graph)
    pr = nx.pagerank(Graph, alpha=0.9)
    df1 = pd.DataFrame.from_dict({
    'node': list(dgr.keys()),
    'Dgr': list(dgr.values()),
    'Btw': list(btw.values()),
    'Ev': list(ev.values()),
    'Pr': list(pr.values())
    })
    df1 = df1.round(6)
    return(df1)

In [0]:
#Function of creating a network from a file
def create_netw(FileZip,i):
    fl1 = FileZip.open('nodes ' + str(1) +'.csv')
    fl2 = FileZip.open('edges ' + str(i) +'.csv')
    fd1 = pd.read_csv(fl1, delimiter=",")
    fd2 = pd.read_csv(fl2, delimiter=",", usecols=[1,2])
    # Create network
    G = nx.Graph()
    G.add_nodes_from(fd1['0'])
    G.add_edges_from(fd2[['0','1']].values.tolist())
    return(G)

In [0]:
#Function of creating a data frame with centrality measures values
def meas_list(FileZip,path):
    fl = FileZip.open(path)
    fd = pd.read_csv(fl, delimiter=",", usecols=[1,2,3,4,5])
    fd.columns = ['node','Dgr','Btw','Ev','Pr']
    return(fd)

In [0]:
#Function for evaluating traditional robustness and list of nodes in top 10
def nodes_list(df1,df2,x,NumbNod):
    #Define the rank of centrality value
    df1['rank'] = df1[x].rank(method='max')
    df2['rank'] = df2[x].rank(method='max')
    #Create data frame with node that exists in two frames
    df3 = pd.merge(df2,df1,how='left',on='node')
    #Compute the Kendall correlation
    TauVal = stats.kendalltau(df2['rank'],df3['rank_y']).correlation
    TauVal = TauVal.round(6)
    #Sort data frame of obsevied network on centrality measures
    df2 = df2.sort_values(x, ascending=False)   
    #Define the first 10% nodes
    srList = df2.head(n=NumbNod)
    OvSrList = srList['node'].values.tolist()  
    return(TauVal, OvSrList)

In [0]:
#Function of creating a data frame with immunization nodes
def list_nodes_im(FileZip,i,j,cm,G):
    fl1 = FileZip.open('netw_' + str(i) +'_mis_'+ str(j) +'_'+ cm +'.csv')
    CList = pd.read_csv(fl1, delimiter=",", usecols=[1])
    CList.columns=['node']
    Clist = list(G.edges(CList['node']))
    return(Clist)

In [0]:
#Function for evaluating SIR model and outbreak size determination
def sim_sir(Beta,Gamma,cList):
    sp = SpreadingProcess(cList,Beta,Gamma,0)
    seed = np.random.randint(max_cplusplus_int+1)
    R0_mean, R0_std=sp.estimate_R0(2000,seed)
    sp.reset()
    R0_mean = np.round(R0_mean,4)
    R0_std = np.round(R0_std,4)
    return (R0_mean, R0_std)

In [0]:
#Upload data from csv file
fd = pd.read_csv("edgelist.truecolsprings.csv", delimiter=",", header=0)

In [0]:
ColSp = nx.Graph()
ColSp.add_edges_from(fd[['V1','V2']].values.tolist())
# Define degree sequence from Colorado Spring network
degree_sequence = [d for n, d in ColSp.degree()]

In [0]:
#Draw Colorado Spring network
nx.draw(ColSp,pos = nx.spring_layout(ColSp),node_size=60,font_size=8)
plt.show()

In [0]:
#Draw degree distribution from Colorado Spring network
plt.hist(degree_sequence, density=True)
plt.show()

In [0]:
#Generation and save the 5000 configuration models into zip-file
newzip = ZipFile('true_netw.zip','w')
# Create a true network
for i in range(1,5001):
    # File save paths
    path_1 = 'nodes ' + str(i) +'.csv'
    path_2 = 'edges ' + str(i) +'.csv'
    seed=rm.seed()
    # Create graph using configuration model
    G = nx.configuration_model(degree_sequence,seed=seed)
    #remove parallel edges
    G = nx.Graph(G)
    #remove self-loops
    G.remove_edges_from(nx.selfloop_edges(G))
    # Save grapth in file
    pd.DataFrame(G.edges).to_csv(path_2)
    pd.DataFrame(G.nodes).to_csv(path_1)
    newzip.write(path_1)
    newzip.write(path_2)
    os.remove(path_1)
    os.remove(path_2)
    
newzip.close()

In [0]:
#Upload files from zip-file and compute centrality measures for G true network
FileZip = ZipFile('true_netw.zip','r')
newzip = ZipFile('G_cm.zip','w')
for i in range(1,5001):
    G = create_netw(FileZip,i)
    path_f = 'netw_' + str(i) +'.csv'
    df2 = list_meas_centr(G)
    df2.to_csv(path_f)
    newzip.write(path_f)
    os.remove(path_f)
    
newzip.close()
FileZip.close()

In [0]:
#Generation a network with missing data and compute centrality measures. Identifying the random immunization list nodes
FileZip = ZipFile('true_netw.zip','r')
newzip = ZipFile('G_imstr.zip','w')
for i in range(1,5001):
    G = create_netw(FileZip1,i)
    for j in range(1,9):
        # Remove random nodes - ...%
        num = int(len(G.nodes())*(j/10))
        # Choose random nodes from true network
        rm.seed()
        rnodes = rm.sample(list(G.nodes()),num)
        # Remove selected random nodes from copy of true network
        G.remove_nodes_from(rnodes)
        #Number of nodes to be immunized
        numNo = int(len(G.nodes)*FracImNodes)
        #Random immunization list nodes
        rm.seed()
        rdm = rm.sample(list(G.nodes),numNo)
        path_r = 'netw_' + str(i) +'_mis_'+str(j) +'_rdm'+'.csv'
        df2 = pd.DataFrame(rdm).to_csv(path_r)
        newzip.write(path_r)
        os.remove(path_r)
        # Immunization strategy measures
        df1 = list_meas_centr(G)
        df1.to_csv(path_f)
        path_f = 'netw_' + str(i) +'_mis_'+str(j) +'.csv'
        newzip.write(path_f)
        os.remove(path_f)
        
newzip.close()
FileZip.close()

In [0]:
# Value of Robustness
R=[]
Rim = []
Rim_std =[]

In [0]:
#Calculation of traditional robustness and determination of the list of nodes of the immunization strategy
FileZip1 = ZipFile('G_cm.zip','r')
FileZip2 = ZipFile('G_imstr.zip','r')
newzip = ZipFile('list_im.zip','w')
for j in range(1,9):
    for i in range(1,5001):
        path = 'netw_' + str(i) +'.csv'
        #Centrality measures of true network
        fd1 = meas_list(FileZip1,path)
        path = 'netw_' + str(i) +'_mis_' + str(j) +'.csv'
        #Centrality measures of netwotk with missing data
        fd2 = meas_list(FileZip2, path)
        #number of nodes in the top 10
        numNo = int(len(fd2)*FracImNodes)
        #Definition of robustness and list of nodes for each immunization strategy
        Dgr, Dlist = nodes_list(fd1,fd2,'Dgr',numNo)
        Btw, Blist = nodes_list(fd1,fd2,'Btw',numNo)
        Ev, Elist = nodes_list(fd1,fd2,'Ev',numNo)
        Pr, Plist = nodes_list(fd1,fd2,'Pr',numNo)
        #Save the results for each level of missing data
        R.append([Dgr,Btw,Ev,Pr,j*10])
        #Save the list of nodes for each immunization strategy (top 10)
        path_d = 'netw_' + str(i) +'_mis_'+str(j) +'_d'+'.csv'
        pd.DataFrame(Dlist).to_csv(path_d)
        path_b = 'netw_' + str(i) +'_mis_'+str(j) +'_b'+'.csv'
        pd.DataFrame(Blist).to_csv(path_b)
        path_e = 'netw_' + str(i) +'_mis_'+str(j) +'_e'+'.csv'
        pd.DataFrame(Elist).to_csv(path_e)       
        path_p = 'netw_' + str(i) +'_mis_'+str(j) +'_p'+'.csv'
        pd.DataFrame(Plist).to_csv(path_p)       
        newzip.write(path_d)
        newzip.write(path_b)
        newzip.write(path_e)
        newzip.write(path_p)
        os.remove(path_d)
        os.remove(path_b)
        os.remove(path_e)
        os.remove(path_p)

newzip.close()
FileZip1.close()      
FileZip2.close()
path_r = 'Robust_tr.csv'
pd.DataFrame(R).to_csv(path_r)

In [0]:
Rim =[]
Rim_std = []

In [0]:
#SIR model calculation and outbreak size determination for each immunization strategy
FileZip1 = ZipFile('true_netw.zip','r')
FileZip2 = ZipFile('list_im_rdm.zip','r')
for i in range(2501,3001):
    G = create_netw(FileZip1,i)
    for j in range(1,9):
        #Upload list of nodes
        dgrList = list_nodes_im(FileZip2,i,j,'d',G)
        btwList = list_nodes_im(FileZip2,i,j,'b',G)
        evList = list_nodes_im(FileZip2,i,j,'e',G)
        prList = list_nodes_im(FileZip2,i,j,'p',G)
        rdmList = list_nodes_im(FileZip2,i,j,'rdm',G)
        #Outbreak size determination
        R1,R1_std = sim_sir(Beta,Gamma,rdmList)
        R2, R2_std = sim_sir(Beta, Gamma,dgrList)
        R3, R3_std = sim_sir(Beta, Gamma, btwList)
        R4, R4_std = sim_sir(Beta, Gamma, evList)
        R5, R5_std = sim_sir(Beta, Gamma,prList)
        #Save the results for each level of missing data
        Rim.append([R1,R2,R3,R4,R5,j*10])
        Rim_std.append([R1_std,R2_std,R3_std,R4_std,R5_std,j*10])


FileZip1.close()
FileZip2.close()
path_r = 'Robust_im.csv'
pd.DataFrame(Rim).to_csv(path_r)
path_r = 'Robust_im_std.csv'
pd.DataFrame(Rim_std).to_csv(path_r)

In [0]:
#Estimation of the mean and standard deviation for traditional robustness
df1 = pd.read_csv('Robust_tr.csv',delimiter = ";", usecols=[1,2,3,4,5])
df1.columns=['Dgr','Btw','Eig','PR','p%']
MV_R = df1.groupby(['p%']).mean()
Std_R = df1.groupby(['p%']).std()

In [0]:
#Estimation of the mean and standard deviation in case of immunization strategies
df2 = pd.read_csv('Robust_im.csv', delimiter = ",", usecols=[1,2,3,4,5,6])
df2.columns=['Rdm','Dgr','Btw','Eig','PR','p%']
df2['Dgr-Rdm'] = df2['Dgr']-df2['Rdm']
df2['Btw-Rdm'] = df2['Btw']-df2['Rdm']
df2['Eig-Rdm'] = df2['Eig']-df2['Rdm']
df2['PR-Rdm'] = df2['PR']-df2['Rdm']
MV_Rim = df2.groupby(['p%']).mean()
Std_Rim = df2.groupby(['p%']).std()

In [0]:
# Plot of results
# Degree centrality
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.errorbar(MV_R.index, MV_Rim['Dgr-Rdm'], yerr= Std_Rim['Dgr-Rdm'],
             fmt='-o', ecolor='blue', c='blue', ms=3)
ax1.set_ylabel('delta in Outbreak size vs. Random immunization')
ax1.set_xlabel('Percent of nodes missing for sample')
ax2 = ax1.twinx()
ax2.errorbar(MV_R.index, MV_R['Dgr'], yerr= Std_R['Dgr'],
             fmt='-o', ecolor='red', c='red', ms=3)
ax2.set_ylabel('Robustness of centrality')
plt.title('Degree centrality')
plt.show()

In [0]:
# Betweenness centrality
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.errorbar(MV_R.index, MV_Rim['Btw-Rdm'], yerr= Std_Rim['Btw-Rdm'],
             fmt='-o', ecolor='blue', c='blue', ms=3)
ax1.set_ylabel('delta in Outbreak size vs. Random immunization')
ax1.set_xlabel('Percent of nodes missing for sample')
ax2 = ax1.twinx()
ax2.errorbar(MV_R.index, MV_R['Btw'], yerr= Std_R['Btw'],
             fmt='-o', ecolor='red', c='red', ms=3)
ax2.set_ylabel('Robustness of centrality')
plt.title('Betweenness centrality')
plt.show()

In [0]:
# Eigenvector centrality
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.errorbar(MV_R.index, MV_Rim['Eig-Rdm'], yerr= Std_Rim['Eig-Rdm'],
             fmt='-o', ecolor='blue', c='blue', ms=3)
ax1.set_ylabel('delta in Outbreak size vs. Random immunization')
ax1.set_xlabel('Percent of nodes missing for sample')
ax2 = ax1.twinx()
ax2.errorbar(MV_R.index, MV_R['Eig'], yerr= Std_R['Eig'],
             fmt='-o', ecolor='red', c='red', ms=3)
ax2.set_ylabel('Robustness of centrality')
plt.title('Eigenvector centrality')
plt.show()

In [0]:
# Pagerank centrality
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.errorbar(MV_R.index, MV_Rim['PR-Rdm'], yerr= Std_Rim['PR-Rdm'],
             fmt='-o', ecolor='blue', c='blue', ms=3)
ax1.set_ylabel('delta in Outbreak size vs. Random immunization')
ax1.set_xlabel('Percent of nodes missing for sample')
ax2 = ax1.twinx()
ax2.errorbar(MV_R.index, MV_R['PR'], yerr= Std_R['PR'],
             fmt='-o', ecolor='red', c='red', ms=3)
ax2.set_ylabel('Robustness of centrality')
plt.title('Pagerank centrality')
plt.show()