In [1]:
import os
import math
import time
import random
import numpy as np
import pandas as pd
import networkx as nx
import scipy.stats as stats
from Robustness import *

### Global and local graph measure 

In [2]:
def BA_Graph(n,m,seed):
    '''
    Inputs: n : number of nodes (int), m: new nodes are attaching with degree m (int), seed: indicator (int);
    Output: Barabasi-Albert graph
    '''
    G = nx.barabasi_albert_graph(n,m,seed)
    return G

def ER_Graph(n,p,seed):
    '''
    Inputs: n : number of nodes (int), p: probability of connection between two nodes (0<=p<=1), seed: indicator (int);
    Output: Erdos_Renyi graph
    '''
    G = nx.erdos_renyi_graph(n,p,seed)
    return G

def WS_Graph(n,k,p,seed):
    '''
    Inputs: n : number of nodes (int), K = degree of each node (int), p: probability of rewiring of an edge (0<=p<=1), seed: indicator (int);
    Output: Watts_Strogatz graph
    '''
    G = nx.watts_strogatz_graph(n,k,p,seed)
    return G

#Function to find the distance between two nodes
def Distance(x1, y1, x2, y2):
    '''
    Inputs: Cartesian coordinates of two nodes (x1,y1) and (x2,y2);
    Outputs: Cartesian Distance between the two nodes 
    '''
    return math.sqrt(math.pow((float(x1)-float(x2)), 2) + math.pow((float(y2)-float(y1)), 2))

def HyperbolicGraphGenerator(n,k,seed):
    '''
    Inputs: n : number of nodes (int), k = targeted average degree, seed: indicator (int);
    Output: Creates edgelist and nodelist of Hyperbolic Graph Generator model
    '''
    pathh = f'../Data/Model_Networks/HGG/{n}_{k}/'
    NF = open(pathh + f'HGG_node_seed_{seed}.txt','w')
    EF = open(pathh + f'HGG_edge_seed_{seed}.txt','w')
    #dictionary for Cartesian coordonate of the node
    CCN = {}
    #dictionary for Cartesian distance of the edge
    ECD = {}
    for file in open(pathh + f'HGG_seed_{seed}.hg', 'r'):
        l = file.strip().split('\t')
        if len(l) == 3:
            NF.write('%s'%file)
            CCN[l[0]]=l[1:]
        
        elif len(l) == 2:
            source = l[0]
            target = l[1]
            ECD[file.strip()] = Distance(CCN[source][0], CCN[source][1], CCN[target][0], CCN[target][1])
    for k in ECD.keys():
        EF.write("%s\t%f\n"%(k, ECD[k]))
    NF.close()
    EF.close()
    print('node and edgelist created')
    

def measure(G,seed,path,model):
    '''
    Inputs: G: networkx graph, n: Number of nodes, 
    m: Number of edges to attach from a new node to existing nodes (for BA model) (1 <= m < n),
    k: Each node is joined with its k nearest neighbors (for WS model),
    p: Probability of rewiring each edge (for WS model) or Probability for edge creation (for ER model),
    seed: indicator (int), 
    path: directory of the output files,
    model: Name of the model network 
    Outputs: Creates text files of Degree, ClusteringCoefficient, BetweennessCentrality, EigenVectorCentrality, 
    ClosenessCentrality and global measures for each graph G with the given parameters values
    '''
    
    ## Node measures
    
    Deg = nx.degree(G)
    dfD = pd.DataFrame(Deg)
    dfD.to_csv(path + f'{model}_Degree_seed_{seed}.txt', sep = '\t', index = None,header = None)

    CC = nx.clustering(G)
    data_list = list(CC.items())
    dfC = pd.DataFrame(data_list)
    dfC.to_csv(path + f'{model}_ClusteringCoefficient_seed_{seed}.txt', sep = '\t', index = None,header = None)
    
    BC = nx.betweenness_centrality(G, seed = 1)
    data_list1 = list(BC.items())
    dfB = pd.DataFrame(data_list1)
    dfB.to_csv(path + f'{model}_BetweennessCentrality_seed_{seed}.txt', sep = '\t', index = None,header = None)
    
    EVC = nx.eigenvector_centrality(G, max_iter=5000)
    data_list2 = list(EVC.items())
    dfE = pd.DataFrame(data_list2)
    dfE.to_csv(path + f'{model}_EigenVectorCentrality_seed_{seed}.txt', sep = '\t', index = None,header = None)
    
    CloseC = nx.closeness_centrality(G)
    data_list3 = list(CloseC.items())
    dfCloseC = pd.DataFrame(data_list3)
    dfCloseC.to_csv(path + f'{model}_ClosenessCentrality_seed_{seed}.txt', sep = '\t', index = None,header = None)
    
    print('Node measures are done!')
    
    ## Global Measure
    file = open(path + f'{model}_Global_seed_{seed}.txt','w')
    
    file.write('Number_of_nodes\tNumber_of_edges\tFraction_of_nodes_inlcc\tAverage_degree\tEdge_density\tMean_shortest_path_length')
    file.write('\n')
    NN = G.number_of_nodes()
    EE = G.number_of_edges()

    largest_cc = max(nx.connected_components(G), key=len)
    lcc = list(map(int,largest_cc)) 
    G_lcc = G.subgraph(largest_cc)
    fr_lcc = len(lcc)/NN
    ave_deg = np.mean(dfD[1])
    edge_density = 2*EE/(NN*(NN-1))
    mean_spl = nx.average_shortest_path_length(G_lcc)
    file.write(f'{NN}\t{EE}\t{fr_lcc}\t{ave_deg}\t{edge_density}\t{mean_spl}')
    file.close()
    print('Global measures are done!')
    
    
## To creat edgelist, nodelist and graph measures for model networks

def input_files(model,n,m,k,k1,p,seed):
    '''
    Inputs: model: name of the model network,
    n: Number of nodes, 
    m: Number of edges to attach from a new node to existing nodes (for BA model) (1 <= m < n),
    k: Each node is joined with its k nearest neighbors (for WS model) ,
    p: Probability of rewiring each edge (for WS model) or Probability for edge creation (for ER model),
    seed: indicator (int),
    Outputs: Save the nodelist, edgelist and other graph measures for the model network
    '''
    path = f'../Data/Model_Networks/{model}/'
    if not os.path.exists(path):
        os.mkdir(path)
    for i in range(1,seed+1):
        if model == 'HGG':
            path1 = f'{n}_{k1}'
            hgg = HyperbolicGraphGenerator(n,k1,i)
            fe = pd.read_csv(path + path1 + f'/HGG_edge_seed_{i}.txt', sep = '\t',header = None)
            fn = pd.read_csv(path + path1 + f'/HGG_node_seed_{i}.txt', sep = '\t',header = None)
            G = nx.from_pandas_edgelist(fe,0,1)
            G.add_nodes_from(fn[0])
            
        else:
            if model == 'BA':
                G = BA_Graph(n,m,i)
                path1 = f'{n}_{m}'
                if not os.path.exists(path + path1):
                    os.mkdir(path + path1)

            if model == 'ER':
                G = ER_Graph(n,p,i)
                path1 = f'{n}_{p}'
                if not os.path.exists(path + path1):
                    os.mkdir(path + path1)

            if model == 'WS':
                G = WS_Graph(n,k,p,i)
                path1 = f'{n}_{k}_{p}'
                if not os.path.exists(path + path1):
                    os.mkdir(path + path1)

            ## To write edgelist
            nx.write_edgelist(G,path + f'{path1}/{model}_edge_seed_{i}.txt', data=False, delimiter='\t')

            ## To write nodelist
            N = pd.DataFrame(G.nodes())
            N.to_csv(path + f'{path1}/{model}_node_seed_{i}.txt', sep = '\t', header = None, index = None)
            print('node and edgelist created')
        ## Calls the measure function for graph measures
        infile = measure(G,i,path+path1+'/',model)
        print(f'Done for seed {i}')

## Generate Model Network and store it's Graph measures

In [3]:
#------------------------------------------------------------------------
# Generates all the model networks
# Models = ['BA','ER','WS','HGG']
# no_of_nodes = [1000,2000,5000]
# M = [2,3,4,5]
# K = [2,4,6,8,10]
# KHGG = [3,5,7,9,10]
# prob = [0.003,0.005,0.007,0.01]
# seed = 100
#--------------------------------------------------------------------------

no_of_nodes = [1000]
M = [3]
K = [6]
KHGG = [3]
prob = [0.005]
seed = 1
model = 'BA'

for n in no_of_nodes:
    if model == 'BA':
        k,k1,p = 0,0,0
        for m in M:
            a = input_files(model,n,m,k,k1,p,seed)
    elif model == 'ER':
        k,k1,m = 0,0,0
        for p in prob:
            a = input_files(model,n,m,k,k1,p,seed)
    elif model == 'WS':
        m,k1,p = 0,0,0.5
        for k in K:
            a = input_files(model,n,m,k,k1,p,seed)
    elif model == 'HGG':
        m,k,p = 0,0,0
        for k1 in KHGG:
            a = input_files(model,n,m,k,k1,p,seed)
    print(f'Done for {n}.')  
print(f'------------------------------- Done for {model}.-------------------------------------------')

node and edgelist created
Node measures are done!
Global measures are done!
Done for seed 1
Done for 1000.
------------------------------- Done for BA.-------------------------------------------


In [3]:
def Node_minimum(nodefile,edgefile):
    '''
    Inputs: nodefile: dataframe of nodefile, edgefile: dataframe of edgefile, 
    Outputs: dataframe with four columns; c1:node name, c2: Minimum curvature value, c3: Sum of the curvature values,
    c4: Average of the curvature values
    '''
    nodes = list(nodefile)
    LISTS = {'nodes':[], 'minimum':[], 'sum':[], 'average':[]}
    for node in nodes:
        condition = ( edgefile[0] == node)
        result_list = edgefile.loc[condition, 2].tolist()
        LISTS['nodes'].append(node)
        LISTS['minimum'].append(min(result_list))
        LISTS['sum'].append(sum(result_list))
        LISTS['average'].append(np.mean(result_list))
    df = pd.DataFrame(LISTS)
    return df

def LCC(edgelist):
    '''
    Input: Pandas edgelist 
    Output: List of nodes present in the Largest connected component
    '''
    G = nx.from_pandas_edgelist(edgelist,0,1)
    largest_cc = max(nx.connected_components(G), key=len)
    lcc = list(map(int,largest_cc))
    return lcc

def nodes_measure_lcc(seed,model,parameter,measure,lcc):
    '''
    Inputs: seed: integer,
    model: Model network,
    path: parameter values of the model network (directory),
    measure: Name of the graph measure,
    lcc: List of nodes present in the Largest connected component,
    Output: Pandas dataframe with nodes name in first colume and corresponding measure's value in the second column
    '''
    path1 = '../Data/Model_Networks/'
    
    if measure in Graph_measure:
        df1 = pd.read_csv(path1+ f'{model}/{parameter}/{model}_{measure}_seed_{seed}.txt', sep = '\t', header = None)
        df2 = df1[df1[0].isin(lcc)]
        return df2
  
    elif measure in ['BakryEmery', 'Ollivier']:
        df1 = pd.read_csv(path1+ f'{model}/{parameter}/{measure}/{model}_node_seed_{seed}.txt', sep = '\t', header = None)
        df1[0] = df1[0].astype(int)
        df2 = df1[df1[0].isin(lcc)]
        return df2
    elif measure in ['Forman','AugForman']:
        df1 = pd.read_csv(path1+ f'{model}/{parameter}/{measure}/{model}_node_seed_{seed}.txt', sep = '\t', header = None)
        dfe = pd.read_csv(path1 + f'{model}/{parameter}/{measure}/{model}_edge_seed_{seed}.txt',sep = '\t', header = None)
        df1[0] = df1[0].astype(int)
        dfn = df1[df1[0].isin(lcc)]
        df2 = Node_minimum(dfn[0],dfe)
        return df2
    else:
        return "Measure should be from ['BakryEmery','Forman','AugForman','Ollivier', 'Degree', 'ClusteringCoefficient','BetweennessCentrality','EigenVectorCentrality','ClosenessCentrality']"
    

In [5]:
Models = ['BA','ER','WS','HGG']

Graph_measure = ['Degree', 'ClusteringCoefficient','BetweennessCentrality','EigenVectorCentrality','ClosenessCentrality']
Curv_measure = ['BakryEmery','Forman','AugForman','Ollivier']
all_measures = Curv_measure + Graph_measure

In [6]:
#-----------------------------------------------------------
## Creates text files of nodes measure present in lcc
#----------------------------------------------------------

seed = 1
path1 = '../Data/Model_Networks/'
model = 'BA'
p = '1000_3'
for i in range(1,seed+1):
    df = pd.read_csv(path1 + f'{model}/{p}/{model}_edge_seed_{i}.txt', sep = '\t', header = None)
    lcc = LCC(df)
    for measure in all_measures:
        # Call the function nodes_measure_lcc to consider the node present in the Largest connected components
        node_lcc = nodes_measure_lcc(i,model,p,measure,lcc)
        if measure in Graph_measure:
            node_lcc.to_csv(path1+ f'{model}/{p}/{model}_{measure}lcc_seed_{i}.txt', sep = '\t', header = None, index = None)
        elif measure == 'BakryEmery':    
            node_lcc.to_csv(path1+ f'{model}/{p}/{measure}/{model}_nodelcc_seed_{i}.txt',sep = '\t', header = None, index = None)
        else:    
            node_lcc.to_csv(path1+ f'{model}/{p}/{measure}/{model}_nodelcc_min_seed_{i}.txt',sep = '\t', header = None, index = None)
print(f'Measure Done for {p}') 
print(f'------------------- Measure Done for {model} {p} model----------------------------')    

Measure Done for 1000_3
------------------- Measure Done for BA 1000_3 model----------------------------


### Calculates Correlation between two vertex measures

In [7]:
#-------------------------------------------------------------------------------------------------------
## Calculates Spearman and Preason Correlation between two measure
#-------------------------------------------------------------------------------------------------------

def correlation(model,measure1,measure2,seed,path):
    '''
    Inputs: model: model's name, measure1: first vertex measure , measure2: second vertex measure, seed: total number to sample, path: directory;
    Outputs: dictionary with Spearman and Preason correlation between measure1 and measure2 corresponding to each seed and creates text files of correlations 
    '''
    corr = {'seed':[],'Spearman':[],'Pearson':[]}    
    for i in range(1,seed+1):
        if measure1 in Graph_measure:
            df1 = pd.read_csv(path + f'{model}_{measure1}lcc_seed_{i}.txt', sep = '\t', header = None)
            v1 = list(df1[1])
           
        else:    
            if measure1 == 'BakryEmery':
                df1 = pd.read_csv(path + f'{measure1}/{model}_nodelcc_seed_{i}.txt',sep = '\t', header = None)
                v1 = list(df1[1])
            else:
                df1 = pd.read_csv(path + f'{measure1}/{model}_nodelcc_min_seed_{i}.txt',sep = '\t', header = None)
                v1 = list(df1[1])
                
        k1 = list(df1[0])
        data_dict1 = {k1: v1 for k1, v1 in zip(k1, v1)}
            
        if measure2 in Graph_measure:
            df2 = pd.read_csv(path + f'{model}_{measure2}lcc_seed_{i}.txt', sep = '\t',header=None)
            v2 = list(df2[1])
        else:    
            if measure2 == 'BakryEmery':
                df2 = pd.read_csv(path + f'{measure2}/{model}_nodelcc_seed_{i}.txt',sep = '\t', header = None)
                v2 = list(df2[1])
            else:
                df2 = pd.read_csv(path + f'{measure2}/{model}_nodelcc_min_seed_{i}.txt',sep = '\t', header=None)
                v2 = list(df2[1])
                
        k2 = list(df2[0])
        if len(k1) != len(k2):
            return measure1,measure2,'\nLength of the two input lists are not same',  str(len(k1)), str(len(k2))
        
        data_dict2 = {k: v for k, v in zip(k2, v2)}
        set1 = list(data_dict1.values())
        set2 = [data_dict2[key] for key in data_dict1.keys()]   
        spear, p = stats.spearmanr(set1, set2)
        pear, p = stats.pearsonr(set1, set2)
        
        corr['seed'].append(i)
        corr['Spearman'].append(spear)
        corr['Pearson'].append(pear)
    df = pd.DataFrame(corr)   
    if not os.path.exists(path + '/Correlation_Example'):
        os.mkdir(path + '/Correlation_Example')
    df.to_csv(path + f'/Correlation_Example/{model}_corr_{measure1}&{measure2}.txt',sep = '\t', index = None)
    #print(f'Done for {measure1} & {measure2}.')
    return corr
    

In [8]:
model = 'BA'
p = '1000_3'        
m1 = 'BakryEmery'
path1 = '../Data/Model_Networks/'

## Calculates Correlation between Bakry-Emery curvature and other vertex measures
for m2 in list(set(all_measures)-{'BakryEmery'}):
    cor = correlation(model,m1,m2,seed,path1 +f'{model}/{p}/')
    print(f'Done for {m1} & {m2}.')

Done for BakryEmery & ClusteringCoefficient.
Done for BakryEmery & Forman.
Done for BakryEmery & BetweennessCentrality.
Done for BakryEmery & AugForman.
Done for BakryEmery & ClosenessCentrality.
Done for BakryEmery & Degree.
Done for BakryEmery & EigenVectorCentrality.
Done for BakryEmery & Ollivier.


### Robustness of the vertices

In [None]:
Graph_measure = ['Degree', 'ClusteringCoefficient','BetweennessCentrality','EigenVectorCentrality','ClosenessCentrality']
Curv_measure = ['BakryEmery','Forman','AugForman','Ollivier']


name = 'BakryEmery'
path = f'../Data/Model_Networks/BA/1000_3/'
if not os.path.exists(path + 'Robustness_Example'):
    os.mkdir(path + 'Robustness_Example')
    
edgefile = pd.read_csv(path + 'BA_edge_seed_1.txt', sep = '\t', header = None)
edgefile = edgefile.astype(int)
G = nx.from_pandas_edgelist(edgefile,0,1)
lcc = LCC(edgefile)
subgraph = G.subgraph(lcc)
SGraph =  subgraph.copy()
        #df = pd.read_csv(path + f'{measure}/{model}_nodelcc_seed_{seed}.txt',sep = '\t', header = None)


df = pd.read_csv(path + 'BakryEmery/BA_nodelcc_seed_1.txt',sep = '\t', header = None)
measure_value = {df[0][i]:df[1][i] for i in range(len(df))}
sort_order = False

#------------------------------------------------------------------------------------------
# Call the function 'Robustness_node' to calculate the robustnesss
#
# If name in Graph_measure: sort_order = True
# If name in Curv_measure:  sort_order = False   
#
# For random vertex removal, call the function 'Robustness_random'
#------------------------------------------------------------------------------------------
Robust = Robustness_node(measure_value,SGraph,sort_order)
    

df2 = pd.DataFrame(Robust)
Robustfile = df2.T
Robustfile.columns = ['Fraction_of_nodes','Efficiency']
Robustfile.to_csv(path + f'Robustness_Example/Robustness_BakryEmery.txt', sep = '\t',index = None)
print(' --------------------- Done for',name,'----------------------------------------------')