In [1]:
import sys, pickle
sys.path.insert(0, "libs")

import os, pickle, csv # import packages for file I/O
import time # package to help keep track of calculation time

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

import scipy
import scipy.stats as sst
from scipy.special import comb
from scipy.integrate import simpson
from scipy.signal import argrelextrema
from random import choice

from libs.utils import *
from libs.finiteTheory import *
from visualizations import *
from libs.utils import *
from robustnessSimulations import *
from performanceMeasures import *
from infiniteTheory import *
from finiteTheory import *
from fnmatch import fnmatch

In [18]:
def read_from_adj(filename):
    
    file = open(filename, "r")
    content = file.readlines()

    # convert into networkx graph
    node_list = []
    edge_list = [] #np.empty(len(content), dtype=object)
    
    if len(content) == 0:
        G = nx.Graph()
        return G
    
    edge_count = 0
    for i in range(len(content)):
        
        edge = content[i].strip()
        edge = edge.split(" ")
        
        if len(edge)==2:
            
            edge_list.append([int(edge[0]), int(edge[1])])
            node_list.append(int(edge[0]))
            node_list.append(int(edge[1]))

    node_list = list(set(node_list))
    
    if 0 in node_list:
        n = max(node_list) + 1
        offset = 0
    else:
        n = max(node_list)
        offset = min(node_list)
        
    adj = np.zeros((n, n))
        
    for k in range(len(edge_list)):
        adj[int(edge_list[k][0])-offset, int(edge_list[k][1])-offset] = 1
        adj[int(edge_list[k][1])-offset, int(edge_list[k][0])-offset] = 1

    G = nx.from_numpy_array(adj)
    file.close()
            
    return G

    
def random_removal(G0):
    
    # make a copy of input graph
    G = G0.copy()
    n = G.number_of_nodes()
    
    data_array = np.zeros(n, dtype=float)
    
    for i in range(n):
        # get LCC size
        data_array[i] = len(max(nx.connected_components(G), key=len)) / (n - i)
        # find a random node to remove
        if G.number_of_nodes() != 0:
            v = choice(list(G.nodes()))
            G.remove_node(v)
            
    return data_array

            
def targeted_removal(G0):
    
    # make a copy of input graph
    G = G0.copy()
    n = G.number_of_nodes()
    
    data_array = np.zeros(n, dtype=float)
    for i in range(n):
        # get LCC size
        data_array[i] = len(max(nx.connected_components(G), key=len)) / (n - i)
        # find highest-degree node and remove it
        if G.number_of_nodes() != 0:
            v = sorted(G.degree, key=lambda x: x[1], reverse=True)[0][0]
            G.remove_node(v)
            
    return data_array

def r_pow(x, n, d):
    """
    Compute x to the power of n/d (not reduced to lowest
    expression) with the correct function real domains.
    
    ARGS:
        x (int,float,array): base
        n (int)            : exponent numerator
        d (int)            : exponent denominator
        
    RETURNS:
        x to the power of n/d
    """
    
    # list to array
    if type(x) == list:
        x = np.array(x)
    # check inputs
    if type(n) != int or type(d) != int:
        raise Exception("Exponent numerator and denominator must be integers")
    # if denominator is zero
    if not d:
        raise Exception("Exponent denominator cannot be 0")
        
    # raise x to power of n
    X = x**n
    # even denominator
    if not d % 2:
        # domain is for X>=0 only
        if type(x) == np.ndarray:
            X[X<0] = np.nan
        elif X < 0:
            X = np.nan
        res = np.power(X, 1./d)
        return res
    # odd denominator
    else:
        # domain is all R
        res = np.power(np.abs(X), 1./d)
        res *= np.sign(X)
        return res
    

def check_space(string):
    '''Check if there is a space in a string to help identify edge list files.'''
    
    # counter
    count = 0

    # loop for search each index
    for i in range(0, len(string)):

        # Check each char
        # is blank or not
        if string[i] == " ":
            count += 1

    return count

In [4]:
# Get a list of networks

root = r'pholme_networks'
pattern = "*.adj"
pattern2 = "*.arc"
nwks_list = []

for path, subdirs, files in os.walk(root):
    for name in files:
        if fnmatch(name, pattern):
            # print(os.path.join(path, name))
            nwks_list.append(os.path.join(path, name))
        elif fnmatch(name, pattern2):
            nwks_list.append(os.path.join(path, name))

df = pd.DataFrame(nwks_list)
df.to_csv("paths_to_networks.csv")

In [11]:
len(nwks_list)

2317

In [38]:
# select networks with less or equal to 100 nodes and more than 1 node

compute=False
min_counter=0
max_size=100

table = np.zeros((len(nwks_list),8), dtype=object)

counter = 0

for i, nwpath in enumerate(nwks_list):
    
    # extract file name from file path
    nwname = os.path.basename(nwpath)
    
    # add name of network to table
    table[counter,0] =  str(nwname)
    table[counter,1] =  str(nwpath)
    
    # read graph from ".adj" file
    print('{} {}'.format(i, nwname), end='')
    G = read_from_adj(nwpath)

    # set p for G(n,p) graph
    n = G.number_of_nodes()
    m = G.number_of_edges()
    print(' has (n,m) = ({}, {})'.format(n, m), end='')

    # check if network meets size limitation
    if n > max_size:
        print (' --- omit')
        continue
    elif n < 2:
        print(' --- omit')
        continue    
    else:
        print(' --- compute', end='')

    p = m / scipy.special.comb(n, 2)
    
    if compute:
        
        if counter >= min_counter:
            t0 = time.time()
            # add number of nodes and edges to info table
            table[counter,2] = n
            table[counter,3] = m

            # get data for random and targeted node removal 
            nw_r = np.nanmean([random_removal(G) for i in range(num_tries)], axis=0)
            nw_t = targeted_removal(G)
            
            # finite-theory results for random and targeted node removal
            theory_r = relSCurve(p, n, attack=False, reverse=True, lcc_method_relS="pmult")
            theory_t = relSCurve(p, n, attack=True, reverse=True, lcc_method_relS="pmult")
        
            # rel LCC arrays
            results = [nw_r, nw_t, theory_r, theory_t]
            for i, array in enumerate(results): 
                # store in info table
                table[counter,4+i] = array


            with open('data/fulldata-{}.txt'.format(counter), 'w') as file:
                # Write four lines to the file
                file.write("{} {} {}\n".format(nwname, n, m))
                file.write(' '.join(map(str, nw_r))+"\n")
                file.write(' '.join(map(str, nw_t))+"\n")
                file.write(' '.join(map(str, theory_r))+"\n")
                file.write(' '.join(map(str, theory_t))+"\n")

            print(' in {} s'.format(time.time()-t0))
    
    counter+=1
    

if compute: 

    if min_counter==0:
        # remove empty rows from table
        table2 = table[:counter]

        # convert to data frame and name its columns
        df = pd.DataFrame(table2)
        df.columns = ["network", "nodes", "edges", "real rand rLCC", "real attack rLCC",
                        "fin theory rand rLCC", "fin theory attack rLCC"]
        
else:
    table = table[:counter]
    filename = 'networks2-100.p'
    pickle.dump(table[:,:3], open(filename, 'wb'))
    print('Identified {} networks with sizes 2 to 100 and saved their paths to {}.'.format(counter, filename))

0 10_19.adj has (n,m) = (10, 20) --- compute1 american_revolution.adj has (n,m) = (136, 3420) --- omit
2 birdtrade_a.adj has (n,m) = (42, 68) --- compute3 birdtrade_b.adj has (n,m) = (39, 53) --- compute4 birdtrade_c.adj has (n,m) = (34, 46) --- compute5 celegans_neural.adj has (n,m) = (280, 1973) --- omit
6 ce_nn.arc has (n,m) = (280, 1973) --- omit
7 chile_powergrid.adj has (n,m) = (466, 543) --- omit
8 dolphin.adj has (n,m) = (62, 159) --- compute9 dolphin.arc has (n,m) = (62, 159) --- compute10 drugspider_caffeine.adj has (n,m) = (119, 190) --- omit
11 drugspider_chloralhydrate.adj has (n,m) = (52, 64) --- compute12 earth.adj has (n,m) = (249, 1197) --- omit
13 earth.arc has (n,m) = (249, 1197) --- omit
14 earth_moon_mars.adj has (n,m) = (16, 57) --- compute15 east_europe_rr.adj has (n,m) = (45, 91) --- compute16 farmer.adj has (n,m) = (80, 77) --- compute17 flights.adj has (n,m) = (456, 2799) --- omit
18 flights.arc has (n,m) = (456, 2799) --- omit
19 football.adj has (n,m) = (115

In [46]:
# compute randomness index for networks with n in [2,100]

def bayesian(theory = False, removal = "random", adj_list = ["taro.txt"], oneplot = False, path=''):

    products, nums_nodes, probs = [], [], []

    for file_name in adj_list:
        #file_name = open(os.path.join(file_path,file_name), "r")
        #print(str(file_name))
        file = open(path+file_name, "r")
        #content = file.readlines()
        content = (line.rstrip() for line in file)  # All lines including the blank ones
        content = list(line for line in content if line)

        if len(content) == 0:
            # empty file
            print("None detected")
            product = "None"
            n = "None"
            p = "None"
            file.close()

        else:

            if True:
                node_list = []
                edge_list = np.empty(len(content), dtype=object)
                for i in range(len(content)):
                    edge = content[i].strip()
                    edge = edge.split(" ")
                    edge_list[i] = np.zeros(2)
                    edge_list[i][0] = int(edge[0])
                    edge_list[i][1] = int(edge[1])
                    for j in range(2):
                        node_list.append(int(edge[j]))
                n = max(node_list) + 1
                adj = np.zeros((n, n))
                #print("check1")

                for k in range(len(edge_list)):
                    adj[int(edge_list[k][0]), int(edge_list[k][1])] = 1
                    adj[int(edge_list[k][1]), int(edge_list[k][0])] = 1
                G_0 = nx.from_numpy_array(adj)
                p = len(edge_list) / scipy.special.comb(n, 2)
                degrees = list(G_0.degree())
                product = 1
                for i_d in range(len(degrees)):
                    d = degrees[i_d][1]
                    #print("d", d)
                    product *= scipy.special.comb(n - 1, d) * (p ** d) * (1 - p) ** (n - 1 - d)
                    #print("i_d", i_d)
                
                # plt.show()
                freq = nx.degree_histogram(G_0)
                #print(freq)
                for f in freq:
                    product /= math.factorial(f)
                product = product * math.factorial(n)
                #print("product", product)
                #print("n", n)

                product = r_pow(product, 1, n)


                products.append(product)
                nums_nodes.append(n)
                probs.append(p)
    
    return products,nums_nodes,probs

In [48]:
randomness_indices, nums_nodes, probs = bayesian(theory = False, removal = "random", adj_list = table[:,1], oneplot = False, path='')

In [50]:
# find top 20 networks based on randomness index
top20_i = np.array([i for i in range(len(table)) if randomness_indices[i]>=np.percentile(randomness_indices,80)]).T
top20 = np.array([[nums_nodes[i], randomness_indices[i]] for i in range(len(table)) if randomness_indices[i]>=np.percentile(randomness_indices,80)]).T

bottom20_i = np.array([i for i in range(len(table)) if randomness_indices[i]<np.percentile(randomness_indices,20)]).T
bottom20 = np.array([[nums_nodes[i], randomness_indices[i]] for i in range(len(table)) if randomness_indices[i]<np.percentile(randomness_indices,20)]).T



In [53]:
print(len(top20_i), len(top20[0]), len(bottom20_i), len(bottom20[0]))
print(top20_i)

324 324 324 324
[   8   17   25   26   27   28   29   30   37   59   62   63   89  112
  145  166  238  291  295  296  310  323  347  348  356  357  360  361
  362  364  372  374  375  380  382  383  386  387  388  389  391  392
  393  394  395  396  397  398  400  403  405  406  407  408  409  410
  412  413  414  415  416  417  418  420  423  424  425  426  427  428
  429  431  432  433  434  444  446  449  450  451  452  455  456  458
  460  462  463  465  467  469  473  474  475  479  490  493  496  497
  498  499  500  503  504  508  512  514  517  521  522  524  526  529
  531  533  571  573  583  647  648  649  652  654  655  656  657  659
  661  662  663  664  666  667  668  670  671  672  673  682  683  685
  686  687  691  693  694  695  696  697  698  700  702  704  705  706
  707  715  716  717  718  720  721  722  723  724  725  733  739  740
  742  743  747  749  751  752  753  759  760  761  765  771  772  773
  774  776  777  778  779  780  781  791  792  794  795  796 

In [57]:
# graph sizes of the top 20 graphs
np.array([nums_nodes[i] for i in top20_i])

array([ 45,  28,  40,  62,  67,  49,  50,  50,  49,  71,  75,  74,   4,
        40,  25,   2,   2,  32,  30,  36,   5,   5,  37,  48,  76,  91,
       100,  91,  88,  91,  71,  77,  69,  72,  52,  52,  49,  50,  53,
        48,  94, 100,  98,  95,  92,  93,  95,  93,  83,  80,  78,  77,
        78,  77,  55,  57,  55,  52,  56,  53,  55,  54,  55,  99,  96,
       100,  87,  80,  82,  87,  86,  74,  81,  82,  79,  90,  61,  67,
        65,  64,  61,  65,  49,  47,  51,  51,  46,  50,  33,  33,  31,
        35,  34,  80,  61,  61, 100,  88,  84,  84,  86,  79,  80,  35,
        36,  24,  30,  30,  28,  29,  25,  29,  29,  53,  32,  32,  23,
        27,  28,  59,  23,  30,  40,  53,  43,  31,  38,  61,  83,  89,
        74,  77,  70,  70,  82,  83,  59,  73,  64,  67,  56,  47,  50,
        51,  54,  76,  83,  76,  45,  42,  30,  26,  40,  31,  41,  62,
        77,  85,  60,  59,  51,  96,  58,  85,  71,  42,  58,  75,  46,
        37,  40,  22,  34,  55,  52,  28,  49,  49,  30,  48,  3