# Percolation analysis

* read a network
* drop the links
* add links based on some measure (for example, link weight)

* we measure on scale (0-1) how quickly they make a one complete component
* Percolation = |N_LCC|/|N|


In [1]:
import pandas as pd 
import networkx as nx
import os
from DA import pls_da1
datasets = ["\dolphins\dolphins.gml",
            "\polbooks\out2.txt",
            "\word_adjacencies.gml\word_adjacencies.gml",
            "\\arenas-email\\out2.txt",
            "Karate",
            "Erdos Renyi",
            "\\USAir97\\USAir97.mtx", 
            "\\circuits\s208_st.txt",
            "\\circuits\s420_st.txt",
            "\\circuits\s838_st.txt",
            "\\E. Coli\E. Coli.txt",
            "Barabasi_albert_graph",
            "\\facebook\\0.edges",
            "\\facebook\\107.edges",
            "\\facebook\\348.edges",
            "\\facebook\\414.edges",
            "\\facebook\\686.edges",
            "\\facebook\\1684.edges",
            "\\bio-celegans\\bio-celegans.mtx",
            "\\bn-macaque-rhesus_brain_2\\bn-macaque-rhesus_brain_2.txt",
            '\\soc-tribes\\soc-tribes.txt',
            '\\fb-pages-food\\fb-pages-food.txt',
            '\\bn-cat-mixed-species_brain_1\\bn-cat-mixed-species_brain_1.txt',
            '\\ca-sandi_auths\\ca-sandi_auths.mtx',
            '\\soc-firm-hi-tech\\soc-firm-hi-tech.txt']

def read_graph2(g):
    file_name = 'standard networks dataset' + datasets[int(g)]
    print(file_name)
    G = nx.Graph()
    if g==4:
        G = nx.karate_club_graph()
    elif g==5:
        # nodes = int(input("enter number of nodes?"))
        # edges= int(input("enter number of edges?"))
        G = nx.gnm_random_graph(500, 1500)
    elif g==11:
        # nodes = int(input("enter number of nodes?"))
        # edges= int(input("enter number of edges?"))
        # p = int(input("enter P value?"))
        G = nx.barabasi_albert_graph(500, 3)
    else:
        ext = os.path.splitext(file_name)[1]
        if ext=='.edges':
            G = nx.read_adjlist(file_name, create_using = nx.Graph(), nodetype = int)
        elif ext=='.gml':
            G = nx.read_gml(file_name)
        elif ext=='.mtx':
            G = None
            #matrix = scipy.io.mmread(file_name)
            #G = nx.from_scipy_sparse_matrix(matrix)
        elif ext=='.txt':
            file = open(file_name, 'r')
            lines=  file.readlines()
            G = nx.Graph()
            for line in lines:
                if " " in line:
                    N = line.split(" ")
                else:
                    N = line.split("\t")
                G.add_edge(N[0], N[1])
    return G

# read the networks
networks = {}
for net in range(len(datasets)):
    g = read_graph2(net)
    if g:
        networks[datasets[net]] = g


standard networks dataset\dolphins\dolphins.gml
standard networks dataset\polbooks\out2.txt
standard networks dataset\word_adjacencies.gml\word_adjacencies.gml
standard networks dataset\arenas-email\out2.txt
standard networks datasetKarate
standard networks datasetErdos Renyi
standard networks dataset\USAir97\USAir97.mtx
standard networks dataset\circuits\s208_st.txt
standard networks dataset\circuits\s420_st.txt
standard networks dataset\circuits\s838_st.txt
standard networks dataset\E. Coli\E. Coli.txt
standard networks datasetBarabasi_albert_graph
standard networks dataset\facebook\0.edges
standard networks dataset\facebook\107.edges
standard networks dataset\facebook\348.edges
standard networks dataset\facebook\414.edges
standard networks dataset\facebook\686.edges
standard networks dataset\facebook\1684.edges
standard networks dataset\bio-celegans\bio-celegans.mtx
standard networks dataset\bn-macaque-rhesus_brain_2\bn-macaque-rhesus_brain_2.txt
standard networks dataset\soc-tribes

In [None]:
name = ['dolphins',
 'polbooks',
 'word_adjacencies',
 'arenas-email',
 'Karate',
 'Erdos Renyi',
 'circuits s208_st',
 'circuits s420_st',
 'circuits s838_st',
 'E. Coli',
 'Barabasi_albert_graph',
 'facebook0',
 'facebook107',
 'facebook348',
 'facebook414',
 'facebook686',
 'facebook1684',
 'bn-macaque-rhesus_brain_2',
 'soc-tribes',
 'fb-pages-food',
 'bn-cat-mixed-species_brain_1',
 'soc-firm-hi-tech']

In [None]:
def properties(G):
    GCC = nx.transitivity(G)
    ACC = nx.average_clustering(G)
    d = nx.density(G)
    r = nx.degree_assortativity_coefficient(G)    
    lcg = sorted(nx.connected_components(G), key=len, reverse=True)
    LCG = G.subgraph(lcg[0])    
    ASP = nx.average_shortest_path_length(LCG)
    diam = nx.diameter(LCG)
    return  GCC, ACC, d, r, ASP, diam



In [None]:
def weighted_edges(G, C):
    '''return a weighted edges'''
    W = []
    for u,v in G.edges():
        W.append([u, v, C[u]*C[v]])
    return sorted(W, key=lambda x: x[2])

def batch_list(lst):
    """
    Divide a list into batches of an equal number of items (as close to 50 as possible).
    """
    batch_size = (len(lst) + 49) // 50  # Calculate the batch size
    num_batches = (len(lst) + batch_size - 1) // batch_size
    batches = [lst[i*batch_size:(i+1)*batch_size] for i in range(num_batches)]
    return batches

def simulation(centr):
    results = []
    for network in networks.keys():
        print(network)
        G0 = networks[network]
        bc_G0 = centr(G0)
        W = weighted_edges(G0, bc_G0)
        G = nx.Graph()
        G.add_nodes_from(G0.nodes())
        batches = batch_list(W)

        result = []
        for b in range(len(batches)):
            for u,v,_ in batches[b]:
                G.add_edge(u, v)
            largest_component = G.subgraph(max(nx.connected_components(G), key=len))
            result.append([b, len(largest_component)/len(G)])
        results.append(result)
    return results


In [38]:
def plot(results, title):
    import matplotlib.pyplot as plt
    fig = plt.figure(dpi=600)
    fig, ax = plt.subplots()
    markers = ['+', 'x', 'o', 's', 'd', 'D', '*'] # Add your desired markers here
    for d in range(len(results)):
        data = results[d]
        x = [item[0] for item in data]
        y = [item[1] for item in data]
        marker_idx = d % len(markers) # Choose marker based on index of the result
        ax.plot(x, y, marker=markers[marker_idx], linewidth=0.5, markersize=3, label=name[d])

    ax.set_xlabel('edges')
    ax.set_ylabel(r'$|N_{LCC}| / |N| $')
    ax.set_title(f'{title}-based weighted edges')
    plt.legend()
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.show()


In [None]:
centralities = [nx.degree_centrality, nx.betweenness_centrality, nx.closeness_centrality, nx.clustering]
centr        = ['Degree'            ,   'Betweenness'          ,   'Closeness'          ,   'Clustering']
sims = {}
for i in range(4):
    cent = centr[i]
    sims[cent] = simulation(centralities[i])
    plot(sims[cent], cent)


# Predicting robustness of networks

In [None]:
R = {}
R['Networks'] = name
for c in centr:
    Rs = []
    for i in range(22):
        S = sims[c][i]
        T, V = 0, 0
        for t, v in S:
            if v > V:
                T = t
                V = v
        Rs.append(T)
    R[c] = Rs
pd.DataFrame(R)

In [None]:
def random_sim():
    results = []
    for network in networks.keys():
        G0 = networks[network]
        W = batch_list(list(G0.edges()))
        G = nx.Graph()
        G.add_nodes_from(G0.nodes())
        batches = batch_list(W)

        result = []
        for b in range(len(batches)):
            for (u,v) in batches[b][0]:
                G.add_edge(u, v)
            largest_component = G.subgraph(max(nx.connected_components(G), key=len))
            result.append([b, len(largest_component)/len(G)])
        results.append(result)
    return results

plot(random_sim(), 'Random')
sims_rnd = random_sim()

Rs = []
for i in range(22):
    S = sims_rnd[i]
    T, V = 0, 0
    for t, v in S:
        if v > V:
            T = t
            V = v
    Rs.append(T)
R['Random'] = Rs
pd.DataFrame(R)

In [None]:
def simulation2():
    '''Implementing reverse preferential attachment'''
    results = []
    k = 0.01
    for network in networks.keys():
        print(network)
        G0 = networks[network]
        d = nx.degree_centrality(G0)
        W = [[u, v, (1/((d[u]+k)*(d[v]+k)))] for u,v in G0.edges()]
        W = sorted(W, key=lambda x: x[2])
        G = nx.Graph()
        G.add_nodes_from(G0.nodes())
        batches = batch_list(W)

        result = []
        for b in range(len(batches)):
            for u,v,_ in batches[b]:
                G.add_edge(u, v)
            largest_component = G.subgraph(max(nx.connected_components(G), key=len))
            result.append([b, len(largest_component)/len(G)])
        results.append(result)
    return results


In [None]:
plot(simulation2(), 'Inverted PA')

In [None]:
sims_rnd = simulation2()

Rs = []
for i in range(22):
    S = sims_rnd[i]
    T, V = 0, 0
    for t, v in S:
        if v > V:
            T = t
            V = v
    Rs.append(T)
R['Inverted PA'] = Rs


In [None]:
network_properties = [properties(networks[g]) for g in networks]

In [None]:
GCCs = [i for i, _,_,_,_,_ in network_properties]
ACCs = [i for _, i,_,_,_,_ in network_properties]
ds   = [i for _, _,i,_,_,_ in network_properties]
rs   = [i for _, _,_,i,_,_ in network_properties]
ASPs = [i for _, _,_,_,i,_ in network_properties]
diam = [i for _, _,_,_,_,i in network_properties]

df2 = pd.DataFrame({'Networks': name, 'GCC': GCCs, 'ACC': ACCs, 'Density': ds, 'r': rs, 'ASP': ASPs, 'Diameter': diam}) 
df2.to_csv('Data/networks properties.csv', index=False)


numeric_cols = df2.select_dtypes(include='number').columns
df_quartiles = df2[numeric_cols].apply(lambda x: pd.qcut(x.dropna(), q=[0, 0.25, 0.5, 0.75, 1.0], labels=[0.25,0.5,0.75,1]) if x.dtype != object else x)
df_quartiles['Networks'] = df2['Networks']

In [None]:
df1 = pd.DataFrame(R)

medians = df1.median()
print(medians)
for column in df1.columns :
    if column!= 'Networks':
        median = medians[column]  # Retrieve the median for the column
        df1[column] = (df1[column] < median).astype(int)


In [None]:
Dataset = pd.merge(df_quartiles, df1, on='Networks')
Dataset = Dataset.reindex(columns = ['Networks', 'GCC', 'ACC', 'Density', 'r', 'ASP', 'Diameter', 'Degree',
       'Betweenness', 'Closeness', 'Clustering', 'Random', 'Inverted PA'])
Dataset.to_csv('Data/velnerability output.csv', index=False)
Dataset

In [2]:
from scipy.signal import savgol_filter
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import KFold, cross_val_predict, train_test_split
from sklearn.metrics import accuracy_score

data = pd.read_csv('Data/velnerability output.csv')
oldR = {}
X = data.loc[:, ['GCC', 'ACC', 'Density', 'r', 'ASP', 'Diameter']]
for c in [ 'Degree', 'Betweenness', 'Closeness', 'Clustering', 'Random', 'Inverted PA']:
    Y = data[c]
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=19)
    oldR[c] = pls_da1(X_train, y_train, X_test).tolist()

oldR


{'Degree': [[0.051596228416095784],
  [0.028769530557538423],
  [0.13368620135899473],
  [-0.04513237612424448],
  [-0.15557727792904194],
  [-0.1081763931154136]],
 'Betweenness': [[-0.0549023376778545],
  [-0.031102568213353877],
  [-0.005048550742868397],
  [-0.12076364934178835],
  [-0.10942195475126394],
  [-0.13762176619504451]],
 'Closeness': [[0.22810638836118072],
  [0.20442391971557453],
  [0.08364138884258178],
  [-0.013502421632581547],
  [0.004621882589407485],
  [0.011787688499881438]],
 'Clustering': [[0.017796285664701407],
  [0.05117195642483444],
  [0.05398307933296709],
  [-0.11578336574465273],
  [-0.06939954360897169],
  [-0.04647790023406552]],
 'Random': [[0.006858683168286773],
  [0.04543108025056324],
  [-0.02547456977560801],
  [-0.06195294527120876],
  [-0.06945532594653794],
  [-0.09703895094773181]],
 'Inverted PA': [[0.1258261921783094],
  [0.20338553059341027],
  [-0.041482604988132964],
  [-0.2918435837437639],
  [0.019395419045435295],
  [-0.15441403298

In [3]:
R = {c: [i[0] for i in oldR[c]] for c in oldR}
pd.DataFrame(R)

Unnamed: 0,Degree,Betweenness,Closeness,Clustering,Random,Inverted PA
0,0.051596,-0.054902,0.228106,0.017796,0.006859,0.125826
1,0.02877,-0.031103,0.204424,0.051172,0.045431,0.203386
2,0.133686,-0.005049,0.083641,0.053983,-0.025475,-0.041483
3,-0.045132,-0.120764,-0.013502,-0.115783,-0.061953,-0.291844
4,-0.155577,-0.109422,0.004622,-0.0694,-0.069455,0.019395
5,-0.108176,-0.137622,0.011788,-0.046478,-0.097039,-0.154414
