<a href="https://colab.research.google.com/github/AliAkbarBadri/graph-centrality/blob/main/centrality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [61]:
import networkx as nx
import glob
import pandas as pd
import time
import numpy as np
import json
from multiprocessing import Pool

# Functions

In [62]:
def count_subgraph(G, v):
  neighbours = G.edges(v)
  if len(neighbours) == 0:
    return 1
  else:
    for e in neighbours:
      G_temp = G.copy()
      G_temp.remove_edge(*e)
      a = count_subgraph(G_temp, v)
      b = (2**G.number_of_edges(*e) - 1)*count_subgraph(nx.contracted_edge(G, e, self_loops=False), e[0])
      return a + b

In [97]:
def count_tree(T, v):
  # print(T.edges(v))
  # print("a")
  neighbours = T.edges(v)
  # print(v, neighbours)
  if len(neighbours) == 0: 
    return 1
  else:
    for e in neighbours:
      T_temp = T.copy()
      T_temp.remove_edge(*e)
      return count_tree(T_temp, v) * (count_tree(T_temp,e[1]) + 1) 

In [64]:
line = nx.path_graph(4)
print(line.edges())

# result must be equal to 4
print(count_subgraph(line,0))
print(count_tree(line, 0))

[(0, 1), (1, 2), (2, 3)]
4
4


In [65]:
star = nx.star_graph(4)
print(star.edges())
# result must be equal to 16
print(count_subgraph(star,0))
print(count_tree(star, 0))

[(0, 1), (0, 2), (0, 3), (0, 4)]
16
16


# Get Datasets

In [66]:
! curl -L https://api.github.com/repos/AliAkbarBadri/graph-centrality/tarball --output repo.tar
! tar xf repo.tar --wildcards "*/data/*.csv" --strip-components=1
! rm -rf repo.tar

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  440k    0  440k    0     0   977k      0 --:--:-- --:--:-- --:--:--  977k


In [67]:
for file_name in sorted(glob.glob('data/*.csv')):
  print(file_name.split("/")[1].split(".csv")[0])

ENZYMES_g297
aves-songbird-social
bn-mouse_visual-cortex_1
bn-mouse_visual-cortex_2
ca-sandi_auths
email-dnc-corecipient
email-enron-only
facebook-socfb-Amherst41
insecta-ant-colony1-day01
reptilia-tortoise-network-lm
road-chesapeake
rt-retweet
soc-dolphins
soc-wiki-Vote
web-edu
web-polblogs
web-spam


# Test on datasets

In [110]:
def create_graph_from_csv(dir_name:str) -> dict:
  graphs = {}
  for file_name in sorted(glob.glob(dir_name+'/*.csv')):
    df = pd.read_csv(file_name,header=None,names=['src', 'dest'])
    G = nx.from_pandas_edgelist(df,source="src",target="dest", create_using=nx.DiGraph())
    dataset_name = file_name.split("/")[1].split(".csv")[0]
    print(dataset_name, G.number_of_nodes(), G.number_of_edges())
    graphs[dataset_name] = G
  return graphs

In [111]:
graphs = create_graph_from_csv(dir_name= "data")

ENZYMES_g297 121 298
aves-songbird-social 110 1027
bn-mouse_visual-cortex_1 29 44
bn-mouse_visual-cortex_2 193 214
ca-sandi_auths 86 124
email-dnc-corecipient 906 12085
email-enron-only 143 623
facebook-socfb-Amherst41 2235 90953
insecta-ant-colony1-day01 113 4550
reptilia-tortoise-network-lm 45 106
road-chesapeake 39 170
rt-retweet 96 117
soc-dolphins 62 159
soc-wiki-Vote 889 2914
web-edu 3031 6474
web-polblogs 643 2280
web-spam 4767 37375


In [112]:
graph_names = [
               "bn-mouse_visual-cortex_1",
               "ca-sandi_auths",
               "reptilia-tortoise-network-lm",
               "road-chesapeake",
               "rt-retweet"
               ]

# Tree Centrality

In [118]:
tree_centrality = {}
tree_time = {}

In [119]:
for key in graph_names:
  print("Graph:",key)
  tree_centrality[key] = {}
  G = graphs[key]
  start_time_all = time.time()
  for node in G.nodes():
    # print(node,"-")
    c = count_tree(G, node)
    # print(node, c)
    tree_centrality[key][node] = c
  all_time = time.time() - start_time_all
  print(" time:", all_time)
  tree_time[key] = all_time

Graph: bn-mouse_visual-cortex_1
 time: 0.013124704360961914
Graph: ca-sandi_auths
 time: 0.25470709800720215
Graph: reptilia-tortoise-network-lm
 time: 1.0627782344818115
Graph: road-chesapeake
 time: 1.2863714694976807
Graph: rt-retweet
 time: 0.18354249000549316


In [120]:
with open('results/tree_centrality.json', 'w') as fp:
    json.dump(tree_centrality, fp, indent=4)
with open('results/tree_time.json', 'w') as fp:
    json.dump(tree_time, fp, indent=4)

# Graph Centrality

In [122]:
graph_centrality = {}
graph_time = {}

In [137]:
def fun(G, v):
    # print(v,"-")
    centrality = count_subgraph(G,v)
    print('"{0}": {1},'.format(v,centrality))
    return (v,centrality)

In [143]:
for key in graph_names:
    print("Graph:",key)
    graph_centrality[key] = {}
    G = graphs[key]
    start_time_all = time.time()
    with Pool(processes=4) as pool:
        graph_centrality[key] = pool.starmap(fun, zip([G]*G.number_of_nodes(), list(G.nodes())))
    all_time = time.time() - start_time_all
    print(" time:", all_time)
    print()
    graph_time[key] = all_time


Graph: bn-mouse_visual-cortex_1
"1": 2,
"18": 1,
"19": 1,
"28": 1,
"20": 1,
"14": 1,
"16": 1,
"3": 8,
"21": 1,
"13": 1,
"15": 1,
"6": 2,
"22": 1,
"8": 2,
"5": 48,
"2": 64,
"17": 1,
"23": 1,
"25": 1,
"24": 1,
"26": 1,
"7": 72,
"4": 1,
"10": 64,
"27": 1,
"11": 8,
"12": 32,
"9": 512,
"0": 1,
 time: 0.44628357887268066

Graph: ca-sandi_auths
"35": 2,
"1": 1,
"32": 5,
"10": 2,
"16": 2,
"37": 5,
"18": 2,
"33": 20,
"67": 5,
"2": 1,
"75": 2,
"26": 2,
"5": 1,
"30": 20,
"60": 16,
"15": 2,
"6": 1,
"66": 75,
"40": 80,
"76": 200,
"7": 2,
"3": 1,
"20": 2,
"8": 1,
"64": 5,
"23": 2,
"9": 1,
"46": 2,
"11": 1,
"12": 1,
"36": 837,
"65": 2,
"25": 2,
"17": 1,
"31": 2,
"84": 738,
"19": 1,
"22": 3,
"56": 3,
"81": 6,
"21": 1,
"44": 2337,
"52": 2,
"57": 2,
"54": 162,
"86": 31,
"27": 1,
"41": 32,
"48": 105,
"28": 1,
"55": 32,
"74": 10,
"29": 1,
"61": 105,
"62": 21,
"82": 21,
"83": 62,
"34": 21,
"80": 3278,
"43": 1398,
"24": 1,
"68": 838,
"73": 4291,
"85": 838,
"39": 2,
"38": 1,
"45": 81,
"53": 13990,
"4": 1,
"

# Betweenness Centrality


In [29]:
betweenness_centrality = {}

In [30]:
for key in graph_names:
  print("Graph:",key)
  G = graphs[key]
  betweenness_centrality[key] = nx.betweenness_centrality(G)

Graph: ca-sandi_auths
Graph: reptilia-tortoise-network-lm
Graph: bn-mouse_visual-cortex_1
Graph: road-chesapeake
Graph: rt-retweet


In [31]:
with open('results/betweenness_centrality.json', 'w') as fp:
    json.dump(betweenness_centrality, fp, indent=4)

In [50]:
betweenness_centrality

{'bn-mouse_visual-cortex_1': {0: 0.0,
  1: 0.0013227513227513227,
  2: 0.0,
  3: 0.003968253968253968,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0,
  16: 0.0,
  17: 0.0,
  18: 0.0,
  19: 0.0,
  20: 0.0,
  21: 0.0,
  22: 0.0,
  23: 0.0,
  24: 0.0,
  25: 0.0,
  26: 0.0,
  27: 0.0,
  28: 0.0},
 'ca-sandi_auths': {1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0012605042016806721,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0,
  16: 0.0,
  17: 0.0,
  18: 0.0,
  19: 0.0,
  20: 0.0011204481792717086,
  21: 0.0,
  22: 0.0,
  23: 0.001680672268907563,
  24: 0.0,
  25: 0.0,
  26: 0.0007469654528478058,
  27: 0.0,
  28: 0.0,
  29: 0.0,
  30: 0.004901960784313725,
  31: 0.0,
  32: 0.0,
  33: 0.007096171802054154,
  34: 0.0,
  35: 0.0018207282913165264,
  36: 0.01360877684407096,
  37: 0.0,
  38: 0.0,
  39: 0.0,
  40: 0.003361344537815126,
  41: 0.0,
  42: 0.0,


# Eigenvector Centrality

In [45]:
eigenvector_centrality = {}

In [46]:
for key in graph_names:
  print("Graph:",key)
  G = graphs[key]
  eigenvector_centrality[key] = nx.eigenvector_centrality(G, max_iter=1000)

Graph: ca-sandi_auths
Graph: reptilia-tortoise-network-lm
Graph: bn-mouse_visual-cortex_1
Graph: road-chesapeake
Graph: rt-retweet


In [47]:
with open('results/eigenvector_centrality.json', 'w') as fp:
    json.dump(eigenvector_centrality, fp, indent=4)

In [49]:
eigenvector_centrality

{'bn-mouse_visual-cortex_1': {0: 0.0022636303068946212,
  1: 0.0015093383019313936,
  2: 7.542920049632151e-07,
  3: 0.0007550462969681777,
  4: 0.0007550462969681777,
  5: 7.542920049632151e-07,
  6: 7.542920049632151e-07,
  7: 7.542920049632151e-07,
  8: 7.542920049632151e-07,
  9: 7.542920049632151e-07,
  10: 7.542920049632151e-07,
  11: 7.542920049632151e-07,
  12: 7.542920049632151e-07,
  13: 0.0015093383019313936,
  14: 0.7550470512601831,
  15: 0.0015093383019313936,
  16: 0.3790324867860209,
  17: 0.37827819478105756,
  18: 0.37827819478105756,
  19: 0.0015093383019313936,
  20: 0.0015093383019313936,
  21: 0.0015093383019313936,
  22: 0.0030179223118578465,
  23: 0.0015093383019313936,
  24: 0.0015093383019313936,
  25: 0.0015093383019313936,
  26: 0.0015093383019313936,
  27: 0.0015093383019313936,
  28: 0.0030179223118578465},
 'ca-sandi_auths': {1: 0.009111888998493202,
  2: 0.9997066975926328,
  3: 0.009256247260909567,
  4: 2.5205658167049112e-09,
  5: 1.9873296496854944e

# Degree Centrality

In [35]:
degree_centrality = {}

In [36]:
for key in graph_names:
  print("Graph:",key)
  G = graphs[key]
  degree_centrality[key] = nx.degree_centrality(G)

Graph: ca-sandi_auths
Graph: reptilia-tortoise-network-lm
Graph: bn-mouse_visual-cortex_1
Graph: road-chesapeake
Graph: rt-retweet


In [37]:
with open('results/degree_centrality.json', 'w') as fp:
    json.dump(degree_centrality, fp, indent=4)

In [38]:
degree_centrality

{'bn-mouse_visual-cortex_1': {0: 0.10714285714285714,
  1: 0.10714285714285714,
  2: 0.21428571428571427,
  3: 0.14285714285714285,
  4: 0.03571428571428571,
  5: 0.17857142857142855,
  6: 0.03571428571428571,
  7: 0.14285714285714285,
  8: 0.03571428571428571,
  9: 0.3214285714285714,
  10: 0.21428571428571427,
  11: 0.10714285714285714,
  12: 0.17857142857142855,
  13: 0.07142857142857142,
  14: 0.07142857142857142,
  15: 0.07142857142857142,
  16: 0.10714285714285714,
  17: 0.07142857142857142,
  18: 0.07142857142857142,
  19: 0.07142857142857142,
  20: 0.07142857142857142,
  21: 0.07142857142857142,
  22: 0.14285714285714285,
  23: 0.07142857142857142,
  24: 0.07142857142857142,
  25: 0.07142857142857142,
  26: 0.07142857142857142,
  27: 0.07142857142857142,
  28: 0.14285714285714285},
 'ca-sandi_auths': {1: 0.023529411764705882,
  2: 0.07058823529411765,
  3: 0.11764705882352941,
  4: 0.011764705882352941,
  5: 0.023529411764705882,
  6: 0.023529411764705882,
  7: 0.02352941176470