<a href="https://colab.research.google.com/github/AliAkbarBadri/graph-centrality/blob/main/All_subgraphs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [74]:
import networkx as nx
import glob
import pandas as pd
import time
import numpy as np
import json
from multiprocessing import Pool

# Functions

In [71]:
def count_subgraph(G, v):
  neighbours = G.edges(v)
  if len(neighbours) == 0:
    return 1
  else:
    for e in neighbours:
      G_temp = G.copy()
      G_temp.remove_edge(*e)
      a = count_subgraph(G_temp, v)
      b = (2**G.number_of_edges(*e) - 1)*count_subgraph(nx.contracted_edge(G, e, self_loops=False), e[0])
      return a + b

In [38]:
def count_tree(T, v):
  # print(T.edges(v))
  neighbours = T.edges(v)
  if len(neighbours) == 0: 
    return 1
  else:
    for e in neighbours:
      T_temp = T.copy()
      T_temp.remove_edge(*e)
      return count_tree(T_temp, v) * (count_tree(T_temp,e[1]) + 1) 

In [4]:
line = nx.path_graph(4)
print(line.edges())

# result must be equal to 4
print(count_subgraph(line,0))
print(count_tree(line, 0))

[(0, 1), (1, 2), (2, 3)]
4
4


In [5]:
star = nx.star_graph(4)
print(star.edges())
# result must be equal to 16
print(count_subgraph(star,0))
print(count_tree(star, 0))

[(0, 1), (0, 2), (0, 3), (0, 4)]
16
16


# Get Datasets

In [6]:
! curl -L https://api.github.com/repos/AliAkbarBadri/graph-centrality/tarball --output repo.tar
! tar xf repo.tar --wildcards "*/data/*.csv" --strip-components=1
! rm -rf repo.tar

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  442k    0  442k    0     0   856k      0 --:--:-- --:--:-- --:--:--  856k


In [7]:
for file_name in sorted(glob.glob('data/*.csv')):
  print(file_name.split("/")[1].split(".csv")[0])

ENZYMES_g297
aves-songbird-social
bn-mouse_visual-cortex_2
ca-sandi_auths
email-dnc-corecipient
email-enron-only
facebook-socfb-Amherst41
insecta-ant-colony1-day01
reptilia-tortoise-network-lm
road-chesapeake
rt-retweet
soc-dolphins
soc-wiki-Vote
web-edu
web-polblogs
web-spam


# Test on datasets

In [41]:
def create_graph_from_csv(dir_name:str) -> dict:
  graphs = {}
  for file_name in sorted(glob.glob(dir_name+'/*.csv')):
    df = pd.read_csv(file_name,header=None,names=['src', 'dest'])
    G = nx.from_pandas_edgelist(df,source="src",target="dest", create_using=nx.DiGraph())
    dataset_name = file_name.split("/")[1].split(".csv")[0]
    print(dataset_name, G.number_of_nodes(), G.number_of_edges())
    graphs[dataset_name] = G
  return graphs

In [42]:
graphs = create_graph_from_csv(dir_name= "data")

ENZYMES_g297 121 298
aves-songbird-social 110 1027
bn-mouse_visual-cortex_2 193 214
ca-sandi_auths 86 124
email-dnc-corecipient 906 12085
email-enron-only 143 623
facebook-socfb-Amherst41 2235 90953
insecta-ant-colony1-day01 113 4550
reptilia-tortoise-network-lm 45 106
road-chesapeake 39 170
rt-retweet 96 117
soc-dolphins 62 159
soc-wiki-Vote 889 2914
web-edu 3031 6474
web-polblogs 643 2280
web-spam 4767 37375


In [83]:
graph_names = [
              #  "road-chesapeake",
              #  "reptilia-tortoise-network-lm",
               "rt-retweet",
               "ca-sandi_auths",
               "email-enron-only"
               ]

# Tree Centrality

In [59]:
tree_centrality = {}
tree_time = {}

In [60]:
for key in graph_names:
  # key = "email-enron-only"
  print("Graph:",key)
  tree_centrality[key] = {}
  G = graphs[key]
  start_time_all = time.time()
  for node in G.nodes():
    # print(node)
    tree_centrality[key][node] = count_tree(G, node)
  all_time = time.time() - start_time_all
  print(" time:", all_time)
  tree_time[key] = all_time

Graph: road-chesapeake
 time: 0.580406665802002
Graph: reptilia-tortoise-network-lm
 time: 1.0569086074829102
Graph: rt-retweet
 time: 0.17590045928955078
Graph: ca-sandi_auths
 time: 0.3396949768066406
Graph: email-enron-only
 time: 1239.881894826889


In [66]:
with open('result/tree_centrality.json', 'w') as fp:
    json.dump(tree_centrality, fp, indent=4)
with open('result/tree_time.json', 'w') as fp:
    json.dump(tree_time, fp, indent=4)

# Graph Centrality

In [78]:
graph_centrality = {}
graph_time = {}

In [81]:
def fun(G, v):
    print(v,"-")
    centrality = count_subgraph(G,v)
    print(v,centrality)
    return (v,centrality)

In [None]:
for key in graph_names:
    print("Graph:",key)
    graph_centrality[key] = {}
    G = graphs[key]
    start_time_all = time.time()
    with Pool(processes=4) as pool:
        graph_centrality[key] = pool.starmap(fun, zip([G]*G.number_of_nodes(), list(G.nodes())))
    all_time = time.time() - start_time_all
    print(" time:", all_time)
    graph_time[key] = all_time
    break

Graph: rt-retweet
16 -
56 -
45 -
6 -
16 2
6 2
5 -
1 -
5 1
1 1
17 -
8 -
17 2
8 5
20 -
54 -
20 2
45 48
89 -
40 -
40 2
93 -
56 888
2 -
2 1
75 -
75 784
3 -
3 1
19 -
19 2
4 -
4 1
39 -
39 6
42 -
42 6
72 -
54 23616
76 -
76 2
85 -
85 2
11 -
11 1
94 -
94 2
15 -
15 2
12 -
12 1
24 -
24 2
13 -
13 1
41 -
41 2
14 -
14 1
50 -
50 14
21 -
21 3
33 -
33 3
84 -
84 2
18 -
18 1
96 -
96 4
53 -
53 4
22 -
22 1
65 -
65 2
68 -
68 4
51 -
51 2
23 -
23 1
25 -
25 1
26 -
26 1
27 -
27 1
32 -
32 13
78 -
78 13
29 -
29 1
64 -
64 10
30 -
30 1
31 -
31 1
55 -
55 2
58 -
58 74
34 -
34 1
35 -
35 1
36 -
36 1
37 -
37 1
59 -
59 3
66 -
66 3
81 -
81 33
88 -
88 3
43 -
43 1
74 -
74 444
44 -
44 1
79 -
79 98
83 -
83 49
46 -
46 1
47 -
47 1
73 -
73 2
48 -
48 1
87 -
87 2
70 -
70 10
49 -
49 1
77 -
77 15
52 -
52 1
57 -
57 5
67 -
67 23617
92 -
92 23617
80 -
80 889
90 -
90 889
60 -
60 1
62 -
62 1
63 -
63 1
91 -
91 3
69 -
69 1
71 -
71 1
82 -
82 1
86 -
86 1


In [None]:
# for key in graph_names:
#   print("Graph:",key)
#   graph_centrality[key] = {}
#   G = graphs[key]
#   start_time_all = time.time()
#   for node in G.nodes():
#     graph_centrality[key][node] = count_subgraph(G, node)
#   all_time = time.time() - start_time_all
#   print(" time:", all_time)
#   graph_time[key] = all_time

In [None]:
with open('result/graph_centrality.json', 'w') as fp:
    json.dump(graph_centrality, fp, indent=4)
with open('result/graph_time.json', 'w') as fp:
    json.dump(graph_time, fp, indent=4)

# Betweenness Centrality

In [None]:
betweenness_centrality = nx.betweenness_centrality(G)

In [None]:
with open('result/betweenness_centrality.json', 'w') as fp:
    json.dump(betweenness_centrality, fp, indent=4)

# Eigenvector Centrality

In [None]:
eigenvector_centrality = nx.eigenvector_centrality(G)

In [None]:
with open('result/eigenvector_centrality.json', 'w') as fp:
    json.dump(eigenvector_centrality, fp, indent=4)

# Degree Centrality

In [None]:
degree_centrality = nx.degree_centrality(G)

In [None]:
with open('result/degree_centrality.json', 'w') as fp:
    json.dump(degree_centrality, fp, indent=4)