In [10]:
import time

start = time.time()

import networkit as nk
import numpy as np
import csv

path_data = '../data/' # path to the data

end = time.time()
print('Importing libraries and setting up parameters takes %.4f s' % (end-start))

Importing libraries and setting up parameters takes 0.0001 s


In [7]:
# ====== reading training set ====== 
start = time.time()

training = np.genfromtxt(path_data + 'training_set.txt', dtype=str)

end = time.time()
print('Reading training set takes %.4f s' % (end-start))

Reading training set takes 2.6858 s


In [11]:
# ====== reading node information ====== #
start = time.time()

with open(path_data + 'node_information.csv', 'r') as f:
    reader = csv.reader(f)
    node_info = list(reader)

end = time.time()
print('Reading node information takes %.4f s' % (end-start))

Reading node information takes 0.2916 s


In [13]:
# ====== create an index list, to facilite access to a node by its id ====== #
ID = [element[0] for element in node_info]

In [39]:
# ====== building graph ====== #
start = time.time()

g = nk.Graph(len(ID)) # adding nodes

#edges = [(ID.index(element[0]), ID.index(element[1])) for element in training if element[2] == '1']
for edge in edges:
    if not g.hasEdge(edge[0], edge[1]):
        g.addEdge(edge[0], edge[1])

end = time.time()
print('Building graph takes %.4f s' % (end-start))

Building graph takes 0.0048 s


In [58]:
# compute betweenness
btw = nk.centrality.EstimateBetweenness(g, nSamples=1000, parallel=True)

In [59]:
start = time.time()

btw.run()

end = time.time()
print('EstimateBetwenness takes %.4f s to finish' % (end-start))

EstimateBetwenness takes 13.2222 s to finish


In [49]:
btw_nodes = zip(ID, btw.scores())

In [53]:
i = 0
for node, score_btw in btw_nodes:
    print('Node %s : %.4f' % (node, score_btw))
    i += 1
    if i > 10:
        break

Node 1001 : 138976.5513
Node 1002 : 61363.5440
Node 1003 : 0.6686
Node 1004 : 0.0000
Node 1005 : 2870.6310
Node 1006 : 4401.8777
Node 1007 : 8186.2597
Node 1008 : 23044.7818
Node 1009 : 0.0000
Node 1010 : 5507.4508
Node 1011 : 12638.7077


In [55]:
# use approx
approx_btw = nk.centrality.ApproxBetweenness(g)

In [56]:
approx_btw.run()

<networkit._NetworKit.ApproxBetweenness at 0x7fef468ee400>

In [61]:
# use original betweenness
orig_btw = nk.centrality.Betweenness(g)

In [62]:
start = time.time()

orig_btw.run()

end = time.time()
print('Computing original betweenness takes %.4f s' % (end-start))

Computing original betweenness takes 372.9846 s


In [63]:
orig_btw.scores()

[142972.14091230067,
 89741.74012003807,
 75.45992261594868,
 0.0,
 4830.750744932249,
 3647.4366915819987,
 23042.90178060701,
 34049.930781199735,
 0.0,
 6435.498948041684,
 13360.236020365195,
 259966.09734728606,
 2733.2932572180193,
 13306.36177731125,
 0.0,
 72874.8673244538,
 164307.49995985362,
 12250.520878592157,
 29071.241110411294,
 0.0,
 109110.63774368656,
 3731.8651607646807,
 49504.67428969852,
 85899.42467122611,
 103401.09872602132,
 7275.62246887611,
 120173.1249228744,
 432.68149443612486,
 39506.35716274241,
 5045.212366894684,
 14938.727119535613,
 13298.14886834452,
 128643.76959158908,
 37805.259383735734,
 177973.88659276976,
 8153.623818831593,
 30046.34665865611,
 27281.0,
 51208.56711507794,
 54566.000000000015,
 331998.01517995336,
 167180.89775489212,
 1052.3880812018515,
 0.0,
 84258.50635268574,
 76959.91447055798,
 6.333333333333333,
 18052.106129600535,
 16555.66353257792,
 6041.416047931941,
 905.5146058491148,
 447936.819048382,
 123172.29339367914,


In [71]:
btw_feature = zip(ID, orig_btw.scores())
with open(path_data + 'betweenness_feature.csv', 'w') as f:
    csv_out = csv.writer(f)
    csv_out.writerow(['id', 'betweenness'])
    for row in btw_feature:
        csv_out.writerow(row)