In [80]:
import matplotlib.pyplot as plt
import networkx as nx
import json

In [81]:
links = pd.read_json('investigation/test_links.json')
links[:5]

Unnamed: 0,source,target
0,191129565003782,65acb153c62cbe7e
1,191129565003782,916c42063aa907ef
2,191129565003782,605496a5d2507a6a
3,190802000002131,916c42063aa907ef
4,190802000009180,605496a5d2507a6a


In [83]:
nodes = pd.read_json('investigation/test_nodes.json')
nodes[:5]

Unnamed: 0,id,group,expanded,x,y,name,category,type
0,191129565003782,0.0,True,942.85553,264.085083,,,
1,65acb153c62cbe7e,,True,921.062805,273.498901,65acb153c62cbe7e,3.0,graph0_device
2,916c42063aa907ef,,True,943.221375,271.347504,916c42063aa907ef,3.0,graph0_device
3,605496a5d2507a6a,,True,983.648926,274.26593,605496a5d2507a6a,3.0,graph0_device
4,190802000002131,,True,970.544617,308.52121,190802000002131,0.0,graph0_user


In [93]:
G = nx.Graph()

In [94]:
G.add_nodes_from(nodes['id'].tolist())

In [95]:
G.add_edges_from([tuple(x) for x in links.to_numpy()])

In [106]:
print("Nodes: ",  G.number_of_nodes())
print("Edges: ", G.number_of_edges())

Nodes:  15396
Edges:  10029


## Betweenness Centrality
- Betweenness centrality is a way of detecting the amount of influence a node has over the flow of information in a graph.
- It is often used to find nodes that serve as a bridge from one part of a graph to another.

In [107]:
print("Betweenness")
b = nx.betweenness_centrality(G)
b = {k: v for k, v in reversed(sorted(b.items(), key=lambda item: item[1]))}

Betweenness


In [125]:
nodes[nodes['type'].isna()]

Unnamed: 0,id,group,expanded,x,y,name,category,type
0,191129565003782,0.0,True,942.85553,264.085083,,,


In [187]:
i = 0
for k, v in b.items():
    if i < 20:
        print(k, v)
        i+=1
    else:
        break

ea313a5dc143332d 0.13471669929756327
a6b4bb08e2f80e37 0.12603459372765163
4fc9899a2795a0ce 0.12326970218502747
da2b790abccac8df 0.08100584740750268
200314000004343 0.0778395078320185
e5dda88a-1fdf-4aa1-be06-bd1f46e6d3d9 0.07745964109866198
92ff9530442ba3af 0.07214581903932223
191213715020400 0.06932450443963749
190212000014723 0.06607457766839449
190709000000074 0.05718037209355251
7ca0328b5e66b382 0.05680627120152387
170111000002386 0.05166201043476031
190902000000711 0.0512428771660684
190924000011186 0.05049378809970899
312c214618d25506 0.050242087630215586
190221000014512 0.04986366032642312
547e7aa35af55fb7 0.04936371418636245
180307000002585 0.04539532683273065
06db97b336d9dfda 0.0444715157393316
181212000021447 0.04335689471114787


In [None]:
!conda install -c conda-forge graph-tool -y

In [179]:
nodes[nodes['id'] == 'ea313a5dc143332d'].type.values[0]

str

In [183]:
nodes[nodes['id'] == '200314000004343'].type

254    graph0_user
Name: type, dtype: object

## Degree centrality
- Degree centrality measures the number of incoming and outgoing relationships from a node.
- The Degree Centrality algorithm can help us find popular nodes in a graph.

In [50]:
print("Degree Centrality")
d = nx.degree_centrality(G)
d = {k: v for k, v in reversed(sorted(d.items(), key=lambda item: item[1]))}
i = 0
for k, v in d.items():
    if i < 10:
        print(k, v)
        i+=1
    else:
        break

Degree Centrality
a6b4bb08e2f80e37 0.0740144810941271
190802000002131 0.05068382944489139
015d71f2d6b26266 0.035398230088495575
49e4c7de-9839-4a38-927f-02b488745207 0.03218020917135961
191012000006172 0.02654867256637168
181212000021447 0.02252614641995173
180308000001765 0.02011263073209976
467dd16a-acf9-4350-881e-7c60c9c56730 0.02011263073209976
a870e94fb41c5a1e 0.019308125502815767
190902000000711 0.016090104585679804


## Closeness centrality
- Closeness centrality is a way of detecting nodes that are able to spread information very efficiently through a graph.
- The closeness centrality of a node measures its average farness (inverse distance) to all other nodes. Nodes with a high closeness score have the shortest distances to all other nodes.

In [49]:
print("Closeness centrality")
c = nx.closeness_centrality(G)
c = {k: v for k, v in reversed(sorted(c.items(), key=lambda item: item[1]))}
i = 0
for k, v in c.items():
    if i < 10:
        print(k, v)
        i+=1
    else:
        break

Closeness centrality
da2b790abccac8df 0.13486524747397285
200314000004343 0.13383126194630252
e5dda88a-1fdf-4aa1-be06-bd1f46e6d3d9 0.13248608618520125
200412000049490 0.12927735591530343
200329000037797 0.12860805857285065
191213715020400 0.12580242893511026
a6b4bb08e2f80e37 0.12328109756687458
49e4c7de-9839-4a38-927f-02b488745207 0.12262572055303891
6fcc9caacd15d6b7 0.12074587215420751
200504000007605 0.1187260423075103


## Eigenvector Centrality
- Eigenvector Centrality is an algorithm that measures the transitive influence or connectivity of nodes.
- Relationships to high-scoring nodes contribute more to the score of a node than connections to low-scoring nodes. A high score means that a node is connected to other nodes that have high scores.
- Eigenvector Centrality can be used in many of the same use cases as the PageRank algorithm.

In [51]:
ec = nx.eigenvector_centrality(G)
ec = {k: v for k, v in reversed(sorted(ec.items(), key=lambda item: item[1]))}
i = 0
for k, v in ec.items():
    if i < 10:
        print(k, v)
        i+=1
    else:
        break

a6b4bb08e2f80e37 0.6009760766102973
467dd16a-acf9-4350-881e-7c60c9c56730 0.1973775368638648
200511000025336 0.12290045652570007
190619000001425 0.10284186676876034
200318000006953 0.1009127643292695
200401000010646 0.09889349449357754
200321000003911 0.0981280202062336
200331000012366 0.09682629086919071
200405000031110 0.09538577408700905
200331000012317 0.09454507071757762
