In [1]:
import networkx as nx
G = nx.Graph()
G.add_edge('A', 'B', weight=4)
G.add_edge('B', 'D', weight=2)
G.add_edge('A', 'C', weight=3)
G.add_edge('C', 'D', weight=4)
nx.shortest_path(G, 'A', 'D', weight='weight')

['A', 'B', 'D']

In [3]:
import pandas as pd
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community

In [11]:
accounts = pd.read_csv('../results/networks/20200403_20200405_accounts.csv').values
print(len(accounts))

42177


In [12]:
edges = pd.read_csv('../results/networks/20200403_20200405_comment_thread_edges.csv')
print(edges.shape)

(64963, 2)


## Grab some accounts for testing

In [15]:
es = [(r.source, r.target) for _,r in edges.iloc[:10,:].iterrows()]
ac = list(set([a for b in es for a in b]))
print(len(ac), len(es))

5 10


In [23]:
ac

['100018738009002|Tsaw Zean Pan',
 '100046672568467|ရမ္ဆားရ္ ေခါ့ရမ္',
 '100047717700701|Julia Sengpan',
 '100037954417036|Leng Sy Fa',
 '100040671977347|Rose Gril']

## Graph Test

In [17]:
G = nx.Graph()
G.add_nodes_from(ac)
G.add_edges_from(es)

In [18]:
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 5
Number of edges: 7
Average degree:   2.8000


In [19]:
density = nx.density(G)
print("Network density:", density)

Network density: 0.7


In [20]:
diameter = nx.diameter(G)
print('Network diameter:', diameter)

Network diameter: 2


In [21]:
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)

Triadic closure: 0.5454545454545454


### Centrality

In [22]:
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')

In [28]:
sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)
print(sorted_degree)

[('100037954417036|Leng Sy Fa', 6), ('100046672568467|ရမ္ဆားရ္ ေခါ့ရမ္', 3), ('100018738009002|Tsaw Zean Pan', 2), ('100047717700701|Julia Sengpan', 2), ('100040671977347|Rose Gril', 1)]


In [29]:
betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality

# Assign each to an attribute in your network
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

In [30]:
sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by betweenness centrality:")
for b in sorted_betweenness[:20]:
    print(b)

Top 20 nodes by betweenness centrality:
('100037954417036|Leng Sy Fa', 0.5833333333333333)
('100046672568467|ရမ္ဆားရ္ ေခါ့ရမ္', 0.08333333333333333)
('100018738009002|Tsaw Zean Pan', 0.0)
('100047717700701|Julia Sengpan', 0.0)
('100040671977347|Rose Gril', 0.0)


In [31]:
#First get the top 20 nodes by betweenness as a list
top_betweenness = sorted_betweenness[:20]

#Then find and print their degree
for tb in top_betweenness: # Loop through top_betweenness
    degree = degree_dict[tb[0]] # Use degree_dict to access a node's degree, see footnote 2
    print("Name:", tb[0], "| Betweenness Centrality:", tb[1], "| Degree:", degree)

Name: 100037954417036|Leng Sy Fa | Betweenness Centrality: 0.5833333333333333 | Degree: 6
Name: 100046672568467|ရမ္ဆားရ္ ေခါ့ရမ္ | Betweenness Centrality: 0.08333333333333333 | Degree: 3
Name: 100018738009002|Tsaw Zean Pan | Betweenness Centrality: 0.0 | Degree: 2
Name: 100047717700701|Julia Sengpan | Betweenness Centrality: 0.0 | Degree: 2
Name: 100040671977347|Rose Gril | Betweenness Centrality: 0.0 | Degree: 1


### Communities

In [33]:
communities = community.greedy_modularity_communities(G)
modularity_dict = {} # Create a blank dictionary
for i,c in enumerate(communities): # Loop through the list of communities, keeping track of the number for the community
    for name in c: # Loop through each person in a community
        modularity_dict[name] = i # Create an entry in the dictionary for the person, where the value is which group they belong to.

# Now you can add modularity information like we did the other metrics
nx.set_node_attributes(G, modularity_dict, 'modularity')

In [34]:
# First get a list of just the nodes in that class
class0 = [n for n in G.nodes() if G.nodes[n]['modularity'] == 0]

# Then create a dictionary of the eigenvector centralities of those nodes
class0_eigenvector = {n:G.nodes[n]['eigenvector'] for n in class0}

# Then sort that dictionary and print the first 5 results
class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 0 Sorted by Eigenvector Centrality:")
for node in class0_sorted_by_eigenvector[:5]:
    print("Name:", node[0], "| Eigenvector Centrality:", node[1])

Modularity Class 0 Sorted by Eigenvector Centrality:
Name: 100046672568467|ရမ္ဆားရ္ ေခါ့ရမ္ | Eigenvector Centrality: 0.460817522175768
Name: 100018738009002|Tsaw Zean Pan | Eigenvector Centrality: 0.37021480336418733
Name: 100047717700701|Julia Sengpan | Eigenvector Centrality: 0.37021480336418733


In [38]:
G.nodes[ac[0]]

{'degree': 2,
 'betweenness': 0.0,
 'eigenvector': 0.37021480336418733,
 'modularity': 0}