# Network Centrality Measures

In [8]:
import os, string, warnings, glob, gensim, re, itertools, math, json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from operator import itemgetter
from collections import Counter

# Network libraries.
import networkx as nx
from networkx.algorithms import community
from networkx.readwrite import json_graph

# Visualization libraries.
import seaborn as sns
import matplotlib.pyplot as plt

# Ignore warnings.
warnings.simplefilter("ignore")

# Declare directory.
abs_dir = "/Users/williamquinn/Documents/DH/"

## Import Graph

In [6]:
%%time

with open(abs_dir + "GitHub/DH2022_SemanticEmergence/Data_Outputs/network-vectors.json", 'r') as f:
    G = json_graph.node_link_graph( json.load(f) )

print (nx.info(G))

Graph with 375 nodes and 8480 edges
CPU times: user 57.9 ms, sys: 16.3 ms, total: 74.2 ms
Wall time: 75.8 ms


## Centralities

In [9]:
%%time

# Find centrality measures.
betweenness_dict = nx.betweenness_centrality(G)
print ('...betweenness measured')
eigenvector_dict = nx.eigenvector_centrality(G)
print ('...eigenvector measured')
degree_cent_dict = nx.degree_centrality(G)
print ('...centrality measured')

# Construct dataframe from list of dictionaries.
centrality = [betweenness_dict, eigenvector_dict, degree_cent_dict]

centrality = {k: [d[k] for d in centrality] for k in centrality[0]}

centrality = pd.DataFrame(centrality.items(), columns = ['source', 'centrality'])

centrality[['betweenness', 
            'eigenvector', 
            'degree_cent']] = pd.DataFrame(centrality['centrality'].tolist(), index = centrality.index)

centrality = centrality.drop(columns = ['centrality'])

# Join with degree info.
sorted_degree = sorted( dict( G.degree(G.nodes()) ).items(), key = itemgetter(1), reverse = True)
sorted_degree_df = pd.DataFrame(sorted_degree, columns = ['source', 'degree'])

centrality = centrality.merge(sorted_degree_df, on = 'source', how = 'inner')

centrality.describe()

...betweenness measured
...eigenvector measured
...centrality measured
CPU times: user 2.77 s, sys: 82.5 ms, total: 2.85 s
Wall time: 3.22 s


Unnamed: 0,betweenness,eigenvector,degree_cent,degree
count,375.0,375.0,375.0,375.0
mean,0.004942,0.02999722,0.120927,45.226667
std,0.0164,0.04208988,0.107043,40.034268
min,0.0,3.395292e-60,0.0,0.0
25%,3e-06,1.376129e-05,0.016043,6.0
50%,0.000392,0.0007454512,0.101604,38.0
75%,0.004189,0.06391197,0.208556,78.0
max,0.23464,0.1225844,0.382353,143.0


## Betweenness Ranking

In [14]:
%%time

# Rank betweenness.
betweenness_ranking = centrality.sort_values(by = ['betweenness'], ascending = False).reset_index()
betweenness_ranking['rank'] = betweenness_ranking.index + 1

betweenness_ranking

CPU times: user 3.9 ms, sys: 1.35 ms, total: 5.25 ms
Wall time: 4.48 ms


Unnamed: 0,index,source,betweenness,eigenvector,degree_cent,degree,rank
0,106,2044_man,0.234640,3.728502e-02,0.090909,34,1
1,280,2810_woman,0.115630,4.397605e-04,0.072193,27,2
2,281,6098_woman,0.115630,4.397605e-04,0.072193,27,3
3,223,8315_woman,0.054550,2.691020e-04,0.042781,16,4
4,294,2730_man,0.054199,1.608527e-02,0.045455,17,5
...,...,...,...,...,...,...,...
370,302,2787_man,0.000000,4.596180e-03,0.010695,4,371
371,216,709_man,0.000000,1.330225e-03,0.002674,1,372
372,217,713_woman,0.000000,3.973017e-08,0.002674,1,373
373,299,2910_woman,0.000000,2.510918e-07,0.013369,5,374
