# Network visualization top 30

Last updated: 09202023  
By: Lauren Liao  
Purpose: visualize and analyze from word similarity matrix (30 top most related words)

In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import seaborn as sns
import graph_tool.all as gt

%matplotlib inline

**Data loading**  
the data starts with the preprocessed text (pickled) file  
taking the upper right triangle plus a threshold of 50th quartile

In [2]:
top30_lst =  pd.read_pickle("../results/top30_mean_all.pkl")

In [3]:
len(top30_lst)

9242

## Graph creation and analysis

In [4]:
all_nodes30 = [item for sublist in [[x[0]] for x in top30_lst] + 
               [[k for k in x[1].keys()] for x in top30_lst] for item in sublist]
duplicated_words30 = [word for word, count in Counter(all_nodes30).items() if count > 1]
node_labels = {word:i for i, word in enumerate(all_nodes30)}

In [5]:
len(set(all_nodes30))

9242

In [6]:
len(duplicated_words30)

7897

In [7]:
np.random.seed(1234567891)

# create an empty graph
G = nx.Graph()

# adding nodes
G.add_nodes_from(all_nodes30)

# adding edges
for word_lst in top30_lst:
    word, word_related_dict = word_lst
    for related_word, weight in word_related_dict.items():
        G.add_edge(word, related_word, weight=1-weight)

In [8]:
num_nodes = len(G.nodes())
num_edges = len(G.edges())
print(num_nodes)
print(num_edges)

9242
233374


In [9]:
def print_distance(source_node, target_node):
    try:
        shortest_path = nx.shortest_path(G, source=source_node, target=target_node, weight="weight", method="dijkstra")
        weighted_distance = nx.shortest_path_length(G, source=source_node, target=target_node, weight="weight", method="dijkstra")
        print(f"shortest path from {source_node} to {target_node}: {shortest_path}")
        print(f"weighted distance: {round(weighted_distance, 3)}")
    except nx.NetworkXNoPath:
        print(f"there is no path from {source_node} to {target_node}.")

In [10]:
print_distance("health", "racism")
print_distance("health", "racist")

shortest path from health to racism: ['health', 'eliminate', 'tackling', 'injustice', 'racism']
weighted distance: 0.71
shortest path from health to racist: ['health', 'criminal_justice', 'promise', 'arrangement', 'bring', 'struggle', 'matter', 'racist']
weighted distance: 0.643


In [11]:
print_distance("poverty", "racism")
print_distance("environment", "racism")
print_distance("environmental", "racism")
print_distance("disparity", "racism")

shortest path from poverty to racism: ['poverty', 'physical', 'stigma', 'interpersonal', 'discrimination', 'racism']
weighted distance: 0.407
shortest path from environment to racism: ['environment', 'influence', 'interpersonal', 'discrimination', 'racism']
weighted distance: 0.378
shortest path from environmental to racism: ['environmental', 'structural', 'form', 'historical', 'racism']
weighted distance: 0.379
shortest path from disparity to racism: ['disparity', 'racial', 'discrimination', 'racism']
weighted distance: 0.432


In [12]:
print_distance("deprivation", "racism")
print_distance("economic", "racism")
print_distance("psychosocial", "racism")
print_distance("stress", "racism")

shortest path from deprivation to racism: ['deprivation', 'negative', 'interpersonal', 'discrimination', 'racism']
weighted distance: 0.349
shortest path from economic to racism: ['economic', 'environmental', 'structural', 'form', 'historical', 'racism']
weighted distance: 0.433
shortest path from psychosocial to racism: ['psychosocial', 'interpersonal', 'discrimination', 'racism']
weighted distance: 0.294
shortest path from stress to racism: ['stress', 'psychological', 'stigma', 'interpersonal', 'discrimination', 'racism']
weighted distance: 0.349


In [13]:
print_distance("structural", "interpersonal")
print_distance("structural", "form")
print_distance("form", "discrimination")

shortest path from structural to interpersonal: ['structural', 'interpersonal']
weighted distance: 0.132
shortest path from structural to form: ['structural', 'form']
weighted distance: 0.075
shortest path from form to discrimination: ['form', 'interpersonal', 'discrimination']
weighted distance: 0.238


In [14]:
print_distance("inequality", "discrimination")
print_distance("harassment", "discrimination")
print_distance("harassment", "racism")

shortest path from inequality to discrimination: ['inequality', 'gender', 'discrimination']
weighted distance: 0.29
shortest path from harassment to discrimination: ['harassment', 'prejudice', 'discrimination']
weighted distance: 0.176
shortest path from harassment to racism: ['harassment', 'prejudice', 'racism']
weighted distance: 0.216


### Graph visualization with graph tool

In [74]:
KEYTERMS = ['health', 'doctor', 'resource', 'class', 
            'race', 'black', 'white', 'bame', 'minority', 
            'racism', 'disadvantaged',
            'discrimination', 'wealth', 'poverty', 'power', 'bias', 'structural', 
            'possibly',
            'perhaps', 'probably', 'likely']

In [59]:
sub_nodes30 = [x for x in KEYTERMS] + [item for sublist in [y.keys() for x,y in top30_lst if x in KEYTERMS] for item in sublist]
duplicated_words30 = [word for word, count in Counter(sub_nodes30).items() if count > 1]

In [76]:
sub_30lst = [(x, y) for x,y in top30_lst if x in set(KEYTERMS+duplicated_words30)]
node_labels = {word:i for i, word in enumerate(set(KEYTERMS+duplicated_words30))}

In [79]:
np.random.seed(1234567891)

num_vertices = len(KEYTERMS+duplicated_words30)

g = gt.Graph(directed=False)
vertices = [g.add_vertex() for _ in set(KEYTERMS+duplicated_words30)]

# adding edges
for word_lst in sub_30lst:
    word, word_related_dict = word_lst
    for related_word, weight in word_related_dict.items():
        if related_word in node_labels:
            g.add_edge(vertices[node_labels[word]], vertices[node_labels[related_word]])

In [80]:
state = gt.minimize_nested_blockmodel_dl(g)

In [81]:
state.draw(output="../results/network_graph30.pdf")

(<VertexPropertyMap object with value type 'vector<double>', for Graph 0x7f0a5f964a50, at 0x7f0a5f80b190>,
 <GraphView object, directed, with 162 vertices and 161 edges, edges filtered by (<EdgePropertyMap object with value type 'bool', for Graph 0x7f0a5f8cb810, at 0x7f0a5f8cb450>, False), vertices filtered by (<VertexPropertyMap object with value type 'bool', for Graph 0x7f0a5f8cb810, at 0x7f0a5f8c8650>, False), at 0x7f0a5f8cb810>,
 <VertexPropertyMap object with value type 'vector<double>', for Graph 0x7f0a5f8cb810, at 0x7f0a5f8cab10>)