In [1]:
import re
import numpy as np
import pandas as pd
import networkx as nx
import community
import markov_clustering as mc
import matplotlib.pyplot as plt
from scipy import sparse

%matplotlib inline

In [2]:
# Load words dataset table
words = pd.read_csv('data/database/words.csv', dtype={
    'tweet': np.unicode_,
    'index': np.int,
    'text': np.unicode_,
    'pos': np.unicode_,
    'conf': np.float
})

# Remove words dataframe entries with text = 'woman'
words = words.drop(words[words.text == 'woman'].index).reset_index(drop=True)  

In [3]:
# Load dictionaries

w2i_2017, i2w_2017 = np.load('data/dict2017.npy')
w2i_2018, i2w_2018 = np.load('data/dict2018.npy')

w2i = [w2i_2017, w2i_2018]
i2w = [i2w_2017, i2w_2018]

# Load edges dataframes
edges_2017 = pd.read_csv('data/database/edges2017',)
edges_2018 = pd.read_csv('data/database/edges2018')

# Load adjacency matrices
X_2017 = np.load('data/adj_matrix_2017.npy') 
X_2018 = np.load('data/adj_matrix_2018.npy') 

In [4]:
# Create an undirected multilink graph object
net_2017 = nx.from_numpy_matrix(X_2017, parallel_edges=True, create_using=nx.MultiGraph)
net_2018 = nx.from_numpy_matrix(X_2018, parallel_edges=True, create_using=nx.MultiGraph)

In [5]:
# Set pos-tag and text as attribute of each node

# relation index - pos
i2p_2017 = pd.DataFrame(i2w_2017.values()).iloc[:,1]
i2p_2018 = pd.DataFrame(i2w_2018.values()).iloc[:,1]

# add the pos attribute through a dict
attr_pos = { node: text for node, text in tuple(zip(i2p_2017.index, i2p_2017.values )) }
nx.set_node_attributes(net_2017, attr_pos , 'pos')

attr_pos = { node: text for node, text in tuple(zip(i2p_2018.index, i2p_2018.values )) }
nx.set_node_attributes(net_2018, attr_pos, 'pos')

# relation index - text
i2t_2017 = pd.DataFrame(i2w_2017.values()).iloc[:,0]
i2t_2018 = pd.DataFrame(i2w_2018.values()).iloc[:,0]

# add the text attribute through a dict
attr_text = { node: text for node, text in tuple(zip(i2t_2018.index, i2t_2018.values )) }
nx.set_node_attributes(net_2017, attr_text, 'text')

attr_text = { node: text for node, text in tuple(zip(i2t_2018.index, i2t_2018.values )) }
nx.set_node_attributes(net_2018, attr_text, 'text')

## Degree assortativity

In [14]:
print('Assortativity coefficient 2017:',nx.degree_assortativity_coefficient(net_2017))
print('Assortativity coefficient 2018:',nx.degree_assortativity_coefficient(net_2018))

Assortativity coefficient 2017: -0.03360969002667204
Assortativity coefficient 2018: -0.002170278602065764


## Node assortativity by attribute

In [7]:
verbs_2017 = [x for x,y in net_2017.nodes(data='pos') if y == 'V']
verbs_2018 = [x for x,y in net_2018.nodes(data='pos') if y == 'V']

In [8]:
print('Assortativity coefficient 2017:',nx.degree_assortativity_coefficient(net_2017.subgraph(verbs_2017)))
print('Assortativity coefficient 2018:',nx.degree_assortativity_coefficient(net_2018.subgraph(verbs_2018)))

Assortativity coefficient 2017: 0.032036909658333716
Assortativity coefficient 2018: 0.02476446466691135


In [17]:
print('Assortativity coefficient 2017 wrt pos attribute:',nx.attribute_assortativity_coefficient(net_2017,'pos'))
print('Assortativity coefficient 2018 wrt pos attribute:',nx.attribute_assortativity_coefficient(net_2018,'pos'))

Assortativity coefficient 2017: -0.048484302452582805
Assortativity coefficient 2018: -0.013328685955001137
