Negative network:
- jaccard coefficient
- adamic adar index
- closeness vitality
- page rank

In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_rows = 999

In [2]:
# read multigraph M
M = nx.read_gml('../output/network/u_Gc_negative2.gml')


# convert multigraph M to simple graph G (remove parallel edges)
G = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    w = data['weight'] if 'weight' in data else 1.0
    if G.has_edge(u,v):
        G[u][v]['weight'] += w
    else:
        G.add_edge(u, v, weight=w)
#print G.edges(data=True)

print nx.info(M), '\n'
print nx.info(G)

Name: undirected Gc
Type: MultiGraph
Number of nodes: 1140
Number of edges: 1783
Average degree:   3.1281 

Name: 
Type: Graph
Number of nodes: 1140
Number of edges: 1661
Average degree:   2.9140


In [3]:
## jaccard coefficient

jc = nx.jaccard_coefficient(G)
jc = list(jc)
for u, v, p in jc:
    '(%s, %s) -> %.4f' % (u, v, p)

In [4]:
df = pd.DataFrame(jc, columns=['u', 'v', 'jaccard'])
subset_df = df.ix[df['jaccard'] != 0, :]
subset_df.sort_values('jaccard', ascending=False)

Unnamed: 0,u,v,jaccard
647499,cancer-causing,lung cancer,1.000000
342436,convulsions,gene products,1.000000
340885,U.S. measles mortality,global population,1.000000
340888,U.S. measles mortality,outrageous,1.000000
341027,U.S. measles mortality,death from suicide,1.000000
341069,U.S. measles mortality,death from congenital malformation,1.000000
341084,U.S. measles mortality,death from murder,1.000000
341153,U.S. measles mortality,death from heart disease,1.000000
341164,U.S. measles mortality,death from congenital disease,1.000000
341322,U.S. measles mortality,death from car accident,1.000000


In [5]:
# save jaccard
#subset_df.to_csv('jaccard_negative.csv')

In [6]:
## adamic adar index

aa = nx.adamic_adar_index(G)
aa = list(aa)
for u, v, p in aa:
    '(%s, %s) -> %.8f' % (u, v, p)

In [7]:
df = pd.DataFrame(aa, columns=['u', 'v', 'adamic_adar'])
subset_df = df.ix[df['adamic_adar'] != 0, :]
subset_df.sort_values('adamic_adar', ascending=False)

Unnamed: 0,u,v,adamic_adar
203451,sanctions,harassment,5.947526
386930,prejudice,discrimination,5.034327
88273,sick children,disabled children,4.547174
529439,vaccines,pharmaceutical companies,4.507343
254211,vaccine decisions,young doctors,3.795629
51785,vaccine additives,vaccines,3.792387
466766,flu shots,vaccines,3.766455
469803,formaldehyde,monosodium glutamate,3.744558
417885,doctors,pharmaceutical companies,3.666743
204164,science,mainstream media,3.605236


In [8]:
# save adamic idar
#subset_df.to_csv('adamic_negative.csv')

In [None]:
# closeness vitality
# of a node is the change in the sum of distances between all node pairs when excluding that node

cv = nx.closeness_vitality(M)

cv_df = pd.DataFrame.from_dict(cv, orient = 'index')
cv_df.columns = ['closeness vitality']
cv_df.sort_values(by = ['closeness vitality'], ascending = False)

In [None]:
# save closeness vitality
cv_df.to_csv('cv_negative.csv')

In [None]:
# link analysis: page rank
# PageRank computes a ranking of nodes based on structure of incoming links

pr = nx.pagerank_numpy(M)

pr_df = pd.DataFrame.from_dict(pr, orient = 'index')
pr_df.columns = ['page rank']
pr_df.sort_values(by = ['page rank'], ascending = False)

In [None]:
# save page rank
pr_df.to_csv('pr_negative.csv')