Positive network:
- jaccard coefficient
- adamic adar index
- closeness vitality
- page rank

In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_rows = 999

In [2]:
# read multigraph M
M = nx.read_gml('../output/network/u_Gc_positive2.gml')


# convert multigraph M to simple graph G (remove parallel edges)
G = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    w = data['weight'] if 'weight' in data else 1.0
    if G.has_edge(u,v):
        G[u][v]['weight'] += w
    else:
        G.add_edge(u, v, weight=w)
#print G.edges(data=True)

print nx.info(M), '\n'
print nx.info(G)

Name: undirected Gc
Type: MultiGraph
Number of nodes: 585
Number of edges: 1042
Average degree:   3.5624 

Name: 
Type: Graph
Number of nodes: 585
Number of edges: 935
Average degree:   3.1966


In [3]:
## jaccard coefficient

jc = nx.jaccard_coefficient(G)
jc = list(jc)
for u, v, p in jc:
    '(%s, %s) -> %.4f' % (u, v, p)

In [4]:
df = pd.DataFrame(jc, columns=['u', 'v', 'jaccard'])
subset_df = df.ix[df['jaccard'] != 0, :]
subset_df.sort_values('jaccard', ascending=False)

Unnamed: 0,u,v,jaccard
80,neighbors,social media,1.000000
101325,Prabhupada Village,bad news,1.000000
16639,mild,daily activities,1.000000
16687,mild,pain,1.000000
16708,mild,swelling,1.000000
16755,mild,arm,1.000000
16810,mild,hardness of the skin,1.000000
16891,mild,run down feeling,1.000000
101473,Prabhupada Village,models of disease,1.000000
101395,Prabhupada Village,ear infections,1.000000


In [5]:
# save jaccard
#subset_df.to_csv('jaccard_positive.csv')

In [6]:
## adamic adar index

aa = nx.adamic_adar_index(G)
aa = list(aa)
for u, v, p in aa:
    '(%s, %s) -> %.8f' % (u, v, p)

In [7]:
df = pd.DataFrame(aa, columns=['u', 'v', 'adamic_adar'])
subset_df = df.ix[df['adamic_adar'] != 0, :]
subset_df.sort_values('adamic_adar', ascending=False)

Unnamed: 0,u,v,adamic_adar
51539,vaccination coverage,measles vaccination,6.205075
130168,autism risk,autism,4.418644
103384,genital warts,cervical dysplasia,4.335491
69421,Jain study,autism,3.583447
141283,vaccines,vaccine-autism link,3.266084
141310,vaccines,vaccination,3.264423
81799,meningococcal vaccine,vaccines,3.188803
167967,HPV vaccine,sexually active,3.034899
108527,2014-2015 FLULAVAL QUADRIVALENT flu vaccine,flu vaccine recall,2.885390
52923,states,herd immunity,2.866833


In [8]:
# save adamic idar
#subset_df.to_csv('adamic_positive.csv')

In [9]:
# closeness vitality
# of a node is the change in the sum of distances between all node pairs when excluding that node

cv = nx.closeness_vitality(M)

cv_df = pd.DataFrame.from_dict(cv, orient = 'index')
cv_df.columns = ['closeness vitality']
cv_df.sort_values(by = ['closeness vitality'], ascending = False)

Unnamed: 0,closeness vitality
meningococcal disease,79948.0
vaccination,77396.0
polio vaccine opposition,74438.0
Wakefield study,64018.0
HPV vaccine,63748.0
vaccines,61934.0
autism,61016.0
orthodox Hasidic Jews,55846.0
measles,47038.0
hepatitis A vaccine,44804.0


In [10]:
# save closeness vitality
#cv_df.to_csv('cv_positive.csv')

In [11]:
# link analysis: page rank
# PageRank computes a ranking of nodes based on structure of incoming links

pr = nx.pagerank_numpy(M)

pr_df = pd.DataFrame.from_dict(pr, orient = 'index')
pr_df.columns = ['page rank']
pr_df.sort_values(by = ['page rank'], ascending = False)

Unnamed: 0,page rank
vaccines,0.024877
parents,0.022525
measles,0.022306
vaccination,0.019813
meningococcal disease,0.015175
HPV vaccine,0.013251
autism,0.01268
vaccine-autism link,0.012336
anti-vaccination,0.011657
measles vaccine,0.009861


In [12]:
# save page rank
#pr_df.to_csv('pr_positive.csv')