# node_df
0. node-level data (all articles)
1. network data + centrality data = combined_df
2. remove NaN values
3. out = node_df.csv

In [8]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob
pd.set_option('display.mpl_style', 'default') 
# display all the columns
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60)


gml_files = glob('../output/network/*/*.gml')


def calculate_graph_inf(graph):
    graph.name = filename
    info = nx.info(graph)
    print info
    ## plot spring layout
    #plt.figure(figsize=(10,10))
    #nx.draw_spring(graph, arrows=True, with_labels=True)

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))

In [9]:
# create empty dataframe with columns

data_columns = ['name',
                'sentiment'
                ]
data = pd.DataFrame(columns = data_columns)
combined_df = pd.DataFrame()

In [10]:
# graph = directed, ugraph = undirected

for graph_num, gml_graph in enumerate(gml_files):
    graph = nx.read_gml(gml_graph)
    ugraph = graph.to_undirected()
    # adding missing edges back
    U = graph.to_undirected(reciprocal=True)
    e = U.edges()
    ugraph.add_edges_from(e)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 40)
    print(gml_graph)
    #calculate_graph_inf(graph)
    #calculate_graph_inf(ugraph)

    # calculate variables and save into list
    sent = filepath.split('/')[-1]
    deg_cent = nx.degree_centrality(graph)
    bet_cent = nx.betweenness_centrality(graph)
    clo_cent = nx.closeness_centrality(graph)
    
    graph_values = {'name':filename,
                    'sentiment':sent,
                    }

    data = data.append(graph_values, ignore_index=True)
    
    #
    degree = nx.degree(graph)
    deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
    deg_df.columns = ['degree']
    # degree centrality
    deg_cent = nx.degree_centrality(graph)
    dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
    dc_df.columns = ['degree centrality']
    # betweenness centrality
    bet_cent = nx.betweenness_centrality(graph)
    bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
    bc_df.columns = ['betweenness centrality']
    # closeness centrality
    clo_cent = nx.closeness_centrality(graph)
    cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
    cc_df.columns = ['closeness centrality']
    
    # concat node frames into node_df
    frames = [deg_df, dc_df, bc_df, cc_df]
    node_df = pd.concat(frames, axis = 1)
    node_df.index.name = 'node'
    node_df = node_df.reset_index()
    
    values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])
    
    # df = merges graph_values with node_df for single graph and fill NaNs
    df = pd.concat([values, node_df], axis = 1)
    df = df.fillna(method='ffill')
    
    # append to combined_df
    combined_df = combined_df.append(df)
    
#    if graph_num == 2:
#        break

----------------------------------------
../output/network/negative/article03.gml
----------------------------------------
../output/network/negative/article05.gml
----------------------------------------
../output/network/negative/article06.gml
----------------------------------------
../output/network/negative/article07.gml
----------------------------------------
../output/network/negative/article1.gml
----------------------------------------
../output/network/negative/article1001.gml
----------------------------------------
../output/network/negative/article1021.gml
----------------------------------------
../output/network/negative/article152.gml
----------------------------------------
../output/network/negative/article2308.gml
----------------------------------------
../output/network/negative/article3335.gml
----------------------------------------
../output/network/negative/article4106.gml
----------------------------------------
../output/network/negative/article432.gml
-----

In [11]:
combined_df

Unnamed: 0,name,sentiment,node,degree,degree centrality,betweenness centrality,closeness centrality
0,article03.gml,negative,parent,1,0.058824,0.000000,0.058824
1,article03.gml,negative,vaccine dangers,2,0.117647,0.003676,0.058824
2,article03.gml,negative,unvaccinated children,2,0.117647,0.011029,0.088235
3,article03.gml,negative,vaccines are not safe,1,0.058824,0.000000,0.000000
4,article03.gml,negative,safe,1,0.058824,0.000000,0.000000
5,article03.gml,negative,parents who know that vaccines are not safe,1,0.058824,0.000000,0.058824
6,article03.gml,negative,vaccine-injured children,1,0.058824,0.000000,0.000000
7,article03.gml,negative,vaccinated children,2,0.117647,0.000000,0.133690
8,article03.gml,negative,vaccines,2,0.117647,0.011029,0.058824
9,article03.gml,negative,doctors,1,0.058824,0.000000,0.058824


In [12]:
# save dataframe to csv
combined_df.to_csv('node_df', encoding = 'utf-8')

- - -

In [13]:
node_df = pd.read_csv('node_df')
node_df

Unnamed: 0.1,Unnamed: 0,name,sentiment,node,degree,degree centrality,betweenness centrality,closeness centrality
0,0,article03.gml,negative,parent,1,0.058824,0.000000,0.058824
1,1,article03.gml,negative,vaccine dangers,2,0.117647,0.003676,0.058824
2,2,article03.gml,negative,unvaccinated children,2,0.117647,0.011029,0.088235
3,3,article03.gml,negative,vaccines are not safe,1,0.058824,0.000000,0.000000
4,4,article03.gml,negative,safe,1,0.058824,0.000000,0.000000
5,5,article03.gml,negative,parents who know that vaccines are not safe,1,0.058824,0.000000,0.058824
6,6,article03.gml,negative,vaccine-injured children,1,0.058824,0.000000,0.000000
7,7,article03.gml,negative,vaccinated children,2,0.117647,0.000000,0.133690
8,8,article03.gml,negative,vaccines,2,0.117647,0.011029,0.058824
9,9,article03.gml,negative,doctors,1,0.058824,0.000000,0.058824
