In [31]:
import warnings
warnings.filterwarnings('ignore')

In [32]:
import numpy as np
import pandas as pd
import networkx as nx

# 1. Excel Dataset Processing

- Nodes

In [33]:
# Import Node
node_df = pd.read_excel('Data/Nodes.xlsx')

In [34]:
node_df

Unnamed: 0,id,size,type,subcategory of type 1
0,1,233,1,1.0
1,2,9714,1,2.0
2,3,1474,1,3.0
3,4,92807,1,2.0
4,5,9618,1,4.0
...,...,...,...,...
1351,1336,561,3,
1352,1338,12896,3,
1353,1346,1261,3,
1354,1347,174197,3,


In [35]:
np.sum(np.array(list(node_df['type'])) == 1), np.sum(np.array(list(node_df['type'])) == 2), np.sum(np.array(list(node_df['type'])) == 3)

(644, 501, 211)

- Edges

In [36]:
# Import Edges
edge_df1 = pd.read_excel('Data/Edges at Nov 2019 for Fig. 2(c).xlsx'); 
edge_df1

Unnamed: 0,source,target,Column1
0,997,227,
1,297,1326,
2,702,344,
3,702,334,
4,702,274,
...,...,...,...
7382,1263,1029,
7383,1263,836,
7384,1263,802,
7385,1263,750,


In [37]:
edge_df2 = pd.read_excel('Data/Edges at Dec 2020 for Fig. 2(d).xlsx'); 
edge_df2

Unnamed: 0,source,target
0,1356,1185
1,1356,839
2,654,1231
3,1356,922
4,1356,1128
...,...,...
7149,978,1254
7150,978,544
7151,978,405
7152,978,1300


#  2. Create Graphs

In [77]:
G1 = nx.DiGraph(); G2 = nx.DiGraph()

In [78]:
node_polarity_num = node_df[["type"]].values.tolist(); node_polarity_num = sum(node_polarity_num, [])

In [79]:
node_polarity = []
for num in node_polarity_num:
    if num == 1:
        node_polarity.append('neutral')
    elif num == 2:
        node_polarity.append('anti_vaccines')
    else:
        node_polarity.append('pro_vaccines')

In [80]:
# Add nodes
node_name = node_df[["id"]].values.tolist(); node_name = sum(node_name, [])
fan_size = node_df[["size"]].values.tolist(); fan_size = sum(fan_size, [])

for i in range(len(node_name)):
    G1.add_node(node_name[i], polarity = node_polarity[i], fan_size = fan_size[i])
    G2.add_node(node_name[i], polarity = node_polarity[i], fan_size = fan_size[i])

In [81]:
# Add edges
edge1 = edge_df1[["source", "target"]].values.tolist(); 
edge2 = edge_df2[["source", "target"]].values.tolist();

for i in range(len(edge1)):
    G1.add_edge(edge1[i][0], edge1[i][1], weight = node_df[node_df["id"] == edge1[i][0]][['id']].values[0][0] * node_df[node_df["id"] == edge1[i][1]][['size']].values[0][0])

for i in range(len(edge2)):
    G2.add_edge(edge2[i][0], edge2[i][1], weight = node_df[node_df["id"] == edge2[i][0]][['id']].values[0][0] * node_df[node_df["id"] == edge2[i][1]][['size']].values[0][0])

# 3. Save Graphs

## 3.1. gpickle

In [83]:
nx.write_gpickle(G1, "Data/Graphs/G1.gpickle")
nx.write_gpickle(G2, "Data/Graphs/G2.gpickle")

In [84]:
nx.info(G1), nx.info(G2)

('DiGraph with 1356 nodes and 7387 edges',
 'DiGraph with 1356 nodes and 7154 edges')

In [85]:
G1.nodes[1], G2.nodes[2]

({'polarity': 'neutral', 'fan_size': 233},
 {'polarity': 'neutral', 'fan_size': 9714})

In [90]:
G1.edges[(2, 650)], G2.edges[(2, 650)]

({'weight': 64972}, {'weight': 64972})

## 3.2. CSV

In [91]:
import csv

In [92]:
def write_nodes_csv(G, nodes, attributes, address):
    
    file = open(address, 'w', newline ='')
    header = ['node'] + attributes
    
    with file:
        # identifying header 
        writer = csv.DictWriter(file, fieldnames = header)
        
        # writing data row-wise into the csv file
        writer.writeheader()
        
        for node in nodes:
            node_attributes = G.nodes[node]
            node_attributes['node'] = node
            writer.writerow(node_attributes)

In [94]:
write_nodes_csv(G1, list(G1.nodes), ['polarity', 'fan_size'], "Data/Graphs/G_nodes.csv")

In [95]:
def write_edges_csv(G, edges, attributes, address):
    
    file = open(address, 'w', newline ='')
    header = ['source_node', 'target_node'] + attributes
    
    with file:
        # identifying header 
        writer = csv.DictWriter(file, fieldnames = header)
        
        # writing data row-wise into the csv file
        writer.writeheader()
        
        for edge in edges:
            edge_attributes = G.edges[edge]
            edge_attributes['source_node'] = edge[0]
            edge_attributes['target_node'] = edge[1]
            writer.writerow(edge_attributes)

In [96]:
write_edges_csv(G1, list(G1.edges), ['weight'], "Data/Graphs/G1_edges.csv")

In [97]:
write_edges_csv(G2, list(G2.edges), ['weight'], "Data/Graphs/G2_edges.csv")