In [None]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random as rn
from scipy import stats
from scipy.stats import expon
import plotly.express as px
import plotly.graph_objects as go
from urllib.request import urlopen
import json

In [None]:
df_hesitancy = pd.read_csv('Data_sets/US_County_hesitancy.csv',index_col=None)

In [None]:
df_facebook_friendship = pd.read_csv('Data_sets/Facebook_county_network.csv',index_col=None)

In [None]:
df_proximity = pd.read_csv('Data_sets/Proximity_county_network.csv',index_col=None)

In [None]:
df_traits = pd.read_csv('Data_sets/US_County_traits.csv',index_col=None)

######################################################################################################################
######################################################################################################################
######################################################################################################################
######################################################################################################################

In [None]:
year_before = 2015
year_after = 2018

## TRAITS

In [None]:
df_selection = df_traits[df_traits['year'] == year_before][['FIPS','Household Size','High Income Percentage']]
df_selection.columns = ['FIPS','Household Size','High Income']

## HESITANCY DATA

## 2015

In [None]:
df_hesitancy_before = df_hesitancy[df_hesitancy['Year'] == year_before][['FIPS','Hesitancy Level']]
df_hesitancy_before.columns = ['FIPS','Hesitancy level before']

## 2018

In [None]:
df_hesitancy_after = df_hesitancy[df_hesitancy['Year'] == year_after][['FIPS','Hesitancy Level']]
df_hesitancy_after.columns = ['FIPS','Hesitancy level after']

## 2015 - 2018

In [None]:
df_merge = pd.merge(df_hesitancy_before, df_hesitancy_after, on="FIPS")
df_merge['state'] = df_merge['FIPS'].div(1000).astype(int)

In [None]:
nodes_hesitancy = list(df_merge['FIPS'])

hesitancy_before = dict(zip(nodes_hesitancy, list(df_merge['Hesitancy level before'])))
hesitancy_after = dict(zip(nodes_hesitancy, list(df_merge['Hesitancy level after'])))

opinion_before = dict(zip(nodes_hesitancy, [-1 if each_hesitancy >= 0.05 else 1 for each_node, each_hesitancy in hesitancy_before.items()]))
opinion_after = dict(zip(nodes_hesitancy, [-1 if each_hesitancy >= 0.05 else 1 for each_node, each_hesitancy in hesitancy_after.items()]))

## AGGREGATED NETWORK : FACEBOOK + HESITANCY

In [None]:
""""merge facebook with hesitancy data"""

sci_value = 400
df_facebook = df_facebook_friendship[(df_facebook_friendship.sci >= sci_value)][['county_origin','county_destination']]
graph_facebook = nx.from_pandas_edgelist(df_facebook, source = 'county_origin', target = 'county_destination')
graph_facebook.remove_edges_from(list(nx.selfloop_edges(graph_facebook)))
graph_facebook.number_of_nodes()
degree = 2 * graph_facebook.number_of_edges() / graph_facebook.number_of_nodes()

In [None]:
""""remove missing nodes"""

nodes_in_data_not_in_facebook = [each_node for each_node in nodes_hesitancy if each_node not in graph_facebook.nodes]
node_in_facebook_not_in_data = [each_node for each_node in graph_facebook.nodes if each_node not in nodes_hesitancy]

missing_nodes = list(set().union(nodes_in_data_not_in_facebook, node_in_facebook_not_in_data))
graph_facebook.remove_nodes_from(missing_nodes)

graph_facebook.number_of_nodes()
degree = 2 * graph_facebook.number_of_edges() / graph_facebook.number_of_nodes()

# AGGREGATED NETWORK : 
## PROXIMITY + FACEBOOK + HESITANCY


In [None]:
graph_spatial = nx.from_pandas_edgelist(df_proximity, source = 'county_origin', target = 'county_destination')
graph_spatial.remove_edges_from(list(nx.selfloop_edges(graph_spatial)))
degree = 2 * graph_spatial.number_of_edges() / graph_spatial.number_of_nodes()

In [None]:
node_in_spatial_not_in_aggregated = [n for n in graph_spatial.nodes() if n not in graph_facebook.nodes()]
graph_spatial.remove_nodes_from(node_in_spatial_not_in_aggregated)

""" We have the aggregated dataset, by means the facebook and proximity network with the same counties according to the levels of hesitancy avaliable for the years 2015 and 2018. Now we export aggregated files """

# EXPORT FILES

In [None]:
df_graph_spatial = nx.to_pandas_edgelist(graph_spatial, nodelist = graph_spatial.nodes())
df_graph_spatial = df_graph_spatial[['source', 'target']]
df_graph_spatial.to_csv('Data_sets/Aggregated_spatial_network_period_'+str(year_before)+'-'+str(year_after)+'.csv', index=False)

In [None]:
df_graph_facebook = nx.to_pandas_edgelist(graph_facebook, nodelist = graph_spatial.nodes())
df_graph_facebook = df_graph_facebook[['source', 'target']]
df_graph_facebook.to_csv('Data_sets/Aggregated_facebook_network_period_'+str(year_before)+'-'+str(year_after)+'.csv', index=False)

In [None]:
df_hesitancy_levels = df_merge.copy()
df_hesitancy_levels = df_hesitancy_levels[df_hesitancy_levels['FIPS'].isin(list((graph_facebook.nodes())))]
df_hesitancy_levels['Opinion before'] = np.where(df_hesitancy_levels['Hesitancy level before'] >= 0.05, 'Vulnerable', 'Protected')
df_hesitancy_levels['Opinion after'] = np.where(df_hesitancy_levels['Hesitancy level after'] >= 0.05, 'Vulnerable', 'Protected')
df_hesitancy_levels.to_csv('Data_sets/Aggregated_network_hesitancy_level_period_'+str(year_before)+'-'+str(year_after)+'.csv', index=False)

In [None]:
df_attributes = df_selection.copy()
df_attributes = df_attributes[df_attributes['FIPS'].isin(list((graph_facebook.nodes())))]
df_attributes.to_csv('Data_sets/Aggregated_network_attributes_period_'+str(year_before)+'.csv', index=False)