In [None]:
import pandas as pd
import networkx as nx
from src.transform_data import get_city_name

In [None]:
df = pd.read_csv('data/agg_data/infectious_disease.csv')

In [None]:
df['MUNIC_RES'] = get_city_name(df['MUNIC_RES'])
df['MUNIC_MOV'] = get_city_name(df['MUNIC_MOV'])

In [None]:
# Creating a directed graph from the DataFrame, with weight (HOSPITALIZACOES)
G = nx.from_pandas_edgelist(df, source='MUNIC_RES', target='MUNIC_MOV', edge_attr='HOSPITALIZACOES', create_using=nx.DiGraph())

In [None]:
in_degree = dict(G.in_degree(weight='HOSPITALIZACOES'))
out_degree = dict(G.out_degree(weight='HOSPITALIZACOES'))
for node in G.nodes():
    if G.has_edge(node, node):
        out_degree[node] -= G[node][node].get('HOSPITALIZACOES', 0)
degree_diff = {node: in_degree[node] / (in_degree[node] + out_degree[node]) for node in G.nodes()}

degree_diff = pd.Series(degree_diff, name='DEGREE_DIFF')

degree_diff.sort_values(ascending=False).round(3)

In [None]:
# plotting the degree difference
degree_diff.sort_values(ascending=False).plot(kind='bar', figsize=(12, 6), title='Diferença percentual entre hospitalizações no município e moradores do município hospitalizados', ylabel='Degree Difference', xticks=range(0,len(degree_diff),len(degree_diff)//10))

In [None]:
(degree_diff < 0.5).sum(), (degree_diff >= 0.5).sum(), (degree_diff[degree_diff !=0] < 0.5).sum()

In [None]:
(degree_diff == 0).sum(),

In [None]:
df['MUNIC_RES'].unique().shape

In [None]:
d = degree_diff[(degree_diff > 0.0) & (degree_diff < 0.5)]
d.sort_values(ascending=False).plot(kind='bar', figsize=(12, 6), title='Degree Difference by City', ylabel='Degree Difference', xticks=range(0,len(d),len(d)//10))