## **Preparation of data for plotting**

Here, we consider that all the networks of interest were already generated and saved. This way, we will augment the networks with extra information
and perform extra calculations to generate data ready for plotting.

Let us consider the two types of networks:<br>
1. **Cities flux networks**<br>
2. **Cities to Hospitals flux networks**<br>

## **-1. Lib**

In [2]:
# -- Bib
import os
import sys
sys.path.append("..")

import networkx as nx
import geopandas as gpd
import pandas as pd
import glob
import pickle
from shapely.geometry import Point
import geopy
from geopy.distance import distance, geodesic
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
import fluxsus.fluxnets.fnets_utils as futils
from tqdm import tqdm

from fluxsus.preprocessing.processnet import NetProperties

PyTables is not installed. No support for HDF output.


In [3]:
idx = pd.IndexSlice

## **0. Load base data**

In [4]:
# -- base paths
basepath = os.path.join(os.environ["HOMEPATH"], "Documents", "data")
#basepath = os.path.join(os.environ["HOME"], "Documents", "data")
cnespath = os.path.join(basepath, "opendatasus", "cnes")
geopath = os.path.join(basepath, "shapefilesceqgis")
gmlpath = os.path.join(basepath, "redes_aih")

In [5]:
# -- load geo
geodata_df = gpd.read_parquet(os.path.join(geopath, "ce_geodata.parquet"))
geodata_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 184 entries, 0 to 183
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   GEOCOD7            184 non-null    object  
 1   NM_MUNICIP         184 non-null    object  
 2   GEOCOD6            184 non-null    object  
 3   MACRO_ID           184 non-null    int64   
 4   CRES_ID            184 non-null    int64   
 5   geometry_municip   184 non-null    geometry
 6   MACRO_NOME         184 non-null    object  
 7   geometry_macro     184 non-null    geometry
 8   geometry_cres      184 non-null    geometry
 9   MACRO_ID_PROPOSAL  184 non-null    int64   
 10  centroid_municip   184 non-null    geometry
 11  municip_lon        184 non-null    float64 
 12  municip_lat        184 non-null    float64 
 13  2010               184 non-null    float64 
 14  2011               184 non-null    float64 
 15  2012               184 non-null    float64 
 16  

## **1. Maps of proposal**

**Current Macro**

In [5]:
# no preprocessing

**Proposal**

In [6]:
# no preprocessing

**Separate macros**

In [7]:
# no preprocessing

## **2. Population, Hospitals**

**2.1 City population 2010 and 2022**

In [8]:
# -- no processing

**2.2 Hospitals and hospital beds**

In [9]:
# -- no 

In [10]:
cnes_df = pd.read_parquet(os.path.join(cnespath, "cnes_st_0801_2312.parquet"))
cnes_df = cnes_df.merge(geodata_df[["GEOCOD6", "MACRO_ID", "MACRO_ID_PROPOSAL"]], left_on="CODUFMUN", right_on="GEOCOD6", how="left")

leitos_df = pd.read_parquet(os.path.join(cnespath, "cnes_leitos_timeserie_0801_2312.parquet"))

graph = nx.read_gml(os.path.join(gmlpath, "novo_completo", "citytohospitalnet_agg_1801_2306.gml"))

In [11]:
valid_cnes = []
for v in graph.nodes():
    if graph.nodes[v]['type']=='hospital' and graph.degree(v)>0:
        valid_cnes.append(graph.nodes[v]['code'])

valid_cnes_df = cnes_df[cnes_df["CNES"].isin(valid_cnes)]

**2.3 Geolocation of health units**

In [12]:
# -- geolocation of hospitals
cols = ["CNES", "CODUFMUN", "MACRO_ID", "MACRO_ID_PROPOSAL", "latitude", "longitude", "COMPETEN_MAX", "COMPETEN_MIN"]
valid_cnes_df[cols].to_parquet(os.path.join(gmlpath, "dados_for_plot", "cnes_com_aih_1801_2306.parquet"))

In [13]:
valid_cnes_df

Unnamed: 0,CNES,CODUFMUN,COD_CEP,CPF_CNPJ,PF_PJ,NIV_DEP,CNPJ_MAN,COD_IR,REGSAUDE,MICR_REG,...,AP05CV07,AP06CV07,AP07CV07,COMPETEN_MIN,COMPETEN_MAX,latitude,longitude,GEOCOD6,MACRO_ID,MACRO_ID_PROPOSAL
0,9999310,231130,63900085,00064176983353,1,1,00000000000000,,,,...,0,0,0,201912,202312,-4.970753,-39.014854,231130,2,5
611,9675787,230440,60822131,32159518000169,3,1,00000000000000,,,,...,0,0,0,201812,202312,-3.795548,-38.492201,230440,1,1
613,9672427,230760,62930000,00000000000000,3,3,07954571000104,,010,,...,0,0,0,201811,202312,-5.089599,-38.121126,230760,3,6
635,9658815,230440,60115081,31924394000106,3,1,00000000000000,,,,...,0,0,0,201811,202312,-3.732426,-38.511244,230440,1,1
908,9526293,230440,60812030,00000000000000,3,3,04885197000144,,0001,,...,0,0,0,201806,202312,-3.771279,-38.474536,230440,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16457,0156647,230280,62700000,00000000000000,3,3,07963259000187,,,,...,0,0,0,202005,202107,-4.361886,-39.313771,230280,2,5
16458,0153087,230640,62502612,00000000000000,3,3,07623077000167,,0006,,...,0,0,0,202005,202312,-3.496534,-39.597289,230640,1,2
16524,0104477,230440,60020061,00000000000000,3,3,04885197000144,,0001,000001,...,0,0,0,202003,202012,-3.742621,-38.536761,230440,1,1
16525,0100358,231290,62011250,00000000000000,3,3,07598634000137,,,,...,0,0,0,202003,202203,-3.685556,-40.345703,231290,4,8


In [14]:
def get_leitos_municipio(municip_code, leitos_df, cnes_df, valid_cnes):
    cnes_of_municipio = cnes_df[(cnes_df["CODUFMUN"]==municip_code) & (cnes_df["CNES"].isin(valid_cnes))]["CNES"].tolist()
    if len(cnes_of_municipio)==0:
        return 0
    else:
        n_leitos_municipio = max(leitos_df.loc[:, idx[cnes_of_municipio, "NUMLEITOS_PRINC"]].sum(axis=1).tolist()[-5:])
        return n_leitos_municipio
    
def get_hospitais_municipio(municip_code, cnes_df, valid_cnes):
    cnes_of_municipio = cnes_df[(cnes_df["CODUFMUN"]==municip_code) & (cnes_df["CNES"].isin(valid_cnes))]["CNES"].tolist()
    return len(cnes_of_municipio)


muni_leitos, muni_hospitais = {}, {}
for muni in geodata_df["GEOCOD6"].tolist():
    n_leitos_municipio = get_leitos_municipio(muni, leitos_df, cnes_df, valid_cnes)
    n_hospitais_municipio = get_hospitais_municipio(muni, cnes_df, valid_cnes)
    muni_leitos[muni] = n_leitos_municipio
    muni_hospitais[muni] = n_hospitais_municipio

geodata_df["NUMLEITOS"] = geodata_df["GEOCOD6"].map(muni_leitos)
geodata_df["NUMHOSPITAIS"] = geodata_df["GEOCOD6"].map(muni_hospitais)
geodata_df[["GEOCOD6", "NUMLEITOS", "NUMHOSPITAIS"]].to_parquet(os.path.join(gmlpath, "dados_for_plot", "leitos_hospitais_por_municipio_1801_2306.parquet"))
geodata_df.head(3)

Unnamed: 0,GEOCOD7,NM_MUNICIP,GEOCOD6,MACRO_ID,CRES_ID,geometry_municip,MACRO_NOME,geometry_macro,geometry_cres,MACRO_ID_PROPOSAL,...,2016,2017,2018,2019,2020,2021,2022,MACRO_COLOR,NUMLEITOS,NUMHOSPITAIS
0,2300101,ABAIARA,230010,5,19,"POLYGON ((-39.08246 -7.29577, -39.08347 -7.292...",Superintendência Regional de Saúde Cariri,"POLYGON ((-38.67306 -6.70700, -38.67306 -6.707...","POLYGON ((-4354692.823 -865196.973, -4354812.2...",3,...,11380.0,11498.0,11619.0,11737.0,11853.0,11965.0,10038.0,#073b4c,0.0,0
1,2300150,ACARAPE,230015,1,3,"POLYGON ((-38.67268 -4.27393, -38.67268 -4.273...",Superintendência Regional de Saúde Fortaleza,"MULTIPOLYGON (((-39.69667 -2.99902, -39.69216 ...","POLYGON ((-4325426.833 -465433.632, -4325426.8...",1,...,14598.0,14707.0,14820.0,14929.0,15036.0,15140.0,14027.0,#ef476f,0.0,0
2,2300200,ACARAÚ,230020,4,12,"POLYGON ((-39.99113 -3.09797, -39.99117 -3.098...",Superintendência Regional de Saúde Norte,"MULTIPOLYGON (((-39.90892 -3.27414, -39.90892 ...","POLYGON ((-4503430.806 -312287.468, -4503430.7...",2,...,61208.0,61679.0,62165.0,62641.0,63104.0,63556.0,64806.0,#118ab2,38.0,1


**Stats per macro (Original)**

In [15]:
# -- no processing

**Stats per macro (Proposta)**

In [16]:
# -- no processing

## **3. Geodesic distance**

In [17]:
# -- load geo
geodata_df = gpd.read_parquet(os.path.join(geopath, "ce_geodata.parquet"))

# -- create the two datasets
# -- one: geolocation of health units who genereated an AIH during 2018-2023 (this period can be flexible)
# -- two: geolocation of census units with the number of population contained in each unit
cnes_df = pd.read_parquet(os.path.join(cnespath, "cnes_st_0801_2312.parquet"))
cnes_df = cnes_df[["CNES", "CODUFMUN", "latitude", "longitude"]]
cnes_df = cnes_df.merge(geodata_df[["GEOCOD6", "MACRO_ID"]], left_on="CODUFMUN", right_on="GEOCOD6", how="left").drop("GEOCOD6", axis=1)

pop_census_df = gpd.read_parquet(os.path.join(geopath, "censo2010_pop_setores.parquet"))
pop_census_df1 = pop_census_df[["CD_GEOCODI", "CD_GEOCODM", "geometry", "Pop_setor_censo2010"]].copy()
pop_census_df1["GEOCOD6"] = pop_census_df1["CD_GEOCODM"].apply(lambda x: x[:6])
pop_census_df1 = pop_census_df1.merge(geodata_df[["GEOCOD6", "MACRO_ID"]], on="GEOCOD6", how="left")
pop_census_df1['centroid'] = pop_census_df1['geometry'].centroid
pop_census_df1 = pop_census_df1.drop('geometry', axis=1).rename({'centroid': 'geometry'}, axis=1).set_geometry('geometry')
pop_census_df1 = pop_census_df1.to_crs(epsg=29194).copy()

# -- the city-hospital bipartite network can provide which health units are actually relevant for analysis (generated at least one AIH during the period chosen)
graph = nx.read_gml(os.path.join(gmlpath, "novo_completo", "citytohospitalnet_agg_1801_2306.gml"))

# -- filter only the relevant health units
valid_cnes = [ graph.nodes[v]['code'] for v in graph.nodes() if graph.nodes[v]['type']=='hospital' and graph.degree(v)>0 ]
valid_cnes_df = cnes_df[cnes_df["CNES"].isin(valid_cnes)]

# -- define the geometry of the health units
valid_cnes_df["geometry"] = gpd.points_from_xy(valid_cnes_df.longitude, valid_cnes_df.latitude)
valid_cnes_df = gpd.GeoDataFrame(valid_cnes_df, geometry='geometry', crs="EPSG:4674")
# -- health units in projection for meters
valid_cnes_df1 = valid_cnes_df.to_crs(epsg=29194).copy()
#valid_cnes_df1 = valid_cnes_df.copy()


  pop_census_df1['centroid'] = pop_census_df1['geometry'].centroid
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_cnes_df["geometry"] = gpd.points_from_xy(valid_cnes_df.longitude, valid_cnes_df.latitude)


In [18]:
# -- distance from sectors to hospitals

macro_id = 2
subset_cnes = valid_cnes_df1[valid_cnes_df1["MACRO_ID"]==macro_id]
subset_pop_setor = pop_census_df1[pop_census_df1["MACRO_ID"]==macro_id]

for index in range(subset_cnes.shape[0]):
    current_point_cnes = subset_cnes.geometry.iloc[index]

macro_distances = subset_cnes.geometry.apply(lambda g: subset_pop_setor.distance(g)/1000).values.flatten() # km

In [19]:
# -- distance from sectors to hospitals (weighted by population)

macro_id = 2
subset_cnes = valid_cnes_df1[valid_cnes_df1["MACRO_ID"]==macro_id]
subset_pop_setor = pop_census_df1[pop_census_df1["MACRO_ID"]==macro_id]

weighted_distances = []
for index in range(subset_cnes.shape[0]):
    current_point_cnes = subset_cnes.geometry.iloc[index]

    res = (subset_pop_setor.distance(current_point_cnes)/1000).apply(lambda x: [x] if pd.notna(x) else np.nan)*subset_pop_setor["Pop_setor_censo2010"].fillna(0).astype(int)
    weighted_distances.append(res)

weighted_distances = pd.concat(weighted_distances)

In [21]:
base_dist = []
for lst in weighted_distances.values:
    base_dist += lst

In [22]:
base_dist

[82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,
 82.27692298734185,


In [23]:
bins = np.arange(0,305,10)
va1, _ = np.histogram(base_dist, bins)

**Macro distances (original)**

In [10]:
# no processing

**Macro distances (proposta)**

In [11]:
# no processing

## **4. CCA for hospitals**

**Plotting for CCA diagram**

In [5]:
# -- no processing

**Algorithm**

In [29]:
cnes_df = pd.read_parquet(os.path.join(cnespath, "cnes_st_0801_2312.parquet"))[["CNES", "latitude", "longitude"]]

# ---- geolocation of census tract units (brazil)
pop_census_df = gpd.read_parquet(os.path.join(geopath, "censo2010_pop_setores.parquet"))
pop_census_df1 = pop_census_df[["CD_GEOCODI", "geometry"]].copy()
pop_census_df1['centroid'] = pop_census_df1['geometry'].centroid
pop_census_df1 = pop_census_df1.drop('geometry', axis=1).rename({'centroid': 'geometry'}, axis=1).set_geometry('geometry')

sihpath = os.path.join(basepath, "opendatasus", "sihsus", "PARQUET")
list_of_cnes = futils.list_of_cnes_with_aih(sihpath, "RDCE1801", "RDCE2306")
valid_cnes_df = cnes_df[cnes_df["CNES"].isin(list_of_cnes)]
valid_cnes_df["geometry"] = gpd.points_from_xy(valid_cnes_df.longitude, valid_cnes_df.latitude)
valid_cnes_df = gpd.GeoDataFrame(valid_cnes_df.drop(["latitude", "longitude"], axis=1), geometry='geometry', crs="EPSG:4674")

# -- standard identifier column
pop_census_df1 = pop_census_df1.rename({"CD_GEOCODI": "IDENT"}, axis=1)
valid_cnes_df = valid_cnes_df.rename({"CNES": "IDENT"}, axis=1)


  pop_census_df1['centroid'] = pop_census_df1['geometry'].centroid
100%|██████████| 66/66 [00:02<00:00, 29.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_cnes_df["geometry"] = gpd.points_from_xy(valid_cnes_df.longitude, valid_cnes_df.latitude)


In [31]:
for lhos in [4, 6, 8, 10, 12, 14, 16, 18, 20, 22]:
    print(lhos)
    sector_dfs = []
    for lsector in tqdm(np.arange(lhos, 38, 2)):
        sector_to_module, hospital_to_module = futils.cca_health(pop_census_df1, valid_cnes_df, lsector=lsector, lhos=lhos, ident_col="IDENT")
        curr_df = pd.DataFrame.from_dict(sector_to_module, orient='index').reset_index().rename({'index': "CD_GEOCODI", 0: f"module_lsec{lsector}_lhos{lhos}"}, axis=1).set_index("CD_GEOCODI").sort_index()
        sector_dfs.append(curr_df)
    sector_dfs = pd.concat(sector_dfs, axis=1)
    sector_dfs.to_parquet(os.path.join(gmlpath, "dados_for_plot", f"sector_modules_cca_lhosp{lhos}.parquet"))

4


100%|██████████| 17/17 [00:59<00:00,  3.51s/it]


6


100%|██████████| 16/16 [00:59<00:00,  3.70s/it]


8


100%|██████████| 15/15 [00:56<00:00,  3.77s/it]


10


100%|██████████| 14/14 [00:54<00:00,  3.89s/it]


12


100%|██████████| 13/13 [00:52<00:00,  4.02s/it]


14


100%|██████████| 12/12 [00:48<00:00,  4.05s/it]


16


100%|██████████| 11/11 [00:44<00:00,  4.07s/it]


18


100%|██████████| 10/10 [00:41<00:00,  4.17s/it]


20


100%|██████████| 9/9 [00:37<00:00,  4.13s/it]


22


100%|██████████| 8/8 [00:33<00:00,  4.24s/it]


array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])

## **5. Flux networks (city2city)**

**Infomap regarding to fluxes (normalized)**

In [68]:
suffix = 'agg_1801_2212'
fname = f'cityfluxnet_{suffix}.gml'
graph = nx.read_gml(os.path.join(gmlpath, 'novo_completo', fname))

# -- include 2022 population to nodes
pop_2022 = dict(zip(geodata_df['GEOCOD6'], geodata_df['2022']))

for v in graph.nodes():
    muncode = graph.nodes[v]['municipio_code']
    graph.nodes[v]['pop_2022'] = pop_2022[muncode]

# -- normalize flow based on the population
factor = 1000
for src, tgt in graph.edges():
    pop_src = graph.nodes[src]['pop_2022']
    outflow = graph.edges[src, tgt]['admission_count']
    outflow_cost = graph.edges[src, tgt]['total_cost']
    graph.edges[src, tgt]['admission_count_norm_pop'] = (outflow/pop_src)*factor
    graph.edges[src, tgt]['total_cost_norm_pop'] = (outflow_cost/pop_src)

# -- calculate in and out flow for each node
graph_prop = NetProperties(graph)
graph_prop.calculate_in_flow(weight_people_col='admission_count', weight_cost_col='total_cost').calculate_out_flow(weight_people_col='admission_count', weight_cost_col='total_cost')
graph_prop.process_infomap_graph(weight_people_col='admission_count_norm_pop', weight_cost_col='total_cost_norm_pop', people_property_name='infomap_count_norm_pop_mod_id', cost_property_name='infomap_cost_norm_pop_mod_id')

# -- create dict for infomap modules
infomap_code_to_mod_count = dict()
infomap_code_to_mod_cost = dict()
for v in graph.nodes():
    muncode = graph.nodes[v]['municipio_code']
    module_id1 = graph.nodes[v]['infomap_count_norm_pop_mod_id']
    module_id2 = graph.nodes[v]['infomap_cost_norm_pop_mod_id']
    infomap_code_to_mod_count.update({muncode:module_id1})
    infomap_code_to_mod_cost.update({muncode:module_id2})

geodata_df_mod = geodata_df.copy()
geodata_df_mod["infomap_modules_count_norm_pop"] = geodata_df_mod["GEOCOD6"].map(infomap_code_to_mod_count)
geodata_df_mod["infomap_modules_cost_norm_pop"] = geodata_df_mod["GEOCOD6"].map(infomap_code_to_mod_cost)

cols = ["GEOCOD6", "infomap_modules_count_norm_pop", "infomap_modules_cost_norm_pop"]
geodata_df_mod[cols].to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geocod_to_infomap_modules_{suffix}.parquet"))

found 10 modules with codelength: 2.5835095577379503
found 4 modules with codelength: 2.547002819397024


In [64]:
geodata_df_mod[cols]

Unnamed: 0,GEOCOD6,infomap_modules_count_norm_pop,infomap_modules_cost_norm_pop
0,230010,,
1,230015,3.0,3.0
2,230020,,
3,230030,,
4,230040,,
...,...,...,...
179,231380,,
180,231390,,
181,231395,,
182,231400,,


{'municipio_code': '230010', 'municipio_name': 'ABAIARA', 'macro_id': 5, 'macro_new_id': 3, 'macro_name': 'Superintendência Regional de Saúde Cariri', 'cres_id': 19, 'lat': -7.35990681440754, 'lon': -39.03753839591732, 'pop_2022': 10038.0, 'incoming_people': 0, 'incoming_cost': 0, 'out_people': 2665, 'out_cost': 4265590.140000001, 'infomap_count_norm_pop_mod_id': 10, 'infomap_cost_norm_pop_mod_id': 5}


In [30]:
for u, v in graph.edges():
    print(graph.edges[u,v])
    break

{'admission_count': 31, 'total_cost': 19061.94, 'source_macro': 5.0, 'target_macro': 5, 'source_micro': 19.0, 'target_micro': 19, 'same_macro': 5.0, 'same_micro': 19.0, 'admission_count_ch1': 0.0, 'total_cost_ch1': 0.0, 'admission_count_ch2': 0.0, 'total_cost_ch2': 0.0, 'admission_count_ch3': 0.0, 'total_cost_ch3': 0.0, 'admission_count_ch4': 0.0, 'total_cost_ch4': 0.0, 'admission_count_ch5': 0.0, 'total_cost_ch5': 0.0, 'admission_count_ch6': 0.0, 'total_cost_ch6': 0.0, 'admission_count_ch7': 0.0, 'total_cost_ch7': 0.0, 'admission_count_ch8': 0.0, 'total_cost_ch8': 0.0, 'admission_count_ch9': 0.0, 'total_cost_ch9': 0.0, 'admission_count_ch10': 0.0, 'total_cost_ch10': 0.0, 'admission_count_ch11': 7.0, 'total_cost_ch11': 3900.98, 'admission_count_ch12': 0.0, 'total_cost_ch12': 0.0, 'admission_count_ch13': 1.0, 'total_cost_ch13': 948.13, 'admission_count_ch14': 7.0, 'total_cost_ch14': 3875.07, 'admission_count_ch15': 16.0, 'total_cost_ch15': 10337.76, 'admission_count_ch16': 0.0, 'total_c

**Infomap to isolated macros (original)**

In [44]:
macro_id = 5

suffix = 'agg_1801_2212'
fname = f'cityfluxnet_{suffix}.gml'
graph = nx.read_gml(os.path.join(gmlpath, 'novo_completo', fname))

# -- filter graph
nodes_to_remove = []
for v in graph.nodes():
    cur_macro = graph.nodes[v]['macro_id']
    if cur_macro!=macro_id:
        nodes_to_remove.append(v)
graph.remove_nodes_from(nodes_to_remove)

# -- include 2022 population to nodes
pop_2022 = dict(zip(geodata_df['GEOCOD6'], geodata_df['2022']))

for v in graph.nodes():
    muncode = graph.nodes[v]['municipio_code']
    graph.nodes[v]['pop_2022'] = pop_2022[muncode]

# -- normalize flow based on the population
factor = 1000
for src, tgt in graph.edges():
    pop_src = graph.nodes[src]['pop_2022']
    outflow = graph.edges[src, tgt]['admission_count']
    outflow_cost = graph.edges[src, tgt]['total_cost']
    graph.edges[src, tgt]['admission_count_norm_pop'] = (outflow/pop_src)*factor
    graph.edges[src, tgt]['total_cost_norm_pop'] = (outflow_cost/pop_src)

# -- calculate in and out flow for each node
graph_prop = NetProperties(graph)
graph_prop.calculate_in_flow(weight_people_col='admission_count', weight_cost_col='total_cost').calculate_out_flow(weight_people_col='admission_count', weight_cost_col='total_cost')
graph_prop.process_infomap_graph(weight_people_col='admission_count_norm_pop', weight_cost_col='total_cost_norm_pop', people_property_name='infomap_count_norm_pop_mod_id', cost_property_name='infomap_cost_norm_pop_mod_id')

# -- create dict for infomap modules
infomap_code_to_mod_count = dict()
infomap_code_to_mod_cost = dict()
for v in graph.nodes():
    muncode = graph.nodes[v]['municipio_code']
    module_id1 = graph.nodes[v]['infomap_count_norm_pop_mod_id']
    module_id2 = graph.nodes[v]['infomap_cost_norm_pop_mod_id']
    infomap_code_to_mod_count.update({muncode:module_id1})
    infomap_code_to_mod_cost.update({muncode:module_id2})

geodata_df_mod = geodata_df.copy()
geodata_df_mod["infomap_modules_count_norm_pop"] = geodata_df_mod["GEOCOD6"].map(infomap_code_to_mod_count)
geodata_df_mod["infomap_modules_cost_norm_pop"] = geodata_df_mod["GEOCOD6"].map(infomap_code_to_mod_count)

cols = ["GEOCOD6", "infomap_modules_count_norm_pop", "infomap_modules_cost_norm_pop"]
geodata_df_mod[cols].to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geocod_to_infomap_modules_{suffix}_macro{macro_id}.parquet"))

found 33 modules with codelength: 2.026331136465984
found 35 modules with codelength: 1.6318107450203745


**Infomap to isolated macros (proposal)**

In [62]:
macro_id = 1

suffix = 'agg_1801_2306'
fname = f'cityfluxnet_{suffix}.gml'
graph = nx.read_gml(os.path.join(gmlpath, 'novo_completo', fname))

# -- filter graph
nodes_to_remove = []
for v in graph.nodes():
    cur_macro = graph.nodes[v]['macro_new_id']
    if cur_macro!=macro_id:
        nodes_to_remove.append(v)
graph.remove_nodes_from(nodes_to_remove)

# -- include 2022 population to nodes
pop_2022 = dict(zip(geodata_df['GEOCOD6'], geodata_df['2022']))

for v in graph.nodes():
    muncode = graph.nodes[v]['municipio_code']
    graph.nodes[v]['pop_2022'] = pop_2022[muncode]

# -- normalize flow based on the population
factor = 1000
for src, tgt in graph.edges():
    pop_src = graph.nodes[src]['pop_2022']
    outflow = graph.edges[src, tgt]['admission_count']
    outflow_cost = graph.edges[src, tgt]['total_cost']
    graph.edges[src, tgt]['admission_count_norm_pop'] = (outflow/pop_src)*factor
    graph.edges[src, tgt]['total_cost_norm_pop'] = (outflow_cost/pop_src)

# -- calculate in and out flow for each node
graph_prop = NetProperties(graph)
graph_prop.calculate_in_flow(weight_people_col='admission_count', weight_cost_col='total_cost').calculate_out_flow(weight_people_col='admission_count', weight_cost_col='total_cost')
graph_prop.process_infomap_graph(weight_people_col='admission_count_norm_pop', weight_cost_col='total_cost_norm_pop', people_property_name='infomap_count_norm_pop_mod_id', cost_property_name='infomap_cost_norm_pop_mod_id')

# -- create dict for infomap modules
infomap_code_to_mod_count = dict()
infomap_code_to_mod_cost = dict()
for v in graph.nodes():
    muncode = graph.nodes[v]['municipio_code']
    module_id1 = graph.nodes[v]['infomap_count_norm_pop_mod_id']
    module_id2 = graph.nodes[v]['infomap_cost_norm_pop_mod_id']
    infomap_code_to_mod_count.update({muncode:module_id1})
    infomap_code_to_mod_cost.update({muncode:module_id2})

geodata_df_mod = geodata_df.copy()
geodata_df_mod["infomap_modules_count_norm_pop"] = geodata_df_mod["GEOCOD6"].map(infomap_code_to_mod_count)
geodata_df_mod["infomap_modules_cost_norm_pop"] = geodata_df_mod["GEOCOD6"].map(infomap_code_to_mod_count)

cols = ["GEOCOD6", "infomap_modules_count_norm_pop", "infomap_modules_cost_norm_pop"]
geodata_df_mod[cols].to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geocod_to_infomap_modules_{suffix}_macronew{macro_id}.parquet"))

found 24 modules with codelength: 1.8560111748816774
found 23 modules with codelength: 1.7807361666002661


## **6. Balance flux per city (city2city)**

In [31]:
def calculate_balance_flow(suffix_net='agg_1801_2212', geodata_df=None,
                           admission_col='admission_count', cost_col="total_cost", suffix=''):
    fname = f'cityfluxnet_{suffix_net}.gml'
    graph = nx.read_gml(os.path.join(gmlpath, 'novo_completo', fname))

    # -- include 2022 population to nodes
    pop_2022 = dict(zip(geodata_df['GEOCOD6'], geodata_df['2022']))

    for v in graph.nodes():
        muncode = graph.nodes[v]['municipio_code']
        graph.nodes[v]['pop_2022'] = pop_2022[muncode]

    # -- normalize flow based on the population
    factor = 1000
    for src, tgt in graph.edges():
        pop_src = graph.nodes[src]['pop_2022']
        outflow = graph.edges[src, tgt]['admission_count']
        outflow_cost = graph.edges[src, tgt]['total_cost']
        graph.edges[src, tgt]['admission_count_norm_pop'] = (outflow/pop_src)*factor
        graph.edges[src, tgt]['total_cost_norm_pop'] = (outflow_cost/pop_src)

    # -- calculate in and out flow for each node
    graph_prop = NetProperties(graph)
    graph_prop.calculate_in_flow(weight_people_col=admission_col, weight_cost_col=cost_col).calculate_out_flow(weight_people_col=admission_col, weight_cost_col=cost_col)

    # -- calculate balance of flow of people and flow of costs
    for v in graph_prop.graph.nodes():
        inpeople, outpeople = graph_prop.graph.nodes[v]['incoming_people'], graph_prop.graph.nodes[v]['out_people']
        incost, outcost = graph_prop.graph.nodes[v]['incoming_cost'], graph_prop.graph.nodes[v]['out_cost']
        graph_prop.graph.nodes[v]['balance_people'] = outpeople - inpeople
        graph_prop.graph.nodes[v]['balance_cost'] = outcost - incost
        graph_prop.graph.nodes[v]['balance_people_norm'] = (outpeople - inpeople)/(outpeople+inpeople)
        graph_prop.graph.nodes[v]['balance_cost_norm'] = (outcost - incost)/(outcost+incost)

    # -- metadata on balance
    balance_people, balance_cost = dict(), dict()
    balance_people_norm, balance_cost_norm = dict(), dict()
    for v in graph_prop.graph.nodes():
        muncode = graph_prop.graph.nodes[v]['municipio_code']
        bal_people, bal_cost = graph_prop.graph.nodes[v]['balance_people'], graph_prop.graph.nodes[v]['balance_cost']
        bal_people_norm, bal_cost_norm = graph_prop.graph.nodes[v]['balance_people_norm'], graph_prop.graph.nodes[v]['balance_cost_norm']
        balance_people.update({muncode:bal_people})
        balance_cost.update({muncode:bal_cost})
        balance_people_norm.update({muncode:bal_people_norm})
        balance_cost_norm.update({muncode:bal_cost_norm})

    geodata_df_mod = geodata_df.copy()
    geodata_df_mod["balance_people"] = geodata_df_mod["GEOCOD6"].map(balance_people)
    geodata_df_mod["balance_cost"] = geodata_df_mod["GEOCOD6"].map(balance_cost)
    geodata_df_mod["balance_people_norm"] = geodata_df_mod["GEOCOD6"].map(balance_people_norm)
    geodata_df_mod["balance_cost_norm"] = geodata_df_mod["GEOCOD6"].map(balance_cost_norm)
    cols = ["GEOCOD6", "balance_people", "balance_cost", "balance_people_norm", "balance_cost_norm"]
    return geodata_df_mod[cols]

In [38]:
suffix = 'agg_1801_2212'
geodata_df_mod = calculate_balance_flow(suffix_net=suffix, geodata_df=geodata_df,
                                        admission_col="admission_count", cost_col='total_cost')

geodata_df_mod2 = calculate_balance_flow(suffix_net=suffix, geodata_df=geodata_df,
                                        admission_col="admission_count_ch2", cost_col='total_cost_ch2')

geodata_df_mod9 = calculate_balance_flow(suffix_net=suffix, geodata_df=geodata_df,
                                        admission_col="admission_count_ch9", cost_col='total_cost_ch9')

geodata_df_mod10 = calculate_balance_flow(suffix_net=suffix, geodata_df=geodata_df,
                                        admission_col="admission_count_ch10", cost_col='total_cost_ch10')


geodata_df_mod.to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geocod_to_balance_flow_{suffix}.parquet"))
geodata_df_mod2.to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geocod_to_balance_flow_{suffix}_ch2.parquet"))
geodata_df_mod9.to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geocod_to_balance_flow_{suffix}_ch9.parquet"))
geodata_df_mod10.to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geocod_to_balance_flow_{suffix}_ch10.parquet"))

In [19]:
graph_prop.graph.nodes[v]

{'municipio_code': '231410',
 'municipio_name': 'VIÇOSA DO CEARÁ',
 'macro_id': 4,
 'macro_new_id': 8,
 'macro_name': 'Superintendência Regional de Saúde Norte',
 'cres_id': 13,
 'lat': -3.5450734133203823,
 'lon': -41.13613634135282,
 'pop_2022': 59712.0,
 'incoming_people': 201,
 'incoming_cost': 102609.48,
 'out_people': 6699,
 'out_cost': 13601667.34,
 'balance_people': -6498,
 'balance_cost': -13499057.86}

## **0. Time series of hospitals and hospital beds per macro**

In [None]:
cnes_df = pd.read_parquet(os.path.join(cnespath, "cnes_st_0801_2312.parquet"))
leitos_df = pd.read_parquet(os.path.join(cnespath, "cnes_leitos_timeserie_0801_2312.parquet")).fillna(0)

# -- get info on macro for each CNES
cnes_macro = cnes_df.merge(geodata_df[["MACRO_ID", "MACRO_ID_PROPOSAL", "GEOCOD6"]], left_on="CODUFMUN", right_on="GEOCOD6", how="left")[["CNES", "CODUFMUN", "MACRO_ID", "MACRO_ID_PROPOSAL"]]

In [None]:
# -- objective: for each macro, sum the number of hospital beds of the hospitals inside the macro
def count_hospitalbed_per_macro(leitos_df, macro_id, cnes_macro, macro_col="MACRO_ID"):
    ''' 
        Return the number of hospital beds (main and all) for the macrorregion for each time
        point.
    '''
    cnes_list = cnes_macro[cnes_macro[macro_col]==macro_id]["CNES"].tolist()
    subset_pri =  leitos_df.loc[:, idx[cnes_list, 'NUMLEITOS_PRINC']].sum(axis=1)
    subset_todos =  leitos_df.loc[:, idx[cnes_list, 'NUMLEITOS_TODOS']].sum(axis=1)
    macro_res = pd.DataFrame({
        "period": leitos_df.index, "macro_leitos_princ": subset_pri, "macro_leitos_todos": subset_todos
    })
    return macro_res

macro_original = { n : count_hospitalbed_per_macro(leitos_df, n, cnes_macro, macro_col="MACRO_ID") for n in range(1,5+1)  }
macro_proposal = { n : count_hospitalbed_per_macro(leitos_df, n, cnes_macro, macro_col="MACRO_ID_PROPOSAL") for n in range(1,8+1)  }

In [None]:
macro_original = pd.concat([ macro_original[key] for key in macro_original.keys() ], axis=1, keys=macro_original.keys())
macro_proposal = pd.concat([ macro_proposal[key] for key in macro_proposal.keys() ], axis=1, keys=macro_proposal.keys())


In [None]:
macro_original.to_parquet(os.path.join(gmlpath, "dados_for_plot", "timeserie_macro_original_beds.parquet"))
macro_proposal.to_parquet(os.path.join(gmlpath, "dados_for_plot", "timeserie_macro_proposal_beds.parquet"))


In [None]:
macro_original.head()

Unnamed: 0_level_0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5
Unnamed: 0_level_1,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos
801,801,6406.0,9918.0,801,499.0,882.0,801,546.0,901.0,801,1389.0,2414.0,801,2180.0,3362.0
802,802,6450.0,9962.0,802,499.0,882.0,802,546.0,901.0,802,1389.0,2412.0,802,2186.0,3368.0
803,803,6450.0,9971.0,803,499.0,884.0,803,546.0,908.0,803,1389.0,2418.0,803,2192.0,3383.0
804,804,6611.0,10148.0,804,499.0,884.0,804,552.0,914.0,804,1362.0,2388.0,804,2189.0,3371.0
805,805,6632.0,10192.0,805,499.0,884.0,805,552.0,914.0,805,1365.0,2391.0,805,2184.0,3367.0


In [None]:
macro_proposal.head()

Unnamed: 0_level_0,1,1,1,2,2,2,3,3,3,4,...,5,6,6,6,7,7,7,8,8,8
Unnamed: 0_level_1,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos,period,...,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos,period,macro_leitos_princ,macro_leitos_todos
801,801,6111.0,9337.0,801,520.0,1031.0,801,1606.0,2325.0,801,...,721.0,801,558.0,929.0,801,386.0,623.0,801,855.0,1474.0
802,802,6155.0,9381.0,802,520.0,1031.0,802,1607.0,2326.0,802,...,721.0,802,558.0,929.0,802,386.0,622.0,802,855.0,1473.0
803,803,6155.0,9390.0,803,520.0,1037.0,803,1613.0,2337.0,803,...,723.0,803,558.0,936.0,803,386.0,622.0,803,855.0,1473.0
804,804,6315.0,9565.0,804,497.0,1005.0,804,1613.0,2330.0,804,...,723.0,804,564.0,942.0,804,386.0,629.0,804,852.0,1470.0
805,805,6352.0,9633.0,805,480.0,980.0,805,1613.0,2330.0,805,...,723.0,805,564.0,942.0,805,390.0,633.0,805,852.0,1470.0


## **1. Between cities flux networks**

### **1.1 Mapping and macros**

In [None]:
netsuffix = "agg_2001_2012"

# -- load and perform calculations
fname = f"cityfluxnet_{netsuffix}.gml"
graph = nx.read_gml(os.path.join(gmlpath, "novo_completo", fname))
netprop = NetProperties(graph).calculate_in_flow(weight_people_col='admission_count', weight_cost_col='total_cost').calculate_out_flow(weight_people_col='admission_count', weight_cost_col='total_cost')
netprop.process_infomap_graph().process_louvain_graph()
graph = netprop.graph

# -- complementary info on graph (macro proposal)
#temp_ = dict(zip(geodata_df["GEOCOD6"], geodata_df["MACRO_ID_PROPOSAL"]))
#for v in graph.nodes():
#    graph.nodes[v]['macro_id_proposal'] = temp_[graph.nodes[v]['municipio_code']]

# -- add modules info on the geodataframe
geodata_df1 = geodata_df.copy()
geodata_df1["infomap_count_module"] = geodata_df1["GEOCOD6"].map({graph.nodes[u]['municipio_code']: graph.nodes[u]['infomap_count_module_id'] for u in graph.nodes()})
geodata_df1["infomap_cost_module"] = geodata_df1["GEOCOD6"].map({graph.nodes[u]['municipio_code']: graph.nodes[u]['infomap_cost_module_id'] for u in graph.nodes()})
geodata_df1["louvain_count_module"] = geodata_df1["GEOCOD6"].map({graph.nodes[u]['municipio_code']: graph.nodes[u]['louvain_count_module_id'] for u in graph.nodes()})
geodata_df1["louvain_cost_module"] = geodata_df1["GEOCOD6"].map({graph.nodes[u]['municipio_code']: graph.nodes[u]['louvain_cost_module_id'] for u in graph.nodes()})
geodata_df1["inflow_people"] = geodata_df1["GEOCOD6"].map({graph.nodes[u]['municipio_code']: graph.nodes[u]['incoming_people'] for u in graph.nodes()})
geodata_df1["inflow_cost"] = geodata_df1["GEOCOD6"].map({graph.nodes[u]['municipio_code']: graph.nodes[u]['incoming_cost'] for u in graph.nodes()})

found 9 modules with codelength: 2.5192578014211713
found 3 modules with codelength: 2.4760044338086376


In [None]:
# -- specify base colors
# ---- original colors based on specific cities
# ---- new modules will be colored with the original colors if one of the cities below is included

cmap_macro_original = {1: "#ef476f", 2: "#ffb300", 3: "#04ae81", 4: "#118ab2", 5: "#073b4c"}

citycode_colors = {"230440": "#ef476f", "230730": "#073b4c", "230410": "#118ab2", "231140": "#ffb300", "231180": "#04ae81"}
citylabel_colors = {"58": "#ef476f", "98": "#073b4c", "49": "#118ab2", "150": "#ffb300", "154": "#04ae81"}
extra_colors = ["#00756a", "#bcb20f", "#ff8239", "#c6881c", "#00a2a3", "#4d4d4d", "#f0e2e7", "#dbfe87", "#1c448e"]

# -- create new cmap for new modules
#cmap_infomap_count, cmap_infomap_cost = {}, {}
#cmap_louvain_count, cmap_louvain_cost = {}, {}

def get_cmap(graph, citycode_colors, extra_colors, property_name):
    cmap = {}
    n_modules = len(set( [ graph.nodes[v][property_name] for v in graph.nodes() ] ))
    for v in graph.nodes():
        if graph.nodes[v]['municipio_code'] in citycode_colors.keys():
            if graph.nodes[v][property_name] not in cmap.keys():
                cmap.update({graph.nodes[v][property_name] : citycode_colors[graph.nodes[v]['municipio_code']]})
    extra_colors_ = extra_colors[:] + [ x for x in citycode_colors.values() if x not in cmap.values() ]
    dummy_index = 0
    for n in range(n_modules):
        if n+1 not in cmap.keys():
            cmap.update({ n+1: extra_colors_[dummy_index]})
            dummy_index+=1
    return cmap

cmap_11 = get_cmap(graph, citycode_colors, extra_colors, 'infomap_count_module_id')
cmap_13 = get_cmap(graph, citycode_colors, extra_colors, 'infomap_cost_module_id')
cmap_21 = get_cmap(graph, citycode_colors, extra_colors, 'louvain_count_module_id')
cmap_23 = get_cmap(graph, citycode_colors, extra_colors, 'louvain_cost_module_id')
#cmap_prop = get_cmap(graph, citycode_colors, extra_colors, 'macro_id_proposal')


geodata_df1["INFOMAP_COUNT_COLOR"] = geodata_df1["infomap_count_module"].map(cmap_11)
geodata_df1["INFOMAP_COST_COLOR"] = geodata_df1["infomap_cost_module"].map(cmap_13)
geodata_df1["LOUVAIN_COUNT_COLOR"] = geodata_df1["louvain_count_module"].map(cmap_21)
geodata_df1["LOUVAIN_COST_COLOR"] = geodata_df1["louvain_cost_module"].map(cmap_23)
#geodata_df1["MACRO_PROPOSAL_COLOR"] = geodata_df1["MACRO_ID_PROPOSAL"].map(cmap_prop)

In [None]:
cmap_11

{7: '#118ab2',
 1: '#ef476f',
 2: '#073b4c',
 9: '#04ae81',
 3: '#00756a',
 4: '#bcb20f',
 5: '#ff8239',
 6: '#c6881c',
 8: '#00a2a3'}

In [None]:
geodata_df1 = geodata_df1.drop(["centroid_municip"], axis=1)
geodata_df1.to_parquet(os.path.join(gmlpath, "dados_for_plot", f"geoforplot_{netsuffix}.parquet"))

In [None]:
geodata_df1.columns

Index(['GEOCOD7', 'NM_MUNICIP', 'GEOCOD6', 'MACRO_ID', 'CRES_ID',
       'geometry_municip', 'MACRO_NOME', 'geometry_macro', 'geometry_cres',
       'MACRO_ID_PROPOSAL', 'municip_lon', 'municip_lat', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', '2022', 'MACRO_COLOR', 'infomap_count_module',
       'infomap_cost_module', 'louvain_count_module', 'louvain_cost_module',
       'inflow_people', 'inflow_cost', 'INFOMAP_COUNT_COLOR',
       'INFOMAP_COST_COLOR', 'LOUVAIN_COUNT_COLOR', 'LOUVAIN_COST_COLOR'],
      dtype='object')

### **1.1 time series networks - Fluxes over time**

In [None]:
fnames = sorted(glob.glob(os.path.join(gmlpath, "novo_completo", "*noverlap*")))
list_of_networks = [ nx.read_gml(fname) for fname in fnames ]

**Define utility functions**

Calculate the incoming, outcoming and internal flows for each macrorregion.

In [None]:
def get_fluxes_macro(graph, macro_indices=["1", "2", "3", "4", "5"]):
    '''
        Calculate the incoming and outgoing fluxes for each macro. Also calculate the internal flux. 
    '''
    # -- initialize flows dict.
    macro_in_out = {
        ind: {
            "INCOMING": 0,
            "OUTGOING": 0,
            "INTERNAL": 0
        } for ind in macro_indices
    }

    # -- calculation
    for src, tgt in graph.edges():
        cur_action = graph.edges[(src, tgt)]
        
        src_macro, tgt_macro = str(int(cur_action['source_macro'])), str(int(cur_action['target_macro']))
        flux_count = cur_action['admission_count']

        if src_macro==tgt_macro:
            macro_in_out[src_macro]["INTERNAL"] += flux_count
        else:
            macro_in_out[src_macro]["OUTGOING"] += flux_count
            macro_in_out[tgt_macro]["INCOMING"] += flux_count
    return macro_in_out

def macro_flows_time(list_of_graphs, macro_indices=["1", "2", "3", "4", "5"]):
    ''' 
    
    '''
    # -- initialize flows dict.
    macro_in_out = {
        ind: {
            "INCOMING": [],
            "OUTGOING": [],
            "INTERNAL": []
        } for ind in macro_indices
    }
    for graph in list_of_graphs:
        res = get_fluxes_macro(graph)
        for key in res.keys():
            macro_in_out[key]["INCOMING"].append(res[key]["INCOMING"])
            macro_in_out[key]["OUTGOING"].append(res[key]["OUTGOING"])
            macro_in_out[key]["INTERNAL"].append(res[key]["INTERNAL"])
    return macro_in_out

In [None]:
macro_flows = macro_flows_time(list_of_networks)
with open(os.path.join(gmlpath, 'dados_for_plot', 'macro_original_fluxes.pickle'), 'wb') as handle:
    pickle.dump(macro_flows, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
import graph_tool as gt
from graph_tool.inference import minimize_blockmodel_dl

In [None]:
fname = "cityfluxnet_agg_1801_2212.gml"
graph = gt.load_graph(os.path.join(gmlpath, "novo_completo", fname))

In [None]:
state = minimize_blockmodel_dl(graph)
modules_v = state.get_blocks()

In [None]:
# YYMM
fname = "cityfluxnet_agg_1801_2212.gml"
graph = nx.read_gml(os.path.join(gmlpath, "novo_completo", fname))

In [None]:
for v in graph.nodes():
    break
graph.nodes[v]

{'municipio_code': '230010',
 'municipio_name': 'ABAIARA',
 'macro_id': 5,
 'macro_new_id': 3,
 'macro_name': 'Superintendência Regional de Saúde Cariri',
 'cres_id': 19,
 'lat': -7.35990681440754,
 'lon': -39.03753839591732}

In [None]:
for u, v in graph.edges():
    break
graph.edges[(u,v)]

{'admission_count': 27,
 'total_cost': 16354.65,
 'source_macro': 5.0,
 'target_macro': 5,
 'source_micro': 19.0,
 'target_micro': 19,
 'same_macro': 5.0,
 'same_micro': 19.0,
 'admission_count_ch1': 0.0,
 'total_cost_ch1': 0.0,
 'admission_count_ch2': 0.0,
 'total_cost_ch2': 0.0,
 'admission_count_ch3': 0.0,
 'total_cost_ch3': 0.0,
 'admission_count_ch4': 0.0,
 'total_cost_ch4': 0.0,
 'admission_count_ch5': 0.0,
 'total_cost_ch5': 0.0,
 'admission_count_ch6': 0.0,
 'total_cost_ch6': 0.0,
 'admission_count_ch7': 0.0,
 'total_cost_ch7': 0.0,
 'admission_count_ch8': 0.0,
 'total_cost_ch8': 0.0,
 'admission_count_ch9': 0.0,
 'total_cost_ch9': 0.0,
 'admission_count_ch10': 0.0,
 'total_cost_ch10': 0.0,
 'admission_count_ch11': 6.0,
 'total_cost_ch11': 3420.04,
 'admission_count_ch12': 0.0,
 'total_cost_ch12': 0.0,
 'admission_count_ch13': 1.0,
 'total_cost_ch13': 948.13,
 'admission_count_ch14': 6.0,
 'total_cost_ch14': 2934.94,
 'admission_count_ch15': 14.0,
 'total_cost_ch15': 9051.54,
 

## **2. Cities to Hospitals flux networks**