# **Tests for the Networks**

In [26]:
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy import text, inspect, MetaData

from tqdm import tqdm
import numpy as np
from pathlib import Path
import pandas as pd
from itertools import product
import datetime as dt

from dateutil.relativedelta import relativedelta

import nxviz as nv
import networkx as nx
from nxviz import layouts, plots, lines
from nxviz import nodes, edges, annotate, highlights
from nxviz.plots import despine, rescale, respine, aspect_equal

from nxviz.utils import edge_table, node_table
from nxviz import encodings as aes

import matplotlib
import matplotlib.pyplot as plt

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = ["Tahoma"]

## **Definitions**

### **Database query**

In [2]:
# -- test
def query_metadata(engine):
    inspector = inspect(engine)
    tables = inspector.get_table_names()
    table_dict = { table_name : inspector.get_columns(table_name) for table_name in tables }
    return table_dict


def perform_query(query_str, engine, batchsize=1000):

    schema_data = {
        'rows': [],
        'columns': [],
    }

    query_str = text(query_str)
    with engine.connect() as conn:
        qres = conn.execute(query_str)
        schema_data['columns'] = list(qres.keys())

        while True:
            rows = qres.fetchmany(batchsize)
            if not rows:
                break
            schema_data["rows"] += [ row for row in rows ]
    
    res_df = pd.DataFrame(schema_data['rows'], columns=schema_data['columns'])
    return res_df

### **Network definitions - SIH fluxes**

**Database query definitions**

In [3]:
def select_period_aih(engine, start_date, final_date, diag_level=0):
    '''
        Filter the AIH records for the period selected and considering the 
        diagnostic level of information required.

        'diag_level' refers to the number of chars to be considered in the 
        diagnostic ICD-10 of a hospital admission.
    '''
    if diag_level>4:
        diag_level = 4
    elif diag_level<0:
        diag_level = 0
    
    query = f'''
        SELECT 
            N_AIH, CNES, MUNIC_RES, MUNIC_MOV, VAL_TOT,
            SUBSTR(DIAG_PRINC,1,{diag_level}) as DIAG_CATEG 
        FROM aih_reduzida
        WHERE DT_INTER >= '{start_date.strftime("%Y-%m-%d")}' AND DT_INTER <= '{final_date.strftime("%Y-%m-%d")}'
    '''
    df = perform_query(query, engine)
    return df

def select_period_aih_services(engine, start_date, final_date):
    '''
        Filter the AIH records for the period selected and considering the 
        diagnostic level of information required.

        Here, the AIH records are selected together with all medical services performed
        for each hospital admission.
    '''
    query = f'''
        SELECT 
            a.*, b.SP_ATOPROF , b.SP_QTD_ATO
        FROM (
            SELECT 
                N_AIH, CNES, MUNIC_RES, MUNIC_MOV, ANO_CMPT, MES_CMPT
            FROM aih_reduzida
            WHERE DT_INTER >= '{start_date.strftime("%Y-%m-%d")}' AND DT_INTER <= '{final_date.strftime("%Y-%m-%d")}'
        ) a
        LEFT JOIN servicos_profissionais b
        WHERE a.N_AIH = b.SP_NAIH AND a.ANO_CMPT = b.SP_AA AND a.MES_CMPT = b.SP_MM
    '''
    df = perform_query(query, engine)
    df = df.drop_duplicates(subset=["N_AIH", "SP_ATOPROF"], keep='first')
    return df

def select_cnes_equip_data(engine, reference_date):
    '''
    
    '''
    query = f'''
        SELECT
            a.*, b.CODUFMUN, b.VINC_SUS, b.TPGESTAO,
            b.ESFERA_A, b.NATUREZA, b.TP_UNID
        FROM (
            SELECT
                *
            FROM equipamentos_mes
            WHERE COMPET = '{reference_date.strftime("%Y-%m-%d 00:00:00.000000")}'
        ) a
        LEFT JOIN cnes b
        WHERE a.CNES = b.CNES
    '''
    df = perform_query(query, engine)
    df["EQUIP_KEY"] = df["TIPEQUIP"]+'-'+df["CODEQUIP"]
    return df

In [4]:
ex = select_cnes_equip_data(cnes_engine, dt.datetime(2012, 1, 1))
ex

NameError: name 'cnes_engine' is not defined

**Edgelist definitions**

In [5]:
# -- City2City Networks (C2C)

def edgelist_c2c(engine, start_date, final_date, diag_level, mode='people'):
    '''
        Create the edgelist for the City2City networks.

        Given a period between 'start_date' and 'final_date', the function retrieves hospital admission
        data for this period and create the edgelist with pairs of cities and their corresponding weights.
        For this network, the weight can be regarded as vector $\vec{w}$ of m entries. Each entry corresponds 
        to a group of disease. The coarsing of the group depends on 'diag_level'. For instance, if 'diag_level'=1,
        then each weight $w_i$ corresponds to a group of diseases given by the first letter of the ICD-10 code.
        If 'mode'='people', then the value of a weight will correspond to the amount of flux of people carried
        in the current edge.
        If 'mode'='money', then the value of a weight will correspond to the amount of flux of money carried in
        the current edge.

        Args:
        -----
            engine:
                sqlalchemy.engine.base.Engine. SQLAlchemy engine responsible for the connection to the database
                of fluxes.
            start_date:
                datetime.datetime.
            final_date:
                datetime.datetime.
            diag_level:
                Integer. Coarsening level of the ICD-10 code associated to the hospital admission. For instance, 
                if 'diag_level'=1, any flux of admissions due to disease 'B34.2' (Covid-19) will be grouped into
                the disease group 'B'. If 'diag_level'=2, the example will be grouped as 'B3'. If 'diag_level'=0,
                then the weight vector will have only one entry corresponding to the total sum of fluxes with no
                discrimination of disease.
            mode:
                String.
                Options=['people', 'money']. See description.

        Returns:
        --------
            edgelist:
                pandas.DataFrame.
    '''
    df = select_period_aih(engine, start_date, final_date, diag_level=diag_level)
    if mode == 'people':
        edgelist = df.groupby(["MUNIC_RES", "MUNIC_MOV"])["DIAG_CATEG"].value_counts().reset_index()
        edgelist = pd.pivot_table(edgelist, index=["MUNIC_RES", "MUNIC_MOV"], columns="DIAG_CATEG", values="count").fillna(0)
    elif mode == 'money':
        edgelist = df.groupby(["MUNIC_RES", "MUNIC_MOV", "DIAG_CATEG"])["VAL_TOT"].sum().reset_index()
        edgelist = pd.pivot_table(edgelist, index=["MUNIC_RES", "MUNIC_MOV"], columns="DIAG_CATEG", values="VAL_TOT").fillna(0)
    edgelist["SOMA"] = edgelist.apply(sum, axis=1)
    return edgelist

def edgelist_services_c2c(engine, start_date, final_date):
    '''
        Create the edgelist for the City2City networks where the weight vector will divided into
        medical services performed for each hospital admission.

        The expression "city i has a set of directed edges towards city j" in this case refers to all medical services
        that were exported from i to j through the flux of people.
    '''
    df = select_period_aih_services(engine, start_date, final_date)
    edgelist = df.groupby(["MUNIC_RES", "MUNIC_MOV"])["SP_ATOPROF"].value_counts().reset_index()
    edgelist = pd.pivot_table(edgelist, index=["MUNIC_RES", "MUNIC_MOV"], columns="SP_ATOPROF", values="count").fillna(0)
    edgelist["SOMA"] = edgelist.apply(sum, axis=1)
    return edgelist

# -- City2Hospital Networks (C2H)

def edgelist_c2h(engine, start_date, final_date, diag_level, mode='people'):
    '''
        ...
    '''
    df = select_period_aih(engine, start_date, final_date, diag_level=diag_level)
    if mode == 'people':
        edgelist = df.groupby(["MUNIC_RES", "CNES"])["DIAG_CATEG"].value_counts().reset_index()
        edgelist = pd.pivot_table(edgelist, index=["MUNIC_RES", "CNES"], columns="DIAG_CATEG", values="count").fillna(0)
    elif mode == 'money':
        edgelist = df.groupby(["MUNIC_RES", "CNES", "DIAG_CATEG"])["VAL_TOT"].sum().reset_index()
        edgelist = pd.pivot_table(edgelist, index=["MUNIC_RES", "CNES"], columns="DIAG_CATEG", values="VAL_TOT").fillna(0)
    edgelist["SOMA"] = edgelist.apply(sum, axis=1)
    return edgelist



def edgelist_services_c2h(engine, start_date, final_date):
    '''
        ...
    '''
    df = select_period_aih_services(engine, start_date, final_date)
    edgelist = df.groupby(["MUNIC_RES", "CNES"])["SP_ATOPROF"].value_counts().reset_index()
    edgelist = pd.pivot_table(edgelist, index=["MUNIC_RES", "CNES"], columns="SP_ATOPROF", values="count").fillna(0)
    edgelist["SOMA"] = edgelist.apply(sum, axis=1)
    return edgelist

# -- Hospital2Diseases Networks (H2D)
def edgelist_h2d(engine, start_date, final_date):
    df = select_period_aih(engine, start_date, final_date, diag_level=3)
    edgelist = df.groupby(["CNES"])["DIAG_CATEG"].value_counts().reset_index().rename({"count": "TOTAL"}, axis=1)
    edgelist_ = df.groupby(["CNES", "DIAG_CATEG"])["VAL_TOT"].sum().reset_index().rename({"VAL_TOT": "VALOR_TOTAL"}, axis=1)
    edgelist = edgelist.merge(edgelist_, on=["CNES", "DIAG_CATEG"], how="inner")
    return edgelist

# -- Hospital2HealthServices (H2HS)
def edgelist_h2hs(engine, start_date, final_date):
    df = select_period_aih_services(engine, start_date, final_date)
    edgelist = df.groupby(["CNES"])["SP_ATOPROF"].value_counts().reset_index().rename({"count": "TOTAL"}, axis=1)
    return edgelist

# -- Health equipaments (maybe also include physicians) to Hospital/Municipality (E2H and E2M)
def edgelist_equip2_h_m(engine, reference_date, mode="hospital"):
    '''
        ...

        Args:
        -----
            reference_date:
                datetime.datetime. Date for extracting year and month of the data point to create the network.
            mode:
                String. Options = ['hospital', 'municip']
    '''
    cnes_df = select_cnes_equip_data(engine, reference_date)
    cnes_df["QT_EXIST"] = cnes_df["QT_EXIST"].astype(int)
    cnes_df["QT_USO"] = cnes_df["QT_USO"].astype(int)
    if mode=='hospital':
        edgelist = cnes_df.groupby(["EQUIP_KEY", "CNES"])[["QT_EXIST", "QT_USO"]].sum().reset_index().rename({"count": "TOTAL"}, axis=1)
    elif mode=="municip":
        edgelist = cnes_df.groupby(["EQUIP_KEY", "CODUFMUN"])[["QT_EXIST", "QT_USO"]].sum().reset_index().rename({"count": "TOTAL"}, axis=1)
    else:
        raise Exception("'hospital' or 'municip' are the only available options.")
    return edgelist

## **Test Runnings - Networks**

In [6]:
basepath = Path.home().joinpath("Documents", "data")
sih_location = basepath.joinpath("opendatasus")
sih_name = "SIH_CNES_WAREHOUSE.db"

cnes_location = basepath.joinpath("opendatasus", "cnes")
cnes_name = "CNES_WAREHOUSE.db"

sih_engine_url = f"sqlite:///{sih_location.joinpath(sih_name)}"
sih_engine = create_engine(sih_engine_url)

cnes_engine_url = f"sqlite:///{cnes_location.joinpath(cnes_name)}"
cnes_engine = create_engine(cnes_engine_url)

### **Basic interaction with the database**

In [13]:
q = f'''
    SELECT * FROM aih_reduzida LIMIT 5
'''

df = perform_query(q, engine)
df.head(4)

Unnamed: 0,N_AIH,UF_ZI,ANO_CMPT,MES_CMPT,ESPEC,IDENT,MUNIC_RES,NASC,SEXO,UTI_MES_IN,...,GESTOR_COD,GESTOR_TP,CNES,INFEHOSP,CID_ASSO,CID_MORTE,COMPLEX,FINANC,RACA_COR,FONTE
0,2314100004788,230000,2014,1,7,1,231180,2014-01-17 00:00:00.000000,3,0,...,0,0,2785900,,,,2,6,99,RDCE1401
1,2314100004854,230000,2014,1,7,1,230945,2011-09-12 00:00:00.000000,3,0,...,0,0,2785900,,,,2,6,99,RDCE1401
2,2314100004865,230000,2014,1,7,1,230440,2013-05-19 00:00:00.000000,3,0,...,0,0,2785900,,,,2,6,99,RDCE1401
3,2314100004876,230000,2014,1,7,1,230440,2013-05-30 00:00:00.000000,1,0,...,0,0,2785900,,,,2,6,99,RDCE1401


In [14]:
q = f'''
    SELECT * FROM servicos_profissionais LIMIT 5
'''

df = perform_query(q, engine)
df.head(4)

Unnamed: 0,SP_NAIH,SP_GESTOR,SP_AA,SP_MM,SP_CNES,SP_PROCREA,SP_ATOPROF,SP_TP_ATO,SP_QTD_ATO,SP_PTSP,...,SP_M_PAC,SP_COMPLEX,SP_FINANC,SP_CO_FAEC,SP_PF_CBO,SP_CIDPRI,SP_CIDSEC,SP_QT_PROC,SP_U_AIH,FONTE
0,2314100116548,230000,2014,1,6779522,303060107,303060107,,1,0,...,230730,2,6,,0,I110,,1,0,
1,2314100116548,230000,2014,1,6779522,303060107,303060107,,1,50,...,230730,2,6,,225125,I110,,0,1,
2,2314100116548,230000,2014,1,6779522,303060107,301010170,,6,120,...,230730,2,6,,225125,I110,,6,0,
3,2314100116548,230000,2014,1,6779522,303060107,202010180,,1,0,...,230730,2,6,,0,I110,,1,0,


In [15]:
q = f'''
    SELECT * FROM aih_rejeitada LIMIT 5
'''

df = perform_query(q, engine)
df.head(4)

Unnamed: 0,N_AIH,UF_ZI,ANO_CMPT,MES_CMPT,MUNIC_RES,NASC,SEXO,UTI_MES_IN,UTI_MES_AN,UTI_MES_AL,...,MORTE,NACIONAL,NUM_PROC,CAR_INT,TOT_PT_SP,CNES,RACA_COR,ST_SITUAC,ST_BLOQ,ST_MOT_BLO
0,,230000,2014,1,230440,1944-08-30 00:00:00.000000,3,0,0,0,...,0,10,,2,0,2785900,99,1,5,0
1,,230000,2014,1,230440,1944-08-30 00:00:00.000000,3,0,0,0,...,1,10,,2,0,2785900,99,1,5,0
2,,230000,2014,1,230440,1963-05-04 00:00:00.000000,1,0,0,0,...,0,10,,2,0,2785900,99,1,5,0
3,,230000,2014,1,230440,1945-09-09 00:00:00.000000,3,0,0,0,...,0,10,,2,0,2785900,99,1,5,0


### **Test edgelist functions**

In [13]:
start_date, final_date = dt.datetime(2008, 1, 1), dt.datetime(2008, 12, 31)

In [14]:
aih_df = select_period_aih(sih_engine, start_date, final_date, diag_level=3)
aih_df.head(4)

Unnamed: 0,N_AIH,CNES,MUNIC_RES,MUNIC_MOV,VAL_TOT,DIAG_CATEG


In [8]:
# -- c2c (diseases)
c2c_disease = edgelist_c2c(sih_engine, start_date, final_date, diag_level=3, mode="people")
print(c2c_disease.shape)
c2c_disease.head(4)

(2022, 1320)


Unnamed: 0_level_0,DIAG_CATEG,A00,A01,A02,A03,A04,A05,A06,A07,A08,A09,...,Z47,Z48,Z52,Z53,Z73,Z80,Z87,Z93,Z98,SOMA
MUNIC_RES,MUNIC_MOV,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
110002,230440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
110012,230440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
110018,230440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
110020,230260,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [78]:
# -- c2c (service)
c2c_service = edgelist_services_c2c(sih_engine, start_date, final_date)
print(c2c_service.shape)
c2c_service.head(4)

(1176, 1998)


Unnamed: 0_level_0,SP_ATOPROF,0201010020,0201010046,0201010062,0201010135,0201010143,0201010160,0201010194,0201010208,0201010216,0201010224,...,0802010067,0802010083,0802010105,0802010121,0802010148,0802010156,0802010199,0802010237,0802020011,SOMA
MUNIC_RES,MUNIC_MOV,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
110002,230440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0
110012,230440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0
110020,230260,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
110020,230440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.0


In [79]:
# -- c2h (diseases)
c2h_disease = edgelist_c2h(sih_engine, start_date, final_date, diag_level=3, mode="people")
print(c2h_disease.shape)
c2h_disease.head(4)

(4015, 1096)


Unnamed: 0_level_0,DIAG_CATEG,A00,A01,A02,A03,A04,A05,A06,A07,A08,A09,...,Z43,Z47,Z48,Z52,Z53,Z80,Z87,Z93,Z98,SOMA
MUNIC_RES,CNES,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
110002,2561492,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
110012,2561492,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
110020,2327945,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
110020,2373971,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0


In [84]:
# -- c2h (services)
c2h_services = edgelist_services_c2h(sih_engine, start_date, final_date)
print(c2h_services.shape)
c2h_services.head(4)

(4015, 1998)


Unnamed: 0_level_0,SP_ATOPROF,0201010020,0201010046,0201010062,0201010135,0201010143,0201010160,0201010194,0201010208,0201010216,0201010224,...,0802010067,0802010083,0802010105,0802010121,0802010148,0802010156,0802010199,0802010237,0802020011,SOMA
MUNIC_RES,CNES,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
110002,2561492,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0
110012,2561492,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0
110020,2327945,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
110020,2373971,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0


In [102]:
# -- h2d
h2d = edgelist_h2d(sih_engine, start_date, final_date)
print(h2d.shape)
h2d.head(4)

(12507, 4)


Unnamed: 0,CNES,DIAG_CATEG,TOTAL,VALOR_TOTAL
0,2327945,O80,212,96505.58
1,2327945,A05,99,34515.56
2,2327945,O82,91,56864.87
3,2327945,J15,36,21945.8


In [118]:
h2hs_df = edgelist_h2hs(sih_engine, start_date, final_date)
print(h2hs_df.shape)
h2hs_df.head(4)

(19772, 3)


Unnamed: 0,CNES,SP_ATOPROF,TOTAL
0,2327945,202031179,323
1,2327945,802010032,304
2,2327945,310010039,212
3,2327945,202020380,197


In [None]:
reference_date = dt.datetime(2015, 7, 1)
equip2hospital = edgelist_equip2_h_m(cnes_engine, reference_date, mode="hospital")
print(equip2hospital.shape)
equip2hospital.head(4)

In [107]:
reference_date = dt.datetime(2015, 7, 1)
equip2mun = edgelist_equip2_h_m(cnes_engine, reference_date, mode="municip")
print(equip2mun.shape)
equip2mun.head(4)

(3695, 4)


Unnamed: 0,EQUIP_KEY,CODUFMUN,QT_EXIST,QT_USO
0,1-01,230190,1,1
1,1-01,230250,1,1
2,1-01,230440,20,20
3,1-01,230730,1,1


### **Generate edgelist data**

**For C2C (Diag and Services), C2H (Diag and Services):**

Period: 2008-2024

(i) 1-aggregate network and 1-year time window;<br>
(ii) 3-aggregate network and 1-year time window;<br>
(iii) 5-aggregate network and 1-year time window.<br>


In [8]:
output = basepath.joinpath("regulacao", "edgelists")

In [15]:
plusminus_agg_year = [0, 1, 2] # given the mid year y, the values of the list are the +/- delta from mid year. ex: if mid year is 2012 and +/- is 1 year, then the aggregated network will be from 2011, 2012 and 2013 period.
years = np.arange(2010, 2024+1, 1)

for n_agg in plusminus_agg_year:
    for mid_year in tqdm(years):
        left_yy = mid_year - n_agg
        right_yy = mid_year + n_agg
        if left_yy < min(years) or right_yy > max(years):
            continue
        
        start_date, final_date = dt.datetime(left_yy, 1, 1), dt.datetime(right_yy, 12, 31)
        # -- c2c
        c2c_diag_people = edgelist_c2c(sih_engine, start_date, final_date, diag_level=3, mode="people")
        c2c_diag_money = edgelist_c2c(sih_engine, start_date, final_date, diag_level=3, mode="money")
        c2c_diag_people.to_parquet(output.joinpath("c2c", f"c2c_diag_people_{left_yy}_{right_yy}.parquet"))
        c2c_diag_money.to_parquet(output.joinpath("c2c", f"c2c_diag_money_{left_yy}_{right_yy}.parquet"))

        # -- c2h
        c2h_diag_people = edgelist_c2h(sih_engine, start_date, final_date, diag_level=3, mode="people")
        c2h_diag_money = edgelist_c2h(sih_engine, start_date, final_date, diag_level=3, mode="money")
        c2h_diag_people.to_parquet(output.joinpath("c2h", f"c2h_diag_people_{left_yy}_{right_yy}.parquet"))
        c2h_diag_money.to_parquet(output.joinpath("c2h", f"c2h_diag_money_{left_yy}_{right_yy}.parquet"))

        # -- c2c service
        c2c_service = edgelist_services_c2c(sih_engine, start_date, final_date)
        c2c_service.to_parquet(output.joinpath("c2c", f"c2c_service_{left_yy}_{right_yy}.parquet"))
        # -- c2h service
        c2h_service = edgelist_services_c2h(sih_engine, start_date, final_date)
        c2h_service.to_parquet(output.joinpath("c2h", f"c2h_service_{left_yy}_{right_yy}.parquet"))

        # -- h2d
        h2d_df = edgelist_h2d(sih_engine, start_date, final_date)
        h2d_df.to_parquet(output.joinpath("h2d", f"h2d_{left_yy}_{right_yy}.parquet"))

        # -- h2hs
        h2hs_df = edgelist_h2hs(sih_engine, start_date, final_date)
        h2hs_df.to_parquet(output.joinpath("h2hs", f"h2hs_{left_yy}_{right_yy}.parquet"))

100%|██████████| 15/15 [51:34<00:00, 206.33s/it]
100%|██████████| 15/15 [1:21:09<00:00, 324.66s/it]
100%|██████████| 15/15 [1:33:35<00:00, 374.39s/it]


In [30]:
end_date = dt.datetime(2024, 6, 1)
start_date = dt.datetime(2011, 1, 1)
while start_date<=end_date:
    #print(start_date)
    equip2hospital = edgelist_equip2_h_m(cnes_engine, start_date, mode="hospital")
    equip2mun = edgelist_equip2_h_m(cnes_engine, start_date, mode="municip")

    equip2hospital.to_parquet(output.joinpath("equip", f"equip2hospital_{start_date.month}_{start_date.year}.parquet"))
    equip2mun.to_parquet(output.joinpath("equip", f"equip2mun_{start_date.month}_{start_date.year}.parquet"))

    start_date += relativedelta(months=+1)

In [28]:
?relativedelta

[1;31mInit signature:[0m
[0mrelativedelta[0m[1;33m([0m[1;33m
[0m    [0mdt1[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdt2[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0myears[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mmonths[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mdays[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mleapdays[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mweeks[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mhours[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mminutes[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mseconds[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mmicroseconds[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0myear[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmonth[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mday[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
