# Introdução

## Importação das bibliotecas necessarias

In [1]:
%matplotlib inline
import requests
import pandas as pd
import os
import json
import sys
from decimal import *

## Funções auxiliares

In [2]:
def return_sql_query(query_string):
    ret = {
        "token": {
            "token": token
        },
        "sql": {
            "sql": {
                "query":query_string
            }
        }
    }
    return ret

def json_to_df(resp):
    columns = [col['name'] for col in resp.json()['columns']]
    values = resp.json()['rows']  
    return pd.DataFrame(values, columns=columns)

def rows_and_columns_to_df(columns, rows):
    columns_array = [col['name'] for col in columns]
    return pd.DataFrame(rows, columns=columns_array)

def fetch_all_data_from_response(res, initial_records_rows):
    while 'cursor' in res.json():
        res_next = {
            "token": {
                "token": token
            },
            "sql": {
                "sql": {
                    "cursor": res.json()['cursor']
                }
            }
        }
        res = requests.post(os.path.join(url_base,'sql_query'), json=res_next)
        initial_records_rows+=res.json()['rows']
        sys.stdout.write('.')
        sys.stdout.flush()
    print('(y)')
    return initial_records_rows

## Dados de acesso a api

In [3]:
from IPython.display import clear_output
import getpass

token = getpass.getpass('Digite o seu token para acesso a API:')

clear_output()

In [5]:
url_base = "https://bigdata-api.fiocruz.br"

## Bases de dados disponiveis na API atualizadas

In [6]:
params = {
          "token": token
        }
resp = requests.post(url_base+ '/show_tables', json=params)
print(resp.json())

{'databases': ['datasus-sim', 'datasus-sinasc', 'datasus-sih', 'datasus-cnes-lt-2008-2017', 'datasus-cnes-eq-2008-2017', 'datasus-cnes-pf-2008-2017', 'datasus-cnes-sr-2008-2017']}


## Busca dos campos existentes na base SIM

In [7]:
%time
sql = {
          "token": {
            "token": token
          },
          "sql": {
            "sql": {"query":'describe "datasus-sim"'}
          }
        }
resp = requests.post(os.path.join(url_base,'sql_query'), json=sql)
#resp.content
fields = [x[0] for x in resp.json()['rows']]
print(fields)

CPU times: user 2 µs, sys: 2 µs, total: 4 µs
Wall time: 8.34 µs
['ACIDTRAB', 'ALTCAUSA', 'ASSISTMED', 'ATESTADO', 'ATESTANTE', 'CAUSABAS', 'CAUSABAS_O', 'CAUSAMAT', 'CB_PRE', 'CIRCOBITO', 'CIRURGIA', 'CODBAIOCOR', 'CODBAIRES', 'CODCART', 'CODESTAB', 'CODIFICADO', 'CODINST', 'CODMUNCART', 'CODMUNNATU', 'CODMUNOCOR', 'CODMUNRES', 'COMUNSVOIM', 'CONTADOR', 'CRM', 'DIFDATA', 'DTATESTADO', 'DTCADASTRO', 'DTCADINF', 'DTCADINV', 'DTCONCASO', 'DTCONINV', 'DTINVESTIG', 'DTNASC', 'DTOBITO', 'DTRECEBIM', 'DTRECORIG', 'DTRECORIGA', 'DTREGCART', 'ESC', 'ESC2010', 'ESCFALAGR1', 'ESCMAE', 'ESCMAE2010', 'ESCMAEAGR1', 'ESTABDESCR', 'ESTCIV', 'EXAME', 'EXPDIFDATA', 'FONTE', 'FONTEINV', 'FONTES', 'FONTESINF', 'GESTACAO', 'GRAVIDEZ', 'HORAOBITO', 'IDADE', 'IDADEMAE', 'LINHAA', 'LINHAB', 'LINHAC', 'LINHAD', 'LINHAII', 'LOCOCOR', 'MORTEPARTO', 'NATURAL', 'NECROPSIA', 'NUDIASINF', 'NUDIASOBCO', 'NUDIASOBIN', 'NUMERODN', 'NUMERODO', 'NUMERODV', 'NUMEROLOTE', 'NUMREGCART', 'NUMSUS', 'OBITOGRAV', 'OBITOPARTO', 

## Busca dos campos existentes na base SINASC

In [8]:
%time
sql = {
          "token": {
            "token": token
          },
          "sql": {
            "sql": {"query":'describe "datasus-sinasc"'}
          }
        }
resp = requests.post(os.path.join(url_base,'sql_query'), json=sql)
#resp.content
fields = [x[0] for x in resp.json()['rows']]
print(fields)

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 8.34 µs
['APGAR1', 'APGAR5', 'CODANOMAL', 'CODBAINASC', 'CODBAIRES', 'CODCART', 'CODESTAB', 'CODINST', 'CODMUNCART', 'CODMUNNASC', 'CODMUNNATU', 'CODMUNRES', 'CODOCUPMAE', 'CODPAISRES', 'CODUFNATU', 'CONSPRENAT', 'CONSULTAS', 'CONTADOR', 'DIFDATA', 'DTCADASTRO', 'DTDECLARAC', 'DTNASC', 'DTNASCMAE', 'DTRECEBIM', 'DTRECORIG', 'DTRECORIGA', 'DTREGCART', 'DTULTMENST', 'ESCMAE', 'ESCMAE2010', 'ESCMAEAGR1', 'ESTCIVMAE', 'GESTACAO', 'GRAVIDEZ', 'HORANASC', 'IDADEMAE', 'IDADEPAI', 'IDANOMAL', 'KOTELCHUCK', 'LOCNASC', 'MESPRENAT', 'NATURALMAE', 'NOVO', 'NUMERODN', 'NUMERODV', 'NUMEROLOTE', 'NUMREGCART', 'ORIGEM', 'PARIDADE', 'PARTO', 'PESO', 'PREFIXODN', 'QTDFILMORT', 'QTDFILVIVO', 'QTDGESTANT', 'QTDPARTCES', 'QTDPARTNOR', 'RACACOR', 'RACACORMAE', 'RACACORN', 'RACACOR_RN', 'SEMAGESTAC', 'SERIESCMAE', 'SEXO', 'STCESPARTO', 'STDNEPIDEM', 'STDNNOVA', 'STTRABPART', 'TPAPRESENT', 'TPDOCRESP', 'TPFUNCRESP', 'TPMETESTIM', 'TPNASCASSI', 'TPROBSON'

# Proporção de nascidos vivos do grupo 2 de Robson (SINASC)

A ideia inicial era explorar os dados de todo o Brasil do período de 2012 a 2019.
Como a tabela SIH-RD tratada e filtrada que temos é do período de 2018 até 2019 e apenas do Estado de São Paulo, iremos utilizar o mesmo subconjunto.

In [9]:
%time
sql_t_nasc_vivos = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Total  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 \
                    GROUP BY CODMUNRES, ano_nasc")
resp_sql_t_nasc_vivos = requests.post(os.path.join(url_base,'sql_query'), json=sql_t_nasc_vivos)
record_columns=resp_sql_t_nasc_vivos.json()['columns']
record_rows=resp_sql_t_nasc_vivos.json()['rows']

fetch_all_data_from_response(resp_sql_t_nasc_vivos, record_rows)
print("Record rows length : "+str(len(record_rows)))

df_t_nasc_vivos = rows_and_columns_to_df(record_columns, record_rows)

CPU times: user 2 µs, sys: 2 µs, total: 4 µs
Wall time: 8.58 µs
.............................................(y)
Record rows length : 44683


## Consulta de nascimentos por cesariana

In [10]:
%time

sql_cesariana = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_cesariana  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_cesariana = requests.post(os.path.join(url_base,'sql_query'), json=sql_cesariana)
record_columns_cesariana = resp_sql_cesariana.json()['columns']
record_rows_cesariana = resp_sql_cesariana.json()['rows']

fetch_all_data_from_response(resp_sql_cesariana, record_rows_cesariana)

df_cesariana =  rows_and_columns_to_df(record_columns_cesariana, record_rows_cesariana)

df_cesariana.head()

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 7.87 µs
.............................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_cesariana
0,110000,2012,1
1,110000,2013,6
2,110000,2014,4
3,110000,2015,4
4,110000,2018,1


## Consultas de nascimentos pertencentes a cada grupo de Robson que também foram cesarianas

In [11]:
%time

sql_robson11 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson11  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=11 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson11 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson11)
record_columns_11=resp_sql_robson11.json()['columns']
record_rows_11=resp_sql_robson11.json()['rows']

fetch_all_data_from_response(resp_sql_robson11, record_rows_11)

df_robson11 = rows_and_columns_to_df(record_columns_11, record_rows_11)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 7.63 µs
.........................(y)


In [12]:
df_teste=df_robson11[df_robson11['CODMUNRES']=='330455']
df_teste

Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson11
16363,330455,2012,6034
16364,330455,2014,4515
16365,330455,2015,1881
16366,330455,2016,1282
16367,330455,2017,960
16368,330455,2018,1357
16369,330455,2019,1130


In [13]:
%time

sql_robson1 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson1  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=1 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson1 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson1)
record_columns_1=resp_sql_robson1.json()['columns']
record_rows_1=resp_sql_robson1.json()['rows']

fetch_all_data_from_response(resp_sql_robson1, record_rows_1)

df_robson1 = rows_and_columns_to_df(record_columns_1, record_rows_1)

df_robson1.head()

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 9.06 µs
......................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson1
0,110000,2015,1
1,110001,2012,72
2,110001,2014,66
3,110001,2015,84
4,110001,2016,61


In [14]:
%time


sql_robson2 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson2  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=2 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson2 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson2)
record_columns_2=resp_sql_robson2.json()['columns']
record_rows_2=resp_sql_robson2.json()['rows']

fetch_all_data_from_response(resp_sql_robson2, record_rows_2)

df_robson2 = rows_and_columns_to_df(record_columns_2, record_rows_2)

df_robson2.head()

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 8.34 µs
......................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson2
0,110000,2015,1
1,110001,2012,21
2,110001,2014,12
3,110001,2015,20
4,110001,2016,14


In [15]:
%time


sql_robson3 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson3  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=3 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson3 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson3)
record_columns_3=resp_sql_robson3.json()['columns']
record_rows_3=resp_sql_robson3.json()['rows']

fetch_all_data_from_response(resp_sql_robson3, record_rows_3)

df_robson3 = rows_and_columns_to_df(record_columns_3, record_rows_3)

df_robson3.head()

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 8.11 µs
...................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson3
0,110001,2012,28
1,110001,2014,22
2,110001,2015,20
3,110001,2016,34
4,110001,2017,38


In [16]:
%time

sql_robson4 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson4  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=4 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson4 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson4)
record_columns_4=resp_sql_robson4.json()['columns']
record_rows_4=resp_sql_robson4.json()['rows']

fetch_all_data_from_response(resp_sql_robson4, record_rows_4)

df_robson4 = rows_and_columns_to_df(record_columns_4, record_rows_4)

df_robson4.head()

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 8.34 µs
...................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson4
0,110000,2014,1
1,110001,2012,2
2,110001,2014,6
3,110001,2015,7
4,110001,2016,4


In [17]:
%time

sql_robson5 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson5  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=5 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson5 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson5)
record_columns_5=resp_sql_robson5.json()['columns']
record_rows_5=resp_sql_robson5.json()['rows']

fetch_all_data_from_response(resp_sql_robson5, record_rows_5)

df_robson5 = rows_and_columns_to_df(record_columns_5, record_rows_5)

df_robson5.head()

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 7.87 µs
.......................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson5
0,110000,2012,1
1,110000,2014,3
2,110000,2015,1
3,110001,2012,97
4,110001,2014,102


In [20]:
%time

sql_robson6_ao_9 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson6_ao_9  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=6 OR\
                    TPROBSON=7 OR\
                    TPROBSON=8 OR\
                    TPROBSON=9 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson6_ao_9 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson6_ao_9)
record_columns_6_ao_9=resp_sql_robson6_ao_9.json()['columns']
record_rows_6_ao_9=resp_sql_robson6_ao_9.json()['rows']

fetch_all_data_from_response(resp_sql_robson6_ao_9, record_rows_6_ao_9)

df_robson6_ao_9 = rows_and_columns_to_df(record_columns_6_ao_9, record_rows_6_ao_9)

df_robson6_ao_9.head()

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 8.11 µs
...........................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson6_ao_9
0,110001,2011,3
1,110001,2012,21
2,110001,2014,21
3,110001,2015,17
4,110001,2016,13


In [21]:
%time

sql_robson10 = return_sql_query("SELECT CODMUNRES, ano_nasc, count(*) as Qtd_Robson10  \
                    FROM \"datasus-sinasc\" \
                    WHERE ano_nasc>=2012 AND \
                    ano_nasc<=2019 AND \
                    TPROBSON=10 AND\
                    PARTO=2 \
                    GROUP BY CODMUNRES, ano_nasc")

resp_sql_robson10 = requests.post(os.path.join(url_base,'sql_query'), json=sql_robson10)
record_columns_10=resp_sql_robson10.json()['columns']
record_rows_10=resp_sql_robson1.json()['rows']

fetch_all_data_from_response(resp_sql_robson10, record_rows_10)

df_robson10 = rows_and_columns_to_df(record_columns_10, record_rows_10)

df_robson10.head()

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 8.11 µs
.....................................(y)


Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson10
0,110000,2015,1
1,110001,2012,72
2,110001,2014,66
3,110001,2015,84
4,110001,2016,61


In [22]:
df_robson6_ao_9

Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson6_ao_9
0,110001,2011,3
1,110001,2012,21
2,110001,2014,21
3,110001,2015,17
4,110001,2016,13
...,...,...,...
42161,530010,2015,2644
42162,530010,2016,2359
42163,530010,2017,2561
42164,530010,2018,2725


In [23]:
df_merge = pd.merge(df_cesariana, df_t_nasc_vivos, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson1, df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson2, df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson3, df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson4, df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson5, df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson6_ao_9, df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson10,df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = pd.merge(df_robson11,df_merge, how='right', on=['CODMUNRES','ano_nasc'])
df_merge = df_merge.fillna(0)

df_merge.head()

Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson11,Qtd_Robson10,Qtd_Robson6_ao_9,Qtd_Robson5,Qtd_Robson4,Qtd_Robson3,Qtd_Robson2,Qtd_Robson1,Qtd_cesariana,Qtd_Total
0,110000,2012,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2
1,110000,2013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,16
2,110000,2014,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,4.0,4
3,110000,2015,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,4.0,4
4,110000,2016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


In [137]:
df_merge['Qtd_Robson1'] = df_merge['Qtd_Robson1'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson2'] = df_merge['Qtd_Robson2'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson3'] = df_merge['Qtd_Robson3'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson4'] = df_merge['Qtd_Robson4'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson5'] = df_merge['Qtd_Robson5'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson6'] = df_merge['Qtd_Robson6'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson7'] = df_merge['Qtd_Robson7'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson8'] = df_merge['Qtd_Robson8'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson9'] = df_merge['Qtd_Robson9'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson10'] = df_merge['Qtd_Robson10'] / df_merge['Qtd_cesariana']
df_merge['Qtd_Robson11'] = df_merge['Qtd_Robson11'] / df_merge['Qtd_cesariana']
df_merge = df_merge.fillna(0)
df_merge = df_merge.round(decimals=6)
df_merge.head()

Unnamed: 0,CODMUNRES,ano_nasc,Qtd_Robson11,Qtd_Robson10,Qtd_Robson9,Qtd_Robson8,Qtd_Robson7,Qtd_Robson6,Qtd_Robson5,Qtd_Robson4,Qtd_Robson3,Qtd_Robson2,Qtd_Robson1,Qtd_cesariana,Qtd_Total
0,110000,2012,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2
1,110000,2013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,16
2,110000,2014,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.25,0.0,0.0,0.0,4.0,4
3,110000,2015,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.25,4.0,4
4,110000,2016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


In [24]:
%time
df_merge.to_csv('../csv_files/Q-20.1-contribuicao-relativa-grupos-robson-na-taxa-global-de-cesariana.csv',index=False)

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 8.58 µs


In [142]:
df = df_merge['Qtd_Robson10']/df_merge['Qtd_cesariana']
df = df.fillna(0)
df

0        0.000000
1        0.000000
2        0.000000
3        0.250000
4        0.000000
           ...   
44678    0.083146
44679    0.092010
44680    0.088017
44681    0.094671
44682    0.095672
Length: 44683, dtype: float64