# Juntando as Peças 🧩

## (Fontes de Dados 🚰 ➕ Manipulação 🪡 ➕ Análise de Dados 🕵🏽) <sup>Alto Desempenho 🚀 🧞‍♂️</sup>

### Funções Auxiliares

In [None]:
!pip install --upgrade pandas --quiet

In [None]:
import os, json, sqlite3, pandas as pd

In [None]:
def build_path(subfolder = 'raw'):
    folderpath = os.path.join(os.getcwd(), os.pardir, 
                              'project', 'data', subfolder)
    folderpath = os.path.abspath(folderpath)
    if not os.path.exists(folderpath):
        os.makedirs(folderpath)
    return folderpath

#### csv

In [None]:
def get_estados_georreferenciamento(
        filename='estados_georreferenciamento.csv'):
    filepath = os.path.join(build_path(), filename)
    
    return pd.read_csv(filepath)

def get_municipios_georreferenciamento(
        filename='municipios_georreferenciamento.csv'):
    filepath = os.path.join(build_path(), filename)
    return pd.read_csv(filepath)

def get_covid_infections_and_deaths(
        filename='ALL_HIST_PAINEL_COVID.csv'):
    filepath = os.path.join(build_path(), filename)
    
    return pd.read_csv(filepath, sep=';', parse_dates = ['data'],\
                       dtype={'codmun': 'Int64', \
                              'codRegiaoSaude': 'Int64', \
                              'populacaoTCU2019': 'Int64', \
                              'casosAcumulado': 'Int64', \
                              'Recuperadosnovos': 'Int64', \
                              'emAcompanhamentoNovos': 'Int64', \
                              'interior/metropolitana': 'Int64'}, \
                       encoding='utf-8')

#### json

In [None]:
#fail
def get_estados_codigos(filename='estados_codigos.json'):
    filepath = os.path.join(build_path(), filename)
    
    return pd.read_json(filepath)

#solution
def get_estados_codigos(filename='estados_codigos.json'):
    filepath = os.path.join(build_path(), filename)
    
    with open(filepath) as jsonfile:
        return pd.json_normalize(json.load(jsonfile))

#fail
def get_estados_caracteristicas(filename='estados_caracteristicas.json'):
    filepath = os.path.join(build_path(), filename)

    return pd.read_json(filepath)

#solution
def get_estados_caracteristicas(filename='estados_caracteristicas.json'):
    filepath = os.path.join(build_path(), filename)

    with open(filepath) as jsonfile:
        return pd.json_normalize(json.load(jsonfile), \
                                 record_path='characteristics', \
                                 record_prefix='characteristics_', \
                                 meta='state',\
                                 meta_prefix='state_')

#big fail
def get_estados_vacinacao(filename='estados_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    return pd.read_json(filepath)

#solution
def get_estados_vacinacao(filename='estados_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    with open(filepath) as jsonfile:
        df = pd.json_normalize(json.load(jsonfile)['Paciente_Estado']['buckets'], \
                               record_path=['Data_Aplicacao_Vacina', ['buckets']], \
                               meta=['key', 'doc_count'], \
                               record_prefix='vacinacao_', \
                               meta_prefix='estado_').convert_dtypes()
        df['vacinacao_key'] = pd.to_datetime(df['vacinacao_key'], unit='ms')
        return df

#big fail
def get_municipios_vacinacao(filename='municipios_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    return pd.read_json(filepath)

#solution
def get_municipios_vacinacao(filename='municipios_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    with open(filepath) as jsonfile:
        df = pd.json_normalize(json.load(jsonfile)['Paciente_Municipio']['buckets'], \
                               record_path=['Data_Aplicacao_Vacina', ['buckets']], \
                               meta=['key', 'doc_count'], \
                               record_prefix='vacinacao_', \
                               meta_prefix='municipio_',).convert_dtypes()
        df['vacinacao_key'] = pd.to_datetime(df['vacinacao_key'], unit='ms')
        return df

#### sqlite

In [None]:
def get_municipios_codigos(filename='municipios_codigos.db'):
    filepath = os.path.join(build_path(), filename)

    conn = sqlite3.connect(filepath)
    municipios_codigos = pd.read_sql_query('select * from ibge', conn, index_col='id')

    return municipios_codigos

### Descrição dos Dados 🎲🕵️‍♀️

#### Dados de Georreferenciamento

##### Unidades Federativas

In [None]:
estados_georreferenciamento = get_estados_georreferenciamento()

In [None]:
estados_georreferenciamento.head()

In [None]:
estados_georreferenciamento.dtypes

In [None]:
estados_georreferenciamento.describe()

##### Municípios

In [None]:
municipios_georreferenciamento = get_municipios_georreferenciamento()

In [None]:
municipios_georreferenciamento.head()

In [None]:
municipios_georreferenciamento.dtypes

In [None]:
municipios_georreferenciamento.describe()

#### Dados de Casos e Óbitos por Covid-19 no Brasil

In [45]:
covid_infections_and_deaths = get_covid_infections_and_deaths()

In [46]:
covid_infections_and_deaths.head()

Unnamed: 0,regiao,estado,municipio,coduf,codmun,codRegiaoSaude,nomeRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
0,Brasil,,,76,,,,2020-02-25,9,210147125,0,0,0,0,0,0,
1,Brasil,,,76,,,,2020-02-26,9,210147125,1,1,0,0,1,0,
2,Brasil,,,76,,,,2020-02-27,9,210147125,1,0,0,0,1,0,
3,Brasil,,,76,,,,2020-02-28,9,210147125,1,0,0,0,0,1,
4,Brasil,,,76,,,,2020-02-29,9,210147125,2,1,0,0,1,1,


In [47]:
covid_infections_and_deaths.dtypes

regiao                            object
estado                            object
municipio                         object
coduf                              int64
codmun                             Int64
codRegiaoSaude                     Int64
nomeRegiaoSaude                   object
data                      datetime64[ns]
semanaEpi                          int64
populacaoTCU2019                   Int64
casosAcumulado                     Int64
casosNovos                         int64
obitosAcumulado                    int64
obitosNovos                        int64
Recuperadosnovos                   Int64
emAcompanhamentoNovos              Int64
interior/metropolitana             Int64
dtype: object

In [48]:
covid_infections_and_deaths.describe()

Unnamed: 0,coduf,codmun,codRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
count,7283092.0,7245936.0,7218720.0,7283092,7283092.0,7255876.0,7283092.0,7283092.0,7283092.0,7283092.0,1326.0,1326.0,7218720.0
mean,32.3602,325258.01413,32403.123698,2022-01-03 10:06:07.831024896,26.76047,114401.230637,12100.81947,15.59087,264.178,0.290869,21123272.384615,511579.233032,0.0693
min,11.0,110000.0,11001.0,2020-02-25 00:00:00,1.0,781.0,0.0,-336837.0,0.0,-9114.0,0.0,-6206.0,0.0
25%,25.0,251200.0,25010.0,2021-02-13 00:00:00,15.0,5474.0,286.0,0.0,5.0,0.0,7530290.0,121233.5,0.0
50%,31.0,314610.0,31059.0,2022-01-03 00:00:00,27.0,11695.0,885.0,0.0,15.0,0.0,21478994.0,395361.0,0.0
75%,41.0,411915.0,41015.0,2022-11-23 00:00:00,38.0,25765.0,2446.0,1.0,42.0,0.0,34140048.5,738180.0,0.0
max,76.0,530010.0,53001.0,2023-10-13 00:00:00,53.0,210147125.0,37849919.0,336959.0,706142.0,9115.0,37104058.0,11232608.0,1.0
std,9.874131,98535.030999,9836.341988,,14.27102,2969815.725652,363505.185072,721.1244,7763.557,16.67581,13329843.8538,583920.242119,0.253963


##### Características das Unidades da Federação

In [49]:
estados_caracteristicas = get_estados_caracteristicas()

In [50]:
estados_caracteristicas.head()

Unnamed: 0,characteristics_label,characteristics_value,characteristics_measure,state_state
0,Governador,MARCOS JOSÉ ROCHA DOS SANTOS,,ro
1,Capital,Porto Velho,,ro
2,Gentílico,rondoniense ou rondoniano,,ro
3,Área Territorial,"237.754,172",km²,ro
4,População residente,1.581.016,pessoas,ro


In [51]:
estados_caracteristicas.dtypes

characteristics_label      object
characteristics_value      object
characteristics_measure    object
state_state                object
dtype: object

In [52]:
estados_caracteristicas.describe()

Unnamed: 0,characteristics_label,characteristics_value,characteristics_measure,state_state
count,324,324.0,324.0,324
unique,12,320.0,8.0,27
top,Governador,1.01,,ro
freq,27,2.0,108.0,12


#### Dados de Vacinação

##### Unidades Federativas

In [53]:
estados_vacinacao = get_estados_vacinacao()

In [54]:
estados_vacinacao.head()

Unnamed: 0,vacinacao_key_as_string,vacinacao_key,vacinacao_doc_count,estado_key,estado_doc_count
0,2023-10-16T00:00:00.000Z,2023-10-16,2238,SP,167924813
1,2023-10-15T00:00:00.000Z,2023-10-15,9,SP,167924813
2,2023-10-14T00:00:00.000Z,2023-10-14,288,SP,167924813
3,2023-10-13T00:00:00.000Z,2023-10-13,243,SP,167924813
4,2023-10-12T00:00:00.000Z,2023-10-12,51,SP,167924813


In [55]:
estados_vacinacao.dtypes

vacinacao_key_as_string    string[python]
vacinacao_key              datetime64[ms]
vacinacao_doc_count                 Int64
estado_key                 string[python]
estado_doc_count                    Int64
dtype: object

In [56]:
estados_vacinacao.describe()

Unnamed: 0,vacinacao_key,vacinacao_doc_count,estado_doc_count
count,30878,30878.0,30878.0
mean,2021-04-21 07:07:07.877000,18772.744996,23493232.622741
min,1899-12-30 00:00:00,1.0,1665735.0
25%,2021-06-06 00:00:00,238.0,7396427.0
50%,2022-03-19 00:00:00,3387.0,11228811.0
75%,2022-12-30 00:00:00,15990.25,29898738.0
max,2023-10-16 00:00:00,1418739.0,167924813.0
std,,51083.805059,33644632.469449


##### Municípios

In [57]:
municipios_vacinacao = get_municipios_vacinacao()

In [58]:
municipios_vacinacao.head()

Unnamed: 0,vacinacao_key_as_string,vacinacao_key,vacinacao_doc_count,municipio_key,municipio_doc_count
0,2023-10-16T00:00:00.000Z,2023-10-16,645,355030,46504973
1,2023-10-15T00:00:00.000Z,2023-10-15,2,355030,46504973
2,2023-10-14T00:00:00.000Z,2023-10-14,171,355030,46504973
3,2023-10-13T00:00:00.000Z,2023-10-13,181,355030,46504973
4,2023-10-12T00:00:00.000Z,2023-10-12,8,355030,46504973


In [59]:
municipios_vacinacao.dtypes

vacinacao_key_as_string    string[python]
vacinacao_key              datetime64[ms]
vacinacao_doc_count                 Int64
municipio_key              string[python]
municipio_doc_count                 Int64
dtype: object

In [60]:
municipios_vacinacao.describe()

Unnamed: 0,vacinacao_key,vacinacao_doc_count,municipio_doc_count
count,62331,62331.0,62331.0
mean,2022-02-22 12:48:44.079000,3474.323001,3822946.667132
min,1900-08-21 00:00:00,1.0,1154546.0
25%,2021-08-02 00:00:00,79.0,1383111.0
50%,2022-04-19 00:00:00,975.0,2017584.0
75%,2023-01-08 00:00:00,3425.5,3395436.0
max,2023-10-16 00:00:00,316423.0,46504973.0
std,,10317.238833,6628156.566902


#### Dados de Códigos do IBGE

##### Unidades Federativas

In [61]:
estados_codigos = get_estados_codigos()

In [62]:
estados_codigos.head()

Unnamed: 0,id,sigla,nome,regiao.id,regiao.sigla,regiao.nome
0,11,RO,Rondônia,1,N,Norte
1,12,AC,Acre,1,N,Norte
2,13,AM,Amazonas,1,N,Norte
3,14,RR,Roraima,1,N,Norte
4,15,PA,Pará,1,N,Norte


In [63]:
estados_codigos.dtypes

id               int64
sigla           object
nome            object
regiao.id        int64
regiao.sigla    object
regiao.nome     object
dtype: object

In [64]:
estados_codigos.describe()

Unnamed: 0,id,regiao.id
count,27.0,27.0
mean,29.111111,2.555556
std,13.024631,1.395965
min,11.0,1.0
25%,19.0,1.5
50%,27.0,2.0
75%,38.0,3.5
max,53.0,5.0


##### Municípios

In [65]:
municipios_codigos = get_municipios_codigos()

In [66]:
municipios_codigos.head()

Unnamed: 0_level_0,state,city
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1100015,RO,Alta Floresta D''Oeste
1100379,RO,Alto Alegre dos Parecis
1100403,RO,Alto Paraíso
1100346,RO,Alvorada D''Oeste
1100023,RO,Ariquemes


In [67]:
municipios_codigos.dtypes

state    object
city     object
dtype: object

In [68]:
municipios_codigos.describe()

Unnamed: 0,state,city
count,5570,5570
unique,27,5298
top,MG,Bom Jesus
freq,853,5
