# Importing municipal and national data from Covid-19 Monitoring Panel
## Source: https://covid.saude.gov.br/

In [23]:
import os
import pandas as pd
from datetime import datetime

## Reading data

In [24]:
# Set the directory where your data files are located
data_folder = 'data/input/covid_monitoring_panel/'

# Get a list of all files in the directory
file_list = os.listdir(data_folder)

# Initialize an empty DataFrame to store the data
df = pd.DataFrame()

# Loop through the files and read them into the DataFrame
for file in file_list:
    if file.endswith('.csv'):
        file_path = os.path.join(data_folder, file)
        data = pd.read_csv(file_path, sep=';')
        df = pd.concat([df, data], ignore_index=True)

In [25]:
df.data = pd.to_datetime(df.data, infer_datetime_format=True)

In [26]:
df.codmun = df.codmun.astype(pd.Int32Dtype())

In [27]:
df_municipalities = pd.read_csv('../brazilian_municipalities/data/output/df_municpal.csv', header=0)

## Adjusting data

In [28]:
STUDY_END_DATE = datetime.fromisoformat('2022-12-31')

In [29]:
df[['regiao','estado','municipio','codmun']].drop_duplicates()

Unnamed: 0,regiao,estado,municipio,codmun
0,Brasil,,,
158,Norte,RO,,
316,Norte,AC,,
474,Norte,AM,,
632,Norte,RR,,
...,...,...,...,...
713846,Centro-Oeste,GO,Vianópolis,522200
713973,Centro-Oeste,GO,Vicentinópolis,522205
714100,Centro-Oeste,GO,Vila Boa,522220
714227,Centro-Oeste,GO,Vila Propício,522230


In [30]:
df.columns

Index(['regiao', 'estado', 'municipio', 'coduf', 'codmun', 'codRegiaoSaude',
       'nomeRegiaoSaude', 'data', 'semanaEpi', 'populacaoTCU2019',
       'casosAcumulado', 'casosNovos', 'obitosAcumulado', 'obitosNovos',
       'Recuperadosnovos', 'emAcompanhamentoNovos', 'interior/metropolitana'],
      dtype='object')

In [31]:
df_covid_base = df[['data', 'estado', 'municipio', 'codmun', 'obitosNovos', 'obitosAcumulado', 'casosNovos', 'casosAcumulado']]

In [32]:
df_covid_base = df_covid_base.rename(columns={'codmun': 'CODIGO_MUNICIPIO_6'})

In [33]:
df_covid_base = df_covid_base[df_covid_base.data <= STUDY_END_DATE]

In [39]:
df_ts_covid_municipalities = df_covid_base[df_covid_base.CODIGO_MUNICIPIO_6.isna()==False].merge(df_municipalities[['CODIGO_MUNICIPIO_6','CODIGO_MUNICIPIO_7']],  left_on='CODIGO_MUNICIPIO_6', right_on='CODIGO_MUNICIPIO_6')

In [40]:
df_ts_covid_national = df_covid_base[(df_covid_base.CODIGO_MUNICIPIO_6.isna()) & (df_covid_base.estado.isna())]

In [41]:
df_ts_covid_national

Unnamed: 0,data,estado,municipio,CODIGO_MUNICIPIO_6,obitosNovos,obitosAcumulado,casosNovos,casosAcumulado
0,2020-02-25,,,,0,0,0,0.0
1,2020-02-26,,,,0,0,1,1.0
2,2020-02-27,,,,0,0,0,1.0
3,2020-02-28,,,,0,0,0,1.0
4,2020-02-29,,,,0,0,1,2.0
...,...,...,...,...,...,...,...,...
6266229,2021-06-26,,,,1593,512735,64134,18386894.0
6266230,2021-06-27,,,,739,513474,33704,18420598.0
6266231,2021-06-28,,,,618,514092,27804,18448402.0
6266232,2021-06-29,,,,1893,515985,64903,18513305.0


## Saving data

In [44]:
df_ts_covid_municipalities.to_csv('data/output/df_ts_monitoring_panel_imported_municipal.csv', index=False)

In [45]:
df_ts_covid_municipalities.shape

(5625700, 9)

In [46]:
df_ts_covid_municipalities.head()

Unnamed: 0,data,estado,municipio,CODIGO_MUNICIPIO_6,obitosNovos,obitosAcumulado,casosNovos,casosAcumulado,CODIGO_MUNICIPIO_7
0,2020-03-27,RO,Alta Floresta D'Oeste,110001,0,0,0,0.0,1100015
1,2020-03-28,RO,Alta Floresta D'Oeste,110001,0,0,0,0.0,1100015
2,2020-03-29,RO,Alta Floresta D'Oeste,110001,0,0,0,0.0,1100015
3,2020-03-30,RO,Alta Floresta D'Oeste,110001,0,0,0,0.0,1100015
4,2020-03-31,RO,Alta Floresta D'Oeste,110001,0,0,0,0.0,1100015


In [49]:
df_ts_covid_national.to_csv('data/output/df_ts_monitoring_panel_imported_national.csv', index=False)

In [47]:
df_ts_covid_national.shape

(1041, 8)

In [48]:
df_ts_covid_national.head()

Unnamed: 0,data,estado,municipio,CODIGO_MUNICIPIO_6,obitosNovos,obitosAcumulado,casosNovos,casosAcumulado
0,2020-02-25,,,,0,0,0,0.0
1,2020-02-26,,,,0,0,1,1.0
2,2020-02-27,,,,0,0,0,1.0
3,2020-02-28,,,,0,0,0,1.0
4,2020-02-29,,,,0,0,1,2.0
