# Registered deaths and causes in Brazil

Builds the dataset of registered deaths by the civil registry offices in Brazil by state and day.
This dataset only contains data from 2020 and the same period in 2019.

Source: [https://transparencia.registrocivil.org.br/especial-covid](https://transparencia.registrocivil.org.br/especial-covid)

In [None]:
import datetime
import requests
import json

import pandas as pd
import numpy as np

In [None]:
pd.set_option('display.max_rows', None)

In [None]:
url = 'https://transparencia.registrocivil.org.br/api/covid-cardiaco?start_date={}&end_date={}&state={}&city_id=all&chart=chart2&gender={}&places[]=HOSPITAL&places[]=DOMICILIO&places[]=VIA_PUBLICA&places[]=OUTROS&cor_pele={}&chart=chartCardiac3&diffCity=false HTTP/1.1'

In [None]:
header = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'recaptcha': 'xxxx',
    'X-CSRF-TOKEN': 'xxxx',
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
    'X-XSRF-TOKEN': 'xxxx',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Dest': 'empty',
    'Referer': 'https://transparencia.registrocivil.org.br/registros',
    'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7,es-ES;q=0.6,es;q=0.5',
    'Cookie': 'xxxx'
}

In [None]:
states = ['AC', 'AL', 'AM', 'AP', 'BA', 'CE', 'DF', 'ES', 'GO', 'MA', 'MG',
           'MS', 'MT', 'PA', 'PB', 'PE', 'PI', 'PR', 'RJ', 'RN', 'RO', 'RR',
           'RS', 'SC', 'SE', 'SP', 'TO']

In [None]:
genders = ['F', 'M']

In [None]:
skin_colors = ['Amarela', 'Branca', 'Ignorada', 'Indigena', 'Parda', 'Preta']

In [None]:
begin_date_str = '2020-01-01'
end_date_str = '2020-12-31'

In [None]:
begin_date = datetime.datetime.strptime(begin_date_str, '%Y-%m-%d').date()
end_date = datetime.datetime.strptime(end_date_str, '%Y-%m-%d').date()

dates = pd.date_range(begin_date, end_date).tolist()

In [None]:
df = pd.DataFrame(columns=['date', 'state', 'gender', 'age', 'color', 'cause', 'total'])

for date in dates:
    print(date)
    for state in states:
        for gender in genders:
            for color in skin_colors:

                date_request = date.strftime('%Y-%m-%d')
                page = requests.get(url.format(date_request, date_request, state, gender, color), headers=header)
                content = json.loads(page.content)['chart']

                if len(content) > 0:
                    for age in content.keys():
                        for year in content[age].keys():

                            if not (int(year) == 2019 and date.month == 2 and date.day == 29):
                                date_year = datetime.date(int(year), date.month, date.day)

                                cause = []
                                total = []
                                for key, val in content[age][year].items():
                                    cause.append(key)
                                    total.append(int(val))

                                df = df.append(pd.DataFrame({'date': date_year, 'state': state, 'gender': gender, 'age': age, 'color': color, 'cause': cause, 'total': total}), ignore_index=True)

In [None]:
cause = {
    'SRAG': 'Sars',
    'INSUFICIENCIA_RESPIRATORIA': 'Respiratory Failure',
    'INDETERMINADA': 'Undetermined',
    'OUTRAS': 'Others',
    'AVC': 'Stroke',
    'CARDIOPATIA': 'Cardiopathy',
    'CHOQUE_CARD': 'Cardiogenic Shock',
    'INFARTO': 'Hearth Attack',
    'PNEUMONIA': 'Pneumonia',
    'SEPTICEMIA': 'Septicemia',
    'SUBITA': 'Sudden Death',
    'COVID_AVC': 'Covid (Stroke)',
    'COVID_INFARTO': 'Covid (Hearth Attack)'
}

skin_color = {
    'Amarela': 'East asian',
    'Branca': 'White',
    'Ignorada': 'Ignored', 
    'Indigena': 'Indigenous', 
    'Parda': 'Mixed', 
    'Preta': 'Black'
}

def translate(dictionary, term):
    if term in dictionary:
        return dictionary[term]
    else:
        return term.capitalize()

In [None]:
df['cause'] = df['cause'].apply(lambda x: translate(cause, x))

In [None]:
df['color'] = df['color'].apply(lambda x: translate(skin_color, x))

In [None]:
df.to_csv('death_cause_brazil.csv', index=False)