In [1]:
import numpy as np #lib para cálculos e funções em arrays multidimensionais
import pandas as pd #pandas - libpra manipulação e análise de dados de alta performance
import seaborn as sb #lib pra dar um upgrade na visualização do matplotlib
import matplotlib #lib para plotar gráficos
import matplotlib.pyplot as plt
sb.set_style('darkgrid') #definindo tema do seaborn (fundo mais escuro com grelhas)

font = {'family' : 'normal', #definindo objeto para formatar a fonte
        'size'   : 16}

matplotlib.rc('font', **font) #aplicando a fonte padrão que queremos para o plot dos gráficos

## 1 - INGESTÃO: Carregando primeiro Dataset ##   

In [2]:
cor_se_temp = pd.read_csv('cor-semana-covid.csv', sep=';', encoding = "ISO-8859-1",skiprows=[0,1,2,3,12,13,14,15])

print('\nTabela Original - Raça / Semana Epidemiológica')

display(cor_se_temp)


Tabela Original - Raça / Semana Epidemiológica


Unnamed: 0,Cor,SE_20 11_1ºÓbito Covid19,SE_20 12,SE_20 13,SE_20 14,SE_20 15,SE_20 16,SE_20 17,SE_20 18,SE_20 19,...,SE_20 39,SE_20 40,SE_20 41,SE_20 42,SE_20 43,SE_20 44,SE_20 45,SE_20 46,SE_20 47,Total
0,Branca,3,48,178,366,445,452,519,656,620,...,165,174,128,101,89,79,92,124,51,12745
1,Preta,-,2,15,56,66,56,82,104,87,...,27,22,20,18,15,7,13,13,4,1903
2,Amarela,-,2,10,16,18,21,31,24,30,...,5,5,4,7,4,3,2,5,3,471
3,Parda,-,5,37,128,178,189,215,291,303,...,55,54,53,45,30,27,28,36,18,4927
4,Indígena,-,-,1,-,2,-,1,-,2,...,1,-,-,-,-,-,1,-,-,14
5,Não informado,-,-,12,17,25,28,22,42,41,...,8,11,5,7,6,4,2,3,1,689
6,Total,3,57,253,583,734,746,870,1117,1083,...,261,266,210,178,144,120,138,181,77,20749


## 2 - Tratamento dos dados

In [3]:
## Converter os valores todos para numérico, se der erro, setar como NaN e depois converter pro número 0
cor_se = cor_se_temp.apply(pd.to_numeric, errors='coerce').fillna(0)

print('\nResultado do Primeiro Tratamento - Raça / Semana Epidemiológica')


display(cor_se)


Resultado do Primeiro Tratamento - Raça / Semana Epidemiológica


Unnamed: 0,Cor,SE_20 11_1ºÓbito Covid19,SE_20 12,SE_20 13,SE_20 14,SE_20 15,SE_20 16,SE_20 17,SE_20 18,SE_20 19,...,SE_20 39,SE_20 40,SE_20 41,SE_20 42,SE_20 43,SE_20 44,SE_20 45,SE_20 46,SE_20 47,Total
0,0.0,3.0,48.0,178,366.0,445,452.0,519,656.0,620,...,165,174.0,128.0,101.0,89.0,79.0,92,124.0,51.0,12745
1,0.0,0.0,2.0,15,56.0,66,56.0,82,104.0,87,...,27,22.0,20.0,18.0,15.0,7.0,13,13.0,4.0,1903
2,0.0,0.0,2.0,10,16.0,18,21.0,31,24.0,30,...,5,5.0,4.0,7.0,4.0,3.0,2,5.0,3.0,471
3,0.0,0.0,5.0,37,128.0,178,189.0,215,291.0,303,...,55,54.0,53.0,45.0,30.0,27.0,28,36.0,18.0,4927
4,0.0,0.0,0.0,1,0.0,2,0.0,1,0.0,2,...,1,0.0,0.0,0.0,0.0,0.0,1,0.0,0.0,14
5,0.0,0.0,0.0,12,17.0,25,28.0,22,42.0,41,...,8,11.0,5.0,7.0,6.0,4.0,2,3.0,1.0,689
6,0.0,3.0,57.0,253,583.0,734,746.0,870,1117.0,1083,...,261,266.0,210.0,178.0,144.0,120.0,138,181.0,77.0,20749


In [4]:
## Como eu perdi os dados da coluna 0 (que foram transformados no valor 0), preciso resgatar do Dataset original e sobrescrever.
cor_se['Cor'] = cor_se_temp['Cor']

print('\nTabela Final - Raça / Semana Epidemiológica')


display(cor_se)


Tabela Final - Raça / Semana Epidemiológica


Unnamed: 0,Cor,SE_20 11_1ºÓbito Covid19,SE_20 12,SE_20 13,SE_20 14,SE_20 15,SE_20 16,SE_20 17,SE_20 18,SE_20 19,...,SE_20 39,SE_20 40,SE_20 41,SE_20 42,SE_20 43,SE_20 44,SE_20 45,SE_20 46,SE_20 47,Total
0,Branca,3.0,48.0,178,366.0,445,452.0,519,656.0,620,...,165,174.0,128.0,101.0,89.0,79.0,92,124.0,51.0,12745
1,Preta,0.0,2.0,15,56.0,66,56.0,82,104.0,87,...,27,22.0,20.0,18.0,15.0,7.0,13,13.0,4.0,1903
2,Amarela,0.0,2.0,10,16.0,18,21.0,31,24.0,30,...,5,5.0,4.0,7.0,4.0,3.0,2,5.0,3.0,471
3,Parda,0.0,5.0,37,128.0,178,189.0,215,291.0,303,...,55,54.0,53.0,45.0,30.0,27.0,28,36.0,18.0,4927
4,Indígena,0.0,0.0,1,0.0,2,0.0,1,0.0,2,...,1,0.0,0.0,0.0,0.0,0.0,1,0.0,0.0,14
5,Não informado,0.0,0.0,12,17.0,25,28.0,22,42.0,41,...,8,11.0,5.0,7.0,6.0,4.0,2,3.0,1.0,689
6,Total,3.0,57.0,253,583.0,734,746.0,870,1117.0,1083,...,261,266.0,210.0,178.0,144.0,120.0,138,181.0,77.0,20749


## 3 - Exportanto resultado

In [5]:
cor_se.to_csv('cor-semana-covid-tratado.csv')