### Projeto análise de dados - Dados sobre COVID-19 no Brasil (02/20-11/20)  

Esse projeto visa entender, por meio de um relatório mensal, como foi o comportamento de casos e óbitos da COVID-19 durante o período de fevereiro a novembro de 2020.

In [1]:
# Importando bibliotecas que serão utilizidas
import zipfile
import os
import pandas as pd
import os

In [2]:
# Entrando no diretorio 
PATH_DIR = os.getcwd()
DATA_DIR = os.path.join(PATH_DIR, "data")
DATA_PATH = [os.path.join(DATA_DIR, name) for name in os.listdir(DATA_DIR)][0]

In [3]:
# Carregando dados
df = pd.read_csv(DATA_PATH, sep=";", encoding="utf-8")

In [4]:
# Enxergando o head
df.head()

Unnamed: 0,regiao,estado,municipio,coduf,codmun,codRegiaoSaude,nomeRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
0,Brasil,,,76,,,,2020-02-25,9,210147125.0,0,0,0,0,,,
1,Brasil,,,76,,,,2020-02-26,9,210147125.0,1,1,0,0,,,
2,Brasil,,,76,,,,2020-02-27,9,210147125.0,1,0,0,0,,,
3,Brasil,,,76,,,,2020-02-28,9,210147125.0,1,0,0,0,,,
4,Brasil,,,76,,,,2020-02-29,9,210147125.0,2,1,0,0,,,


In [5]:
# Filtrando para dados do Brasil
df_brazil = df[df["regiao"] == "Brasil"]

df_brazil.head()

Unnamed: 0,regiao,estado,municipio,coduf,codmun,codRegiaoSaude,nomeRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
0,Brasil,,,76,,,,2020-02-25,9,210147125.0,0,0,0,0,,,
1,Brasil,,,76,,,,2020-02-26,9,210147125.0,1,1,0,0,,,
2,Brasil,,,76,,,,2020-02-27,9,210147125.0,1,0,0,0,,,
3,Brasil,,,76,,,,2020-02-28,9,210147125.0,1,0,0,0,,,
4,Brasil,,,76,,,,2020-02-29,9,210147125.0,2,1,0,0,,,


#### Limpando e tratando dados

In [6]:
df_brazil.isnull().sum()

regiao                      0
estado                    270
municipio                 270
coduf                       0
codmun                    270
codRegiaoSaude            270
nomeRegiaoSaude           270
data                        0
semanaEpi                   0
populacaoTCU2019            0
casosAcumulado              0
casosNovos                  0
obitosAcumulado             0
obitosNovos                 0
Recuperadosnovos           54
emAcompanhamentoNovos      54
interior/metropolitana    270
dtype: int64

In [7]:
# Verificando se de fato só tem dados do Brasil e não dos seus estados
df_brazil["populacaoTCU2019"].unique()

array([2.10147125e+08])

In [8]:
# Salvando numero de populacao
population_brazil = int(df_brazil["populacaoTCU2019"][0])
population_brazil

210147125

In [9]:
# Excluindo colunas desnecessarias
df_brazil = df_brazil.drop(columns=[
  "estado", "municipio", "coduf", "codmun", "codRegiaoSaude", "nomeRegiaoSaude", "populacaoTCU2019", "interior/metropolitana", "regiao"
])
df_brazil

Unnamed: 0,data,semanaEpi,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos
0,2020-02-25,9,0,0,0,0,,
1,2020-02-26,9,1,1,0,0,,
2,2020-02-27,9,1,0,0,0,,
3,2020-02-28,9,1,0,0,0,,
4,2020-02-29,9,2,1,0,0,,
...,...,...,...,...,...,...,...,...
265,2020-11-16,47,5876464,13371,166014,216,5322406.0,388044.0
266,2020-11-17,47,5911758,35294,166699,685,5361592.0,383467.0
267,2020-11-18,47,5945849,34091,167455,756,5389863.0,388531.0
268,2020-11-19,47,5981767,35918,168061,606,5407498.0,406208.0


In [10]:
# Formatando as datas
df_brazil["data"] = pd.to_datetime(df_brazil["data"], format="%Y-%m-%d")

In [11]:
# Listando os tipos
df_brazil.dtypes

data                     datetime64[ns]
semanaEpi                         int64
casosAcumulado                    int64
casosNovos                        int64
obitosAcumulado                   int64
obitosNovos                       int64
Recuperadosnovos                float64
emAcompanhamentoNovos           float64
dtype: object

In [12]:
# Transformando recuperados e am acompanhamentos em Int64
df_brazil["Recuperadosnovos"] = df_brazil["Recuperadosnovos"].astype("Int64")
df_brazil["emAcompanhamentoNovos"] = df_brazil["emAcompanhamentoNovos"].astype("Int64")
df_brazil.dtypes


data                     datetime64[ns]
semanaEpi                         int64
casosAcumulado                    int64
casosNovos                        int64
obitosAcumulado                   int64
obitosNovos                       int64
Recuperadosnovos                  Int64
emAcompanhamentoNovos             Int64
dtype: object

#### Gerando relatório mensal

In [13]:
report = []

In [14]:
# Funcao para calcular novos casos e novas mortes suas variações
def calc_cases_and_deads(data, month):
    new_cases = data["casosNovos"].sum()
    new_deads = data["obitosNovos"].sum()

    absolute_variate_cases = None
    absolute_variate_deads = None

    if month > 2:
        cases_last_month = report[month-3][1]
        absolute_variate_cases = abs(new_cases - cases_last_month )
        
        deads_last_month = report[month-3][3]
        absolute_variate_deads = abs(new_deads - deads_last_month )
    
    return (new_cases, absolute_variate_cases, new_deads, absolute_variate_deads)

In [15]:
# Funcao para calcular casos e obitos acumulados
def calc_accumulate(new_cases, new_deads, month):
  if month > 2:
    accumulate_cases = report[month-3][5] + new_cases
    accumulate_deads = report[month-3][6] + new_deads
  else:
    accumulate_cases = new_cases
    accumulate_deads = new_deads
  
  return (accumulate_cases, accumulate_deads)


In [16]:
def calc_for_100k(new_cases, new_deads):
  cases_for_100k = (new_cases / population_brazil) * 100000
  cases_for_100k = round(cases_for_100k, 2)
  
  deads_for_100k = (new_deads / population_brazil) * 100000
  deads_for_100k = round(deads_for_100k, 2)
  
  return (cases_for_100k, deads_for_100k)

In [17]:
for month in range(2,12):
    df_month_brazil = df_brazil[df_brazil["data"].dt.month == month]
    new_cases, absolute_variate_cases, new_deads, absolute_variate_deads = calc_cases_and_deads(df_month_brazil, month)
    accumulate_cases, accumulate_deads = calc_accumulate(new_cases, new_deads, month)
    cases_for_100k, deads_for_100k = calc_for_100k(new_cases, new_deads)
    
    days = df_month_brazil.shape[0]
    
    data = [month, new_cases, absolute_variate_cases, new_deads, absolute_variate_deads, accumulate_cases, accumulate_deads, cases_for_100k, deads_for_100k, days]
    report.append(data)

In [18]:

report = pd.DataFrame(report, 
    columns = ["mes", "casosNovos", "variacaoDeCasos", "obitosNovos", "variacaoDeObitos", "casosAcumulados", "obitosAcumulados", "Casos por 100 mil habitantes", "Obitos por 100 mil habitantes", "Quantidade de dias"])
report = report.set_index("mes")

In [19]:
# Mudando tipo das variações
report["variacaoDeObitos"] = report["variacaoDeObitos"].astype("Int64")
report["variacaoDeCasos"] = report["variacaoDeObitos"].astype("Int64")
report

Unnamed: 0_level_0,casosNovos,variacaoDeCasos,obitosNovos,variacaoDeObitos,casosAcumulados,obitosAcumulados,Casos por 100 mil habitantes,Obitos por 100 mil habitantes,Quantidade de dias
mes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,2,,0,,2,0,0.0,0.0,5
3,5715,201.0,201,201.0,5717,201,2.72,0.1,31
4,79663,5499.0,5700,5499.0,85380,5901,37.91,2.71,30
5,428820,17713.0,23413,17713.0,514200,29314,204.06,11.14,31
6,887841,6867.0,30280,6867.0,1402041,59594,422.49,14.41,30
7,1260444,2601.0,32881,2601.0,2662485,92475,599.79,15.65,31
8,1245787,3975.0,28906,3975.0,3908272,121381,592.82,13.76,31
9,902663,6335.0,22571,6335.0,4810935,143952,429.54,10.74,30
10,724670,6639.0,15932,6639.0,5535605,159884,344.84,7.58,31
11,484559,7203.0,8729,7203.0,6020164,168613,230.58,4.15,20


# To do:
 - Plotar graficos do mes
 - Plotar graficos com todos os dias