# Análise Covid Brasil

## Carregar módulos

In [None]:
import numpy as np
import pandas as pd

## Carregar dados

- Dados Covid Brasil

In [None]:
! wget --no-check-certificate --content-disposition 'https://github.com/wcota/covid19br/blob/master/cases-brazil-cities-time.csv.gz?raw=true'

--2021-07-17 16:26:51--  https://github.com/wcota/covid19br/blob/master/cases-brazil-cities-time.csv.gz?raw=true
Resolving github.com (github.com)... 13.114.40.48
Connecting to github.com (github.com)|13.114.40.48|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/wcota/covid19br/raw/master/cases-brazil-cities-time.csv.gz [following]
--2021-07-17 16:26:52--  https://github.com/wcota/covid19br/raw/master/cases-brazil-cities-time.csv.gz
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-cities-time.csv.gz [following]
--2021-07-17 16:26:52--  https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-cities-time.csv.gz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|18

In [None]:
! gunzip cases-brazil-cities-time.csv.gz

In [None]:
dados_brasil = pd.read_csv('cases-brazil-cities-time.csv')

- Dados censo

In [None]:
dados_censo = pd.read_csv('dados_municipios_2010.csv', sep = ';', decimal = ',', encoding = 'latin1')

## Ajustar dados

In [None]:
max_date = max(dados_brasil['date'])

In [None]:
dados_brasil = dados_brasil[(dados_brasil['date'] == max_date) & \
                            (dados_brasil['state'] != 'TOTAL')]

In [None]:
cols = ['ibgeID', 'date', 'state', 'city', 'totalCases', 'deaths']

In [None]:
dados_brasil = dados_brasil[cols].reset_index(drop=True)

## Aqui começa o trabalho

## Questão 1

In [None]:
dados_brasil.shape

(5596, 6)

In [None]:
dados_censo.shape

(5565, 7)

## Join dos dados

- join dos dados

In [None]:
dados_brasil_covid = dados_brasil.merge(dados_censo, how = 'left', on = 'ibgeID')

- dados sem merge

In [None]:
dados_sem_merge = dados_brasil_covid[dados_brasil_covid.isnull().any(axis=1) == True]

In [None]:
dados_sem_merge[['ibgeID', 'state', 'city']].drop_duplicates()

Unnamed: 0,ibgeID,state,city
434,4220000,SC,Balneário Rincão/SC
781,12,AC,CASO SEM LOCALIZAÇÃO DEFINIDA/AC
782,27,AL,CASO SEM LOCALIZAÇÃO DEFINIDA/AL
783,13,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM
784,16,AP,CASO SEM LOCALIZAÇÃO DEFINIDA/AP
785,29,BA,CASO SEM LOCALIZAÇÃO DEFINIDA/BA
786,23,CE,CASO SEM LOCALIZAÇÃO DEFINIDA/CE
787,32,ES,CASO SEM LOCALIZAÇÃO DEFINIDA/ES
788,52,GO,CASO SEM LOCALIZAÇÃO DEFINIDA/GO
789,21,MA,CASO SEM LOCALIZAÇÃO DEFINIDA/MA


- dados sem missings

In [None]:
dados_brasil_covid_filtrados = dados_brasil_covid[~dados_brasil_covid.isnull().any(axis=1) == True].reset_index()

## Índice de Gini

In [None]:
def gini(array):
    """Calculate the Gini coefficient of a numpy array."""
    # based on bottom eq:
    # http://www.statsdirect.com/help/generatedimages/equations/equation154.svg
    # from:
    # http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
    # All values are treated equally, arrays must be 1d:
    array = array.flatten()
    if np.amin(array) < 0:
        # Values cannot be negative:
        array -= np.amin(array)
    # Values cannot be 0:
    array += 0.0000001
    # Values must be sorted:
    array = np.sort(array)
    # Index per array element:
    index = np.arange(1,array.shape[0]+1)
    # Number of array elements:
    n = array.shape[0]
    # Gini coefficient:
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array)))

In [None]:
dados_brasil_covid_filtrados.groupby('state')['RDPC'] \
                            .apply(lambda x: gini(x.values)) \
                            .to_frame('GINI') \
                            .reset_index() \
                            .sort_values('GINI')

Unnamed: 0,state,GINI
6,DF,0.0
20,RO,0.127922
17,PR,0.131235
5,CE,0.132551
14,PB,0.134084
11,MS,0.134532
23,SC,0.137464
8,GO,0.138104
24,SE,0.140378
25,SP,0.14059


## Medidas estatísitcas


In [None]:
dados_brasil_covid_filtrados.columns

Index(['index', 'ibgeID', 'date', 'state', 'city', 'totalCases', 'deaths',
       'Municipio', 'ESPVIDA', 'E_ANOSESTUDO', 'T_ANALF18M', 'RDPC', 'IDHM'],
      dtype='object')

In [None]:
dados_brasil_covid_filtrados.groupby('state') \
                            .agg(CasosTotais = pd.NamedAgg('totalCases', 'sum'),
                                 MortesTotais = pd.NamedAgg('deaths', 'sum'),
                                 MediaESPVIDA = pd.NamedAgg('ESPVIDA', 'mean'),
                                 MediaE_ANOSESTUDO = pd.NamedAgg('E_ANOSESTUDO', 'mean'),
                                 MediaT_ANALF18M = pd.NamedAgg('T_ANALF18M', 'mean'),
                                 Mediana_IDHM = pd.NamedAgg('IDHM', 'median'),
                                 Mediana_RDPC = pd.NamedAgg('RDPC', 'median'),
                                 DP_RDPC = pd.NamedAgg('RDPC', 'std')) \
                            .reset_index()

Unnamed: 0,state,CasosTotais,MortesTotais,MediaESPVIDA,MediaE_ANOSESTUDO,MediaT_ANALF18M,Mediana_IDHM,Mediana_RDPC,DP_RDPC
0,AC,86644,1776,70.991818,7.752273,26.106818,0.5885,313.705,127.780499
1,AL,225064,5610,69.367843,8.896961,35.308529,0.5625,233.44,79.124345
2,AM,410367,13421,70.924677,7.451774,20.889194,0.564,227.585,98.250134
3,AP,119540,1877,72.028125,9.035625,13.9975,0.641,402.44,133.018963
4,BA,1157427,24835,70.570288,8.453453,25.816427,0.589,272.86,101.927806
5,CE,896739,23192,70.674076,9.675054,29.604728,0.612,256.84,76.335807
6,DF,440734,9455,77.35,9.87,3.66,0.824,1715.11,
7,ES,528198,11493,74.536026,9.209231,13.27141,0.6835,533.375,203.473289
8,GO,708319,19994,74.522683,9.800813,13.726098,0.697,573.815,148.26256
9,MA,329943,9417,69.431198,9.057972,29.225945,0.572,217.65,90.115351
