# Análise dos casos de Covid-19 em Santa Catarina

In [2]:
# bibliotecas
import pandas as pd
import folium
import requests
import json

%matplotlib inline

In [3]:
# URL da api com os dados
url = 'https://brasil.io/api/dataset/covid19/caso/data?state=SC'
# Pego o retorno da api e salvo em 'response'
response = requests.get(url)
# Mostro a resposta do retorno
# response.text

In [4]:
# Crio um dataframe com os dados da primeira página da api
df_dados = pd.DataFrame(response.json()['results'])

df_dados.head()

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
0,Águas Mornas,4200606,1,15.45834,2020-04-14,,0,6469.0,True,9,city,SC
1,Anita Garibaldi,4201000,1,14.01935,2020-04-14,,0,7133.0,True,8,city,SC
2,Antônio Carlos,4201208,11,129.21414,2020-04-14,0.2727,3,8513.0,True,19,city,SC
3,Araranguá,4201406,4,5.8627,2020-04-14,,0,68228.0,True,12,city,SC
4,Aurora,4201901,1,17.60873,2020-04-14,,0,5679.0,True,6,city,SC


In [5]:
df_dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
city                              985 non-null object
city_ibge_code                    984 non-null object
confirmed                         1000 non-null int64
confirmed_per_100k_inhabitants    982 non-null float64
date                              1000 non-null object
death_rate                        172 non-null float64
deaths                            1000 non-null int64
estimated_population_2019         984 non-null float64
is_last                           1000 non-null bool
order_for_place                   1000 non-null int64
place_type                        1000 non-null object
state                             1000 non-null object
dtypes: bool(1), float64(3), int64(3), object(5)
memory usage: 87.0+ KB


In [6]:
# Como a API possui muitos dados, eles podem ser divididos em várias páginas
# Vou verificar se existem mais páginas e incluí-las no df_dados

parsed = json.loads(response.text)
next_page = parsed['next']

while next_page:
    res = requests.get(next_page)
    res.text
    df = pd.DataFrame(res.json()['results'])
    df_dados = df_dados.append(df)
    parsed = json.loads(res.text)
    next_page = parsed['next']
    
    
df_dados.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1328 entries, 0 to 327
Data columns (total 12 columns):
city                              1294 non-null object
city_ibge_code                    1307 non-null object
confirmed                         1328 non-null int64
confirmed_per_100k_inhabitants    1304 non-null float64
date                              1328 non-null object
death_rate                        182 non-null float64
deaths                            1328 non-null int64
estimated_population_2019         1307 non-null float64
is_last                           1328 non-null bool
order_for_place                   1328 non-null int64
place_type                        1328 non-null object
state                             1328 non-null object
dtypes: bool(1), float64(3), int64(3), object(5)
memory usage: 125.8+ KB


In [7]:
df_dados.head(100)

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
0,Águas Mornas,4200606,1,15.45834,2020-04-14,,0,6469.0,True,9,city,SC
1,Anita Garibaldi,4201000,1,14.01935,2020-04-14,,0,7133.0,True,8,city,SC
2,Antônio Carlos,4201208,11,129.21414,2020-04-14,0.2727,3,8513.0,True,19,city,SC
3,Araranguá,4201406,4,5.86270,2020-04-14,,0,68228.0,True,12,city,SC
4,Aurora,4201901,1,17.60873,2020-04-14,,0,5679.0,True,6,city,SC
...,...,...,...,...,...,...,...,...,...,...,...,...
95,Balneário Piçarras,4212809,2,8.64043,2020-04-13,,0,23147.0,False,5,city,SC
96,Barra Velha,4202107,1,3.42841,2020-04-13,,0,29168.0,False,5,city,SC
97,Biguaçu,4202305,2,2.92052,2020-04-13,,0,68481.0,False,12,city,SC
98,Blumenau,4202404,68,19.03701,2020-04-13,,0,357199.0,False,23,city,SC


In [8]:
# Podemos verificar que a API retorna a quantidade de casos do município do último mês
df_dados[df_dados['city'] == 'Florianópolis']

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
22,Florianópolis,4205407,207,41.31959,2020-04-14,0.0145,3,500973.0,True,34,city,SC
109,Florianópolis,4205407,199,39.7227,2020-04-13,0.0151,3,500973.0,False,33,city,SC
195,Florianópolis,4205407,177,35.33125,2020-04-12,0.0169,3,500973.0,False,32,city,SC
278,Florianópolis,4205407,173,34.5328,2020-04-11,0.0173,3,500973.0,False,31,city,SC
360,Florianópolis,4205407,167,33.33513,2020-04-10,0.018,3,500973.0,False,30,city,SC
441,Florianópolis,4205407,166,33.13552,2020-04-09,0.0181,3,500973.0,False,29,city,SC
516,Florianópolis,4205407,123,24.55222,2020-04-08,0.0244,3,500973.0,False,28,city,SC
583,Florianópolis,4205407,114,22.75572,2020-04-07,0.0263,3,500973.0,False,27,city,SC
645,Florianópolis,4205407,99,19.76154,2020-04-06,0.0202,2,500973.0,False,26,city,SC
703,Florianópolis,4205407,94,18.76349,2020-04-05,0.0213,2,500973.0,False,25,city,SC


In [9]:
# E da mesma forma retorna a quantidade de casos totais do estado no último mês
df_dados[df_dados['place_type'] == 'state']

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
86,,42,853,11.90545,2020-04-14,0.0328,28,7164788.0,True,34,state,SC
173,,42,826,11.5286,2020-04-13,0.0315,26,7164788.0,False,33,state,SC
256,,42,776,10.83075,2020-04-12,0.0309,24,7164788.0,False,32,state,SC
338,,42,732,10.21663,2020-04-11,0.0287,21,7164788.0,False,31,state,SC
419,,42,717,10.00727,2020-04-10,0.0251,18,7164788.0,False,30,state,SC
499,,42,693,9.6723,2020-04-09,0.026,18,7164788.0,False,29,state,SC
566,,42,501,6.99253,2020-04-08,0.0339,17,7164788.0,False,28,state,SC
629,,42,457,6.37842,2020-04-07,0.0328,15,7164788.0,False,27,state,SC
689,,42,417,5.82013,2020-04-06,0.0264,11,7164788.0,False,26,state,SC
744,,42,379,5.28976,2020-04-05,0.0264,10,7164788.0,False,25,state,SC


# Ajuste do dataframe com os dados de localização dos municípios

Depois de obter todos os dados referentes aos casos, resta criar um dataframe com as localizações dos municípios do estado.

In [10]:
# Carregar arquivos com as localizações

# Crio um dataframe com os estados
df_estados = pd.read_csv('./data/estados.csv')

# Crio um dataframe com os municípios
df_muni = pd.read_csv('./data/municipios.csv')

In [11]:
# Visualizar o dataframe de estados
df_estados.head(27)

Unnamed: 0,codigo_uf,uf,nome
0,11,RO,Rondônia
1,12,AC,Acre
2,13,AM,Amazonas
3,14,RR,Roraima
4,15,PA,Pará
5,16,AP,Amapá
6,17,TO,Tocantins
7,21,MA,Maranhão
8,22,PI,Piauí
9,23,CE,Ceará


In [12]:
# Visualizar o dataframe de municípios
df_muni.head(20)

Unnamed: 0,codigo_ibge,nome,latitude,longitude,capital,codigo_uf
0,5200050,Abadia de Goiás,-16.7573,-49.4412,0,52
1,3100104,Abadia dos Dourados,-18.4831,-47.3916,0,31
2,5200100,Abadiânia,-16.197,-48.7057,0,52
3,3100203,Abaeté,-19.1551,-45.4444,0,31
4,1500107,Abaetetuba,-1.72183,-48.8788,0,15
5,2300101,Abaiara,-7.34588,-39.0416,0,23
6,2900108,Abaíra,-13.2488,-41.6619,0,29
7,2900207,Abaré,-8.72073,-39.1162,0,29
8,4100103,Abatiá,-23.3049,-50.3133,0,41
9,4200051,Abdon Batista,-27.6126,-51.0233,0,42


In [13]:
# Como quero apenas os municípios de SC, devo analisar o dataframe df_muni e separar os municípios.
# Ao olhar no dataframe de estados, podemos identificar que o código da UF de SC é 42
# Então podemos ver no dataframe de municípios quantos que pertencem ao código da UF 42
df_muni[df_muni['codigo_uf'] == 42].count()

codigo_ibge    295
nome           295
latitude       295
longitude      295
capital        295
codigo_uf      295
dtype: int64

In [14]:
# Criarei um dataframe separado apenas com os municípios de SC
df_sc = df_muni[df_muni['codigo_uf'] == 42].copy()

df_sc.head()

Unnamed: 0,codigo_ibge,nome,latitude,longitude,capital,codigo_uf
9,4200051,Abdon Batista,-27.6126,-51.0233,0,42
11,4200101,Abelardo Luz,-26.5716,-52.3229,0,42
43,4200200,Agrolândia,-27.4087,-49.822,0,42
44,4200309,Agronômica,-27.2662,-49.708,0,42
53,4200408,Água Doce,-26.9985,-51.5528,0,42


In [15]:
# Verifico se o tamanho do dataframe confere com o número de municípios pertencentes ao estado que é 295
df_sc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 295 entries, 9 to 5569
Data columns (total 6 columns):
codigo_ibge    295 non-null int64
nome           295 non-null object
latitude       295 non-null float64
longitude      295 non-null float64
capital        295 non-null int64
codigo_uf      295 non-null int64
dtypes: float64(2), int64(3), object(1)
memory usage: 16.1+ KB


In [16]:
df_dados.head()

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
0,Águas Mornas,4200606,1,15.45834,2020-04-14,,0,6469.0,True,9,city,SC
1,Anita Garibaldi,4201000,1,14.01935,2020-04-14,,0,7133.0,True,8,city,SC
2,Antônio Carlos,4201208,11,129.21414,2020-04-14,0.2727,3,8513.0,True,19,city,SC
3,Araranguá,4201406,4,5.8627,2020-04-14,,0,68228.0,True,12,city,SC
4,Aurora,4201901,1,17.60873,2020-04-14,,0,5679.0,True,6,city,SC


In [17]:
# Crio duas cópias dos dataframes df_sc e df_dados para manter um backup
# Caso de algum problema não preciso executar o notebook todo novamente
df_sc_cp = df_sc.copy()
df_dados_cp = df_dados.copy()

In [18]:
df_dados_cp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1328 entries, 0 to 327
Data columns (total 12 columns):
city                              1294 non-null object
city_ibge_code                    1307 non-null object
confirmed                         1328 non-null int64
confirmed_per_100k_inhabitants    1304 non-null float64
date                              1328 non-null object
death_rate                        182 non-null float64
deaths                            1328 non-null int64
estimated_population_2019         1307 non-null float64
is_last                           1328 non-null bool
order_for_place                   1328 non-null int64
place_type                        1328 non-null object
state                             1328 non-null object
dtypes: bool(1), float64(3), int64(3), object(5)
memory usage: 125.8+ KB


In [19]:
df_dados_cp[df_dados_cp['city_ibge_code'].isnull()]

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
32,Importados/Indefinidos,,12,,2020-04-14,,0,,True,21,city,SC
119,Importados/Indefinidos,,11,,2020-04-13,,0,,False,20,city,SC
205,Importados/Indefinidos,,11,,2020-04-12,,0,,False,19,city,SC
288,Importados/Indefinidos,,11,,2020-04-11,,0,,False,18,city,SC
370,Importados/Indefinidos,,8,,2020-04-10,,0,,False,17,city,SC
451,Importados/Indefinidos,,11,,2020-04-09,,0,,False,16,city,SC
524,Importados/Indefinidos,,11,,2020-04-08,,0,,False,15,city,SC
589,Importados/Indefinidos,,10,,2020-04-07,,0,,False,14,city,SC
651,Importados/Indefinidos,,10,,2020-04-06,,0,,False,13,city,SC
709,Importados/Indefinidos,,9,,2020-04-05,,0,,False,12,city,SC


In [20]:
df_sc_cp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 295 entries, 9 to 5569
Data columns (total 6 columns):
codigo_ibge    295 non-null int64
nome           295 non-null object
latitude       295 non-null float64
longitude      295 non-null float64
capital        295 non-null int64
codigo_uf      295 non-null int64
dtypes: float64(2), int64(3), object(1)
memory usage: 16.1+ KB


In [21]:
# É preciso juntar o dataframe dos municípios com o dataframe de dados
# pois o dataframe de dados não possui as informações de latitude e longitude de cada município.
# Como nos dois dataframes temos o código IBGE da cidade, podemos uní-los através dele

# Para juntar os dois dataframes eu vou transformar a coluna do código do IBGE de cada dataframe em indice,
# dessa forma, no momento de fazer o join ele vai unir as colunas com o mesmo indice, então sempre que achar um 
# indice no dataframe dos dados com o mesmo indice do dataframe com as localizações ele vai unir as informações

# Porém para funcionar, os indices devem ser do mesmo tipo, e identifiquei com o comando .info() 
# que a coluna do código IBGE no dataframe de dados é do tipo 'object' e a coluna referente ao código IBGE 
# no dataframe com as cidades e localizações, é do tipo 'int'

# Sendo assim vou converter a coluna do df_sc_cp para object. Mas porque object e não para int?
# Porque temos no dataframe de dados, uma informação onde a cidade corresponde a outros estados ou outros países
# e nessa informação não temos o código do IBGE, então ele não é um inteiro e não pode ser convertido para int

# Outra maneira seria alterar esse código do IBGE correspondente a outros estados e países para um valor numérico
# e depois converter a coluna toda para int

# Convertendo a coluna 'codigo_ibge' do dataframe com as localizações para uma string
df_sc_cp['codigo_ibge'] = df_sc_cp['codigo_ibge'].astype(str)

In [22]:
df_sc_cp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 295 entries, 9 to 5569
Data columns (total 6 columns):
codigo_ibge    295 non-null object
nome           295 non-null object
latitude       295 non-null float64
longitude      295 non-null float64
capital        295 non-null int64
codigo_uf      295 non-null int64
dtypes: float64(2), int64(2), object(2)
memory usage: 16.1+ KB


In [23]:
# Fetias as conversões, deixo o código do IBGE da cidade como indice em cada dataframe
df_sc_cp.set_index('codigo_ibge', drop=False, inplace=True)
df_dados_cp.set_index('city_ibge_code', drop=False, inplace=True)

In [24]:
# Faço a união dos dataframes em um novo.
df_final = df_dados_cp.join(df_sc_cp)

In [25]:
df_final.sample(10)

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state,codigo_ibge,nome,latitude,longitude,capital,codigo_uf
4207304.0,Imbituba,4207304.0,5,11.14753,2020-04-02,,0,44853.0,False,14,city,SC,4207304.0,Imbituba,-28.2284,-48.6659,0.0,42.0
4204202.0,Chapecó,4204202.0,5,2.26894,2020-04-01,,0,220367.0,False,13,city,SC,4204202.0,Chapecó,-27.1004,-52.6152,0.0,42.0
4202909.0,Brusque,4202909.0,16,11.87622,2020-04-11,,0,134723.0,False,15,city,SC,4202909.0,Brusque,-27.0977,-48.9107,0.0,42.0
4209102.0,Joinville,4209102.0,51,8.63725,2020-04-10,0.0196,1,590466.0,False,29,city,SC,4209102.0,Joinville,-26.3045,-48.8487,0.0,42.0
42.0,,42.0,235,3.27993,2020-03-31,0.0085,2,7164788.0,False,20,state,SC,,,,,,
4201901.0,Aurora,4201901.0,1,17.60873,2020-04-09,,0,5679.0,False,1,city,SC,4201901.0,Aurora,-27.3098,-49.6295,0.0,42.0
4203006.0,Caçador,4203006.0,3,3.81704,2020-04-13,,0,78595.0,False,5,city,SC,4203006.0,Caçador,-26.7757,-51.012,0.0,42.0
42.0,,42.0,247,3.44742,2020-04-01,0.0081,2,7164788.0,False,21,state,SC,,,,,,
42.0,,42.0,7,0.0977,2020-03-16,,0,7164788.0,False,5,state,SC,,,,,,
,Importados/Indefinidos,,11,,2020-04-13,,0,,False,20,city,SC,,,,,,


In [26]:
# Como nos dados além dos municípios, tínhamos o histórico de casos no estado todo, 
# e também aqueles dados de 'outros países e estados' (que acredito ser de Catarinenses com casos confirmados, 
# mas que não estão em Santa Catarina), esses dados não foram unidos com o outro dataframe:
df_final[df_final['place_type'] == 'state'].head()

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state,codigo_ibge,nome,latitude,longitude,capital,codigo_uf
42,,42,853,11.90545,2020-04-14,0.0328,28,7164788.0,True,34,state,SC,,,,,,
42,,42,826,11.5286,2020-04-13,0.0315,26,7164788.0,False,33,state,SC,,,,,,
42,,42,776,10.83075,2020-04-12,0.0309,24,7164788.0,False,32,state,SC,,,,,,
42,,42,732,10.21663,2020-04-11,0.0287,21,7164788.0,False,31,state,SC,,,,,,
42,,42,717,10.00727,2020-04-10,0.0251,18,7164788.0,False,30,state,SC,,,,,,


In [27]:
df_final[df_final['city'] == 'Importados/Indefinidos']

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state,codigo_ibge,nome,latitude,longitude,capital,codigo_uf
,Importados/Indefinidos,,12,,2020-04-14,,0,,True,21,city,SC,,,,,,
,Importados/Indefinidos,,11,,2020-04-13,,0,,False,20,city,SC,,,,,,
,Importados/Indefinidos,,11,,2020-04-12,,0,,False,19,city,SC,,,,,,
,Importados/Indefinidos,,11,,2020-04-11,,0,,False,18,city,SC,,,,,,
,Importados/Indefinidos,,8,,2020-04-10,,0,,False,17,city,SC,,,,,,
,Importados/Indefinidos,,11,,2020-04-09,,0,,False,16,city,SC,,,,,,
,Importados/Indefinidos,,11,,2020-04-08,,0,,False,15,city,SC,,,,,,
,Importados/Indefinidos,,10,,2020-04-07,,0,,False,14,city,SC,,,,,,
,Importados/Indefinidos,,10,,2020-04-06,,0,,False,13,city,SC,,,,,,
,Importados/Indefinidos,,9,,2020-04-05,,0,,False,12,city,SC,,,,,,


In [28]:
# Como definimos o indice dos dados como sendo o código do IBGE as cidades correspondetes 
# a 'Importados/Indefinidos' ficaram sem indice

# Gerando os mapas

In [29]:
# Crio um mapa com base na localização de Santa Catarina
sc = folium.Map(
        location=[-27.097889, -51.150770], # Cordenadas do google maps
        zoom_start=7
)

In [30]:
# Percorro o dataframe pegando os locais com casos
# e desenhar um marcador para cada cidade

for indice, municipio in df_final[(df_final['confirmed'] > 0) & (df_final['city'] != 'Importados/Indefinidos')
                                 & (df_final['place_type'] != 'state') & (df_final['is_last'] == True)].iterrows():
    folium.Marker(
        location=[municipio['latitude'], municipio['longitude']],
        popup= '%s, %s Caso(s) ' %(municipio['city'], municipio['confirmed']),
        icon=folium.map.Icon(color='green')
    ).add_to(sc)
    
# Mostra o mapa
sc