### Informacoes sobre Covid-19 no Brasil dos ultimos 6 meses via API 

#### Bibliotecas Utilizadas

In [14]:
import requests
import pandas as pd
from pandas import DataFrame
from datetime import date, timedelta

#### Extracao de dados

In [3]:
#documentacao api [https://covid19-brazil-api-docs.vercel.app] 
#serviço -> [Lista casos no brasil em data específica]

#envoca API dia por dia (ultimos 6 meses)
today = date.today()
start_date = today + timedelta(days= -180)
url = "https://covid19-brazil-api.now.sh/api/report/v1/brazil/"

#criacao de dicionario para cada uf
result_dict = {}

while start_date < today:
  day = start_date.strftime("%Y%m%d")
  #print("Starting invocation from day",day)
  #retorna os dados(json) da api do dia especifico
  response = requests.get(url + day).json()

  #cada item do response.data retornara os registros pela uf
  for item in response['data']:
    uf = item['uf']
    #Nao havendo um dicionario para a uf(chave) -> add um dicionario a ela com array vazio (1º registros)
    if (not uf in result_dict):
      result_dict[uf] = []
    #e entao adiciona todos os registros correspondentes da uf em seu dicionario
    result_dict[uf].append(item)
  
  #add os resultados da api dia por dia
  start_date = start_date + timedelta(days= 1)

In [6]:
result_dict

{'SP': [{'uid': 35,
   'uf': 'SP',
   'state': 'São Paulo',
   'cases': 1062634,
   'deaths': 37992,
   'suspects': 5334,
   'refuses': 596,
   'datetime': '2020-10-17T21:35:37.785Z'},
  {'uid': 35,
   'uf': 'SP',
   'state': 'São Paulo',
   'cases': 1063602,
   'deaths': 38020,
   'suspects': 5334,
   'refuses': 596,
   'datetime': '2020-10-18T22:35:40.336Z'},
  {'uid': 35,
   'uf': 'SP',
   'state': 'São Paulo',
   'cases': 1064039,
   'deaths': 38035,
   'suspects': 5334,
   'refuses': 596,
   'datetime': '2020-10-19T22:36:08.966Z'},
  {'uid': 35,
   'uf': 'SP',
   'state': 'São Paulo',
   'cases': 1068962,
   'deaths': 38246,
   'suspects': 5334,
   'refuses': 596,
   'datetime': '2020-10-20T22:35:34.412Z'},
  {'uid': 35,
   'uf': 'SP',
   'state': 'São Paulo',
   'cases': 1073261,
   'deaths': 38371,
   'suspects': 5334,
   'refuses': 596,
   'datetime': '2020-10-22T17:36:15.361Z'},
  {'uid': 35,
   'uf': 'SP',
   'state': 'São Paulo',
   'cases': 1083641,
   'deaths': 38608,
   '

#### Tratamento e Limpeza de Dados

In [41]:
covid_df = None
#criacao de data frame para cada uf do dicionario
for uf_key in result_dict:
  new_df = pd.DataFrame(result_dict[uf_key])

  #criacao de novas colunas de casos e mortes com a contagem do dia anterior
  #zerando as colunas com NAN
  new_df['casesday'] = new_df['cases'].diff().fillna(0)
  new_df['deathsday'] = new_df['deaths'].diff().fillna(0)

  #carregamento de dados ao dataframe uf por uf 
  if covid_df is None:
    covid_df = new_df
  else:
    covid_df = pd.concat([covid_df, new_df])

In [34]:
covid_df

Unnamed: 0,uid,uf,state,cases,deaths,suspects,refuses,datetime,casesday,deathsday
0,35,SP,São Paulo,1062634,37992,5334,596,2020-10-17T21:35:37.785Z,0.0,0.0
1,35,SP,São Paulo,1063602,38020,5334,596,2020-10-18T22:35:40.336Z,968.0,28.0
2,35,SP,São Paulo,1064039,38035,5334,596,2020-10-19T22:36:08.966Z,437.0,15.0
3,35,SP,São Paulo,1068962,38246,5334,596,2020-10-20T22:35:34.412Z,4923.0,211.0
4,35,SP,São Paulo,1073261,38371,5334,596,2020-10-22T17:36:15.361Z,4299.0,125.0
...,...,...,...,...,...,...,...,...,...,...
142,12,AC,Acre,72403,1325,12,0,2021-04-08T22:40:02.351Z,523.0,7.0
143,12,AC,Acre,72897,1334,12,0,2021-04-09T22:39:56.330Z,494.0,9.0
144,12,AC,Acre,73425,1343,12,0,2021-04-11T22:39:53.760Z,528.0,9.0
145,12,AC,Acre,74007,1367,12,0,2021-04-13T22:39:55.572Z,582.0,24.0


In [35]:
#remocao de colunas que nao possuem informacoes relaventes para analise
covid_df = covid_df.drop(['suspects','refuses'], axis=1)

In [36]:
#incluindo latitude e longitude de um arquivo json
covid_df = covid_df.join(pd.read_json('./brazil_states_lat_long.json').set_index('uf'), on='uf')

In [37]:
covid_df

Unnamed: 0,uid,uf,state,cases,deaths,datetime,casesday,deathsday,latitude,longitude
0,35,SP,São Paulo,1062634,37992,2020-10-17T21:35:37.785Z,0.0,0.0,-22.19,-48.79
1,35,SP,São Paulo,1063602,38020,2020-10-18T22:35:40.336Z,968.0,28.0,-22.19,-48.79
2,35,SP,São Paulo,1064039,38035,2020-10-19T22:36:08.966Z,437.0,15.0,-22.19,-48.79
3,35,SP,São Paulo,1068962,38246,2020-10-20T22:35:34.412Z,4923.0,211.0,-22.19,-48.79
4,35,SP,São Paulo,1073261,38371,2020-10-22T17:36:15.361Z,4299.0,125.0,-22.19,-48.79
...,...,...,...,...,...,...,...,...,...,...
142,12,AC,Acre,72403,1325,2021-04-08T22:40:02.351Z,523.0,7.0,-8.77,-70.55
143,12,AC,Acre,72897,1334,2021-04-09T22:39:56.330Z,494.0,9.0,-8.77,-70.55
144,12,AC,Acre,73425,1343,2021-04-11T22:39:53.760Z,528.0,9.0,-8.77,-70.55
145,12,AC,Acre,74007,1367,2021-04-13T22:39:55.572Z,582.0,24.0,-8.77,-70.55


In [32]:
#Exportando arquivo para csv
covid_df.to_csv('./data_covid.csv', encoding='latin-1', index = False)

In [None]:
#Link acesso Tableau Public
['https://public.tableau.com/profile/bruna.carolino.de.souza#!/vizhome/CoronavirusBrazil/DashCases']