## Dados sobre a COVID-19 no Brasil

Repositório: https://github.com/wcota/covid19br/

Descrição dos dados: https://github.com/wcota/covid19br/blob/master/DESCRIPTION.en.md

In [1]:
%config Completer.use_jedi = False

In [2]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

# import directly from GitHub
df = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv")

# change date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# print all available columns
print(df.columns, end='\n')

df.sample(3)

Index(['epi_week', 'date', 'country', 'state', 'city', 'newDeaths', 'deaths',
       'newCases', 'totalCases', 'deathsMS', 'totalCasesMS',
       'deaths_per_100k_inhabitants', 'totalCases_per_100k_inhabitants',
       'deaths_by_totalCases', 'recovered', 'suspects', 'tests',
       'tests_per_100k_inhabitants', 'vaccinated',
       'vaccinated_per_100k_inhabitants', 'vaccinated_second',
       'vaccinated_second_per_100k_inhabitants'],
      dtype='object')


Unnamed: 0,epi_week,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants,vaccinated,vaccinated_per_100k_inhabitants,vaccinated_second,vaccinated_second_per_100k_inhabitants
6052,42,2020-10-14,Brazil,RJ,TOTAL,104,19440,1152,285205,19440,285205,112.59811,1651.93132,0.06816,262009.0,44470.0,813560.0,4712.20785,,,,
8709,103,2021-01-17,Brazil,PE,TOTAL,27,10020,482,241087,10020,241087,104.84384,2522.60342,0.04156,205511.0,22777.0,1015708.0,10627.81683,,,,
5255,38,2020-09-16,Brazil,CE,TOTAL,15,8764,1156,230406,8764,230406,95.96939,2523.04021,0.03804,204373.0,88176.0,703363.0,7702.11336,,,,


In [3]:
# filter data for Brazil, and show only the specified columns
data_BR = df.query("state == 'TOTAL'")[['date', 'totalCases', 'totalCasesMS', 'deaths', 'deathsMS', 'suspects', 'recovered', 'tests', 'vaccinated', 'vaccinated_second']]

# create new columns
data_BR['activeCases'] = data_BR['totalCases'] - data_BR['deaths'] - data_BR['recovered']
data_BR['activeCasesMS'] = data_BR['totalCasesMS'] - data_BR['deathsMS'] - data_BR['recovered']
data_BR['activeCasesDiff'] = data_BR['activeCases'] - data_BR['activeCasesMS']
data_BR['deathsDiff'] = data_BR['deaths'] - data_BR['deathsMS']
data_BR['newVaccinated'] = data_BR['vaccinated'].diff()
data_BR['newVaccinated_second'] = data_BR['vaccinated_second'].diff()

# display the dataframe
data_BR.sample(3)

Unnamed: 0,date,totalCases,totalCasesMS,deaths,deathsMS,suspects,recovered,tests,vaccinated,vaccinated_second,activeCases,activeCasesMS,activeCasesDiff,deathsDiff,newVaccinated,newVaccinated_second
657,2020-04-04,10381,10278,445,432,74380.0,78.0,,,,9858.0,9768.0,90.0,13,,
3849,2020-07-27,2449677,2442375,87802,87618,1011487.0,1848326.0,6047128.0,,,513549.0,506431.0,7118.0,184,,
7,2020-02-28,2,1,0,0,,,,,,,,,0,,


In [69]:
import cufflinks as cf
cf.go_offline()
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import iplot
import plotly.express as px

In [97]:
# data_BR.iplot(y=['activeCases', 'activeCasesMS'],
#               x='date',width=2.0,
#               secondary_y='activeCasesDiff', secondary_y_title='Diferença',
#               xTitle='Data', yTitle='Casos Ativos', title='Casos Ativos - MS x Consórcio')
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['activeCases'],
        line=dict(width=3),
        name='Casos Ativos'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['activeCasesMS'],
        line=dict(width=3),
        name='Casos Ativos - MS'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Bar(
        x = data_BR['date'],
        y = data_BR['activeCasesDiff'],
        name='diferença'
    ),
    secondary_y=True,
)

fig.update_layout(
    title='<b>Casos Ativos - MS x Consórcio</b>',
    hovermode='x unified',
    separators=',.',
    plot_bgcolor='#fafafa'
)

# Anotações
fig.add_annotation(x=data_BR.loc[data_BR['activeCasesDiff'].idxmax(), 'date'], y=data_BR['activeCasesDiff'].max(),
            yref='y2',
            text="> "+str(round(data_BR['activeCasesDiff'].max(),-3)),
            showarrow=True,
            arrowhead=1)

# Set x-axis title
fig.update_xaxes(title_text="Data")

# Set y-axes titles
fig.update_yaxes(title_text="Casos Confirmados", secondary_y=False)
fig.update_yaxes(title_text="Diferença", secondary_y=True)

fig.show()
fig.write_html("graficos/casos-ativos_x_consorcio.html")

In [99]:
# data_BR.iplot(y=['deaths', 'deathsMS'],
#               x='date',width=2.0,
#               secondary_y='deathsDiff', secondary_y_title='Diferença',
#               xTitle='Data', yTitle='Óbitos', title='Óbitos - MS x Consórcio')

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['deaths'],
        line=dict(width=3, color='orange'),
        name='Óbitos'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['deathsMS'],
        line=dict(width=3, color='blue'),
        name='Óbitos - MS'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Bar(
        x = data_BR['date'],
        y = data_BR['deathsDiff'],
        name='diferença'
    ),
    secondary_y=True,
)

fig.update_layout(
    title='<b>Óbitos - MS x Consórcio</b>',
    hovermode='x unified',
    separators=',.',
    plot_bgcolor='#fafafa'
)

# Anotações
fig.add_annotation(x=data_BR.loc[data_BR['deathsDiff'].idxmax(), 'date'], y=data_BR['deathsDiff'].max(),
            yref='y2',
            text="> "+str(round(data_BR['deathsDiff'].max(),-2)),
            showarrow=True,
            arrowhead=1)

# Set x-axis title
fig.update_xaxes(title_text="Data")

# Set y-axes titles
fig.update_yaxes(title_text="Óbitos Confirmados", secondary_y=False)
fig.update_yaxes(title_text="Diferença", secondary_y=True)

fig.show()
fig.write_html("graficos/obitos_x_consorcio.html")

In [88]:
round(data_BR['deathsDiff'].max(),-1)

900

In [82]:
# data_BR[~data_BR['vaccinated'].isna()].iplot(
#     x='date', 
#     y=['vaccinated', 'vaccinated_second'], 
#     title='Evolução da vacinação'    
# )

_dv = data_BR[~data_BR['vaccinated'].isna()]
fig = go.Figure(go.Scatter(
    x = _dv['date'],
    y = _dv['vaccinated'],
    line=dict(color='purple', width=3),
    name='primeira dose'

))

fig.add_trace(go.Scatter(
    x = _dv['date'],
    y = _dv['vaccinated_second'],
    line=dict(color='orange', width=3),
    name='segunda dose'
))

fig.update_layout(
    title = 'Evolução da vacinação',
    xaxis_tickformat = '%d %b %Y'
)
fig.update_traces(hovertemplate='Data: %{x} <br>Total: %{y:,.0f}')
fig.update_layout(hovermode='x unified', separators=',.', plot_bgcolor='#fafafa')
fig.show()
fig.write_html("graficos/evolucao-vacinacao.html")

In [8]:
# import gzip file with cities data
chunk_size = 50000
df_cities = pd.DataFrame()

_dc = pd.read_csv("https://github.com/wcota/covid19br/blob/master/cases-brazil-cities-time.csv.gz?raw=true", 
                        compression='gzip',
                        iterator=True,
                        chunksize=chunk_size
                       )
for df in _dc:
    df_cities = df_cities.append(df)


df_cities.sample(3)

Unnamed: 0,epi_week,date,country,state,city,ibgeID,cod_RegiaoDeSaude,name_RegiaoDeSaude,newDeaths,deaths,newCases,totalCases,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,_source,last_info_date
323695,28,2020-07-07,Brazil,MG,Ituiutaba/MG,3134202,31073.0,Ituiutaba,0,2,0,109,1.90015,103.55803,0.01835,SES,2021-03-31
303211,27,2020-07-03,Brazil,PR,Paula Freitas/PR,4118600,41006.0,6ª RS União da Vitória,0,0,0,11,0.0,186.18822,0.0,SES,2021-04-30
29140,17,2020-04-25,Brazil,SP,Bady Bassitt/SP,3504602,35155.0,São José do Rio Preto,0,0,0,2,0.0,11.26063,0.0,SES,2021-04-26


In [9]:
df_cities['date'] = pd.to_datetime(df_cities['date'])

In [10]:
df_cities['ibgeID'].dtype

dtype('int64')

In [11]:
df_cities.shape

(1991561, 17)

In [12]:
df_gps_cities = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/gps_cities.csv")
df_gps_cities.sample(3)

Unnamed: 0,ibgeID,id,lat,lon,longName
1644,5221197.0,Terezópolis de Goiás/GO,-16.483787,-49.09493,"Terezópolis de Goiás, Microrregião de Goiânia,..."
530,2503803.0,Caldas Brandão/PB,-7.102796,-35.325468,"Caldas Brandão, Região Geográfica Imediata de ..."
4330,4200606.0,Águas Mornas/SC,-27.696316,-48.824783,"Águas Mornas, Microrregião de Tabuleiro, Santa..."


In [13]:
df_gps_cities.shape

(5596, 5)

In [14]:
# verifica se tem algum codigo ibge repetido
ibge_rep = np.any(df_gps_cities['ibgeID'].value_counts()>1)

# verifica se tem algum codigo fora do padrão
ibge_fora = bool(len(np.where(df_gps_cities['ibgeID'].values > 9999999)[0])) | bool(len(np.where(df_gps_cities['ibgeID'].values < 0)[0]))

# verifica se tem algum valor faltante
ibge_missing = df_gps_cities['ibgeID'].isna().sum()

print(f'cod. ibge repetido: {ibge_rep} \ncod. ibge fora do padrão: {ibge_fora} \nvalores faltantes: {ibge_missing}')

cod. ibge repetido: False 
cod. ibge fora do padrão: False 
valores faltantes: 26


In [15]:
# removendo as linhas cujo campo ibgeID está faltando
df_gps_cities = df_gps_cities.dropna(subset=['ibgeID'])

In [16]:
# convertendo o tipo da coluna ibeID do df_gps_cities para o mesmo tipo da coluna ibgeID do df_cities
df_gps_cities['ibgeID'] = df_gps_cities['ibgeID'].astype(int)

In [17]:
df_gps_cities['ibgeID'].dtype

dtype('int32')

In [18]:
# definindo as colunas 'lat' e 'lon' no df_cities com base no 'ibgeID' do df_gps_cities
df_cities['lat'] = df_cities['ibgeID'].map(df_gps_cities.set_index('ibgeID')['lat'])
df_cities['lon'] = df_cities['ibgeID'].map(df_gps_cities.set_index('ibgeID')['lon'])

In [19]:
df_cities.sample(3)

Unnamed: 0,epi_week,date,country,state,city,ibgeID,cod_RegiaoDeSaude,name_RegiaoDeSaude,newDeaths,deaths,newCases,totalCases,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,_source,last_info_date,lat,lon
578213,34,2020-08-22,Brazil,SP,Igaratá/SP,3520202,35171.0,Alto Vale do Paraíba,0,6,9,402,62.61087,4194.92852,0.01493,SES,2021-04-26,-23.206347,-46.156934
410796,30,2020-07-23,Brazil,MS,Deodápolis/MS,5003454,50003.0,Dourados,0,1,1,91,7.70179,700.8626,0.01099,SES,2021-04-14,-22.276272,-54.168163
1527601,106,2021-02-08,Brazil,RS,Barros Cassal/RS,4302006,43019.0,Região 19 - Botucaraí,0,2,0,263,17.88589,2351.99428,0.0076,SES,2021-04-30,-29.094938,-52.583237


In [83]:
_df = df_cities.query('date == @df_cities.date.max()')
mapa = px.scatter_mapbox( _df, lat='lat', lon='lon',  
                     hover_name='city',
                     hover_data=['totalCases', 'deaths'],
                     color_continuous_scale=px.colors.sequential.Plasma_r,
                     color='totalCases_per_100k_inhabitants',                                          
                     zoom= 3)
mapa.update_layout(mapbox_style='open-street-map')
mapa.update_layout(height=600, margin={'r':0, 't':0, 'l':0, 'b':0})
mapa.show()
mapa.write_html("graficos/mapa-casos-p-100k-h.html")

In [23]:
import ipywidgets as widgets
from ipywidgets import fixed

In [24]:
f_date = df_cities['date'].dt.strftime('%Y-%m-%d').sort_values().unique().tolist()

In [40]:
# Controle Widget para data
date_limit = widgets.SelectionSlider(
    options= f_date,
    value= '2020-03-01',
    description= 'Data: ',
    continuous_update= False,
    orientation= 'horizontal',
    readout= True,
    style={'description_width': 'initial'}
)

def update_map(df, limit):


    _df = df.loc[df['date'] == pd.to_datetime(limit)].copy()
#     print(limit, end='\n')
#     print(pd.to_datetime(limit), end='\n')
#     print(_df, end='\n')
    mapa = px.scatter_mapbox( _df, lat='lat', lon='lon',  
                     hover_name='city',
                     hover_data=['totalCases', 'deaths'],
                     color_continuous_scale=px.colors.sequential.Plasma_r,
                     color='totalCases_per_100k_inhabitants',                                          
                     zoom= 3)
    mapa.update_layout(mapbox_style='open-street-map')
    mapa.update_layout(height=600, margin={'r':0, 't':0, 'l':0, 'b':0})
    mapa.show()

In [41]:
# _df = df_cities.loc[df_cities['date'] == pd.to_datetime('2020-03-01')]

widgets.interactive( update_map, df=fixed(df_cities), limit=date_limit)

interactive(children=(SelectionSlider(continuous_update=False, description='Data: ', index=5, options=('2020-0…