## Dados sobre a COVID-19 no Brasil

Repositório: https://github.com/wcota/covid19br/

Descrição dos dados: https://github.com/wcota/covid19br/blob/master/DESCRIPTION.en.md

In [1]:
%config Completer.use_jedi = False

In [2]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

# import directly from GitHub
df = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv")

# change date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# print all available columns
print(df.columns, end='\n')

df.sample(3)

Index(['epi_week', 'date', 'country', 'state', 'city', 'newDeaths', 'deaths',
       'newCases', 'totalCases', 'deathsMS', 'totalCasesMS',
       'deaths_per_100k_inhabitants', 'totalCases_per_100k_inhabitants',
       'deaths_by_totalCases', 'recovered', 'suspects', 'tests',
       'tests_per_100k_inhabitants', 'vaccinated',
       'vaccinated_per_100k_inhabitants', 'vaccinated_second',
       'vaccinated_second_per_100k_inhabitants'],
      dtype='object')


Unnamed: 0,epi_week,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants,vaccinated,vaccinated_per_100k_inhabitants,vaccinated_second,vaccinated_second_per_100k_inhabitants
4583,35,2020-08-23,Brazil,CE,TOTAL,1,8289,510,205441,8289,205441,90.76795,2249.66322,0.04035,175237.0,88308.0,583787.0,6392.70711,,,,
2472,24,2020-06-08,Brazil,RS,TOTAL,5,291,115,12250,291,12250,2.55774,107.67111,0.02376,8587.0,457.0,67913.0,596.91987,,,,
6541,45,2020-11-01,Brazil,AP,TOTAL,0,748,325,52500,748,52500,88.4442,6207.64759,0.01425,44424.0,4074.0,94818.0,11211.36626,,,,


In [3]:
# filter data for Brazil, and show only the specified columns
data_BR = df.query("state == 'TOTAL'")[['date', 'state', 'totalCases', 'totalCasesMS', 'deaths', 'deathsMS', 'suspects', 'recovered', 'tests', 'vaccinated', 'vaccinated_second']]

# create new columns
data_BR['activeCases'] = data_BR['totalCases'] - data_BR['deaths'] - data_BR['recovered']
data_BR['activeCasesMS'] = data_BR['totalCasesMS'] - data_BR['deathsMS'] - data_BR['recovered']
data_BR['activeCasesDiff'] = data_BR['activeCases'] - data_BR['activeCasesMS']
data_BR['deathsDiff'] = data_BR['deaths'] - data_BR['deathsMS']
data_BR['newVaccinated'] = data_BR['vaccinated'].diff()
data_BR['newVaccinated_second'] = data_BR['vaccinated_second'].diff()

# display the dataframe
data_BR.sample(3)

Unnamed: 0,date,state,totalCases,totalCasesMS,deaths,deathsMS,suspects,recovered,tests,vaccinated,vaccinated_second,activeCases,activeCasesMS,activeCasesDiff,deathsDiff,newVaccinated,newVaccinated_second
293,2020-03-22,TOTAL,1594,1546,25,25,,,,,,,,,0,,
8553,2021-01-11,TOTAL,8140865,8131612,203750,203580,,7273237.0,29818855.0,,,663878.0,654795.0,9083.0,170,,
9365,2021-02-09,TOTAL,9608775,9599565,233720,233520,4538123.0,8577207.0,33355154.0,4069677.0,50655.0,797848.0,788838.0,9010.0,200,283086.0,17039.0


In [4]:
import cufflinks as cf
cf.go_offline()
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import iplot
import plotly.express as px

In [5]:
# data_BR.iplot(y=['activeCases', 'activeCasesMS'],
#               x='date',width=2.0,
#               secondary_y='activeCasesDiff', secondary_y_title='Diferença',
#               xTitle='Data', yTitle='Casos Ativos', title='Casos Ativos - MS x Consórcio')
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['activeCases'],
        line=dict(width=3),
        name='Casos Ativos'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['activeCasesMS'],
        line=dict(width=3),
        name='Casos Ativos - MS'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Bar(
        x = data_BR['date'],
        y = data_BR['activeCasesDiff'],
        name='diferença'
    ),
    secondary_y=True,
)

# customizações de layout
fig.update_layout(
    title='<b>Casos Ativos - MS x Consórcio</b>',
    hovermode='x unified',
    separators=',.',
    plot_bgcolor='#fafafa'
)

fig.update_traces(hovertemplate='%{y:,.0f}')

# Anotações
fig.add_annotation(x=data_BR.loc[data_BR['activeCasesDiff'].idxmax(), 'date'], y=data_BR['activeCasesDiff'].max(),
            yref='y2',
            text="> "+str(round(data_BR['activeCasesDiff'].max(),-3)),
            showarrow=True,
            arrowhead=1)

# Set x-axis title
fig.update_xaxes(title_text="Data")

# Set y-axes titles
fig.update_yaxes(title_text="Casos Confirmados", secondary_y=False)
fig.update_yaxes(title_text="Diferença", secondary_y=True)

fig.show()
fig.write_html("graficos/casos-ativos_x_consorcio.html")

In [6]:
# data_BR.iplot(y=['deaths', 'deathsMS'],
#               x='date',width=2.0,
#               secondary_y='deathsDiff', secondary_y_title='Diferença',
#               xTitle='Data', yTitle='Óbitos', title='Óbitos - MS x Consórcio')

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['deaths'],
        line=dict(width=3, color='orange'),
        name='Óbitos'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(
        x = data_BR['date'],
        y = data_BR['deathsMS'],
        line=dict(width=3, color='blue'),
        name='Óbitos - MS'
    ),
    secondary_y=False,
)
fig.add_trace(
    go.Bar(
        x = data_BR['date'],
        y = data_BR['deathsDiff'],
        name='diferença'
    ),
    secondary_y=True,
)

# customização do layout
fig.update_layout(
    title='<b>Óbitos - MS x Consórcio</b>',
    hovermode='x unified',
    separators=',.',
    plot_bgcolor='#fafafa'
)

fig.update_traces(hovertemplate='%{y:,.0f}')

# Anotações
fig.add_annotation(x=data_BR.loc[data_BR['deathsDiff'].idxmax(), 'date'], y=data_BR['deathsDiff'].max(),
            yref='y2',
            text="> "+str(round(data_BR['deathsDiff'].max(),-2)),
            showarrow=True,
            arrowhead=1)

# Set x-axis title
fig.update_xaxes(title_text="Data")

# Set y-axes titles
fig.update_yaxes(title_text="Óbitos Confirmados", secondary_y=False)
fig.update_yaxes(title_text="Diferença", secondary_y=True)

fig.show()
fig.write_html("graficos/obitos_x_consorcio.html")

In [7]:
# data_BR[~data_BR['vaccinated'].isna()].iplot(
#     x='date', 
#     y=['vaccinated', 'vaccinated_second'], 
#     title='Evolução da vacinação'    
# )

_dv = data_BR[~data_BR['vaccinated'].isna()]
fig = go.Figure(go.Scatter(
    x = _dv['date'],
    y = _dv['vaccinated'],
    line=dict(color='purple', width=3),
    name='primeira dose'

))

fig.add_trace(go.Scatter(
    x = _dv['date'],
    y = _dv['vaccinated_second'],
    line=dict(color='orange', width=3),
    name='segunda dose'
))

fig.update_layout(
    title = '<b>Evolução da vacinação - COVID-19</b>',
    xaxis_tickformat = '%d %b %Y'
)
fig.update_traces(hovertemplate='Data: %{x} <br>Total: %{y:,.0f}')
fig.update_layout(hovermode='x unified', separators=',.', plot_bgcolor='#fafafa')
fig.show()
fig.write_html("graficos/evolucao-vacinacao.html")

In [8]:
# vacinas aplicadas por dia

_dv['1_dose_7d'] = _dv['newVaccinated'].rolling(7).mean()
_dv['2_dose_7d'] = _dv['newVaccinated_second'].rolling(7).mean()

fig = go.Figure(
    data=[
        go.Bar(
            x = _dv['date'],
            y = _dv['newVaccinated'],            
            name='primeira dose',
            opacity=0.7
        ),
        go.Bar(
            x = _dv['date'],
            y = _dv['newVaccinated_second'],            
            name='segunda dose',
        ),
        go.Scatter(
            x = _dv['date'],
            y = _dv['1_dose_7d'],
            line=dict(color='orange', width=3),
            name='média móvel 1ª dose'
        ),
        go.Scatter(
            x = _dv['date'],
            y = _dv['2_dose_7d'],
            line=dict(color='blue', width=3),
            name='média móvel 2ª dose'
        )
    ]
)

fig.update_layout(
    title = '<b>Vacinação por dia - COVID-19</b>',
    xaxis_tickformat = '%d %b %Y',
    barmode='stack'
)
fig.update_traces(
    hovertemplate='Data: %{x} <br>Total: %{y:,.0f}',
    marker_color='indianred',    
)
fig.update_layout(hovermode='x unified', separators=',.', plot_bgcolor='#fafafa')
fig.show()
fig.write_html("graficos/vacinacao-por-dia.html")


### Vacinação por UF
Malha estadual - shp
Link: https://www.ibge.gov.br/geociencias/organizacao-do-territorio/estrutura-territorial/15774-malhas.html?=&t=downloads

População estimada - xls
Link: https://www.ibge.gov.br/estatisticas/sociais/populacao/9103-estimativas-de-populacao.html?=&t=downloads

In [9]:
import geopandas as gpd

geo_dados_uf = gpd.read_file('shapefiles/BR_UF_2020/BR_UF_2020.shp')
# geo_dados_uf = geo_dados_uf.to_crs('+proj=utm +zone=23 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=km +no_defs ')
# geo_dados_uf.to_file("geojson/brasil-uf-compressed.json", driver='GeoJSON')
geo_dados_uf.head(3)

Unnamed: 0,CD_UF,NM_UF,SIGLA_UF,NM_REGIAO,geometry
0,11,Rondônia,RO,Norte,"POLYGON ((-65.38150 -10.42907, -65.38013 -10.4..."
1,12,Acre,AC,Norte,"POLYGON ((-71.07772 -9.82774, -71.07817 -9.828..."
2,13,Amazonas,AM,Norte,"POLYGON ((-69.83766 -3.68659, -69.82555 -3.620..."


In [10]:
pop_munic = pd.read_excel('datasets/originais/populacao_2020.xls', sheet_name='Municípios', 
                          skiprows=1, skipfooter=16)
pop_munic['POPULAÇÃO ESTIMADA'] = pop_munic['POPULAÇÃO ESTIMADA'].apply(lambda x: str(x).split('(')[0])
pop_munic['POPULAÇÃO ESTIMADA'] = pop_munic['POPULAÇÃO ESTIMADA'].astype(int)
pop_uf = pop_munic[['UF', 'POPULAÇÃO ESTIMADA']].groupby('UF').sum().reset_index()

In [11]:
pop_uf[pop_uf['UF'] == 'TO']

Unnamed: 0,UF,POPULAÇÃO ESTIMADA
26,TO,1590248


In [13]:
data_UF = df.query("state != 'TOTAL'")
# _df = data_UF.query("date == @data_UF.date.max()")
data_UF['perc_vac'] = data_UF['vaccinated'] / data_UF['state'].map(pop_uf.set_index('UF')['POPULAÇÃO ESTIMADA'])
_df_plot = geo_dados_uf.merge(data_UF, left_on='SIGLA_UF', right_on='state').set_index('SIGLA_UF')
_df_plot.sample(3)

Unnamed: 0_level_0,CD_UF,NM_UF,NM_REGIAO,geometry,epi_week,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants,vaccinated,vaccinated_per_100k_inhabitants,vaccinated_second,vaccinated_second_per_100k_inhabitants,perc_vac
SIGLA_UF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
RJ,33,Rio de Janeiro,Sudeste,"MULTIPOLYGON (((-41.82923 -22.78453, -41.82939...",111,2021-03-16,Brazil,RJ,TOTAL,115,34445,1081,608887,34445,608887,199.50833,3526.72465,0.05657,568605.0,52017.0,2313502.0,13399.9979,773279.0,4478.89692,254468.0,1473.90003,0.044528
PB,25,Paraíba,Nordeste,"MULTIPOLYGON (((-34.79576 -7.35000, -34.79601 ...",42,2020-10-12,Brazil,PB,TOTAL,8,2930,56,125933,2930,125933,72.91955,3134.12194,0.02327,101388.0,6772.0,384598.0,9571.57402,,,,,
RR,14,Roraima,Norte,"POLYGON ((-63.96008 2.47313, -63.96041 2.47416...",111,2021-03-18,Brazil,RR,TOTAL,5,1274,349,87032,1269,86683,210.31397,14367.38252,0.01464,80125.0,38971.0,197659.0,32629.86557,28583.0,4718.5276,15127.0,2497.18949,0.045285


In [14]:
max_pct = _df_plot['perc_vac'].max()
fig = px.choropleth_mapbox(_df_plot,
                           geojson=_df_plot.geometry,
                           locations=_df_plot.index,
                           color='perc_vac',
                           color_continuous_scale=["lightblue", "darkblue"],
                           center={'lat': -16.701591, 'lon': -49.164524},
                           mapbox_style="carto-positron",
                           range_color=[0, max_pct],
                           opacity=0.6,
                           animation_frame='date',
                           zoom=3)

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

MemoryError: 

In [None]:
# Usando o GeoJson
import json

with open('geojson/brasil-uf-compressed.json') as f:
    uf_br = json.load(f)
    
uf_br["features"][0].keys()

In [None]:
uf_br["features"][0]['properties'].keys()

In [None]:
fig = px.choropleth_mapbox(_df, geojson=uf_br, color="perc_vac",
                           locations="state", featureidkey="properties.SIGLA_UF",
                            center={'lat': -16.701591, 'lon': -49.164524},
                           mapbox_style="carto-positron", zoom=3)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

### Nº de casos por Municípios - proporcional a 100.000 habitantes

In [None]:
# import gzip file with cities data
chunk_size = 50000
df_cities = pd.DataFrame()

_dc = pd.read_csv("https://github.com/wcota/covid19br/blob/master/cases-brazil-cities-time.csv.gz?raw=true", 
                        compression='gzip',
                        iterator=True,
                        chunksize=chunk_size
                       )
for df in _dc:
    df_cities = df_cities.append(df)


df_cities.sample(3)

In [None]:
df_cities['date'] = pd.to_datetime(df_cities['date'])

In [None]:
df_cities['ibgeID'].dtype

In [None]:
df_cities.shape

In [None]:
df_gps_cities = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/gps_cities.csv")
df_gps_cities.sample(3)

In [None]:
df_gps_cities.shape

In [None]:
# verifica se tem algum codigo ibge repetido
ibge_rep = np.any(df_gps_cities['ibgeID'].value_counts()>1)

# verifica se tem algum codigo fora do padrão
ibge_fora = bool(len(np.where(df_gps_cities['ibgeID'].values > 9999999)[0])) | bool(len(np.where(df_gps_cities['ibgeID'].values < 0)[0]))

# verifica se tem algum valor faltante
ibge_missing = df_gps_cities['ibgeID'].isna().sum()

print(f'cod. ibge repetido: {ibge_rep} \ncod. ibge fora do padrão: {ibge_fora} \nvalores faltantes: {ibge_missing}')

In [None]:
df_gps_cities.loc[df_gps_cities['ibgeID'].isna()]

In [None]:
# removendo as linhas cujo campo ibgeID está faltando
df_gps_cities = df_gps_cities.dropna(subset=['ibgeID'])

In [None]:
# convertendo o tipo da coluna ibeID do df_gps_cities para o mesmo tipo da coluna ibgeID do df_cities
df_gps_cities['ibgeID'] = df_gps_cities['ibgeID'].astype(int)

In [None]:
df_gps_cities['ibgeID'].dtype

In [None]:
# definindo as colunas 'lat' e 'lon' no df_cities com base no 'ibgeID' do df_gps_cities
df_cities['lat'] = df_cities['ibgeID'].map(df_gps_cities.set_index('ibgeID')['lat'])
df_cities['lon'] = df_cities['ibgeID'].map(df_gps_cities.set_index('ibgeID')['lon'])

In [None]:
df_cities.sample(3)

In [None]:
_df = df_cities.query('date == @df_cities.date.max()')
mapa = px.scatter_mapbox( _df, lat='lat', lon='lon',  
                     hover_name='city',
                     hover_data=['totalCases', 'deaths'],
                     color_continuous_scale=px.colors.sequential.Plasma_r,
                     color='totalCases_per_100k_inhabitants',                                          
                     zoom= 3,
                     title='<b>Covid-19</b> Proporção de casos por 100mil habitantes')
mapa.update_layout(mapbox_style='open-street-map')
mapa.update_layout(height=600, margin={'r':0, 't':0, 'l':0, 'b':0})
mapa.show()
mapa.write_html("graficos/mapa-casos-p-100k-h.html")

In [None]:
import ipywidgets as widgets
from ipywidgets import fixed

In [None]:
f_date = df_cities['date'].dt.strftime('%Y-%m-%d').sort_values().unique().tolist()

In [None]:
# Controle Widget para data
date_limit = widgets.SelectionSlider(
    options= f_date,
    value= '2020-03-01',
    description= 'Data: ',
    continuous_update= False,
    orientation= 'horizontal',
    readout= True,
    style={'description_width': 'initial'}
)

def update_map(df, limit):


    _df = df.loc[df['date'] == pd.to_datetime(limit)].copy()
#     print(limit, end='\n')
#     print(pd.to_datetime(limit), end='\n')
#     print(_df, end='\n')
    mapa = px.scatter_mapbox( _df, lat='lat', lon='lon',  
                     hover_name='city',
                     hover_data=['totalCases', 'deaths'],
                     color_continuous_scale=px.colors.sequential.Plasma_r,
                     color='totalCases_per_100k_inhabitants',                                          
                     zoom= 3)
    mapa.update_layout(mapbox_style='open-street-map')
    mapa.update_layout(height=600, margin={'r':0, 't':0, 'l':0, 'b':0})
    mapa.show()

In [None]:
# _df = df_cities.loc[df_cities['date'] == pd.to_datetime('2020-03-01')]

widgets.interactive( update_map, df=fixed(df_cities), limit=date_limit)