In [1]:
import re

import ipywidgets as widgets
import pandas as pd
import requests
from IPython.display import display
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

In [2]:
base_url = "https://covid19br.s3-sa-east-1.amazonaws.com"

In [3]:
r = requests.get(f"{base_url}/?delimiter=/&prefix=pkl/")

In [4]:
filenames = re.findall(r"<Key>(.+?)</Key>", r.content.decode())
filenames = [name for name in filenames if name.endswith("pkl")]

In [6]:
def month_number(month: str):
    months = ["jan", "fev", "mar", "abr", "mai", "jun", "jul", "ago", "set", "out", "nov", "dez"]
    convert = {j: i+1 for i, j in enumerate(months)}
    return convert[month]

def filename_date(name : str):
    short = re.match(r"^.*COVIDBR_(.*)\.xlsx\.pkl$", name).group(1)
    day = int(short[:2])
    month = month_number(short[2:5])
    year = int(short[5:])
    return pd.Timestamp(year=year, month=month, day=day)

def date_repr(date):
    return "{:02d}/{:02d}/{}".format(date.day, date.month, date.year)

In [7]:
filenames = sorted(filenames, key=lambda x: filename_date(x))
options = [date_repr(filename_date(f)) for f in filenames]

# Tabular files from https://covid.saude.gov.br

In [13]:
# In pickle format for fast loading
list(reversed(filenames))

['pkl/16b5fddef7e95ef588be4905a04b7271_HIST_PAINEL_COVIDBR_01jun2020.xlsx.pkl',
 'pkl/b018c99a75bbee61dafaa494a079ab5a_HIST_PAINEL_COVIDBR_31mai2020.xlsx.pkl',
 'pkl/e358588704d7d7612d046fdf1f901131_HIST_PAINEL_COVIDBR_30mai2020.xlsx.pkl',
 'pkl/e4a5151e6b13d290505bbceb00284d27_HIST_PAINEL_COVIDBR_29mai2020.xlsx.pkl',
 'pkl/561126ce43be8c5dc4f8e016a17a7e1a_HIST_PAINEL_COVIDBR_28mai2020.xlsx.pkl',
 'pkl/868d1c04f67df4b43f5acd1328b8f6d5_HIST_PAINEL_COVIDBR_27mai2020.xlsx.pkl',
 'pkl/dfc17705543cc44ae2ce669b4ff9c2c0_HIST_PAINEL_COVIDBR_26mai2020.xlsx.pkl',
 'pkl/102c312dac5175110604bfae93bbff02_HIST_PAINEL_COVIDBR_25mai2020.xlsx.pkl',
 'pkl/f537be7f66c69a4927dd1bd754c55a2a_HIST_PAINEL_COVIDBR_24mai2020.xlsx.pkl',
 'pkl/5d1f3f2a7f43ed55a26ac467d468219f_HIST_PAINEL_COVIDBR_23mai2020.xlsx.pkl',
 'pkl/9013849cb451712f1121ef2cdee98587_HIST_PAINEL_COVIDBR_22mai2020.xlsx.pkl',
 'pkl/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx_HIST_PAINEL_COVIDBR_21mai2020.xlsx.pkl',
 'pkl/a3d153fe0e95aa7e0d3a585c317a1dc6_H

# Load dataframes

In [14]:
df_map = {}
for filename, opt in tqdm(list(zip(filenames, options))):
    url = f"{base_url}/{filename}"
    df_map[opt] = pd.read_pickle(url)

HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




In [15]:
date_slider = widgets.SelectionSlider(
    options=[date_repr(filename_date(f)) for f in filenames],
    value=date_repr(filename_date(filenames[-1])),
    description='File date:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)

# Dataframes info

In [16]:
def func(x):
    return df_map[x].info()

widgets.interact(func, x=date_slider);

interactive(children=(SelectionSlider(continuous_update=False, description='File date:', index=18, options=('1…

# Counts

## Regiao

In [17]:
def func(x):
    df = df_map[x]
    axes = df["regiao"].value_counts().sort_values().plot(kind="bar")
    axes.set_ylabel("counts");

widgets.interact(func, x=date_slider);

interactive(children=(SelectionSlider(continuous_update=False, description='File date:', index=15, options=('1…

## Estado

In [18]:
def func(x):
    df = df_map[x]
    axes = df["estado"].value_counts().sort_values().plot(kind="bar")
    axes.set_ylabel("counts");

widgets.interact(func, x=date_slider);

interactive(children=(SelectionSlider(continuous_update=False, description='File date:', index=18, options=('1…

# Estado vs codigo

In [19]:
def func(x):
    df = df_map[x]
    df = df[["estado", "coduf"]].drop_duplicates().sort_values(by=["estado", "coduf"])
    return df.reset_index(drop=True)

widgets.interact(func, x=date_slider);

interactive(children=(SelectionSlider(continuous_update=False, description='File date:', index=18, options=('1…

# Missing rate

In [20]:
def func(x):
    df = df_map[x]
    df.apply(lambda x: sum(pd.isna(x))/len(x), axis=0).sort_index().plot(kind="bar");

widgets.interact(func, x=date_slider);

interactive(children=(SelectionSlider(continuous_update=False, description='File date:', index=18, options=('1…

# Casos acumulados

In [21]:
def func(x):
    df = df_map[x]
    ax = df[df["regiao"] == "Brasil"][["data", "casosAcumulado"]].\
        rename(columns={"casosAcumulado": "Brasil"}).set_index("data").plot()
    ax.set_ylabel("casos acumulados");

widgets.interact(func, x=date_slider);

interactive(children=(SelectionSlider(continuous_update=False, description='File date:', index=18, options=('1…

In [22]:
def func(x):
    df = df_map[x]
    ax = plt.subplot()
    regioes = df[(df["regiao"] != "Brasil")]["regiao"].unique()
    for regiao in sorted(regioes):
        df[(df["regiao"] == regiao)].groupby(["data", "estado", "codmun"]).\
            sum()[["casosAcumulado"]].groupby(level=0).\
            sum().rename(columns={"casosAcumulado": regiao}).plot(ax=ax)
    ax.set_ylabel("casos acumulados");

widgets.interact(func, x=date_slider);

interactive(children=(SelectionSlider(continuous_update=False, description='File date:', index=17, options=('1…