In [4]:
import datetime as dt
import string
from functools import wraps

import altair as alt
import altair_morberg.core as morberg
import pandas as pd

alt.themes.register("morberg_theme", morberg.theme)
alt.themes.enable("morberg_theme")

%load_ext lab_black

In [2]:
fhm_data, labels = dataimport.get_lag_data()
local_url = 'data/fhm.json'
fhm_data.to_json(local_url, orient='records')
url = "https://raw.githubusercontent.com/morberg/covid-notebook/master/data/fhm.json"

In [109]:
# Functions for processing cases data

def log_step(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        tic = dt.datetime.now()
        result = func(*args, **kwargs)
        time_taken = str(dt.datetime.now() - tic)
        print(f"{func.__name__} shape={result.shape} took {time_taken}s")
        return result

    return wrapper


@log_step
def start_pipeline(dataf):
    return dataf.copy()


@log_step
def remove_columns(dataf):
    return dataf.drop(columns="Totalt_antal_fall")


@log_step
def to_long_format(dataf):
    return dataf.melt(id_vars="Statistikdatum", var_name="Län", value_name="Fall")


@log_step
def add_week_numbers(dataf):
    dataf["Vecka"] = dataf.Statistikdatum.dt.week
    return dataf

In [115]:
# Data from FHM for number of cases
input_excel = "https://www.arcgis.com/sharing/rest/content/items/b5e7488e117749c19881cce45db13f7e/data"
df = pd.read_excel(input_excel)

cases = (
    df.pipe(start_pipeline)
    .pipe(remove_columns)
    .pipe(to_long_format)
    .pipe(add_week_numbers)
)

local_cases_url = "data/cases.json"
cases.to_json(local_cases_url, orient="records")
cases_url = (
    "https://raw.githubusercontent.com/morberg/covid-notebook/master/data/cases.json"
)

start_pipeline shape=(311, 23) took 0:00:00.000110s
remove_columns shape=(311, 22) took 0:00:00.000522s
to_long_format shape=(6531, 3) took 0:00:00.002107s
add_week_numbers shape=(6531, 4) took 0:00:00.000770s


In [127]:
week_order = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

alt.Chart(
    cases_url,
    title={
        "text": "Weekly Covid-19 Cases in Sweden",
        "subtitle": f"Updated on {cases.Statistikdatum.max().date():%A, %B %d}",
    },
    width=700,
).mark_bar().encode(
    x=alt.X("Vecka:O", title=None),
    y=alt.Y("sum(Fall):Q", axis=alt.Axis(title=None, orient="right", offset=10),),
    order=alt.Order("Statistikdatum:N"),
    color=alt.Color(
        "day(Statistikdatum):N",
        title="Day reported",
        sort=week_order,
        legend=alt.Legend(orient="top-left"),
    ),
    tooltip=[
        alt.Tooltip("sum(Fall):Q", title="Number of Cases"),
        alt.Tooltip("Statistikdatum:T", title="Date reported"),
    ],
)

# Work in progress

Experimental stuff

In [4]:
def average_lag(df, start_date):
    df1 = pd.DataFrame(df.groupby("publication_date")["n_diff"].sum())
    df1["average_lag"] = (
        df.groupby("publication_date")["age"].sum()
        / df.groupby("publication_date")["n_diff"].sum()
    )
    df1 = df1.reset_index()
    df1 = df1[df1["publication_date"] >= start_date]

    lag_chart = (
        alt.Chart(df1, width=600, title="Average Reporting Lag")
        .mark_trail()
        .encode(
            x=alt.X("publication_date", title="Publication Date"),
            y=alt.Y("average_lag:Q", title="Daily Average Reporting Lag"),
            size=alt.Size("n_diff", title="Reported Deaths"),
        )
    )

    return lag_chart

average_lag(fhm_data, '2020-04-03')

In [5]:
df = fhm_data[fhm_data['date'] > '2020-03-10']
# Only show weekly publication dates based on latest publication date
df = df[df.publication_date.dt.dayofweek == fhm_data['publication_date'].max().weekday()]

alt.Chart(df, width=600).mark_line(interpolate='basis').encode(
    x=alt.X('date', title="Date"),
    y=alt.Y('N', title="Deceased"),
    color=alt.Color('monthdate(publication_date):N', title="Publication Date")
)

MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation

alt.Chart(...)

In [6]:
df = fhm_data[fhm_data['publication_date'] >= '2020-04-03']
df = df.replace(0, np.nan)

alt.Chart(df,height=200, width=600).mark_rect().encode(
    x='yearmonthdate(publication_date)',
#    y='yearmonthdate(date)',
    y=alt.Y('lag', sort=labels),
    size='n_diff',
    color=alt.Color('n_diff', title='Deaths', scale=alt.Scale(scheme='goldgreen')),
    tooltip=[alt.Tooltip('n_diff', title='Deaths')]
)

MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation

alt.Chart(...)

## Daily reported deaths and lag

Number of deaths reported by day and the lag in reporting for each death. Each column is a weekday and each row a week.

In [34]:
week_order = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

hist = alt.Chart(url, height=100, width=100).mark_bar().encode(
    x=alt.X("lag:O", title="Reporting Lag", sort=labels),
    y=alt.Y("sum(n_diff):Q", title="Reported Deaths"),
    color=alt.Color(
        "day(publication_date):N", title="Publication Day", sort=week_order
    ),
)

text = (
    alt.Chart(url)
    .mark_text(align="right", x=95, y=28, fontSize=20)
    .encode(alt.Text("sum(n_diff):Q"),)
)

(hist + text).facet(
    facet=alt.Facet("publication_date:T", title="Reported Deaths per Day"),
    columns=7,
).transform_filter("datum.date >= datetime(2020,3,6)")

In [42]:
(hist + text).facet(
    facet=alt.Facet("publication_date:T", title="Reported Deaths per Day"),
    columns=7,
).transform_filter("datum.date >= datetime(2020,3,6)")

(hist + text).facet(data=url, 
    row="publication_date:T",
    column=week_order,
).transform_filter("datum.date >= datetime(2020,3,6)")

SchemaValidationError: Invalid specification

        altair.vegalite.v4.api.FacetMapping->column, validating 'type'

        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] is not of type 'object'
        

alt.FacetChart(...)