In [2]:
import altair as alt
import pandas as pd
import numpy as np
import datetime as dt
from covid import dataimport

In [3]:
fhm_data, labels = dataimport.get_lag_data()
local_url = 'data/fhm.json'
fhm_data.to_json(local_url, orient='records')

In [10]:
url = "https://raw.githubusercontent.com/morberg/covid-notebook/master/data/fhm.json"
fhm_data[fhm_data.lag=='0']

Unnamed: 0,date,publication_date,N,days_since_publication,n_diff,n_diff_pct,delay,lag,age,prediction,publication_week
33,2020-04-02,2020-04-02,5,0.0,5.0,,Same day,0,0.0,,14
68,2020-04-03,2020-04-03,2,0.0,2.0,,Same day,0,0.0,,14
104,2020-04-04,2020-04-04,1,0.0,1.0,,Same day,0,0.0,,14
141,2020-04-05,2020-04-05,1,0.0,1.0,,Same day,0,0.0,,14
179,2020-04-06,2020-04-06,13,0.0,13.0,,Same day,0,0.0,,15
...,...,...,...,...,...,...,...,...,...,...,...
4589,2020-06-08,2020-06-08,0,0.0,0.0,0.0,Same day,0,0.0,46.217857,24
4691,2020-06-09,2020-06-09,1,0.0,1.0,,Same day,0,0.0,46.613889,24
4794,2020-06-10,2020-06-10,1,0.0,1.0,,Same day,0,0.0,45.744444,24
4898,2020-06-11,2020-06-11,2,0.0,2.0,,Same day,0,0.0,41.609091,24


# Covid-19 graphs for Sweden
Total death toll in these graphs differ slightly from official total numbers:

* Old data points with few deaths have been trimmed to make graphs more legible
* Some death dates are unknown (e.g. there were 23 unknown Covid-19 death dates by May 19)

## Covid-19 death date, reporting date, and lag

Interactive graph showing the number of reported Covid-19 deaths in Sweden. Select dates in the bottom Publication Date graph by clicking and dragging. Gray bars is a prediction based on average lag for the past three weeks. See [Adam Altmejd's repo](https://github.com/adamaltmejd/covid) for details.

In [6]:
domain = [
    '2020-03-13',
    (fhm_data.date.max() + pd.Timedelta("1D")).date().isoformat(),
]

brush = alt.selection(type="interval", encodings=["x"])

deceased = (
    alt.Chart(url, height=400)
    .mark_bar()
    .encode(
        x=alt.X(
            "yearmonthdate(date):T",
            title="Date Deceased",
            scale=alt.Scale(domain=domain),
        ),
        y=alt.Y(
            "sum(n_diff):Q", title="Deceased", scale=alt.Scale(domain=[0, fhm_data.N.max()])
        ),
        order=alt.Order(
            # Sort the segments of the bars by this field
            "days_since_publication:N",
            sort="ascending",
        ),
        color=alt.Color(
            "lag:O",
            title="Lag in Days",
            sort=labels,
            scale=alt.Scale(scheme="category20c"),
        ),
    )
    .transform_filter(brush)
).transform_filter('datum.date >= datetime(2020,2,13)')

reported = (
    alt.Chart(url, height=100)
    .mark_bar()
    .encode(
        x=alt.X(
            "yearmonthdate(publication_date):T",
            title="Publication Date",
            scale=alt.Scale(domain=domain),
        ),
        y=alt.Y("sum(n_diff):Q", title="Reported Deaths"),
        tooltip=[
            alt.Tooltip("sum(n_diff):Q", title="Reported Deaths"),
            "publication_date:T",
        ],
    )
    .add_selection(brush)
)

legend_vert = (
    alt.Chart(url, width=50, title="Lag in Days")
    .mark_bar()
    .encode(
        x=alt.X("sum(n_diff):Q", title="Reported Deaths"),
        y=alt.Y("lag:O", title="", sort=labels),
        color=alt.Color("lag:O", sort=labels, legend=None),
    )
    .transform_filter(brush)
)

text = (
    alt.Chart(url)
    .transform_filter(brush)
    .transform_aggregate(sum_deaths="sum(n_diff):Q")
    .transform_calculate(text="Total deaths: " + alt.datum.sum_deaths)
    .mark_text(align="right", x=692, y=19, fontSize=18)
    .encode(text="text:N")
)

prediction = (
    alt.Chart(url)
    .mark_bar(color="#E8E8E8")
    .encode(
        x="yearmonthdate(date):T",
        y=alt.Y("prediction:Q", aggregate={"argmax": "publication_date"}),
    )
    .transform_filter(brush)
)

(prediction + deceased + text).properties(width=750) & reported.properties(width=750) | legend_vert

## Weekly reported deaths

Number of deaths reported each week with each publication weekday shown individually. Some weekdays (typically Saturday-Monday) tend to report lower numbers in reported deaths. This graph gives an idea of what the reported numbers have been previous weeks on the same weekday. Total for current week is number of reported deaths so far.

Data is updated daily.

In [11]:
week_order = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

base = alt.Chart(url, title={
            'text': 'Weekly Reported Covid-19  Deaths in Sweden',
            'subtitle': f'Updated on {fhm_data.date.max().date():%A, %B %d}'}).mark_bar().encode(
    x=alt.X('publication_week:O', title='Publication Week'),
    y=alt.Y('sum(n_diff):Q', title='Reported Deaths', stack='zero'),
)

bars = base.mark_bar().encode(
    order=alt.Order('publication_date:N'),
    color=alt.Color(
        "day(publication_date):N", title="Publication Day", sort=week_order
    ),
)

text = base.mark_text(color='black', baseline='bottom').encode(
    text=alt.Text('sum(n_diff):Q')
)

(bars + text).properties(width=500)

## Daily reported deaths and lag

Number of deaths reported by day and the lag in reporting for each death. Each column is a weekday and each row a week.

In [96]:
hist = alt.Chart(url, height=100, width=100).mark_bar().encode(
    x=alt.X("lag:O", title="Reporting Lag", sort=labels),
    y=alt.Y("sum(n_diff):Q", title="Reported Deaths"),
    color=alt.Color(
        "day(publication_date):N", title="Publication Day", sort=week_order
    ),
)

text = (
    alt.Chart(url)
    .mark_text(align="right", x=95, y=28, fontSize=20)
    .encode(alt.Text("sum(n_diff):Q"),)
)

(hist + text).facet(
    facet=alt.Facet("publication_date:T", title="Reported Deaths per Day"),
    columns=7,
).transform_filter("datum.date >= datetime(2020,3,6)")