In [25]:
import pandas as pd
import numpy as np
import altair as alt
from covid import dataimport

# Needed for datasets with more than 5k entries. Does not work in all cloud environments.
#alt.data_transformers.enable('data_server')

# Uncomment these if 'data_server' is not working in your environment
# Might result in large notebooks, see https://altair-viz.github.io/user_guide/faq.html#why-does-altair-lead-to-such-extremely-large-notebooks
alt.data_transformers.enable('default') #
alt.data_transformers.disable_max_rows(); # Warning: remove this after testing

base_url = "https://raw.githubusercontent.com/morberg/covid-notebook/master/"

In [21]:
data_scb = dataimport.get_scb_county_data()
weekly = data_scb.groupby('county').resample('W', on='date').sum().reset_index()

scb_file = 'data/scb.json'
weekly.to_json(scb_file, orient='records')
scb_url = base_url + scb_file


data_fhm, _ = dataimport.get_lag_data()
data_combined = dataimport.merge_data(data_scb, data_fhm)
data_combined = data_combined.groupby('death_cause').resample('W', on='date').sum().reset_index()

combined_file = 'data/combined.json'
data_combined.to_json(combined_file, orient='records')
combined_url = base_url + combined_file

## Total weekly deaths

Charts that show excess mortality in 2020 compared to 2018-2019. Based on data [published weekly by Statistiska Centralbyrån](https://scb.se/hitta-statistik/statistik-efter-amne/befolkning/befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/). Numbers for Covid-19 deaths from FHM. Due to reporting lag the data for the last two weeks will change.

In [22]:
base = alt.Chart(url, height=400, width=800, title={
        "text": 'Weekly Deaths in Sweden 2018-2020',
        "subtitle": f'Data published {data_combined.date.max().date().isoformat()} by SCB'}).encode(
    x=alt.X('monthdate(date):T', axis=alt.Axis(format="%b"), title='Month')
)

history = base.mark_line(size=3, opacity=1).transform_filter(
    'year(datum.date) < 2020'
).transform_aggregate(
    sum_deaths='sum(deaths):Q',
    groupby=['date']
).encode(
    y='mean(sum_deaths):Q',
    color=alt.Color('year(date):N', title='', scale=alt.Scale(range=['#b9b9b9', '#898989']))
)

covid = base.mark_bar(size=14).encode(
    y=alt.Y('deaths:Q', title='Weekly Deaths'),
    color=alt.Color('death_cause:N', title='2020', sort='descending')
).transform_filter(
    'year(datum.date) == 2020'
)

chart = (covid + history).resolve_scale(color='independent')
chart

In [23]:
df=weekly

last_week_to_show = df.date.max().week
filter = f'year(datum.date) == 2020 & week(datum.date) < {last_week_to_show}'

base = alt.Chart(df, height=400, width=800, title={
        "text": 'Weekly Deaths in Sweden 2018-2020',
        "subtitle": f'Data published {df.date.max().date().isoformat()} by SCB'}).encode(
    x=alt.X('monthdate(date)', axis=alt.Axis(format="%b"), title=None),
    y=alt.Y('sum(deaths)', title='Weekly Deaths'),
)

history = base.mark_line(opacity=0.4, size=2).encode(color=alt.Color('year(date):N', title='Year')).transform_filter(
    "year(datum.date) < 2020"
)

current = base.encode().mark_line(size=5, opacity=1).transform_filter(
    filter
).encode(
    y='sum(deaths)',
    color=alt.Color('year(date):N', title=None, scale=alt.Scale(range=['#e45757']))
)

selection = alt.selection_multi(toggle='true', encodings=['y'])

legend = alt.Chart(df, height=320, width=80).mark_square(size=150).encode(
    y=alt.Y('county', axis=alt.Axis(orient='right'), title=None),
    x=alt.X('sum(deaths)', title='Total Deaths 2020'),
    color=alt.condition(selection, alt.value('#e45757'), alt.value('lightgrey')),
    tooltip=[alt.Tooltip('sum(deaths)', title='Total Deaths 2020'), 'county'],
).add_selection(
    selection
).transform_filter(
    filter
).interactive()

chart = (history + current).transform_filter(selection).resolve_scale(color='independent') | legend
chart.save('charts/weekly-county.html')
chart

In [26]:
df=data_scb[data_scb.date.notna()]

# Don't show last week due to lag in reporting
last_week_to_show = df.date.max().week - 1
current = f'year(datum.date) == 2020 & week(datum.date) < {last_week_to_show}'
history_2019 = 'year(datum.date) == 2019'
history_2018 = 'year(datum.date) == 2018'

base = alt.Chart(
    df,
    height=400,
    width=800,
    title={
        "text": "Daily Deaths in Sweden 2018-2020",
        "subtitle": "With 20 Day Rolling Average"
    }
).transform_aggregate(
    sum_deaths='sum(deaths)',
    groupby=['date']
).encode(
    x=alt.X('monthdate(date)', title='Month-Date'),
    color=alt.Color('year(date):N', title='Year', scale=alt.Scale(range=['#d9d9d9', '#b9b9b9', '#e6550d']))
)

points = base.mark_point(opacity=0.5).encode(
    y='sum_deaths:Q',
)

# https://vega.github.io/vega/docs/expressions/#datetime-functions
def plot_mean(time_filter):
    mean = base.transform_filter(
        time_filter
    ).transform_window(
        rolling_mean='mean(sum_deaths)',
        frame=[-10, 10]
    ).mark_line(
        size=4,
        interpolate='basis'
    ).encode(
        y=alt.Y('rolling_mean:Q', title='Deaths per Day'),
    )

    return mean

selection = alt.selection_multi(toggle='true', encodings=['y'])

legend = alt.Chart(df, height=300).mark_square(size=100).encode(
    y=alt.Y('county', axis=alt.Axis(orient='right'), title=None),
    color=alt.condition(selection, alt.value('#e6550d'), alt.value('lightgrey')),
).add_selection(
    selection
).interactive()

chart = (plot_mean(history_2018) + plot_mean(history_2019) + points +
         plot_mean(current)).transform_filter(selection) | legend
chart

In [27]:
df = weekly

click = alt.selection_multi(encodings=['color'])
brush = alt.selection_interval(encodings=['x'])

bars = alt.Chart(
    df,
    width=800,
    title="Weekly Deaths in Sweden 2018-2020"
).mark_bar(
).encode(
    alt.X('yearmonthdate(date):O', title='Date'),
    alt.Y('sum(deaths):Q', title="Weekly Deaths"),
    color=alt.Color('county:N', legend=None),
    opacity=alt.condition(brush, alt.value(1), alt.value(0.2)),
    tooltip=['county', 'deaths', 'date'],
).add_selection(
    brush
).transform_filter(
    click
).interactive()

county_list = alt.Chart(
    df,
    width=800,
    height=100
).mark_bar(
).encode(
    x=alt.X('county:N', title=''),
    y=alt.Y('sum(deaths)', title='Total Deaths'),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2)),
    color='county',
    tooltip=['sum(deaths)']
).add_selection(
    click
).transform_filter(
    brush
)

legend = alt.Chart(df, height=300).mark_point().encode(
    y=alt.Y('county:N', axis=alt.Axis(orient='right'), title=None),
    color=alt.condition(click, 'county', alt.value('lightgray')),
).add_selection(
    click
).interactive()

chart = bars & county_list | legend
chart

In [32]:
# Calculate cumulative deaths per county, reset each year
df = data_scb.dropna().sort_values(by=['county', 'date']).set_index('date')
cum_deaths = df.groupby(['county', 'date']).sum().groupby(by=[df.index.year, 'county']).cumsum()
df = df.reset_index()
df['cumulative_deaths'] = cum_deaths.reset_index()['deaths']
df_daily = df

In [34]:
# Calculate cumulative deaths per county, reset each year
df = weekly.dropna().sort_values(by=['county', 'date']).set_index('date')
cum_deaths = df.groupby(['county', 'date']).sum().groupby(by=[df.index.year, 'county']).cumsum()
df = df.reset_index()
df['cumulative_deaths'] = cum_deaths.reset_index()['deaths']
df_weekly = df

In [37]:
chart = alt.Chart(df_weekly, height=180, width=180).mark_line().encode(
    x=alt.X('monthdate(date)', title='Date', axis=alt.Axis(format="%b")),
    y=alt.Y('cumulative_deaths', title=''),
    color=alt.Color('year(date):N', title='Year', scale=alt.Scale(range=['#d9d9d9', '#b9b9b9', '#e6550d'])),
    tooltip=['date', alt.Tooltip('cumulative_deaths', title='Deaths')]
).facet(
    facet=alt.Facet("county", title="Cumulative Deaths 2018-2020 by County"),
    columns=4,
).resolve_scale(y='independent')

chart.save('charts/cumulative-county.html')
chart

In [None]:
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')

line = alt.Chart(df, width=600).mark_line().encode(
    x='date',
    y=alt.Y('sum(cumulative_deaths)', stack=False),
    color='year(date):N'
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart(df).mark_point().encode(
    x='date',
    opacity=alt.value(0),
).add_selection(
    nearest
)

points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'sum(cumulative_deaths):Q', alt.value(' '))
)

# Draw a rule at the location of the selection
rules = alt.Chart(df).mark_rule(color='gray').encode(
    x='date',
).transform_filter(
    nearest
)
# Put the five layers into a chart and bind the data
alt.layer(
    line, selectors, points, rules, text
).properties(
    width=600, height=300
)