In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_0

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
from os.path import join
from web import for_website

from constants_1_0 import COLUMNS, DATA_AGGREGATE_TYPES
from utils_1_0 import (
    get_visualization_subtitle,
    apply_theme
)

# Data Preprocessing

## Daily Counts Data From Figshare
Use the latest data from https://doi.org/10.6084/m9.figshare.12152976.v1

In [None]:
# DailyCounts-CombinedByCountry.csv
df_dc = pd.read_csv("https://ndownloader.figshare.com/files/22346625")

df_dc.head()

In [None]:
ALL_COUNTRY = "All countries"
ALL_COUNTRY_COLOR = "#444444"
COUNTRIES = ["France", "Germany", "Italy", "Singapore", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#CC79A7", "#D55E00"]

In [None]:
CATEGORY = "category"

def preprocess_daily_df(df_dc):

    # Wide to long
    df_dc = pd.melt(df_dc, id_vars=[
        COLUMNS.SITE_ID, COLUMNS.DATE,
        COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
        COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
        COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.UNMASKED_SITES_NEW_DEATHS,
        COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.MASKED_SITES_NEW_DEATHS
    ])
    df_dc = df_dc.rename(columns={"variable": CATEGORY, "value": COLUMNS.NUM_PATIENTS})

    # Leave only the 'upper' and 'under' values for the certain 'category' only
    for c in [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.PATIENTS_IN_ICU, COLUMNS.NEW_DEATHS]:
        filter_c = df_dc[CATEGORY] == c
        df_dc.loc[filter_c, "upper"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c]
        df_dc.loc[filter_c, "under"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS]
        df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c] / 2.0
        
        # Add num of sites
        df_dc.loc[filter_c, COLUMNS.NUM_SITES] = df_dc["unmasked_sites_" + c] + df_dc["masked_sites_" + c]

    # Drop unused columns
    df_dc = df_dc.drop(columns=[
        COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
        COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
        COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.UNMASKED_SITES_NEW_DEATHS,
        COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.MASKED_SITES_NEW_DEATHS
    ])

    # Make sure to drop date range out of our interest
    df_dc = df_dc[df_dc[COLUMNS.DATE] >= "2020-01-29"]
    df_dc = df_dc[df_dc[COLUMNS.DATE] <= "2020-03-31"]

    # We are not using ICU
    df_dc = df_dc[df_dc[CATEGORY] != COLUMNS.PATIENTS_IN_ICU]

    # Use more readable names
    df_dc.loc[df_dc[COLUMNS.SITE_ID] == "Combined", COLUMNS.SITE_ID] = ALL_COUNTRY

    # Remove zero num_sites, which is missing data
    df_dc = df_dc[df_dc[COLUMNS.NUM_SITES] != 0]

    return df_dc

# df_dc = read_combined_by_country_daily_counts_df() # For using local data
df_dc = preprocess_daily_df(df_dc)

# df_dc_combined = read_combined_daily_counts_df()
# df_dc_combined = preprocess_daily_df(df_dc_combined)

# Merge
# df_dc = pd.concat([df_dc, df_dc_combined])

df_dc

# Visualizations

In [None]:
CATEGORIES = [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.NEW_DEATHS]
TITLE_BY_CATEGORY = {
    COLUMNS.NEW_POSITIVE_CASES: "possitive cases",
    COLUMNS.NEW_DEATHS: "deaths",
    COLUMNS.PATIENTS_IN_ICU: "ICU admissions"
}

def dailycount_by_date(df=df_dc, is_cum=True, is_only_combined=False, is_site_level=False, is_num_hospital=False):
    
    # Selections
    nearest = alt.selection(type="single", nearest=True, on="mouseover", encodings=["x"], empty='none', clear="mouseout")
    dailycount_dropdown = alt.binding_select(options=CATEGORIES)
    dailycount_selection = alt.selection_single(fields=[CATEGORY], bind=dailycount_dropdown, name="Value", init={CATEGORY: COLUMNS.NEW_POSITIVE_CASES})
    legend_selection = alt.selection_multi(fields=[COLUMNS.SITE_ID], bind="legend")
    date_brush = alt.selection(type="interval", encodings=['x'])
    y_zoom = alt.selection(type="interval", bind='scales', encodings=['y'])

    # Rule
    nearest_rule = alt.Chart(df).mark_rule(color="red").encode(
        x=f"{COLUMNS.DATE}:T",
        size=alt.value(0.5)
    ).transform_filter(
        nearest
    )

    color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)
    color_scale_bg = alt.Scale(domain=COUNTRIES, range=["lightgray"])
    if is_only_combined: 
        color_scale = alt.Scale(domain=[ALL_COUNTRY], range=[ALL_COUNTRY_COLOR])
        color_scale_bg = alt.Scale(domain=[ALL_COUNTRY], range=["lightgray"])
    if is_site_level:
        color_scale = alt.Scale(domain=ANONYMOUS_SITES, range=ANONYMOUS_COLORS)
        color_scale_bg = alt.Scale(domain=ANONYMOUS_SITES, range=["lightgray"])

    # Filter
    filtered_chart = alt.Chart(df).transform_filter(
        dailycount_selection
    ).transform_filter(
        legend_selection
    )

    if is_only_combined:
        filtered_chart = filtered_chart.transform_filter(
            alt.datum[COLUMNS.SITE_ID] == ALL_COUNTRY
        )
    else:
       filtered_chart = filtered_chart.transform_filter(
           alt.datum[COLUMNS.SITE_ID] != ALL_COUNTRY
        )

    DAILY_COUNT_TOOLTIP = [
        alt.Tooltip(COLUMNS.SITE_ID, title="Country"),
        alt.Tooltip(COLUMNS.DATE, title="Date", format="%Y-%m-%d", formatType="time"),
        alt.Tooltip(COLUMNS.NUM_PATIENTS, title="Number of patients"),
        alt.Tooltip(COLUMNS.NUM_SITES, title="Number of sites")
    ]

    # Calculate cumulative values
    y_field = COLUMNS.NUM_PATIENTS
    upper = "upper"
    under = "under"
    if is_cum:
        filtered_chart = filtered_chart.transform_window(
            cum_num_patients=f"sum({COLUMNS.NUM_PATIENTS})",    # overwrite
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        ).transform_window(
            cum_upper=f"sum(upper)",
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        ).transform_window(
            cum_under=f"sum(under)",
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        )
        upper = "cum_upper"
        under = "cum_under"
        y_field = "cum_num_patients"
        DAILY_COUNT_TOOLTIP += [alt.Tooltip("cum_num_patients:Q", title="Cumulative # of patients")]

    # Render
    line = filtered_chart.mark_line(size=3, opacity=0.7).encode(
        x=alt.X(
            f"{COLUMNS.DATE}:T", 
            scale=alt.Scale(padding=10),
            axis=alt.Axis(tickCount=7, grid=True, labels=True, ticks=True, domain=True),
            title=None
        ),
        y=alt.Y(f"{y_field}:Q", axis=alt.Axis(tickCount=5), title="Number of patients", scale=alt.Scale(padding=10, nice=False)),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None)),
        tooltip=DAILY_COUNT_TOOLTIP,
    ).transform_filter(
        date_brush
    )
    circle = line.mark_circle(size=30, opacity=0.7).encode(
        size=alt.condition(~nearest, alt.value(30), alt.value(60))
    )
    errorband = line.mark_errorband().encode(
        x=alt.X(f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), title=None),
        y=alt.Y(f"{upper}:Q", title=""), 
        y2=f"{under}:Q",
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None)),
        tooltip=DAILY_COUNT_TOOLTIP
    )

    top_line = (circle + line + errorband + nearest_rule).resolve_scale(color="shared").properties(width=750, height=400).add_selection(y_zoom)

    bottom_y_field = COLUMNS.NUM_HOSPITALS if is_num_hospital else COLUMNS.NUM_SITES
    bottom_y_title = "# of hospitals" if is_num_hospital else "# of sites"
    bottom_bar_bg = filtered_chart.mark_bar(size=5).encode(
        x=alt.X(f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), title=None, scale=alt.Scale(padding=10)),
        y=alt.Y(f"{bottom_y_field}:Q", title=bottom_y_title, axis=alt.Axis(tickMinStep=1)),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale_bg, legend=None),
        tooltip=DAILY_COUNT_TOOLTIP
    ).properties(height=60, width=750)
    bottom_bar = bottom_bar_bg.encode(
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=None), #legend=alt.Legend(title=None)
    )#.transform_filter(date_brush)
    
    bottom_bar = (bottom_bar + nearest_rule).resolve_scale(color="independent").add_selection(date_brush)

    title = "Daily Counts"
    # title = TITLE_BY_CATEGORY[category]
    title = f"Cumulative {title}" if is_cum else f"{title}"
    title = f"{title} by Site" if is_site_level else f"{title} by Country"
    # title = title.capitalize()

    # Apply Theme
    result_vis = apply_theme(
        alt.vconcat(top_line, bottom_bar).resolve_scale(x="independent", color="shared"), 
        legend_orient="right",
        axis_title_font_size=13
    )

    result_vis = result_vis.properties(title={
        "text": title, 
        "subtitle": get_visualization_subtitle(alt_num_sites=21),
        "subtitleColor": "gray",
        "dx": 60
    }).add_selection(
        legend_selection
    ).add_selection(
        nearest
    ).add_selection(
        dailycount_selection
    )
    
    return result_vis

## Daily counts by country

In [None]:
dailycount = dailycount_by_date(is_cum=False)

for_website(dailycount, "Daily Count", "Daily counts by country")
# save(dailycount, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount

## Cumulative daily counts by country

In [None]:
dailycount = dailycount_by_date(is_cum=True)

for_website(dailycount, "Daily Count", "Cumulative daily counts by country")
# save(dailycount, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount

## Values by the day of the week

In [None]:
import datetime
df_dc["week"] = df_dc["date"].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').isocalendar()[1])

country_by_color = { ([ALL_COUNTRY] + COUNTRIES)[i]: ([ALL_COUNTRY_COLOR] + COUNTRY_COLOR)[i]  for i in range(len([ALL_COUNTRY_COLOR] + COUNTRY_COLOR)) }

def dailycount_by_day_and_week(country, category):
    # Filter
    filtered_chart = alt.Chart(df_dc).transform_filter(
        alt.datum[CATEGORY] == category
    ).transform_filter(
        alt.datum[COLUMNS.SITE_ID] == country
    )

    # Rendering
    result_vis = filtered_chart.mark_rect().encode(
        y=alt.Y("day(date):O",title="Day of the week"),
        x=alt.X('week:O', title="Week of the year"),
        color=alt.Color('sum(num_patients):Q', title=None, scale=alt.Scale(scheme="lightorange"))
    ).properties(height=220, width=380, title={
        "text": f"New {TITLE_BY_CATEGORY[category]} ({country})",
        "subtitle": get_visualization_subtitle(alt_num_sites=21),
        "color": country_by_color[country],
        "subtitleColor": "gray",
        "dx": 60
    })
    return result_vis

is_save = False
for category in CATEGORIES:
    v = alt.vconcat()
    for country in COUNTRIES:
        result_vis = dailycount_by_day_and_week(country=country, category=category)
        v &= result_vis
    
    # Apply Theme
    result_vis = apply_theme(
        v, 
        legend_orient="right", 
        legend_stroke_color="white", 
        legend_padding=0
    ).resolve_scale(color="independent", x="shared")

    # Display and save
    result_vis.display()
    if is_save:
        save(result_vis, join(SAVE_DIR, f"dailycount_by_day_and_week_{category}.png"))