In [None]:
# Auto-reload frequently changed files
%load_ext autoreload
%autoreload 2
%aimport utils

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
from os.path import join
from web import for_website

from constants import COLUMNS
from utils import (
    read_combined_daily_counts_df, read_combined_by_country_daily_counts_df,
    apply_theme
)

In [None]:
"""
Common info that should be defined everytime before rendering visualizations
"""
SITES = read_combined_daily_counts_df()[COLUMNS.SITE_ID].unique()

# Titles
NUM_SITES = len(SITES)
DATA_DATE = "2020-04-07"
VIS_DATE = "2020-04-09"
NUM_PATIENTS = "15,427"
SUBTITLE = f"Data as of {DATA_DATE} | {NUM_SITES} Sites | Plots generated on {VIS_DATE}"

SAVE_DIR = join("..", "output") # Where to save visualization *.PNG files

# Colors
COMBINED = "All countries"
COMBINED_COLOR = "#444444"

COUNTRIES = ["France", "Germany", "Italy", "Singapore", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#CC79A7", "#D55E00"]
COLOR_BY_COUNTRY = {COUNTRIES[i]: COUNTRY_COLOR[i] for i in range(len(COUNTRIES))} 

COUNTRIES_AND_COMBINED = [COMBINED] + COUNTRIES
COUNTRY_AND_COMBINED_COLOR = [COMBINED_COLOR] + COUNTRY_COLOR
COLOR_BY_COUNTRY_AND_COMBINED = {COUNTRIES_AND_COMBINED[i]: COUNTRY_AND_COMBINED_COLOR[i] for i in range(len(COUNTRIES_AND_COMBINED))} 

COLOR20 = [
    "#3366cc", "#dc3912", "#ff9900", "#109618", "#990099", "#0099c6", 
    "#dd4477", "#66aa00", "#b82e2e", "#316395", "#994499", "#22aa99", 
    "#aaaa11", "#6633cc", "#e67300", "#8b0707", "#651067", "#329262", "#5574a6", "#3b3eac"
]

# Required Setups
- All combined datasets should be placed in `../data/combined` (e.g., `../data/combined/DailyCounts-Combinedyymmdd.csv` for the DailyCounts file).
- To save PNG files for visualizations, a folder named "output" should be present (i.e., `../output/`).

# Data preprocess

In [None]:
CATEGORY = "category"

def preprocess_daily_df(df_dc):

    # Wide to long
    df_dc = pd.melt(df_dc, id_vars=[
        COLUMNS.SITE_ID, COLUMNS.DATE,
        COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
        COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
        COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.UNMASKED_SITES_NEW_DEATHS,
        COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.MASKED_SITES_NEW_DEATHS
    ])
    df_dc = df_dc.rename(columns={"variable": CATEGORY, "value": COLUMNS.NUM_PATIENTS})

    # Leave only the 'upper' and 'under' values for the certain 'category' only
    for c in [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.PATIENTS_IN_ICU, COLUMNS.NEW_DEATHS]:
        filter_c = df_dc[CATEGORY] == c
        df_dc.loc[filter_c, "upper"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c]
        df_dc.loc[filter_c, "under"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS]
        df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c] / 2.0
        
        # Add num of sites
        df_dc.loc[filter_c, COLUMNS.NUM_SITES] = df_dc["unmasked_sites_" + c] + df_dc["masked_sites_" + c]

    # Drop unused columns
    df_dc = df_dc.drop(columns=[
        COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
        COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
        COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.UNMASKED_SITES_NEW_DEATHS,
        COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.MASKED_SITES_NEW_DEATHS
    ])
    
    return df_dc

# Read files
df_dc = preprocess_daily_df(read_combined_by_country_daily_counts_df())
df_dc_combined = preprocess_daily_df(read_combined_daily_counts_df())

# Merge two
df_dc = pd.concat([df_dc, df_dc_combined])

df_dc.loc[df_dc[COLUMNS.SITE_ID] == "Combined", COLUMNS.SITE_ID] = COMBINED

df_dc

# Visualizations

In [None]:
CATEGORIES = [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.NEW_DEATHS, COLUMNS.PATIENTS_IN_ICU]
TITLE_BY_CATEGORY = {
    COLUMNS.NEW_POSITIVE_CASES: "possitive cases",
    COLUMNS.NEW_DEATHS: "deaths",
    COLUMNS.PATIENTS_IN_ICU: "ICU admissions"
}

def dailycount_by_date(is_cum=True, is_only_combined=False):
    
    dailycount_dropdown = alt.binding_select(options=CATEGORIES)
    dailycount_selection = alt.selection_single(fields=[CATEGORY], bind=dailycount_dropdown, name="Value", init={CATEGORY: COLUMNS.NEW_POSITIVE_CASES})

    color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)
    if is_only_combined: 
        color_scale = alt.Scale(domain=[COMBINED], range=[COMBINED_COLOR])

    # Filter
    filtered_chart = alt.Chart(df_dc).transform_filter(
        dailycount_selection
    )

    if is_only_combined:
        filtered_chart = filtered_chart.transform_filter(
            alt.datum[COLUMNS.SITE_ID] == COMBINED
        )
    else:
       filtered_chart = filtered_chart.transform_filter(
           alt.datum[COLUMNS.SITE_ID] != COMBINED
        )

    # Calculate cumulative values
    y_field = COLUMNS.NUM_PATIENTS
    upper = "upper"
    under = "under"
    if is_cum:
        filtered_chart = filtered_chart.transform_window(
            cum_val=f"sum({COLUMNS.NUM_PATIENTS})",
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        ).transform_window(
            cum_upper=f"sum(upper)",
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        ).transform_window(
            cum_under=f"sum(under)",
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        )
        y_field = "cum_val"
        upper = "cum_upper"
        under = "cum_under"

    # Render
    line = filtered_chart.mark_line(size=3).encode(
        x=alt.X(f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), title=None),
        y=alt.Y(f"{y_field}:Q", axis=alt.Axis(tickCount=5), title="Number of patients"),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None))
    )
    point = line.mark_circle(size=40)
    errorband = filtered_chart.mark_errorband().encode(
        x=alt.X(f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), title=None),
        y=alt.Y(f"{upper}:Q", title=""), 
        y2=f"{under}:Q",
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None))
    )

    top_line = (line + point + errorband).resolve_scale(color="shared").properties(width=750, height=400).interactive()

    bottom_bar = filtered_chart.mark_bar(size=5).encode(
        x=alt.X(f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), title=None),
        y=alt.Y(f"{COLUMNS.NUM_SITES}:Q", title="# of sites"),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None))
    ).properties(height=60)
    
    title = "positive cases, deaths, and ICU admissions over time"
    title = f"Cumulative {title}" if is_cum else f"New {title}"

    # Apply Theme
    result_vis = apply_theme(top_line & bottom_bar).add_selection(
        dailycount_selection
    ).resolve_scale(x="shared").properties(title={
        "text": title, 
        "subtitle": SUBTITLE,
        "subtitleColor": "gray",
        "dx": 60
    })
    
    return result_vis

## New positive cases, new deaths, and ICU admissions over time

In [None]:
dailycount_by_date = dailycount_by_date(is_cum=False)

# for_website(dailycount_by_date, "Daily Count", "Daily count by date") # TODO: Remove this before deploying notebook
# save(dailycount_by_date, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount_by_date

## Cumulative positive cases, new deaths, and ICU admissions over time

In [None]:
cum_dailycount_by_date = dailycount_by_date(is_cum=True)

# for_website(cum_dailycount_by_date, "Daily Count", "Cumulative daily count by date") # TODO: Remove this before deploying notebook
# save(cum_dailycount_by_date, join(SAVE_DIR, "cum_dailycount_by_date.png")) # Uncomment this to save *.png files

cum_dailycount_by_date