In [None]:
# Auto-reload frequently changed files
%load_ext autoreload
%autoreload 2
%aimport utils

import pandas as pd
import numpy as np
import altair as alt
from ipywidgets import interact
from os.path import join

from constants import COLUMNS
from utils import (
    read_combined_daily_counts_df,
    read_combined_demographics_df, 
    read_combined_diagnoses_df,
    read_combined_labs_df,
    apply_theme, apply_grouped_bar_theme
)

# Daily Counts

In [None]:
df = read_combined_daily_counts_df()

# Drop unused columns before preprocessing for the simplicity
df = df.drop(columns=[
    COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.UNMASKED_SITES_NEW_DEATHS,
    COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.MASKED_SITES_NEW_DEATHS
])

# Wide to long
CATEGORY = "category"
df = pd.melt(df, id_vars=[
    COLUMNS.SITE_ID, COLUMNS.DATE,
    COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
    COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS    
])
df = df.rename(columns={"variable": CATEGORY, "value": COLUMNS.NUM_PATIENTS})

# Leave only the 'upper' and 'under' values for the certain 'category' only
for c in [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.PATIENTS_IN_ICU, COLUMNS.NEW_DEATHS]:
    filter_c = df[CATEGORY] == c
    df.loc[filter_c, "upper"] = df.loc[filter_c, COLUMNS.NUM_PATIENTS] + df.loc[filter_c, "masked_upper_bound_" + c]
    df.loc[filter_c, "under"] = df.loc[filter_c, COLUMNS.NUM_PATIENTS]
    df.loc[filter_c, COLUMNS.NUM_PATIENTS] = df.loc[filter_c, COLUMNS.NUM_PATIENTS] + df.loc[filter_c, "masked_upper_bound_" + c] / 2.0

# Drop unused columns
df = df.drop(columns=[
    COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
    COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS
])

df.head()

In [None]:
MID_FIELDS = [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.NEW_DEATHS, COLUMNS.PATIENTS_IN_ICU]
THREE_COLORS = ["#377FB8", "#CA2026", "#60B75D"]
COLOR_BY_FIELD = {
    COLUMNS.NEW_POSITIVE_CASES: "#CA2026",
    COLUMNS.NEW_DEATHS: "#60B75D",
    COLUMNS.PATIENTS_IN_ICU: "#377FB8"
}
GRAY_COLOR = "lightgray"

line = alt.Chart(df).mark_line(size=2).encode(
    x=alt.X(
        f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), 
        title=None,
    ),
    y=alt.Y(
        f"{COLUMNS.NUM_PATIENTS}:Q", axis=alt.Axis(tickCount=5), 
        title="Number of patients",
        scale=alt.Scale(domain=[-1,170])
    ),
    color=alt.Color(f"{CATEGORY}:N", scale=alt.Scale(domain=MID_FIELDS, range=THREE_COLORS), legend=alt.Legend(title=None))
)
point = line.mark_circle(size=30)
errorband = alt.Chart(df).mark_errorband().encode(
    x=alt.X(
        f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), 
        title=None,
    ),y=alt.Y(
        "upper:Q", title=""
    ),y2=alt.Y2(
        "under:Q"
    ),
    color=alt.Color(f"{CATEGORY}:N", scale=alt.Scale(domain=MID_FIELDS, range=THREE_COLORS))
)

agg_chart = (line + point + errorband).resolve_scale(color="shared")

apply_theme(agg_chart).properties(
    width=750, height=400, title="Number of Positive Cases, Patients in ICU, and Deaths"
).interactive()

# Demographics

In [None]:
df = read_combined_demographics_df()

# Drop unused columns before preprocessing for the simplicity
df = df.drop(columns=[
    COLUMNS.UNMASKED_SITES_TOTAL_PATIENTS,
    COLUMNS.UNMASKED_SITES_AGE_0TO2,
    COLUMNS.UNMASKED_SITES_AGE_3TO5,
    COLUMNS.UNMASKED_SITES_AGE_6TO11,
    COLUMNS.UNMASKED_SITES_AGE_12TO17,
    COLUMNS.UNMASKED_SITES_AGE_18TO25,
    COLUMNS.UNMASKED_SITES_AGE_26TO49,
    COLUMNS.UNMASKED_SITES_AGE_50TO69,
    COLUMNS.UNMASKED_SITES_AGE_70TO79,
    COLUMNS.UNMASKED_SITES_AGE_80PLUS,
    COLUMNS.MASKED_SITES_TOTAL_PATIENTS,
    COLUMNS.MASKED_SITES_AGE_0TO2,
    COLUMNS.MASKED_SITES_AGE_3TO5,
    COLUMNS.MASKED_SITES_AGE_6TO11,
    COLUMNS.MASKED_SITES_AGE_12TO17,
    COLUMNS.MASKED_SITES_AGE_18TO25,
    COLUMNS.MASKED_SITES_AGE_26TO49,
    COLUMNS.MASKED_SITES_AGE_50TO69,
    COLUMNS.MASKED_SITES_AGE_70TO79,
    COLUMNS.MASKED_SITES_AGE_80PLUS,
    COLUMNS.MASKED_UPPER_BOUND_TOTAL_PATIENTS,
    COLUMNS.TOTAL_PATIENTS,
])

# Wide to long
df = pd.melt(df, id_vars=[
    COLUMNS.SITE_ID,
    COLUMNS.SEX,
    COLUMNS.MASKED_UPPER_BOUND_AGE_0TO2,
    COLUMNS.MASKED_UPPER_BOUND_AGE_3TO5,
    COLUMNS.MASKED_UPPER_BOUND_AGE_6TO11,
    COLUMNS.MASKED_UPPER_BOUND_AGE_12TO17,
    COLUMNS.MASKED_UPPER_BOUND_AGE_18TO25,
    COLUMNS.MASKED_UPPER_BOUND_AGE_26TO49,
    COLUMNS.MASKED_UPPER_BOUND_AGE_50TO69,
    COLUMNS.MASKED_UPPER_BOUND_AGE_70TO79,
    COLUMNS.MASKED_UPPER_BOUND_AGE_80PLUS,
])
df = df.rename(columns={"variable": COLUMNS.AGE_GROUP, "value": COLUMNS.NUM_PATIENTS})

# Leave only the 'upper' and 'under' values for the certain 'age_group' only
for c in [
        COLUMNS.AGE_0TO2,
        COLUMNS.AGE_3TO5,
        COLUMNS.AGE_6TO11,
        COLUMNS.AGE_12TO17,
        COLUMNS.AGE_18TO25,
        COLUMNS.AGE_26TO49,
        COLUMNS.AGE_50TO69,
        COLUMNS.AGE_70TO79,
        COLUMNS.AGE_80PLUS
        ]:
    filter_c = df[COLUMNS.AGE_GROUP] == c
    df.loc[filter_c, "upper"] = df.loc[filter_c, COLUMNS.NUM_PATIENTS] + df.loc[filter_c, "masked_upper_bound_" + c]
    df.loc[filter_c, "under"] = df.loc[filter_c, COLUMNS.NUM_PATIENTS]
    df.loc[filter_c, COLUMNS.NUM_PATIENTS] = df.loc[filter_c, COLUMNS.NUM_PATIENTS] + df.loc[filter_c, "masked_upper_bound_" + c] / 2.0

df = df[df[COLUMNS.SEX] != "ALL"]

# Drop unused columns
df = df.drop(columns=[
    COLUMNS.MASKED_UPPER_BOUND_AGE_0TO2,
    COLUMNS.MASKED_UPPER_BOUND_AGE_3TO5,
    COLUMNS.MASKED_UPPER_BOUND_AGE_6TO11,
    COLUMNS.MASKED_UPPER_BOUND_AGE_12TO17,
    COLUMNS.MASKED_UPPER_BOUND_AGE_18TO25,
    COLUMNS.MASKED_UPPER_BOUND_AGE_26TO49,
    COLUMNS.MASKED_UPPER_BOUND_AGE_50TO69,
    COLUMNS.MASKED_UPPER_BOUND_AGE_70TO79,
    COLUMNS.MASKED_UPPER_BOUND_AGE_80PLUS,
])

df

In [None]:
color_scale = alt.Scale(domain=["Male", "Female", "Other"], range=["#377FB8", "#CA2026", "gray"])
    
base = alt.Chart(df).mark_bar().encode(
    x=alt.X('sex:N', title=None, axis=None),
    y=alt.Y(f"{COLUMNS.NUM_PATIENTS}:Q", title="Number of patients", axis=alt.Axis(tickCount=5)),
    color=alt.Color("sex:N", title=None, scale=color_scale),
).properties(
    width=70,
    height=400
)


errorbar = alt.Chart(df).mark_errorbar().encode(
    x=alt.X(
        f"{COLUMNS.SEX}:N", title=None,
    ),y=alt.Y(
        f"upper:Q", title=""
    ),y2=alt.Y2(
        f"under:Q"
    ),
    color=alt.value("black")
)

apply_grouped_bar_theme(
    alt.layer(base, errorbar).facet(
        column=alt.Column(
            "age_group:O", 
            sort=["0-2","3-5","6-11","12-17","18-25","26-49","50-69","70-79", "80+"],
            header=alt.Header(labelOrient="bottom", title="Age group", titleOrient="bottom")
        )
    ).properties(title="Demographics"),
    strokeColor="lightgray"
)