In [None]:
# Auto-reload frequently changed files
%load_ext autoreload
%autoreload 2
%aimport utils

import pandas as pd
import numpy as np
import altair as alt
from ipywidgets import interact
from os.path import join

from constants import COLUMNS
from utils import (
    read_combined_daily_counts_df,
    read_combined_demographics_df, 
    read_combined_diagnoses_df,
    read_combined_labs_df,
    apply_theme
)

# Daily Counts

In [None]:
df = read_combined_daily_counts_df()

# Drop unused columns before preprocessing for the simplicity
df = df.drop(columns=[
    COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.UNMASKED_SITES_NEW_DEATHS,
    COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.MASKED_SITES_NEW_DEATHS
])

# Wide to long
CATEGORY = "category"
df = pd.melt(df, id_vars=[
    COLUMNS.SITE_ID, COLUMNS.DATE,
    COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
    COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS    
])
df = df.rename(columns={"variable": CATEGORY, "value": COLUMNS.NUM_PATIENTS})

# Leave only the 'upper' and 'under' values for the certain 'category' only
for c in [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.PATIENTS_IN_ICU, COLUMNS.NEW_DEATHS]:
    filter_c = df[CATEGORY] == c
    df.loc[filter_c, "upper"] = df.loc[filter_c, COLUMNS.NUM_PATIENTS] + df.loc[filter_c, "masked_upper_bound_" + c]
    df.loc[filter_c, "under"] = df.loc[filter_c, COLUMNS.NUM_PATIENTS]
    df.loc[filter_c, COLUMNS.NUM_PATIENTS] = df.loc[filter_c, COLUMNS.NUM_PATIENTS] + df.loc[filter_c, "masked_upper_bound_" + c] / 2.0

# Drop unused columns
df = df.drop(columns=[
    COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
    COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS
])

df.head()

In [None]:
MID_FIELDS = [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.NEW_DEATHS, COLUMNS.PATIENTS_IN_ICU]
UPPER_FIELDS = [f"upper_{COLUMNS.NEW_POSITIVE_CASES}", f"upper_{COLUMNS.NEW_DEATHS}", f"upper_{COLUMNS.PATIENTS_IN_ICU}"]
UNDER_FIELDS = [f"under_{COLUMNS.NEW_POSITIVE_CASES}", f"under_{COLUMNS.NEW_DEATHS}", f"under_{COLUMNS.PATIENTS_IN_ICU}"]
THREE_COLORS = ["#CA2026", "#377FB8", "#60B75D"]
COLOR_BY_FIELD = {
    COLUMNS.NEW_POSITIVE_CASES: "#CA2026",
    COLUMNS.NEW_DEATHS: "#60B75D",
    COLUMNS.PATIENTS_IN_ICU: "#377FB8"
}
GRAY_COLOR = "lightgray"

line = alt.Chart(df).mark_line(size=3).encode(
    x=alt.X(
        f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), 
        title=None,
    ),
    y=alt.Y(
        f"{COLUMNS.NUM_PATIENTS}:Q", axis=alt.Axis(tickCount=5), 
        title="Number of patients",
        scale=alt.Scale(domain=[-1,170])
    ),
    color=alt.Color(f"{CATEGORY}:N", scale=alt.Scale(domain=MID_FIELDS, range=THREE_COLORS), legend=alt.Legend(title=None))
)
point = line.mark_circle(size=40)
errorband = alt.Chart(df).mark_errorband().encode(
    x=alt.X(
        f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), 
        title=None,
    ),y=alt.Y(
        f"upper:Q", title=""
    ),y2=alt.Y2(
        f"under:Q"
    ),
    color=alt.Color(f"{CATEGORY}:N", scale=alt.Scale(domain=MID_FIELDS, range=THREE_COLORS))
)

agg_chart = (line + point + errorband).resolve_scale(color="shared")

apply_theme(agg_chart).properties(
    width=750, height=400, title="Number of Positive Cases, Patients in ICU, and Deaths"
).interactive()

# Demographics

In [None]:
# TODO: Better way to clear the codes here?
df = read_combined_demographics_df()

print(df)