In [None]:
# Auto-reload frequently changed files
%load_ext autoreload
%autoreload 2
%aimport utils

import pandas as pd
import numpy as np
import altair as alt
from ipywidgets import interact
from os.path import join

from constants import COLUMNS
from utils import (
    read_combined_daily_counts_df,
    read_combined_demographics_df, 
    read_combined_diagnoses_df,
    read_combined_labs_df,
    apply_theme, apply_grouped_bar_theme, apply_trellis_theme,
    read_icd_df, read_loinc_df
)

In [None]:
# Common info that should be defined everytime before rendering visualizations
NUM_SITES = "Four"
DATA_DATE = "2020-04-01"
SUBTITLE = f"Data as of {DATA_DATE}" + " | " + NUM_SITES + " Institutions"

SAVE_DIR = join("..", "output") # Where to save visualization *.PNG files 

COUNTRIES = ["France", "Germany", "Italy", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#D55E00"]
COLOR_BY_COUNTRY = {COUNTRIES[i]: COUNTRY_COLOR[i] for i in range(len(COUNTRIES))} 

COMBINED = "Combined"
COMBINED_COLOR = "#444444"

COLOR20 = [
    "#3366cc", "#dc3912", "#ff9900", "#109618", "#990099", "#0099c6", 
    "#dd4477", "#66aa00", "#b82e2e", "#316395", "#994499", "#22aa99", 
    "#aaaa11", "#6633cc", "#e67300", "#8b0707", "#651067", "#329262", "#5574a6", "#3b3eac"
]

# Required Setups

- All four combined datasets should be placed in `../data/combined` (e.g., `../data/combined/DailyCounts-Combinedyymmdd.csv` for the DailyCounts file).
- To save PNG files for visualizations, a folder named "output" should be present (i.e., `../output/`).

# Labs

In [None]:
df_lb = read_combined_labs_df()

# Drop unused columns before preprocessing for the simplicity
df_lb = df_lb.drop(columns=[
    # COLUMNS.UNMASKED_SITES_NUM_PATIENTS,
    # COLUMNS.MASKED_SITES_NUM_PATIENTS
])

# Zero negative values
df_lb.loc[df_lb[COLUMNS.NUM_PATIENTS] < 0, COLUMNS.NUM_PATIENTS] = 0
df_lb.loc[df_lb[COLUMNS.MEAN_VALUE] < 0, COLUMNS.MEAN_VALUE] = 0
df_lb.loc[df_lb[COLUMNS.STDEV_VAL] < 0, COLUMNS.STDEV_VAL] = 0

df_lb["upper"] = df_lb[COLUMNS.MEAN_VALUE] + df_lb[COLUMNS.STDEV_VAL] 
df_lb["under"] = df_lb[COLUMNS.MEAN_VALUE] - df_lb[COLUMNS.STDEV_VAL]

df_lb["upper_p"] = df_lb[COLUMNS.NUM_PATIENTS] + df_lb[COLUMNS.MASKED_UPPER_BOUND_NUM_PATIENTS]
df_lb["under_p"] = df_lb[COLUMNS.NUM_PATIENTS]
df_lb[COLUMNS.NUM_PATIENTS] += (df_lb["upper_p"] - df_lb["under_p"]) / 2.0

loinc_df = read_loinc_df().set_index('loinc').rename(columns={'labTest': 'name'})

df_lb["loinc_name"] = df_lb[COLUMNS.LOINC].apply(lambda code: 
    loinc_df.at[code, "name"] if loinc_df.at[code, "unit"] == "-1" else loinc_df.at[code, "name"] + " (" + loinc_df.at[code, "unit"] + ")"
)

# Number of sites
df_lb[COLUMNS.NUM_SITES] = df_lb[COLUMNS.UNMASKED_SITES_NUM_PATIENTS] # + df_lb[COLUMNS.MASKED_SITES_NUM_PATIENTS]

# Drop unused columns
df_lb = df_lb.drop(columns=[
    COLUMNS.MASKED_UPPER_BOUND_NUM_PATIENTS,
    COLUMNS.UNMASKED_SITES_NUM_PATIENTS,
    COLUMNS.MASKED_SITES_NUM_PATIENTS
])

# Set extent
NUM_PATIENTS_EXTENT = [0, max(df_lb[COLUMNS.NUM_PATIENTS])]
NUM_SITES_EXTENT = [0, max(df_lb[COLUMNS.NUM_SITES])]
DAYS_SINCE_EXTENT = [min(df_lb[COLUMNS.DAYS_SINCE_POSITIVE]), max(df_lb[COLUMNS.DAYS_SINCE_POSITIVE])]

# Combined df
df_lb_combined = df_lb.groupby(["loinc", COLUMNS.DAYS_SINCE_POSITIVE, "loinc_name"]).agg("sum").reset_index()
df_lb_combined[COLUMNS.SITE_ID] = COMBINED

# Merge two
df_lb = pd.concat([df_lb, df_lb_combined])

df_lb

In [None]:
LOINCS = df_lb["loinc_name"].unique()
LAB_TOOLTIP = [
    alt.Tooltip(COLUMNS.SITE_ID, title="Country"),
    alt.Tooltip(COLUMNS.DAYS_SINCE_POSITIVE, title="Days since positive"),
    alt.Tooltip(COLUMNS.MEAN_VALUE, title="Mean value"),
    alt.Tooltip(COLUMNS.NUM_PATIENTS, title="# of patients"),
    alt.Tooltip(COLUMNS.NUM_SITES, title="# of institutions")
]

nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=[COLUMNS.DAYS_SINCE_POSITIVE], empty='none', clear="mouseout")

def lab_chart(test, country):

    """
    vertical rule
    """
    df_v_rule = pd.DataFrame({"date": [1]})
    v_rule = alt.Chart(df_v_rule).mark_rule(color="red", strokeDash=[3,3]).encode(
        x="date:Q"
    )
    nearest_rule = alt.Chart(df_lb).mark_rule(color="black").encode(
        x=f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
        size=alt.value(0.5)
    ).transform_filter(
        nearest
    )

    """
    Data preprocessing
    """
    filtered_chart = alt.Chart(df_lb).transform_filter(
        alt.datum["loinc_name"] == test
    ).transform_filter(
        alt.datum[COLUMNS.SITE_ID] == country
    )
    
    """
    Top Chart
    """
    line = filtered_chart.mark_line(size=1, opacity=1).encode(
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q", 
            title=None, 
            axis=alt.Axis(
                grid=True,
                labels=False, ticks=False, domain=True
                # labelFontSize=0, labelOpacity=0, tickOpacity=0
            ),
            scale=alt.Scale(domain=DAYS_SINCE_EXTENT)
        ),
        y=alt.Y(
            f"mean({COLUMNS.MEAN_VALUE}):Q", 
            title="Mean value (stdev)"
        ),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=alt.Scale(domain=COUNTRIES + [COMBINED], range=COUNTRY_COLOR + [COMBINED_COLOR]), legend=None),
        tooltip=LAB_TOOLTIP,
    )

    circle = line.mark_circle(size=10).encode(
        size=alt.condition(~nearest, alt.value(10), alt.value(50))
    )
    errorband = filtered_chart.mark_errorband().encode(
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q", 
            title=None,
        ),
        y=alt.Y(
            f"under:Q",
            title="", 
        ),
        y2="upper:Q",
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=alt.Scale(domain=COUNTRIES + [COMBINED], range=COUNTRY_COLOR + [COMBINED_COLOR]), legend=None),
        tooltip=LAB_TOOLTIP
    )   
    white_errorline = errorband.mark_errorbar().encode(
        size=alt.value(1),
        opacity=alt.value(0.3)
    )

    top_chart = (circle + line + errorband + white_errorline + v_rule + nearest_rule).properties(height=150, width=500)

    """
    Middle Chart
    """
    bar = filtered_chart.mark_bar().encode(
        y=alt.Y(
            f"sum({COLUMNS.NUM_PATIENTS}):Q", 
            title="# of tested",
            scale=alt.Scale(domain=NUM_PATIENTS_EXTENT)
        ),
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
            # bin=alt.Bin(step=1),
            title=None,
            axis=alt.Axis(
                grid=True,
                labels=False, ticks=False, domain=True, 
                # labelFontSize=0, labelOpacity=0, tickOpacity=0
            )
        ),
        color=alt.value("gray"),
        tooltip=LAB_TOOLTIP
    )

    errorbar = filtered_chart.mark_errorbar().encode(
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
        ),
        y=alt.Y(
            f"under_p:Q",
            title="", 
        ),
        y2="upper_p:Q",
        color=alt.value("black"),
        size=alt.value(1)
    )
    
    middle_chart = (bar + errorbar + v_rule + nearest_rule).properties(height=60)

    """
    Bottom Chart
    """
    bottom_bar = filtered_chart.mark_bar().encode(
        y=alt.Y(
            f"{COLUMNS.NUM_SITES}:Q", 
            title="# of sites",
            scale=alt.Scale(domain=NUM_SITES_EXTENT)
        ),
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
            # bin=alt.Bin(step=1),
            title="Days since positive",
            axis=alt.Axis(
                grid=True,
                labelExpr="abs(parseInt(datum.value)) % 2 == 1 ? null : datum.label"
            )
        ),
        color=alt.value("gray"),
        tooltip=LAB_TOOLTIP
    )

    bottom_chart = (bottom_bar + v_rule + nearest_rule).properties(height=60)

    return alt.vconcat(top_chart.interactive(), middle_chart, bottom_chart, spacing=5).resolve_scale(y="independent", x="shared").properties(
        title={
            "text": test,
            "subtitle": SUBTITLE
        }).add_selection(nearest)

v = alt.vconcat()
for t in LOINCS: 
    h = alt.hconcat()
    for c in COUNTRIES:
        h |= lab_chart(test=t, country=c).properties(title={"text": [f"{c}"], "subtitle": [SUBTITLE], "color": COLOR_BY_COUNTRY[c], "subtitleColor": "gray"})
    v &= h.properties(title={"text": t, "subtitle": [SUBTITLE], "subtitleColor": "gray"})
    break # TODO: for debug
apply_theme(v).display()

# Daily Counts

In [None]:
df_dc = read_combined_daily_counts_df()

# Drop unused columns before preprocessing for the simplicity
df_dc = df_dc.drop(columns=[
    # ...
])

# Wide to long
CATEGORY = "category"
df_dc = pd.melt(df_dc, id_vars=[
    COLUMNS.SITE_ID, COLUMNS.DATE,
    COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
    COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
    COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.UNMASKED_SITES_NEW_DEATHS,
    COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.MASKED_SITES_NEW_DEATHS
])
df_dc = df_dc.rename(columns={"variable": CATEGORY, "value": COLUMNS.NUM_PATIENTS})

# Leave only the 'upper' and 'under' values for the certain 'category' only
for c in [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.PATIENTS_IN_ICU, COLUMNS.NEW_DEATHS]:
    filter_c = df_dc[CATEGORY] == c
    df_dc.loc[filter_c, "upper"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c]
    df_dc.loc[filter_c, "under"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS]
    df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c] / 2.0
    
    # Add num of sites
    df_dc.loc[filter_c, COLUMNS.NUM_SITES] = df_dc["unmasked_sites_" + c] + df_dc["masked_sites_" + c]

# TODO: 
# df_dc[f"{COLUMNS.NEW_POSITIVE_CASES}_{COLUMNS.NUM_SITES}"] = df_dc[COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES] + df_dc[COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES]
# df_dc[f"{COLUMNS.PATIENTS_IN_ICU}_{COLUMNS.NUM_SITES}"] = df_dc[COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU] + df_dc[COLUMNS.MASKED_SITES_PATIENTS_IN_ICU]
# df_dc[f"{COLUMNS.NEW_DEATHS}_{COLUMNS.NUM_SITES}"] = df_dc[COLUMNS.UNMASKED_SITES_NEW_DEATHS] + df_dc[COLUMNS.MASKED_SITES_NEW_DEATHS]

# Drop unused columns
df_dc = df_dc.drop(columns=[
    COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
    COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
    COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.UNMASKED_SITES_NEW_DEATHS,
    COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
    COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
    COLUMNS.MASKED_SITES_NEW_DEATHS
])

# Combined df
df_dc_combined = df_dc.groupby([COLUMNS.DATE, CATEGORY]).agg("sum").reset_index()
df_dc_combined[COLUMNS.SITE_ID] = COMBINED

# Merge two
df_dc = pd.concat([df_dc, df_dc_combined])

df_dc

In [None]:
# TODO: Add cumulative values

CATEGORIES = [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.NEW_DEATHS, COLUMNS.PATIENTS_IN_ICU]

# color_scale = alt.Scale(domain=COUNTRIES + [COMBINED], range=COUNTRY_COLOR + [COMBINED_COLOR])
color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)

def dailycount_chart(category):
    filtered_chart = alt.Chart(df_dc).transform_filter(
        alt.datum[CATEGORY] == category
    ).transform_filter(
        alt.datum[COLUMNS.SITE_ID] != COMBINED
    )

    line = filtered_chart.mark_line(size=2).encode(
        x=alt.X(
            f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), 
            title=None,
        ),
        y=alt.Y(
            f"{COLUMNS.NUM_PATIENTS}:Q", axis=alt.Axis(tickCount=5), 
            title="Number of patients",
            # scale=alt.Scale(domain=[-1,200])
        ),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None))
    )
    point = line.mark_circle(size=30)
    errorband = filtered_chart.mark_errorband().encode(
        x=alt.X(
            f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), 
            title=None,
        ),y=alt.Y(
            "upper:Q", title=""
        ),y2=alt.Y2(
            "under:Q"
        ),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None))
    )

    output_vis = (line + point + errorband).resolve_scale(color="shared").properties(
        width=750, height=400
    ).interactive()
    # TODO: Use this line for country level facet
    # .facet(
    #     row=alt.Row(f"{CATEGORY}", title=None)
    # )

    # Bottom Chart
    bottom_bar = filtered_chart.mark_bar(size=18).encode(
        x=alt.X(
            f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), 
            title=None
        ),
        y=alt.Y(
            f"{COLUMNS.NUM_SITES}:Q", title="# of sites",
            scale=alt.Scale(domain=[0, 30])
        ),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None))
    ).properties(height=60)

    output_vis = apply_theme(output_vis & bottom_bar).resolve_scale(x="shared").properties(
        title={
            "text": category, 
            "subtitle": SUBTITLE,
            "subtitleColor": "gray"
        }
    )

    return output_vis

for c in (CATEGORIES):
    dailycount_chart(category=c).display()

# Demographics

In [None]:
df_dm = read_combined_demographics_df()

# Drop unused columns before preprocessing for the simplicity
df_dm = df_dm.drop(columns=[
    COLUMNS.UNMASKED_SITES_TOTAL_PATIENTS,
    COLUMNS.UNMASKED_SITES_AGE_0TO2,
    COLUMNS.UNMASKED_SITES_AGE_3TO5,
    COLUMNS.UNMASKED_SITES_AGE_6TO11,
    COLUMNS.UNMASKED_SITES_AGE_12TO17,
    COLUMNS.UNMASKED_SITES_AGE_18TO25,
    COLUMNS.UNMASKED_SITES_AGE_26TO49,
    COLUMNS.UNMASKED_SITES_AGE_50TO69,
    COLUMNS.UNMASKED_SITES_AGE_70TO79,
    COLUMNS.UNMASKED_SITES_AGE_80PLUS,
    COLUMNS.MASKED_SITES_TOTAL_PATIENTS,
    COLUMNS.MASKED_SITES_AGE_0TO2,
    COLUMNS.MASKED_SITES_AGE_3TO5,
    COLUMNS.MASKED_SITES_AGE_6TO11,
    COLUMNS.MASKED_SITES_AGE_12TO17,
    COLUMNS.MASKED_SITES_AGE_18TO25,
    COLUMNS.MASKED_SITES_AGE_26TO49,
    COLUMNS.MASKED_SITES_AGE_50TO69,
    COLUMNS.MASKED_SITES_AGE_70TO79,
    COLUMNS.MASKED_SITES_AGE_80PLUS,
    COLUMNS.MASKED_UPPER_BOUND_TOTAL_PATIENTS,
    COLUMNS.TOTAL_PATIENTS,
])

# Wide to long
df_dm = pd.melt(df_dm, id_vars=[
    COLUMNS.SITE_ID,
    COLUMNS.SEX,
    COLUMNS.MASKED_UPPER_BOUND_AGE_0TO2,
    COLUMNS.MASKED_UPPER_BOUND_AGE_3TO5,
    COLUMNS.MASKED_UPPER_BOUND_AGE_6TO11,
    COLUMNS.MASKED_UPPER_BOUND_AGE_12TO17,
    COLUMNS.MASKED_UPPER_BOUND_AGE_18TO25,
    COLUMNS.MASKED_UPPER_BOUND_AGE_26TO49,
    COLUMNS.MASKED_UPPER_BOUND_AGE_50TO69,
    COLUMNS.MASKED_UPPER_BOUND_AGE_70TO79,
    COLUMNS.MASKED_UPPER_BOUND_AGE_80PLUS,
])
df_dm = df_dm.rename(columns={"variable": COLUMNS.AGE_GROUP, "value": COLUMNS.NUM_PATIENTS})

# Leave only the 'upper' and 'under' values for the certain 'age_group' only
for c in [
        COLUMNS.AGE_0TO2,
        COLUMNS.AGE_3TO5,
        COLUMNS.AGE_6TO11,
        COLUMNS.AGE_12TO17,
        COLUMNS.AGE_18TO25,
        COLUMNS.AGE_26TO49,
        COLUMNS.AGE_50TO69,
        COLUMNS.AGE_70TO79,
        COLUMNS.AGE_80PLUS
        ]:
    filter_c = df_dm[COLUMNS.AGE_GROUP] == c
    df_dm.loc[filter_c, "upper"] = df_dm.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dm.loc[filter_c, "masked_upper_bound_" + c]
    df_dm.loc[filter_c, "under"] = df_dm.loc[filter_c, COLUMNS.NUM_PATIENTS]
    df_dm.loc[filter_c, COLUMNS.NUM_PATIENTS] = df_dm.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dm.loc[filter_c, "masked_upper_bound_" + c] / 2.0

df_dm = df_dm[df_dm[COLUMNS.SEX] != "ALL"]

# Drop unused columns
df_dm = df_dm.drop(columns=[
    COLUMNS.MASKED_UPPER_BOUND_AGE_0TO2,
    COLUMNS.MASKED_UPPER_BOUND_AGE_3TO5,
    COLUMNS.MASKED_UPPER_BOUND_AGE_6TO11,
    COLUMNS.MASKED_UPPER_BOUND_AGE_12TO17,
    COLUMNS.MASKED_UPPER_BOUND_AGE_18TO25,
    COLUMNS.MASKED_UPPER_BOUND_AGE_26TO49,
    COLUMNS.MASKED_UPPER_BOUND_AGE_50TO69,
    COLUMNS.MASKED_UPPER_BOUND_AGE_70TO79,
    COLUMNS.MASKED_UPPER_BOUND_AGE_80PLUS,
])

# Combined df
df_dm_combined = df_dm.groupby([COLUMNS.SEX, COLUMNS.AGE_GROUP]).agg("sum").reset_index()
df_dm_combined[COLUMNS.SITE_ID] = COMBINED

# Merge two
df_dm = pd.concat([df_dm, df_dm_combined])

df_dm

In [None]:
color_scale = alt.Scale(domain=["Male", "Female", "Other"], range=COLOR20[:2] + ["gray"])

def demo_chart(country):
    filtered_chart = alt.Chart(df_dm).transform_filter(
        alt.datum[COLUMNS.SITE_ID] == country
    )

    base = filtered_chart.mark_bar().encode(
        x=alt.X(f"{COLUMNS.SEX}:N", title=None, axis=None),
        y=alt.Y(f"{COLUMNS.NUM_PATIENTS}:Q", title="Number of patients", axis=alt.Axis(tickCount=5)),
        color=alt.Color(f"{COLUMNS.SEX}:N", title=None, scale=color_scale),
    ).properties(
        width=60,
        height=300
    )

    errorbar = filtered_chart.mark_errorbar().encode(
        x=alt.X(
            f"{COLUMNS.SEX}:N", title=None,
        ),y=alt.Y(
            f"upper:Q", title=""
        ),y2=alt.Y2(
            f"under:Q"
        ),
        color=alt.value("black"),
        size=alt.value(1.5)
    )

    output_vis = base.encode(
        column=alt.Column(
            "age_group:O", 
            sort=["age_0to2","age_3to5","age_6to11","age_12to17","age_18to25","age_26to49","age_50to69","age_70to79", "age_80plus"],
            header=alt.Header(labelOrient="bottom", title=None, titleOrient="bottom"),
        )
    ).properties(
        title={
            "text": f"{country}",
            "subtitle": SUBTITLE,
            "color": COLOR_BY_COUNTRY[country],
            "subtitleColor": "gray"
        }
    )

    # output_vis = alt.layer(base, errorbar).facet(
    #     column=alt.Column(
    #         "age_group:O", 
    #         sort=["age_0to2","age_3to5","age_6to11","age_12to17","age_18to25","age_26to49","age_50to69","age_70to79", "age_80plus"],
    #         header=alt.Header(labelOrient="bottom", title=None, titleOrient="bottom"),
    #     )
    # ).properties(
    #     title={
    #         "text": f"Demographics ({country})",
    #         "subtitle": SUBTITLE,
    #     }
    # )
    return output_vis

# for c in (COUNTRIES + [COMBINED]):
#     demo_chart(country=c).display()

# https://github.com/vega/vega-lite/issues/4680
# Error msg: Javascript Error: Undefined data set name: "scale_concat_2_child_layer_0_main"
h = alt.hconcat()
for c in COUNTRIES:
    h |= demo_chart(country=c)
    
apply_grouped_bar_theme(h, strokeColor="lightgray").resolve_scale(color="independent").properties(
    title={
        "text": "Demographics", 
        "subtitle": SUBTITLE,
        "subtitleColor": "gray"
    }
)

# DEPRECATED CODES BELOW

# Diagnoses

In [None]:
df_dg = read_combined_diagnoses_df()

# Drop unused columns before preprocessing for the simplicity
df_dg = df_dg.drop(columns=[
    COLUMNS.UNMASKED_SITES_NUM_PATIENTS,
    COLUMNS.MASKED_SITES_NUM_PATIENTS
])

df_dg = df_dg.rename(columns={COLUMNS.MASKED_UPPER_BOUND_NUM_PATIENTS: "upper"})
uppers = df_dg["upper"]
df_dg["upper"] += df_dg[COLUMNS.NUM_PATIENTS]
df_dg["under"] = df_dg[COLUMNS.NUM_PATIENTS]
df_dg[COLUMNS.NUM_PATIENTS] = df_dg[COLUMNS.NUM_PATIENTS] + (df_dg["upper"] - df_dg["under"]) / 2.0

# Our lookup table does not contain dots
df_dg[COLUMNS.ICD_CODE] = df_dg[COLUMNS.ICD_CODE].apply(lambda x: x.replace(".", ""))

# Merge with a lookup table
icd_df = read_icd_df()
df_dg = df_dg.merge(icd_df, how="left", left_on=COLUMNS.ICD_CODE, right_on="ICDcode")

# Handle the missing data
df_dg.loc[pd.isna(df_dg["ICDdescription"]), "ICDdescription"] = df_dg.loc[pd.isna(df_dg["ICDdescription"]), COLUMNS.ICD_CODE]
df_dg.loc[pd.isna(df_dg["Category"]), "Category"] = df_dg.loc[pd.isna(df_dg["Category"]), COLUMNS.ICD_CODE]

# Consistent capitalization
df_dg["ICDdescription"] = df_dg["ICDdescription"].apply(lambda x: x.capitalize())
df_dg["Category"] = df_dg["Category"].apply(lambda x: x.capitalize())

df_dg = df_dg[df_dg[COLUMNS.NUM_PATIENTS] >= 10]

df_dg

In [None]:
# Add filter

def diagnoses_chart(YAxis): 

    yfield = "icd_code"
    if YAxis == "ICD Description":
        yfield = "ICDdescription"
    elif YAxis == "ICD Category":
        yfield = "Category"
    
    sort = df_dg.sort_values(by=[COLUMNS.NUM_PATIENTS], ascending=False)[yfield].unique()
    
    errorbar = alt.Chart(df_dg).mark_errorbar().encode(
        x=alt.X(
            f"upper:Q", title=""
        ),
        x2=alt.X2(
            f"under:Q"
        ),
        y=alt.Y(f"{yfield}:N", title=None, sort=sort),
        size=alt.value(1)
    )

    base = alt.Chart(df_dg).mark_circle(size=50, color="black").encode(
        x=alt.X(f"sum({COLUMNS.NUM_PATIENTS}):Q", title="Number of patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y(f"{yfield}:N", title=None, axis=alt.Axis(grid=True))
    ).properties(
        title={
            "text": "Diagnoses starting 7 days before positive test (Patients >= 10)",
            "subtitle": SUBTITLE
        },
        width=500
    )

    chart = apply_theme(base + errorbar)
    return chart

interact(
    diagnoses_chart, 
    YAxis=["ICD Description", "ICD Code", "ICD Category"]
)

In [None]:
line = alt.Chart(df).mark_line(size=1, opacity=1).encode(
    x=alt.X(
        f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q", 
        title=None, 
        axis=alt.Axis(
            grid=True,
            labelOpacity=0, tickOpacity=0
        )
    ),
    y=alt.Y(
        f"mean({COLUMNS.MEAN_VALUE}):Q", 
        title=None, 
        axis=alt.Axis(orient="right")
    ),
    color=alt.Color("loinc_name:N", scale=alt.Scale(scheme="category20"), legend=None),
).properties(height=150, width=500)

circle = line.mark_circle(size=10)
errorband = alt.Chart(df).mark_errorband().encode(
    x=alt.X(
        f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q", 
        title=None,
    ),
    y=alt.Y(
        f"under:Q",
        title=None, 
    ),
    y2="upper:Q",
    color=alt.Color("loinc_name:N", scale=alt.Scale(scheme="category20"), legend=None),
)   

top_chart = (circle + line + errorband).facet(
    row=alt.Row(
        "loinc_name:N",
        header=alt.Header(labelAngle=0, labelAlign="left", labelAnchor="middle", labelColor="black", title=None)
    ),
).resolve_scale(y="independent")

bottom_chart = line.mark_bar().encode(
    y=alt.Y(
        f"sum({COLUMNS.NUM_PATIENTS}):Q", 
        title="Number of tested patients", 
        axis=alt.Axis(
            tickCount=2, 
            titleAngle=0,
            titleAlign="right", 
            titleBaseline="middle",
            titlePadding=-545,
            orient="right"
        )
    ),
    x=alt.X(
        f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
        # bin=alt.Bin(step=1),
        title="Days since positive",
        axis=alt.Axis(
            grid=True,
            labelExpr="abs(parseInt(datum.value)) % 2 == 1 ? null : datum.label"
        )
    ),
    color=alt.value("gray")
).properties(height=45)

apply_trellis_theme(
    top_chart & bottom_chart
).resolve_scale(y="independent", x="shared").properties(
    title={
        "text": "LOINC test results",
        "subtitle": SUBTITLE
    })