In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_0

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
from os.path import join

from web import for_website
from constants_1_0 import COLUMNS, DATA_AGGREGATE_TYPES
from utils_1_0 import (
    read_loinc_df,
    get_visualization_subtitle,
    apply_theme,
    
    # For loading local data:
    # read_combined_labs_df, 
    # read_combined_by_country_labs_df, 
    # read_combined_by_site_labs_df,
    # read_combined_by_site_anonymous_labs_df,
    # get_combined_color_map,
    # get_siteid_anonymous_map,
    # get_country_color_map,
    # get_anonymousid_color_maps,
    # read_lab_meta_ci_df, 
    # read_site_details_df,
)

# Data Preprocessing

## Labs Data From Figshare
Use the latest data from https://doi.org/10.6084/m9.figshare.12152766.v1

In [None]:
# Lab_MetaCI_ran.csv
df_combined = pd.read_csv("https://ndownloader.figshare.com/files/22345584")

df_combined.head()

In [None]:
# Labs-CombinedByCountry200411.csv
df_country = pd.read_csv("https://ndownloader.figshare.com/files/22345593")

df_country.head()

In [None]:
# Labs-CombinedBySiteNumber200411.csv
df_site = pd.read_csv("https://ndownloader.figshare.com/files/22345590")

df_site.head()

In [None]:
# Sites.csv (https://figshare.com/articles/Healthcare_Systems/12118911)
df_site_desc = pd.read_csv("https://ndownloader.figshare.com/files/22312305")

df_site_desc.head()

In [None]:
ALL_COUNTRY = "All countries"
ALL_COUNTRY_COLOR = "#444444"

COUNTRIES = ["France", "Germany", "Italy", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#D55E00"]
COUNTRY_COLOR_MAP = {COUNTRIES[i]: COUNTRY_COLOR[i] for i in range(len(COUNTRIES))}

ANONYMOUS_SITES = ['France-01', 'France-02', 'Germany-01', 'Germany-02', 'Germany-03', 'Italy-01', 'Italy-02', 'Italy-03', 'USA-01', 'USA-02', 'USA-03', 'USA-04', 'USA-05', 'USA-06', 'USA-07', 'USA-08', 'USA-09', 'USA-10', 'USA-11', 'USA-12']
ANONYMOUS_COLORS = ['#0072B2', '#0072B2', '#E69F00', '#E69F00', '#E69F00', '#009E73', '#009E73', '#009E73', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00']

## Data Preprocessing For Site- And Country-Level Data

In [None]:
def process_labs_df(df_lb):
    
    # Negative values to zeros
    df_lb.loc[df_lb[COLUMNS.NUM_PATIENTS] < 0, COLUMNS.NUM_PATIENTS] = 0
    df_lb.loc[df_lb[COLUMNS.MEAN_VALUE] < 0, COLUMNS.MEAN_VALUE] = 0
    df_lb.loc[df_lb[COLUMNS.STDEV_VAL] < 0, COLUMNS.STDEV_VAL] = 0

    # Upper and under bound for values
    df_lb["upper"] = df_lb[COLUMNS.MEAN_VALUE] + df_lb[COLUMNS.STDEV_VAL] 
    df_lb["under"] = df_lb[COLUMNS.MEAN_VALUE] - df_lb[COLUMNS.STDEV_VAL]

    # Add readable names for LOINC and unit
    loinc_df = read_loinc_df().set_index(COLUMNS.LOINC).rename(columns={'labTest': 'name'})
    df_lb["loinc_name"] = df_lb[COLUMNS.LOINC].apply(lambda code: 
        loinc_df.at[code, "name"] if loinc_df.at[code, "unit"] == "-1" else loinc_df.at[code, "name"] + " (" + loinc_df.at[code, "unit"] + ")"
    )

    # Number of sites
    df_lb[COLUMNS.NUM_SITES] = df_lb[COLUMNS.UNMASKED_SITES_NUM_PATIENTS]

    # Drop unused columns
    df_lb = df_lb.drop(columns=[
        COLUMNS.MASKED_UPPER_BOUND_NUM_PATIENTS,
        COLUMNS.UNMASKED_SITES_NUM_PATIENTS,
        COLUMNS.MASKED_SITES_NUM_PATIENTS
    ])
    
    # Remove data if no sites provided
    df_lb = df_lb[df_lb[COLUMNS.NUM_SITES] != 0]

    # Manual range for days since positive
    df_lb = df_lb[df_lb[COLUMNS.DAYS_SINCE_POSITIVE] >= -2]
    df_lb = df_lb[df_lb[COLUMNS.DAYS_SINCE_POSITIVE] <= 27]

    # Use more readable names
    df_lb.loc[df_lb[COLUMNS.SITE_ID] == "Combined", COLUMNS.SITE_ID] = ALL_COUNTRY

    # Add "Country" column
    df_lb[COLUMNS.COUNTRY] = df_lb[COLUMNS.SITE_ID] 

    # Rearrange labs
    on_the_top = [
        "Creatinine (mg/dL)", 
        "C-reactive protein (mg/dL)", 
        "Total bilirubin (mg/dL)",
        "White blood cell count (10*3/uL)",
        "D-dimer",
        "Lymphocyte count (10*3/uL)",
        "Neutrophil count (10*3/uL)"
    ]
    df_lb["sort"] = df_lb["loinc_name"]
    df_lb["sort"] = df_lb["sort"].apply(lambda x: on_the_top.index(x) if x in on_the_top else 99)
    df_lb = df_lb.sort_values(by=["sort"])
    df_lb = df_lb.drop(columns=["sort"])

    return df_lb

# df_lb_country = read_combined_by_country_labs_df() # For loading local data
df_lb_country = process_labs_df(df_country)

# df_lb_site = read_combined_by_site_anonymous_labs_df() # For loading local data
df_lb_site = process_labs_df(df_site)

"""
# Add Hospital Data. This is used only when we have site-level non-anonymized data.
hs_df = read_site_details_df().reset_index()
hs_df = hs_df[["Acronym", "Hospitals", "Country"]]

df_lb_site = df_lb_site.merge(hs_df, how="left", left_on=COLUMNS.SITE_ID, right_on="Acronym")
df_lb_site = df_lb_site.drop(columns={"Acronym"})
df_lb_site = df_lb_site.rename(columns={"Hospitals": COLUMNS.NUM_HOSPITALS})
df_lb_site[COLUMNS.COUNTRY] = df_lb_site["Country"] # Set the actual country ("Country" => "country")
df_lb_site = df_lb_site.drop(columns={"Country"})

siteid_anonymous_map = get_siteid_anonymous_map()
df_lb_site[COLUMNS.SITE_ID] = df_lb_site[COLUMNS.SITE_ID].apply(lambda x: siteid_anonymous_map[x])

# Add columns for color
color_map = {}
color_map.update(get_anonymousid_color_maps()[1])
color_map.update(get_country_color_map())
color_map.update({ALL_COUNTRY: get_combined_color_map()["Combined"]})

# df_lb_combined[COLUMNS.COLOR] = df_lb_combined[COLUMNS.SITE_ID].apply(lambda x: color_map[x])    get_country_color_map,
"""

df_lb_country[COLUMNS.COLOR] = df_lb_country[COLUMNS.SITE_ID].apply(lambda x: COUNTRY_COLOR_MAP[x.split("-")[0]])
df_lb_site[COLUMNS.COLOR] = df_lb_site[COLUMNS.SITE_ID].apply(lambda x: COUNTRY_COLOR_MAP[x.split("-")[0]])
df_lb_site[COLUMNS.COUNTRY] = df_lb_site[COLUMNS.COUNTRY].apply(lambda x: x.split("-")[0])

# Set extent
NUM_PATIENTS_EXTENT = [0, max(df_lb_site[COLUMNS.NUM_PATIENTS])]
NUM_SITES_EXTENT = [0, max(df_lb_site[COLUMNS.NUM_SITES])]
DAYS_SINCE_EXTENT = [-2 , 27]

df_lb_site

## Data Preprocessing For Combined-Level Data

In [None]:
# df_combined = read_lab_meta_ci_df() # For loading local data
df_lb_combined = df_combined.rename(columns={
    "Lab": "loinc_name", 
    "mean": COLUMNS.MEAN_VALUE, 
    "se": COLUMNS.STDEV_VAL, 
    "ci_95L": "upper", 
    "ci_95U": "under", 
    "total_n": COLUMNS.NUM_PATIENTS
})

df_lb_combined[COLUMNS.SITE_ID] = ALL_COUNTRY # We are using this data as an alternative combined dataset

# Change loinc names to the ones used in our data
consistent_loinc = {
    "alanine aminotransferase (ALT)": "Alanine aminotransferase (U/L)",
    "albumin": "Albumin (g/dL)",
    "aspartate aminotransferase (AST)": "Aspartate aminotransferase (U/L)",
    "total bilirubin": "Total bilirubin (mg/dL)",
    "C-reactive protein (CRP)": "C-reactive protein (mg/dL)",
    "creatinine": "Creatinine (mg/dL)",
    "lactate dehydrogenase (LDH)": "Lactate dehydrogenase (U/L)",
    "cardiac troponin": "Cardiac troponin (ng/mL)",
    "prothrombin time (PT)": "Prothrombin time (s)",
    "white blood cell count (Leukocytes)": "White blood cell count (10*3/uL)",
    "lymphocyte count": "Lymphocyte count (10*3/uL)",
    "neutrophil count": "Neutrophil count (10*3/uL)",
    "D-dimer": "D-dimer",
    "procalcitonin": "Procalcitonin (ng/mL)",
}
df_lb_combined["loinc_name"] = df_lb_combined["loinc_name"].apply(lambda x: consistent_loinc[x])

# Rearrange labs
on_the_top = [
    "Creatinine (mg/dL)", 
    "C-reactive protein (mg/dL)", 
    "Total bilirubin (mg/dL)",
    "White blood cell count (10*3/uL)",
    "D-dimer",
    "Lymphocyte count (10*3/uL)",
    "Neutrophil count (10*3/uL)"
]
df_lb_combined["sort"] = df_lb_combined["loinc_name"]
df_lb_combined["sort"] = df_lb_combined["sort"].apply(lambda x: on_the_top.index(x) if x in on_the_top else 99)
df_lb_combined = df_lb_combined.sort_values(by=["sort"])
df_lb_combined = df_lb_combined.drop(columns=["sort"])

# Add "Country" and "color" columns
df_lb_combined[COLUMNS.COUNTRY] = df_lb_combined[COLUMNS.SITE_ID]
df_lb_combined[COLUMNS.COLOR] = ALL_COUNTRY_COLOR

df_lb_combined.head()

In [None]:
df_lb_combined["data_level"] = "combined"
df_lb_country["data_level"] = "country"
df_lb_site["data_level"] = "site"

LOINCS = df_lb_site["loinc_name"].unique().tolist()
LOINC_IDS = df_lb_site["loinc"].unique().tolist()
axis_format = "r"

# Visualizations

In [None]:
alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

def get_lab_dot_plot(
    chart=None,
    width=700, height=300,
    y_domain=None, 
    y_title=None,
    color_scale=None,
    color_field=COLUMNS.COUNTRY,
    nearest=None,
    tooltip=None,
    no_line=False, 
    show_stdev=False,
    log_y_scale=False,
    legend=None,
    nearest_rule=None,
    point_size=30,
    no_point=False,
    no_x_axis=False
):
    if (chart is None):
        return None

    x_axis = (
        alt.Axis(grid=True, labels=False, ticks=False, domain=True) if no_x_axis 
        else alt.Axis(grid=True, labels=True, ticks=True, domain=True, tickMinStep=1)
    )
    y_scale_type = "log" if log_y_scale else "linear"
    y_scale = (
        alt.Scale(zero=False, domain=y_domain, type=y_scale_type) if y_domain != None 
        else alt.Scale(zero=False, type=y_scale_type)
    )

    circle = chart.mark_circle(
        size=point_size, 
        opacity=0.7
    ).encode(
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q", 
            title=None, 
            scale=alt.Scale(zero=False, nice=False, padding=10),
            axis=x_axis,
        ),
        y=alt.Y(
            f"{COLUMNS.MEAN_VALUE}:Q",
            title=y_title,
            scale=y_scale,
            axis=alt.Axis(format=axis_format)
        ),
        color=alt.Color(f"{color_field}:N", scale=color_scale, legend=legend),
        size=alt.condition(~nearest, alt.value(point_size), alt.value(point_size*2)),
        tooltip=tooltip
    )
    
    line_size = 1 if no_point else 2
    line = circle.mark_line(
        size=line_size, 
        opacity=0.7
    ).encode(
        size=alt.value(line_size)
    )

    errorband = circle.mark_errorbar().encode(
        y=alt.Y("under:Q", title=""),
        y2="upper:Q"
    )

    errorline = errorband.mark_errorbar().encode(
        size=alt.value(1),
        opacity=alt.value(1)
    )
    
    if not no_point and not no_line:
        dot_plot = alt.layer(circle + line)
    elif not no_point and no_line:
        dot_plot = (circle)
    elif no_point and not no_line:
        dot_plot = (line)
    else:
        print("No mark is selected.")
    
    if show_stdev:
        dot_plot = alt.layer(dot_plot + errorline)
    
    dot_plot = alt.layer(dot_plot + nearest_rule).properties(height=height, width=width)
    return dot_plot

In [None]:
def lab_by_date(
    loinc=None,
    data_level=DATA_AGGREGATE_TYPES.COMBINED_BY_COUNTRY, 
    width=700, height=300, bar_size=8, 
    no_axis_title=False, no_legend=False, legend_columns=None,
    no_line_top=False, no_line_bottom=False, show_stdev_top=False, log_y_scale=False,
    y_domain_top=None, y_domain_bottom=None, 
    is_num_hospitals=False, point_size=30
):

    if data_level == DATA_AGGREGATE_TYPES.COMBINED_BY_COUNTRY:
        # Combined on the top, coutry-level on the middle
        df = pd.concat([df_lb_combined, df_lb_country])
        color_scale = alt.Scale(domain=[ALL_COUNTRY] + COUNTRIES, range=[ALL_COUNTRY_COLOR] + COUNTRY_COLOR)
        detail_plot_data_level = "country"
        bottom_plot_data_level = "country"
    elif data_level == DATA_AGGREGATE_TYPES.COMBINED_BY_SITE:
        # Combined on the top, site-level on the middle
        df = pd.concat([df_lb_combined, df_lb_site])
        color_scale = alt.Scale(domain=[ALL_COUNTRY] + ANONYMOUS_SITES, range=[ALL_COUNTRY_COLOR] + ANONYMOUS_COLORS)
        detail_plot_data_level = "site"
        bottom_plot_data_level = "site"
    elif data_level == DATA_AGGREGATE_TYPES.COMBINED_ALL:
        # Combined on the top, coutry-level and site-level on the middle
        df = pd.concat([df_lb_combined, df_lb_country, df_lb_site])
        color_scale = alt.Scale(domain=[ALL_COUNTRY] + COUNTRIES, range=[ALL_COUNTRY_COLOR] + COUNTRY_COLOR)
        detail_plot_data_level = "country"
        bottom_plot_data_level = "site"
    
    c_field = COLUMNS.SITE_ID if data_level == DATA_AGGREGATE_TYPES.COMBINED_BY_SITE else COLUMNS.COUNTRY

    """
    Selections
    """
    nearest = alt.selection(type="single", nearest=True, on="mouseover", fields=[COLUMNS.DAYS_SINCE_POSITIVE], empty='none', clear="mouseout", name="nearest_selector")

    lab_dropdown = alt.binding_select(options=LOINCS)
    lab_selection = alt.selection_single(fields=["loinc_name"], bind=lab_dropdown, name="Lab", init={"loinc_name": LOINCS[0]})
    
    legend_selection = alt.selection_multi(fields=[c_field], bind="legend")
    
    date_brush = alt.selection(type="interval", encodings=['x'])

    y_zoom_top = alt.selection(type="interval", bind='scales', encodings=['y'])
    y_zoom_bottom = alt.selection(type="interval", bind='scales', encodings=['y'])

    """
    Rules
    """
    nearest_rule = alt.Chart(df).mark_rule(color="red").encode(
        x=f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
        size=alt.value(0.5)
    ).transform_filter(
        nearest
    )
    
    mean_rule = alt.Chart(df).mark_rule(color="red", size=2, opacity=0.7).encode(
        y=f"mean({COLUMNS.MEAN_VALUE}):Q"
    )

    """
    Dot plots
    """
    tooltip = [
        alt.Tooltip(COLUMNS.SITE_ID, title="Country"),
        alt.Tooltip(COLUMNS.MEAN_VALUE, title="Weighted mean", format=".2f"),
        alt.Tooltip(COLUMNS.NUM_PATIENTS, title="Number of patients"),
        alt.Tooltip(COLUMNS.DAYS_SINCE_POSITIVE, title="Days since positive")
    ]
    legend = None if no_legend else alt.Legend(title=None, columns=legend_columns) if legend_columns != None else alt.Legend(title=None)
    
    num_site_or_hospital_tooltip = (
        alt.Tooltip(COLUMNS.NUM_HOSPITALS, title="Number of hospitals") if is_num_hospitals 
        else alt.Tooltip(COLUMNS.NUM_PATIENTS, title="Number of patients")
    )
    
    overview_dot = get_lab_dot_plot(
        chart=alt.Chart(df).transform_filter(alt.datum["data_level"] == "combined"),
        width=width, 
        height=height,
        point_size=point_size,
        color_scale=color_scale,
        color_field=c_field,
        y_domain=y_domain_top, 
        nearest_rule=nearest_rule,
        nearest=nearest,
        legend=legend,
        tooltip=tooltip,
        y_title=None if no_axis_title else "Weighted mean (CI)",
        no_line=no_line_top, 
        show_stdev=show_stdev_top,
        log_y_scale=log_y_scale,
        no_x_axis=True
    ).add_selection(
        y_zoom_top
    ).transform_filter(
        date_brush
    )

    detail_dot = get_lab_dot_plot(
        chart=alt.Chart(df).transform_filter(alt.datum["data_level"] == detail_plot_data_level),
        width=width, 
        height=height,
        point_size=point_size,
        y_domain=y_domain_bottom,
        color_scale=color_scale,
        color_field=c_field,
        y_title=None if no_axis_title else "Mean value",
        nearest=nearest,
        tooltip=tooltip,
        legend=legend,
        nearest_rule=nearest_rule,
        no_line=no_line_bottom, 
        show_stdev=False,
        log_y_scale=log_y_scale,
        no_x_axis=False
    ).add_selection(
        y_zoom_bottom
    ).transform_filter(
        date_brush
    )

    if data_level == DATA_AGGREGATE_TYPES.COMBINED_ALL:
        for site in ANONYMOUS_SITES:
            # Trick to add separate lines for individual country but to make site ids to be excluded in legends.
            detail_dot_on_back = get_lab_dot_plot(
                chart=alt.Chart(df).transform_filter(
                    alt.datum["data_level"] == "site"
                ).transform_filter(
                    alt.datum[COLUMNS.SITE_ID] == site
                ),
                width=width, 
                height=height,
                point_size=point_size,
                y_domain=y_domain_bottom,
                color_scale=color_scale,
                y_title=None if no_axis_title else "Mean value",
                nearest=nearest,
                tooltip=tooltip,
                legend=legend,
                nearest_rule=nearest_rule,
                no_line=no_line_bottom, 
                show_stdev=False,
                log_y_scale=log_y_scale,
                no_point=True
            ).encode(
                opacity=alt.value(0.2)
            ).transform_filter(
                date_brush
            )
            detail_dot = alt.layer(detail_dot_on_back, detail_dot)

    dot_plots = alt.vconcat(
        overview_dot, 
        detail_dot, 
        spacing=10
    ).resolve_scale(
        x="shared"
    )

    """
    Middle Bar Chart
    """
    bottom_y_field = COLUMNS.NUM_HOSPITALS if is_num_hospitals else COLUMNS.NUM_SITES
    bar_top_y_title = None if no_axis_title else "# of patients"

    bar = alt.Chart(df).mark_bar(size=bar_size).encode(
        y=alt.Y(
            f"sum({COLUMNS.NUM_PATIENTS}):Q", 
            title=bar_top_y_title,
            axis=alt.Axis(
                format=axis_format, 
                tickMinStep=1
            )
        ),
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
            title=None,
            scale=alt.Scale(
                zero=False,
                nice=False,
                domain=DAYS_SINCE_EXTENT,
                padding=10
            ),
            axis=alt.Axis(
                grid=True,
                labels=False,
                ticks=False,
                domain=True
            )
        ),
        color=alt.Color(
            f"{c_field}:N", 
            scale=color_scale, 
            title=None, 
            legend=legend
        ),
        tooltip=tooltip,
    )
    
    middle_chart = (
        bar + nearest_rule
    ).transform_filter(
        alt.datum["data_level"] == bottom_plot_data_level
    ).add_selection(
        date_brush
    ).properties(
        height=60, 
        width=width
    )

    """
    Bottom Bar Chart
    """
    bar_bottom_y_title = None if no_axis_title else "# of hospitals" if is_num_hospitals else "# of sites"

    bottom_bar = alt.Chart(df).mark_bar(size=bar_size).encode(
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
            title="Days since positive",
            scale=alt.Scale(
                zero=False,
                nice=False,
                domain=DAYS_SINCE_EXTENT,
                padding=10
            )
        ),
        y=alt.Y(
            f"sum({bottom_y_field}):Q", 
            title=bar_bottom_y_title,
            axis=alt.Axis(
                format=axis_format, 
                tickMinStep=1
            )
        ),
        color=alt.Color(
            f"{c_field}:N", 
            scale=color_scale, 
            legend=legend
        ),
        tooltip=tooltip,
    )

    bottom_chart = (
        bottom_bar + nearest_rule
    ).add_selection(
        date_brush
    ).transform_filter(
        alt.datum["data_level"] == bottom_plot_data_level
    ).properties(
        height=60, 
        width=width
    )

    result_vis = alt.vconcat(
        dot_plots, 
        middle_chart, 
        bottom_chart, 
        spacing=5
    ).resolve_scale(
        y="independent", 
        x="independent",
        color="shared"
    ).add_selection(
        nearest
    ).add_selection(
        legend_selection
    ).transform_filter(
        legend_selection
    )
    
    if loinc == None:
        result_vis = result_vis.add_selection(
            lab_selection
        ).transform_filter(
            lab_selection
        )
    else:
        result_vis = result_vis.properties(title={
            "text": loinc
        }).transform_filter(
            alt.datum["loinc_name"] == loinc
        )
    
    return result_vis

## Lab Values by Country

In [None]:
lab_plot = lab_by_date(
    data_level=DATA_AGGREGATE_TYPES.COMBINED_BY_COUNTRY, 
    show_stdev_top=True, 
    no_line_top=True, 
    height=200,
    bar_size=10,
    point_size=60
).properties(title={
    "text": "Lab Values by Country", 
    "subtitle": get_visualization_subtitle(alt_num_sites=21),
    "subtitleColor": "gray", 
    "dx": 60
})
lab_plot = apply_theme(lab_plot, legend_orient="right")

for_website(lab_plot, "Labs", "Lab values by country")

lab_plot

## Lab Values by Site

In [None]:
lab_plot = lab_by_date(
    data_level=DATA_AGGREGATE_TYPES.COMBINED_BY_SITE, 
    show_stdev_top=True, 
    no_line_top=True, 
    height=200,
    bar_size=10,
    point_size=60
).properties(title={
    "text": "Lab Values by Site", 
    "subtitle": get_visualization_subtitle(alt_num_sites=21),
    "subtitleColor": "gray", 
    "dx": 60
})
lab_plot = apply_theme(lab_plot, legend_orient="right")

for_website(lab_plot, "Labs", "Lab values by site")

lab_plot

## Lab Values by Country and Site

In [None]:
lab_plot = lab_by_date(
    data_level=DATA_AGGREGATE_TYPES.COMBINED_ALL,
    show_stdev_top=True, no_line_top=True, no_line_bottom=False,
    is_num_hospitals=False, log_y_scale=False
).properties(title={
    "text": "Lab Values by Country and Site", 
    "subtitle": get_visualization_subtitle(alt_num_sites=21),
    "subtitleColor": "gray", 
    "dx": 60
})
lab_plot = apply_theme(lab_plot, legend_orient="right")

lab_plot

## Key Labs Overview
#### Notice: Since only an anonymized site-level file is available in public, `# of hospitals` from Figure 4 in the paper is replaced to `# of sites` in this notebook.

In [None]:
loinc_of_interest = ["Creatinine (mg/dL)", "C-reactive protein (mg/dL)", "D-dimer", "Total bilirubin (mg/dL)", "White blood cell count (10*3/uL)"] # from LOINCS

domains = {
    "Creatinine (mg/dL)": {
        "y_domain_top": [0.5, 2.2],
        "y_domain_bottom": [0, 5.5]
    },
    "C-reactive protein (mg/dL)": {
        "y_domain_top": [20, 160],
        "y_domain_bottom": None
    },
    "D-dimer": {
        "y_domain_top": [1000, 7000],
        "y_domain_bottom": [0, 16000]
    },
    "Total bilirubin (mg/dL)": {
        "y_domain_top": [0.3,1.3],
        "y_domain_bottom": [0,5.5]
    },
    "White blood cell count (10*3/uL)": {
        "y_domain_top": [5, 15],
        "y_domain_bottom": None
    }
}

h = alt.hconcat()
for loinc in loinc_of_interest:

    no_axis_title = True if loinc != loinc_of_interest[0] else False

    site_level = lab_by_date(
        loinc=loinc,
        data_level=DATA_AGGREGATE_TYPES.COMBINED_ALL,
        width=180, height=150, bar_size=5, no_axis_title=no_axis_title, show_stdev_top=True, no_line_top=True, no_line_bottom=False, legend_columns=14,
        y_domain_top=domains[loinc]["y_domain_top"], y_domain_bottom=domains[loinc]["y_domain_bottom"],
        is_num_hospitals=False, log_y_scale=False
    )

    h |= site_level
    
out = apply_theme(h, 
    legend_orient="bottom", 
    axis_title_font_size=10,
    label_font_size=12, 
    axis_label_font_size=10,
    title_anchor="middle", 
    title_font_size=13
).properties(
    title={
        "text": "Key Labs Overview",
        "fontSize": 18,
        "subtitle": get_visualization_subtitle(alt_num_sites=21),
        "subtitleColor": "gray",
        "anchor": "start",
        "dx": 60
    }
)

for_website(out, "Labs", "Five lab values by site")

out