In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
from os.path import join
import math

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_lab_df,
    get_visualization_subtitle,
    get_country_color_map,
    get_siteid_color_maps,
    read_none_pediatric_site_details_df,
    get_siteid_color_maps_none_pediatric,
    get_siteid_color_maps_pediatric,
    get_anonymousid_color_maps_pediatric,
    get_anonymousid_color_maps_none_pediatric,
    get_siteid_anonymous_map,
    get_siteid_country_map,
    get_country_color_map_none_pediatric,
    get_country_color_map_pediatric,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

In [None]:
DATA_AS_OF='2020-08-03'

In [None]:
loinc_map = {
    'alanine aminotransferase (ALT)': '1742-6, alanine aminotransferase (ALT) (U/L)',
    'albumin':  '1751-7, albumin (g/dL)',
    'aspartate aminotransferase (AST)':  '1920-8, aspartate aminotransferase (AST) (U/L)',
    'C-reactive protein (CRP) (Normal Sensitivity)':  '1988-5, C-reactive protein (CRP) (Normal Sensitivity) (mg/L)',
    'cardiac troponin (High Sensitivity)':  '49563-0, cardiac troponin (High Sensitivity) (ng/mL)',
    'cardiac troponin (Normal Sensitivity)': '6598-7, cardiac troponin (Normal Sensitivity) (ug/L)',
    'creatinine':  '2160-0, creatinine (mg/dL)',
    'D-dimer':  '48065-7, 48066-5, D-dimer (ng/mL)',
    'Ferritin':  '2276-4, Ferritin (ng/mL)',
    'Fibrinogen':  '3255-7, Fibrinogen (mg/dL)',
    'lactate dehydrogenase (LDH)':  '2532-0, lactate dehydrogenase (LDH) (U/L)',
    'lymphocyte count':  '731-0, lymphocyte count (10*3/uL)',
    'neutrophil count':  '751-8, neutrophil count (10*3/uL)',
    'procalcitonin':   '33959-8, procalcitonin (ng/mL)',
    'prothrombin time (PT)':  '5902-2, prothrombin time (PT) (s)',
    'total bilirubin':  '1975-2, total bilirubin (mg/dL)',
    'white blood cell count (Leukocytes)': '6690-2, white blood cell count (Leukocytes) (10*3/uL)' 
}

In [None]:
original_df = read_full_lab_df()
original_df['siteid'].unique().tolist()
original_df

In [None]:
loinc_df = read_loinc_df()
loinc_df['loinc_name'] = loinc_df['LOINC']

loinc_df = loinc_df.set_index('LOINC')

LOINC_NAMES = loinc_df["Name"].unique().tolist()
LOINC_IDS = loinc_df.index.values.tolist()
LOINC_UNITS = loinc_df['Units'].values.tolist()
LOINC_MAP = { LOINC_IDS[i]: f'{LOINC_IDS[i]}, {LOINC_NAMES[i]} ({LOINC_UNITS[i]})' for i in range(len(LOINC_IDS)) }

loinc_df['loinc_name'] = loinc_df['loinc_name'].apply(lambda x: LOINC_MAP[x])

loinc_df

In [None]:
def process_labs_df(df):
    df_lb = df.copy()
    
    # Negative values to zeros
    df_lb.loc[df_lb['num_patients_all'] < 0, 'num_patients_all'] = np.nan
    df_lb.loc[df_lb['num_patients_ever_severe'] < 0, 'num_patients_ever_severe'] = np.nan
    
    df_lb.loc[df_lb['mean_value_all'] < 0, 'mean_value_all'] = np.nan
    df_lb.loc[df_lb['stdev_value_all'] < 0, 'stdev_value_all'] = np.nan
    df_lb.loc[df_lb['mean_log_value_all'] < 0, 'mean_log_value_all'] = np.nan
    df_lb.loc[df_lb['stdev_log_value_all'] < 0, 'stdev_log_value_all'] = np.nan
    
    df_lb.loc[df_lb['mean_value_ever_severe'] < 0, 'mean_value_ever_severe'] = np.nan
    df_lb.loc[df_lb['stdev_value_ever_severe'] < 0, 'stdev_value_ever_severe'] = np.nan
    df_lb.loc[df_lb['mean_log_value_ever_severe'] < 0, 'mean_log_value_ever_severe'] = np.nan
    df_lb.loc[df_lb['stdev_log_value_ever_severe'] < 0, 'stdev_log_value_ever_severe'] = np.nan
    
    # trim x domain
    df_lb = df_lb[df_lb['days_since_admission'] >= 0]
    df_lb = df_lb[df_lb['days_since_admission'] <= 30]
    
    # Remove a white space which shouldn't be provided
    df_lb['loinc'] = df_lb['loinc'].apply(lambda loinc: loinc.replace(' ', ''))
    
    # Add readable names for LOINC and units
    df_lb['loinc_name'] = df_lb['loinc'].apply(lambda loinc: LOINC_MAP[loinc])
    
    # Aggregate sites
    agg_df = df_lb[df_lb['siteid'].str.contains('ICSM')].copy()
    
    agg_df['mean_value_all'] = agg_df['mean_value_all'] * agg_df['num_patients_all']
    agg_df['mean_value_ever_severe'] = agg_df['mean_value_ever_severe'] * agg_df['num_patients_ever_severe']
    
    agg_df = agg_df.groupby([
        'loinc_name', 'days_since_admission', 'country', 'color'
    ])
    agg_df = agg_df.agg({ 
        'num_patients_all': 'sum', 'mean_value_all': 'sum',
        'num_patients_ever_severe': 'sum', 'mean_value_ever_severe': 'sum' 
    }).reset_index()
    
    agg_df['mean_value_all'] = agg_df['mean_value_all'] / agg_df['num_patients_all']
    agg_df['mean_value_ever_severe'] = agg_df['mean_value_ever_severe'] / agg_df['num_patients_ever_severe']
    agg_df['siteid'] = 'ICSM'
    
    # Remove two and use the other one for the aggregated values
    df_lb = df_lb[~df_lb['siteid'].str.contains('ICSM')]
    df_lb = pd.concat([df_lb, agg_df])
    
    """
    Calculate never severe group and apply to '*_all' columns
    """
    mu_all = df_lb['mean_value_all']
    sd_all = df_lb['stdev_value_all']
    n_all = df_lb['num_patients_all']
    mu_severe = df_lb['mean_value_ever_severe']
    sd_severe = df_lb['stdev_value_ever_severe']
    n_severe = df_lb['num_patients_ever_severe']
    
    df_lb['mean_value_all'] =  (
        (mu_all * n_all - mu_severe * n_severe) / (n_all - n_severe)
    )
    
    # Put nan number if the number of patients of either *_all or *_ever_sever are zero.
    df_lb.loc[
        (df_lb['num_patients_ever_severe'] <= 0) | (df_lb['num_patients_all'] <= 0), 'mean_value_all'
    ] = np.nan
    
    mu_never = df_lb['mean_value_all']
    df_lb['num_patients_all'] = df_lb['num_patients_all'] - df_lb['num_patients_ever_severe']

    n_never = df_lb['num_patients_all']    
    df_lb['stdev_value_all'] = (
        (
            sd_all**2 * (n_all-1) + n_all * mu_all**2
            -
            sd_severe**2 * (n_severe-1) + n_severe * mu_severe**2
            -
            n_never * mu_never**2
        ) 
        / 
        (n_never - 1)
    )
    df_lb['stdev_value_all'] = df_lb['stdev_value_all'].apply(lambda x: (
        math.sqrt(x) if x > 0 else x
    ))
    
    # zero number for # of participants
    df_lb.loc[df_lb['num_patients_all'] < 0, 'num_patients_all'] = np.nan
    df_lb.loc[df_lb['num_patients_ever_severe'] < 0, 'num_patients_ever_severe'] = np.nan
    
    df_lb.loc[df_lb['mean_value_all'] < 0, 'mean_value_all'] = np.nan
    df_lb.loc[df_lb['stdev_value_all'] < 0, 'stdev_value_all'] = np.nan
    df_lb.loc[df_lb['mean_log_value_all'] < 0, 'mean_value_all'] = np.nan
    df_lb.loc[df_lb['stdev_log_value_all'] < 0, 'stdev_value_all'] = np.nan
    
    df_lb.loc[df_lb['mean_value_ever_severe'] < 0, 'mean_value_ever_severe'] = np.nan
    df_lb.loc[df_lb['stdev_value_ever_severe'] < 0, 'stdev_value_ever_severe'] = np.nan
    df_lb.loc[df_lb['mean_log_value_ever_severe'] < 0, 'mean_log_value_ever_severe'] = np.nan
    df_lb.loc[df_lb['stdev_log_value_ever_severe'] < 0, 'stdev_log_value_ever_severe'] = np.nan
    
    # We want to put a category for severity, instead of using wide columns
    df_lb = pd.melt(
        df_lb,
        id_vars=[
            'siteid', 'loinc', 'loinc_name', 'days_since_admission', 'units',
            
            'mean_value_all', 
            'stdev_value_all', 
            'mean_log_value_all', 
            'stdev_log_value_all',
            
            'mean_value_ever_severe', 
            'stdev_value_ever_severe', 
            'mean_log_value_ever_severe',
            'stdev_log_value_ever_severe',
            
            'country', 'color'
        ],
        value_vars=['num_patients_all', 'num_patients_ever_severe'], 
        var_name='severity', 
        value_name='num_patients'
    )
    df_lb['severity'] = df_lb['severity'].apply(lambda x: 'Never Severe' if x == 'num_patients_all' else 'Ever Severe')
    
    # Use certain values based on the 'severity' column
    ever = (df_lb['severity'] == 'Ever Severe')
    df_lb.loc[ever, 'mean_value_all'] = df_lb.loc[ever, 'mean_value_ever_severe']
    df_lb.loc[ever, 'stdev_value_all'] = df_lb.loc[ever, 'stdev_value_ever_severe']
    df_lb.loc[ever, 'mean_log_value_all'] = df_lb.loc[ever, 'mean_log_value_ever_severe']
    df_lb.loc[ever, 'stdev_log_value_all'] = df_lb.loc[ever, 'stdev_log_value_ever_severe']
    df_lb = df_lb.rename(columns={
        'mean_value_all': 'mean_value', 
        'stdev_value_all': 'stdev_value', 
        'mean_log_value_all': 'mean_log_value', 
        'stdev_log_value_all': 'stdev_log_value', 
    })
    df_lb = df_lb.drop(columns=[
        'mean_value_ever_severe', 
        'stdev_value_ever_severe',
        'mean_log_value_ever_severe',
        'stdev_log_value_ever_severe'
    ])
    
    pediatric_sites = get_siteid_color_maps_pediatric().keys()
    df_lb['is_pediatric'] = df_lb['siteid'].apply(lambda x: 'yes' if x in pediatric_sites else 'no')
    
    # Because we are not using sd values in site/country-level plots, and 
    # we do not properly aggregate them, we put nan number to make sure 
    # we don't use them improperly
    df_lb['stdev_value'] = np.nan

    # Drop unused values
    df_lb = df_lb.drop(columns=['loinc', 'units', 'mean_log_value', 'stdev_log_value', 'color'])
    
    """
    Make sure to use anonymous ids
    """
    anonymous = get_siteid_anonymous_map()
    df_lb['siteid'] = df_lb['siteid'].apply(lambda x: anonymous[x])
    
    df_lb.loc[df_lb['is_pediatric'] == 'yes', 'country'] = 'Masked'
    
    """
    Add country-level data for 'non-pediatric sites'
    """
    country_df = df_lb.copy()
    country_df = country_df[country_df['is_pediatric'] == 'no'] # non-pediatric sites only
    
    country_df['mean_value'] = country_df['mean_value'] * country_df['num_patients']
    
    country_df = country_df.groupby([
        'loinc_name', 'days_since_admission', 'country', 'severity', 'is_pediatric'
    ])
    country_df = country_df.agg({
        'mean_value': 'sum',
        'num_patients': 'sum'
    }).reset_index()
    
    country_df['mean_value'] = country_df['mean_value'] / country_df['num_patients']
    country_df['siteid'] = country_df['country']
    
    """
    Add aggregated data for 'pediatric sites'
    """
    agg_ped_df = df_lb.copy()
    agg_ped_df = agg_ped_df[agg_ped_df['is_pediatric'] == 'yes'] # pediatric sites only
    
    agg_ped_df['mean_value'] = agg_ped_df['mean_value'] * agg_ped_df['num_patients']
    
    agg_ped_df = agg_ped_df.groupby([
        'loinc_name', 'days_since_admission', 'severity', 'is_pediatric'
    ])
    agg_ped_df = agg_ped_df.agg({
        'mean_value': 'sum',
        'num_patients': 'sum'
    }).reset_index()
    
    agg_ped_df['mean_value'] = agg_ped_df['mean_value'] / agg_ped_df['num_patients']
    agg_ped_df['siteid'] = 'All Country'
    agg_ped_df['country'] = 'All Country'
    
    """
    Concatenate all three data
    """
    df_lb['datalevel'] = 'site'
    country_df['datalevel'] = 'country'
    agg_ped_df['datalevel'] = 'country' # TODO: change to 'aggregated'
    
    df_lb = pd.concat([df_lb, country_df, agg_ped_df])
    
    return df_lb

In [None]:
processed_original_df = process_labs_df(original_df)
processed_original_df

# To debug:
# processed_original_df[
#     (processed_original_df['datalevel'] == 'country') & 
#     (processed_original_df['loinc_name'] == '1742-6, alanine aminotransferase (ALT) (U/L)') & 
#     (processed_original_df['days_since_admission'] == 0) &
#     (processed_original_df['severity'] == 'Ever Severe') &
#     (processed_original_df['is_pediatric'] == 'yes')
# ]

## Alternatively, use a file that is precomputed in some extends
for making `processed_original_df`

In [None]:
original_df = pd.read_csv('../data/Figure_2_lab_plot_eve_nev_dif_30.csv', header=[0])
original_df

In [None]:
# update the following column list upon data updates
n_cols = [] # SITE_n
mean_cols = [] # SITE_mean

for col in original_df.columns:
    if '_n' in col:
        n_cols.append(col)
    if '_mean' in col:
        mean_cols.append(col)

In [None]:
processed_original_df = original_df.copy()

# remove unused rows
processed_original_df = processed_original_df[processed_original_df.scale == 'original']
processed_original_df = processed_original_df.drop(columns=['scale'])

# rename columns
processed_original_df = processed_original_df.rename(columns={
    'labname': 'loinc_name', 'tt0': 'days_since_admission', 'setting': 'severity'
})

# rename categories
processed_original_df.severity = processed_original_df.severity.apply(lambda x: 'Ever Severe' if x == 'ever' else 'Never Severe' if x == 'never' else 'Difference')
processed_original_df.loinc_name = processed_original_df.loinc_name.apply(lambda x: loinc_map[x])

# wide to long
processed_original_df = processed_original_df.melt(
    id_vars=[
        'loinc_name', 'severity', 'days_since_admission'
    ] + mean_cols,
    value_vars=n_cols,
    var_name='siteid',
    value_name='num_patients'
)
processed_original_df.siteid = processed_original_df.siteid.apply(lambda x: x.replace('_n', ''))

processed_original_df['mean_value'] = 0
for site_mean_col in mean_cols:
    processed_original_df.loc[processed_original_df.siteid == site_mean_col.replace('_mean', ''), 'mean_value'] = processed_original_df[site_mean_col]

# drop unused columns
processed_original_df = processed_original_df.drop(columns=mean_cols)

# Add is_pediatric
processed_original_df['is_pediatric'] = 'no'

# Add country
site_country_map = get_siteid_country_map()
processed_original_df['country'] = processed_original_df.siteid.apply(lambda x: site_country_map[x.upper()])

"""
rest of the part is identical to the `process_lab_df()`
"""

# make sure to use anonymous ids
siteid_anonymous_map = get_siteid_anonymous_map()
processed_original_df.siteid = processed_original_df.siteid.apply(lambda x: siteid_anonymous_map[x])

# # add country-level data
country_df = processed_original_df.copy()

country_df.mean_value = country_df.mean_value * country_df.num_patients

country_df = country_df.groupby([
    'loinc_name', 'days_since_admission', 'country', 'severity', 'is_pediatric'
])
country_df = country_df.agg({
    'mean_value': 'sum',
    'num_patients': 'sum'
}).reset_index()

country_df['mean_value'] = country_df['mean_value'] / country_df['num_patients']
country_df['siteid'] = country_df['country']

# concatenate all dataframes
processed_original_df['datalevel'] = 'site'
country_df['datalevel'] = 'country'

processed_original_df = pd.concat([processed_original_df, country_df])

# done
processed_original_df

In [None]:
processed_original_df['loinc_name'].unique().tolist()

In [None]:
meta_df = pd.read_csv("../data/Figure_4_lab_plot_meta_eve_nev_dif_ori_log_daymax30.csv", sep=",", header=0)
meta_df

In [None]:
processed_meta_df = meta_df.copy()

# Drop rows that we don't use
processed_meta_df = processed_meta_df[
    (processed_meta_df['scale'] == 'original')
]

# Drop columns we don't use
processed_meta_df = processed_meta_df.drop(columns=['scale'])


# Change column names
processed_meta_df = processed_meta_df.rename(columns={
    'Lab': 'loinc_name',
    'days_since_positive': 'days_since_admission',
    'mean': 'mean_value',
    'se': 'stdev_value',
    'total_n': 'num_patients',
    'setting': 'severity'
})

# Mark as 'All Country'
processed_meta_df['siteid'] = 'All Country'
processed_meta_df['country'] = 'All Country'
processed_meta_df['datalevel'] = 'combined'
processed_meta_df['is_pediatric'] = 'no'

# Use consistent category names
processed_meta_df['severity'] = processed_meta_df['severity'].apply(lambda x: (
    'Ever Severe' if x == 'ever' else "Never Severe" if x == 'never' else 'Difference'
))
processed_meta_df['loinc_name'] = processed_meta_df['loinc_name'].apply(lambda x: loinc_map[x])

processed_meta_df

In [None]:
df = pd.concat([processed_meta_df, processed_original_df])
df

In [None]:
def lab_plot(
    df=None,
    is_pediatric=False,
    is_aggregated=False,
    loinc=list(LOINC_MAP.values())[0], # ALT
    width=700, 
    height=400,
    bar_size=14,
    point_size=30,
    no_axis_title=False,
    no_legend=False, 
    legend_columns=None,
    y_domain_top=None, 
    y_domain_bottom=None
):
    if(df is None):
        print('No Data Frame Suggested.')
        return
    
    # select data: pediatric or adult sites
    if is_pediatric:
        df_copy = df[df['is_pediatric'] == 'yes'].copy()
    else:
        df_copy = df[df['is_pediatric'] == 'no'].copy()
    
    if is_pediatric:
        SITE_NAMES = list(get_anonymousid_color_maps_pediatric().keys()) # For actual id: get_siteid_color_maps_pediatric()
        SITE_COLORS = list(get_anonymousid_color_maps_pediatric().values())
        NUM_SITES = len(SITE_NAMES)
    else:
        SITE_NAMES = ['All Country'] + list(get_anonymousid_color_maps_none_pediatric().keys())
        SITE_COLORS = ['#000000'] + list(get_anonymousid_color_maps_none_pediatric().values())
        NUM_SITES = len(SITE_NAMES) - 1
        
    if is_pediatric and is_aggregated:
        COUNTRY_NAMES = ['All Country']
        COUNTRY_COLORS = ['#0072B2']
    elif is_pediatric:
        COUNTRY_NAMES = list(get_anonymousid_color_maps_pediatric().keys())
        COUNTRY_COLORS = list(get_anonymousid_color_maps_pediatric().values())
    else:
        COUNTRY_NAMES = ['All Country'] + list(get_country_color_map_none_pediatric().keys())
        COUNTRY_COLORS = ['#000000'] + list(get_country_color_map_none_pediatric().values())
        
    # data field and title definition
    loinc_name = 'loinc_name'
    x_field = 'days_since_admission'
    mean_field = 'mean_value'
    num_pat_field = 'num_patients'
    
    x_field_title = 'Days since positive'
    mean_field_title = 'Mean value'
    
    site_color = alt.Color(
        f"{'siteid'}:N", 
        title='Site ID',
        scale=alt.Scale(domain=SITE_NAMES, range=SITE_COLORS)
    )
        
    country_color = alt.Color(
        f"{'siteid'}:N",
        title='Country',
        scale=alt.Scale(domain=COUNTRY_NAMES, range=COUNTRY_COLORS)
    )
    
    color = country_color if is_aggregated else site_color

    # data selection
    base = (
        alt.Chart(df_copy)
            .transform_filter(
                alt.datum['datalevel'] != 'site' if is_aggregated
                else alt.datum['datalevel'] != 'country'
            )
    )
    
    legend_selection = alt.selection_multi(fields=['siteid'], bind="legend")
    
    date_brush = alt.selection(type="interval", encodings=['x'])
    
    input_dropdown = alt.binding_select(options=list(loinc_map.values()))
    selection = alt.selection_single(
        fields=['loinc_name'], 
        bind=input_dropdown, 
        name='Lab ', 
        init={'loinc_name': loinc}
    )
    
    y_zoom_top = alt.selection(type="interval", bind='scales', encodings=['y'])
    y_zoom_bottom = alt.selection(type="interval", bind='scales', encodings=['y'])
    
    nearest = alt.selection(type="single", nearest=True, on="mouseover", fields=[x_field], empty='none', clear="mouseout", name="nearest_selector")
    nearest_rule = alt.Chart(df_copy).mark_rule(color="red").encode(
        x=f"{x_field}:Q",
        size=alt.value(0.5)
    ).transform_filter(
        nearest
    )
    
    tooltip = [
        alt.Tooltip('siteid', title="Site ID"),
        alt.Tooltip('country', title="Country"),
        alt.Tooltip(mean_field, title="Lab value", format=".2f"),
        alt.Tooltip(num_pat_field, title="Number of patients"),
        alt.Tooltip(x_field, title="Days since positive")
    ]
    
    """
    Meta-analysis
    """
    y_title = None if no_axis_title else mean_field_title
    no_x_axis = False

    x_axis = (
        alt.Axis(grid=True, labels=False, ticks=False, domain=True)
            if no_x_axis 
            else alt.Axis(grid=True, labels=True, ticks=True, domain=True, tickMinStep=1)
    )
    y_scale = (
        alt.Scale(zero=False, domain=y_domain_bottom, type="linear") 
            if y_domain_bottom != None 
            else alt.Scale(zero=False, type="linear")
    )

    meta_circle = (
        base
            .mark_circle(size=point_size, opacity=0.7)
            .encode(
                x=alt.X(
                    f"{x_field}:Q",
                    title=None,
                    scale=alt.Scale(zero=False, nice=False, padding=10),
                    axis=x_axis,
                ),
                y=alt.Y(
                    f"{mean_field}:Q",
                    title='Pooled mean (CI)',
                    scale=y_scale,
                    axis=alt.Axis(format='r')
                ),
                color=color,
                tooltip=tooltip
            )
    )
    
    meta_line = meta_circle.mark_line(size=2, opacity=0.5)
    
    errorline = meta_circle.mark_errorbar().encode(
        y=alt.Y("ci_95L:Q", title=""),
        y2="ci_95U:Q",
        size=alt.value(1),
        opacity=alt.value(1)
    )
    
    meta_plot = (
        alt.layer(meta_circle, meta_line, errorline, nearest_rule)
            .properties(height=height/2.0, width=width)
            .add_selection(y_zoom_top)
    ).facet(
        column=alt.Column("severity:N", title=None, sort=['Ever Severe', 'Never Severe', 'Difference']), bounds="flush"
    ).transform_filter(alt.datum['datalevel'] == 'combined')#.transform_filter(date_brush)
    
    """
    Lab mean values in line/dot plot
    """
    y_title = None if no_axis_title else mean_field_title
    no_x_axis = False

    x_axis = (
        alt.Axis(grid=True, labels=False, ticks=False, domain=True)
            if no_x_axis 
            else alt.Axis(grid=True, labels=True, ticks=True, domain=True, tickMinStep=1)
    )
    y_scale = (
        alt.Scale(zero=False, domain=y_domain_bottom, type="log") 
            if y_domain_bottom != None 
            else alt.Scale(zero=False, type="log")
    )
    
    circle = (
        base
            .mark_circle(size=point_size, opacity=0.7)
            .encode(
                x=alt.X(
                    f"{x_field}:Q",
                    title=None,
                    scale=alt.Scale(
                        zero=False, nice=False, padding=10,
                        domain=[0,30]
                    ),
                    axis=x_axis,
                ),
                y=alt.Y(
                    f"{mean_field}:Q",
                    title=y_title,
                    scale=y_scale,
                    axis=alt.Axis(format='r')
                ),
                color=color,
                tooltip=tooltip
            )
    )
    
    line = circle.mark_line(size=2, opacity=0.5)
    
    reference_band = circle.mark_errorband().encode(
        x=f'{x_field}:Q',
        y=alt.Y("mean(Reference High):Q", title=""),
        y2="mean(Reference Low):Q",
        opacity=alt.value(0.3),
        color=alt.value('gray')
    )
        
    lab_value_plot = (
        alt.layer(circle, line, nearest_rule)
            .properties(height=height, width=width)
            .add_selection(y_zoom_bottom)
        .facet(
            column=alt.Column("severity:N", title=None, sort=['Ever Severe', 'Never Severe', 'Difference']), bounds="flush"
        )
        .transform_filter(alt.datum['datalevel'] != 'combined')#.transform_filter(date_brush)
        # for log scale calculation
        .transform_filter(alt.datum[mean_field] > 0)
    )


    """
    # of patients in bar charts
    """
    num_pat_field_title = None if no_axis_title else "# of patients"

    bar = base.mark_bar(size=bar_size).encode(
        y=alt.Y(
            f"sum({num_pat_field}):Q", 
            title=num_pat_field_title,
            axis=alt.Axis(
                format='r',
                tickMinStep=1
            ),
            scale=alt.Scale(zero=True, padding=0, nice=True)
        ),
        x=alt.X(
            f"{x_field}:Q",
            title=x_field_title,
            scale=alt.Scale(zero=False, nice=False, padding=10),
            axis=alt.Axis(
                grid=True,
                labels=True,
                ticks=True,
                domain=True
            )
        ),
        color=color,
        order=alt.Order(
            'country',
            sort='ascending'
        ),
        tooltip=tooltip
    )
    
    middle_chart = (
        (bar + nearest_rule)
            .properties(height=100, width=width)
            #.add_selection(date_brush)
    ).facet(
        column=alt.Column("severity:N", title=None, header=alt.Header(labels=False), sort=['Ever Severe', 'Never Severe', 'Difference']), bounds="flush"
    ).transform_filter(alt.datum['datalevel'] != 'combined')
    
    vconcat = (
        alt.vconcat(
            lab_value_plot, 
            middle_chart,
            spacing=5
        ) 
        if is_pediatric 
        else 
        alt.vconcat(
            meta_plot,
            lab_value_plot, 
            middle_chart,
            spacing=5
        )
    )
    
    subtitle = get_visualization_subtitle(data_release=DATA_AS_OF, with_num_sites=False) + f'  |  {NUM_SITES} Sites',
    
    result_vis = (
        vconcat
        .properties(
            title={
                "text": (
                    "Lab Values By Pediatric Site" if is_pediatric else
                    'Lab Values By Country' if is_aggregated == True else "Lab Values By Site"
                ),
                'subtitle': subtitle,
                'subtitleColor': 'gray'
            }
        ).resolve_scale(
            y="independent", 
            x="shared",#independent",
            color="shared"
        )
        .add_selection(selection)
        .add_selection(nearest)
        .transform_filter(selection)
        .add_selection(legend_selection)
        .transform_filter(legend_selection)
    )
    
    return result_vis

## Lab Values by Adult Sites

In [None]:
final = lab_plot(df=df, width=500)

final = apply_theme(final, header_label_font_size=16, legend_orient='right')

for_website(final, "1.1_lab", "lab_by_site")

final

In [None]:
final = lab_plot(df=df, is_aggregated=True, width=500)

final = apply_theme(final, header_label_font_size=16, legend_orient='right')

for_website(final, "1.1_lab", "lab_by_country")

final

## Lab Values by Pediatric Sites

In [None]:
final = lab_plot(df=df, is_pediatric=True, width=500)

final = apply_theme(final, header_label_font_size=16, legend_orient='right')

for_website(final, "1.1_lab", "lab_by_pediatric_site")

final

In [None]:
final = lab_plot(df=df, is_pediatric=True, is_aggregated=True, width=500)

final = apply_theme(final, header_label_font_size=16, legend_orient='right')

for_website(final, "1.1_lab", "lab_by_pediatric_site_aggregated")

final

In [None]:
def lab_plot_day0(
    df=None,
    is_pediatric=False,
    is_aggregated=False,
    loinc=list(LOINC_MAP.values())[0], # ALT
    width=500,
    bar_size=14, 
    tick_thickness=5
):
    if(df is None):
        print('No Data Frame Suggested.')
        return
    
    # select data: pediatric or adult sites
    if is_pediatric:
        df_copy = df[df['is_pediatric'] == 'yes'].copy()
    else:
        df_copy = df[df['is_pediatric'] == 'no'].copy()
    
    if is_pediatric:
        NUM_SITES = len(list(get_anonymousid_color_maps_pediatric().keys()))
    else:
        NUM_SITES = len(list(get_anonymousid_color_maps_none_pediatric().keys()))
        
    if is_pediatric and is_aggregated:
        # Notice: Not currently supported for this plot
        # because we do not have enought number of countries
        COUNTRY_NAMES = ['All Country']
        COUNTRY_COLORS = ['#0072B2']
    elif is_pediatric:
        COUNTRY_NAMES = list(get_anonymousid_color_maps_pediatric().keys())
        COUNTRY_COLORS = list(get_anonymousid_color_maps_pediatric().values())
    else:
        COUNTRY_NAMES = list(get_country_color_map_none_pediatric().keys())
        COUNTRY_COLORS = list(get_country_color_map_none_pediatric().values())
        
    # data field and title definition
    loinc_name = 'loinc_name'
    x_field = 'siteid'
    column_field = 'country'
    mean_field = 'mean_value'
    num_pat_field = 'num_patients'
    color_field = 'siteid' if is_pediatric else 'country' 
    
    x_field_title = 'Site ID'
    column_field_title = 'Country'
    mean_field_title = 'Mean value'
        
    color = alt.Color(
        f"{color_field}:N",
        title='Country',
        scale=alt.Scale(domain=COUNTRY_NAMES, range=COUNTRY_COLORS)
    )
    
    # selection
    input_dropdown = alt.binding_select(options=list(LOINC_MAP.values()))
    selection = alt.selection_single(
        fields=['loinc_name'], 
        bind=input_dropdown, 
        name='Lab ', 
        init={'loinc_name': loinc}
    )
    
    y_zoom_top = alt.selection(type="interval", bind='scales', encodings=['y'])
    
    # data selection
    base = (
        alt.Chart(df_copy)
            .transform_filter(
                alt.datum['datalevel'] != 'site' if is_aggregated
                else alt.datum['datalevel'] != 'country'
            ).transform_filter(
                # Use only the lab values on day 0
                alt.datum['days_since_admission'] == 0
            ).transform_filter(
                alt.datum['datalevel'] != 'combined'
            )
    )
    
    x_axis = (
        alt.Axis(grid=True, labels=True, ticks=True, domain=True) #, tickMinStep=1)
    )
    y_scale = (
        alt.Scale(zero=False, type="log")
    )
    
    tooltip = [
        alt.Tooltip('siteid', title="Site ID"),
        alt.Tooltip('country', title="Country"),
        alt.Tooltip(mean_field, title="Lab value", format=".2f"),
        alt.Tooltip(num_pat_field, title="Number of patients")
    ]
    
    """
    Upper plot showing the lab values by site id/country on day 0
    """
    dot = base.mark_tick(size=bar_size, thickness=tick_thickness).encode(
        x=alt.X(f"{x_field}:N", title=None, axis=alt.Axis(
            grid=True, labels=False, ticks=False, domain=True
        )),
        y=alt.Y(f"{mean_field}:Q", title=mean_field_title, scale=y_scale),
        color=color,
        tooltip=tooltip
    )
    
    dot = (
        dot.properties(height=300, width=width)
            .facet(
                column=alt.Column("severity:N", title=None), bounds="flush"
            )
            .add_selection(y_zoom_top)
            .transform_filter(y_zoom_top)
            .transform_filter(alt.datum[mean_field] > 0)
    )
    
    """
    # of patients in bar charts
    """
    bar = base.mark_bar(size=bar_size).encode(
        x=alt.X(
            f"{x_field}:N",
            title=x_field_title,
            axis=alt.Axis(
                grid=True, labels=True, ticks=True, domain=True
            )
        ),
        y=alt.Y(
            f"{num_pat_field}:Q", 
            title="# of patients",
            axis=alt.Axis(
                format='r',
                tickMinStep=1
            ),
            scale=alt.Scale(zero=True, padding=0, nice=True)
        ),
        color=color,
        tooltip=tooltip
    )
    
    middle_chart = (
        (bar).properties(height=100, width=width)
    ).facet(
        column=alt.Column("severity:N", title=None, header=alt.Header(labels=False)), bounds="flush"
    ).transform_filter(
        alt.datum['datalevel'] != 'combined'
    )
    
    """
    Put plots together
    """
    subtitle = get_visualization_subtitle(data_release='2020-08-03', with_num_sites=True, num_sites=45),
    
    result_vis = (
        (dot & middle_chart)
            .resolve_scale(
                x='shared',
                color='shared',
                y='independent'
            )
            .properties(
                title={
                    "text": (
                        "Lab Values By Pediatric Site On Day 0" if is_pediatric else
                        'Lab Values By Country On Day 0' if is_aggregated == True else "Lab Values By Site On Day 0"
                    ),
                    'subtitle': subtitle,
                    'subtitleColor': 'gray'
                }
            )
            .add_selection(selection)
            .transform_filter(selection)
    )

    return result_vis
    
    

## Lab Values by Adult Sites on Day 0

In [None]:
final = lab_plot_day0(df=df, is_pediatric=False, is_aggregated=False, bar_size=20)

final = apply_theme(final, header_label_font_size=16, legend_orient='right')

for_website(final, "1.1_lab", "lab_by_site_day0")

final

In [None]:
final = lab_plot_day0(df=df, is_pediatric=False, is_aggregated=True, width=300, bar_size=40)

final = apply_theme(final, header_label_font_size=16, legend_orient='right')

for_website(final, "1.1_lab", "lab_by_country_day0")

final

## Lab Values by Pediatric Sites on Day 0

In [None]:
final = lab_plot_day0(df=df, is_pediatric=True, is_aggregated=False, bar_size=60)

final = apply_theme(final, header_label_font_size=16, legend_orient='right')

for_website(final, "1.1_lab", "lab_by_pediatric_site_day0")

final