In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_0

import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data

from constants_1_0 import COLUMNS
from utils_1_0 import apply_theme, get_visualization_subtitle
from web import for_website

# Data Preprocessing

## Lab Variation Data From Figshare
Use the latest data from https://doi.org/10.6084/m9.figshare.12152766.v1

In [None]:
# Lab_VariationByCountry.csv
labs = pd.read_csv("https://ndownloader.figshare.com/files/22345587")

labs.head()

In [None]:
# labs = pd.read_csv('../data/Lab_VariationByCountry.csv', header=[0]) # For loading local data

labs = labs.replace('SITE_AVE', 'Within site variation')
labs = labs.replace('ALL', 'All countries')
labs = labs.replace('US', 'USA')


consistent_loinc = {
    "alanine aminotransferase (ALT)": "Alanine aminotransferase (U/L)",
    "albumin": "Albumin (g/dL)",
    "aspartate aminotransferase (AST)": "Aspartate aminotransferase (U/L)",
    "total bilirubin": "Total bilirubin (mg/dL)",
    "C-reactive protein (CRP)": "C-reactive protein (mg/dL)",
    "creatinine": "Creatinine (mg/dL)",
    "lactate dehydrogenase (LDH)": "Lactate dehydrogenase (U/L)",
    "cardiac troponin": "Cardiac troponin (ng/mL)",
    "prothrombin time (PT)": "Prothrombin time (s)",
    "white blood cell count (Leukocytes)": "White blood cell count (10*3/uL)",
    "lymphocyte count": "Lymphocyte count (10*3/uL)",
    "neutrophil count": "Neutrophil count (10*3/uL)",
    "D-dimer": "D-dimer",
    "procalcitonin": "Procalcitonin (ng/mL)",
}
labs["Lab"] = labs["Lab"].apply(lambda x: consistent_loinc[x])

labs.head()

# Visualization

In [None]:
input_dropdown = alt.binding_select(options=np.unique(labs.Lab))
selection = alt.selection_single(fields=['Lab'], bind=input_dropdown, name='Lab ', init={'Lab': labs.Lab[0]})
legend_selection = alt.selection_multi(fields=["Country"], bind="legend")

COUNTRIES = ["All countries", "France", "Germany", "Italy", "USA", "Within site variation"]
COUNTRY_COLOR = ["#444444", "#0072B2", "#E69F00", "#009E73", "#D55E00", "#444444"]
color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)
tick_size = 40

base = alt.Chart(labs).mark_bar(size=tick_size).encode(
    x=alt.Y('Country:N'),
    color=alt.Color('Country:N', scale=color_scale, title=None),
    stroke=alt.Color('Country:N', scale=color_scale),
    strokeWidth=alt.value(1),
    y=alt.Y('y:Q', title='Mean (SD)'),
    y2=alt.Y2('y2:Q'),
    tooltip=[
        alt.Tooltip("Country", title="Category"),
        alt.Tooltip("mean_val", title="Mean", format=".2f"),
        alt.Tooltip("stdev_val", title="Standard deviation", format=".2f"),
        alt.Tooltip("days_since_positive", title="Days since positive")
    ]
)

mean = alt.Chart(labs).mark_tick(size=tick_size, thickness=2).encode(
    x=alt.X('Country:N', title=None),
    opacity=alt.value(1),
    color=alt.value('white'),
    y=alt.Y('mean_val:Q')
)

plot = alt.layer(
    base, mean
).add_selection(
    selection
).add_selection(
    legend_selection
).transform_filter(
    selection
).transform_filter(
    legend_selection
).transform_filter(
    alt.FieldOneOfPredicate(field='Country', oneOf=COUNTRIES)
).transform_filter(
    alt.datum["days_since_positive"] == 0
).transform_calculate(
    y='datum.mean_val + datum.stdev_val',
    y2='datum.mean_val - datum.stdev_val',
).properties(
    title={
        "text": ["Lab Variation Across Sites on Day 0"], 
        "dx": 50,
        "subtitle": get_visualization_subtitle(alt_num_sites=21),
        "subtitleColor": "gray",
        "anchor": "start",
    },
    width=350, height=400
)

plot = apply_theme(plot, legend_orient="right")

for_website(plot, "Labs", "Lab variation across sites on Day 0")

plot