In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_0

import altair as alt
import pandas as pd
import numpy as np

from constants_1_0 import COLUMNS
from utils_1_0 import (
    read_lab_drop_unadjusted_df,
    get_visualization_subtitle,
    apply_theme
)
from web import for_website

# Data Preprocessing
## Lab Drop Ratio Data From Figshare
Use the latest data from https://doi.org/10.6084/m9.figshare.12152766.v1

In [None]:
# Lab_drop_unadjusted.csv
df = pd.read_csv("https://ndownloader.figshare.com/files/22345581")

df.head()

In [None]:
def process_lab_drop(df):
    
    consistent_loinc = {
        "alanine aminotransferase (ALT)": "Alanine aminotransferase (U/L)",
        "albumin": "Albumin (g/dL)",
        "aspartate aminotransferase (AST)": "Aspartate aminotransferase (U/L)",
        "total bilirubin": "Total bilirubin (mg/dL)",
        "C-reactive protein (CRP)": "C-reactive protein (mg/dL)",
        "creatinine": "Creatinine (mg/dL)",
        "lactate dehydrogenase (LDH)": "Lactate dehydrogenase (U/L)",
        "cardiac troponin": "Cardiac troponin (ng/mL)",
        "prothrombin time (PT)": "Prothrombin time (s)",
        "white blood cell count (Leukocytes)": "White blood cell count (10*3/uL)",
        "lymphocyte count": "Lymphocyte count (10*3/uL)",
        "neutrophil count": "Neutrophil count (10*3/uL)",
        "D-dimer": "D-dimer",
        "procalcitonin": "Procalcitonin (ng/mL)",
    }
    df["Lab"] = df["Lab"].apply(lambda x: consistent_loinc[x])
    
    # Rearrange labs
    on_the_top = [
        "Creatinine (mg/dL)", 
        "C-reactive protein (mg/dL)", 
        "Total bilirubin (mg/dL)",
        "White blood cell count (10*3/uL)",
        "D-dimer",
        "Lymphocyte count (10*3/uL)",
        "Neutrophil count (10*3/uL)"
    ]
    df["sort"] = df["Lab"]
    df["sort"] = df["sort"].apply(lambda x: on_the_top.index(x) if x in on_the_top else 99)
    df = df.sort_values(by=["sort"])
    df = df.drop(columns=["sort"])

    # Wide to long
    df = pd.melt(df, id_vars=(["Lab", COLUMNS.DAYS_SINCE_POSITIVE]))
    df = df.rename(columns={"variable": COLUMNS.SITE_ID, "value": "percentage"})

    df = df[df[COLUMNS.DAYS_SINCE_POSITIVE] != "n_max"]
    df[COLUMNS.DAYS_SINCE_POSITIVE] == df[COLUMNS.DAYS_SINCE_POSITIVE].apply(lambda x: int(x))
    df = df.replace('US', 'USA')    

    return df

# unadjusted_df = read_lab_drop_unadjusted_df() # For loading local data
unadjusted_df = process_lab_drop(df)

unadjusted_df

# Visualization

In [None]:
ALL_COUNTRY = "All countries"
ALL_COUNTRY_COLOR = "#444444"

COUNTRIES = ["France", "Germany", "Italy", "USA"]
COUNTRY_COLORS = ["#0072B2", "#E69F00", "#009E73", "#D55E00"]

ANONYMOUS_SITES = ['France-01', 'France-02', 'Germany-01', 'Germany-02', 'Germany-03', 'Italy-01', 'Italy-02', 'Italy-03', 'USA-01', 'USA-02', 'USA-03', 'USA-04', 'USA-05', 'USA-06', 'USA-07', 'USA-08', 'USA-09', 'USA-10', 'USA-11', 'USA-12']
ANONYMOUS_COLORS = ['#0072B2', '#0072B2', '#E69F00', '#E69F00', '#E69F00', '#009E73', '#009E73', '#009E73', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00', '#D55E00']

In [None]:
alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

def lab_drop_plot(df, title, level):

    # Selections
    unique_labs = df["Lab"].unique().tolist()
    lab_dropdown = alt.binding_select(options=unique_labs)
    lab_selection = alt.selection_single(fields=["Lab"], bind=lab_dropdown, init={"Lab": unique_labs[0]}, name="Lab")
    legend_selection = alt.selection_multi(fields=[COLUMNS.SITE_ID], bind="legend")
    nearest = alt.selection(type="single", nearest=True, on="mouseover", encodings=["x", "y"], empty='none', clear="mouseout")

    # Rule
    nearest_rule = alt.Chart(df).mark_rule(color="red").encode(
        x=f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q",
        size=alt.value(0.5)
    ).transform_filter(
        nearest
    )

    c_domain = ANONYMOUS_SITES
    c_range = ANONYMOUS_COLORS
    if level == "country":
        c_domain = COUNTRIES
        c_range = COUNTRY_COLORS
    
    line = alt.Chart(df).mark_line(size=2).encode(
        x=alt.X(
            f"{COLUMNS.DAYS_SINCE_POSITIVE}:Q", 
            scale=alt.Scale(
                domain=[-1, 28], 
                nice=False, 
                clamp=False), 
            title="Days since positive"
        ),
        y=alt.Y(
            "percentage", 
            axis=alt.Axis(format="%"),
            scale=alt.Scale(domain=[-0.05, 1.05], nice=False, clamp=False), 
            title="Percentage of measured (%)"
        ),
        color=alt.Color(COLUMNS.SITE_ID, scale=alt.Scale(domain=c_domain, range=c_range), title=None),
        opacity=alt.value(0.7),
        tooltip=[
            alt.Tooltip(COLUMNS.SITE_ID, title=("Country" if level == "country" else "Site")),
            alt.Tooltip("percentage", title="Percentage of measured (%)", format=".1%"),
            alt.Tooltip(COLUMNS.DAYS_SINCE_POSITIVE, title="Days since positive")
        ]
    )

    circle = line.mark_circle(size=30, opacity=0.7).encode(
        size=alt.condition(~nearest, alt.value(30), alt.value(60))
    ).add_selection(
        nearest
    )

    plot = (
        line 
        + 
        circle
        +
        nearest_rule
    ).transform_filter(
        alt.FieldOneOfPredicate(field=COLUMNS.SITE_ID, oneOf=c_domain)
    ).transform_filter(
        lab_selection
    ).transform_filter(
        legend_selection
    ).add_selection(
        legend_selection
    ).add_selection(
        lab_selection
    ).properties(
        title={
            "text": title,
            "dx": 70,
            "subtitle": get_visualization_subtitle(alt_num_sites=21),
            "subtitleColor": "gray",
        },
        width=500, height=400
    ).interactive()

    return apply_theme(plot, legend_orient="right")

In [None]:
unadjusted_plot = lab_drop_plot(unadjusted_df, "Percentage of Measured Relative to Baseline", "site")

for_website(unadjusted_plot, "Labs", "Percentage of measured relative to baseline by site")

unadjusted_plot

In [None]:
unadjusted_plot = lab_drop_plot(unadjusted_df, "Percentage of Measured Relative to Baseline", "country")

for_website(unadjusted_plot, "Labs", "Percentage of measured relative to baseline by country")

unadjusted_plot