In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from os.path import join
import math

from utils_1_1 import (
    get_country_color_map_none_pediatric,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

# Data Preprocessing

## Lab Variation Data From Figshare (Not supported yet)
Use the latest data from xxx

In [None]:
# Lab_VariationByCountry.csv
# labs = pd.read_csv("https://ndownloader.figshare.com/files/22345587")

# labs.head()

In [None]:
labs = pd.read_csv('../data/Figure_5_lab_ROC_dist_overtime.csv', header=[0]) # For loading local data
labs

In [None]:
processed_labs = labs.copy()

loinc_map = {
    'alanine aminotransferase (ALT)': '1742-6, alanine aminotransferase (ALT) (U/L)',
    'albumin':  '1751-7, albumin (g/dL)',
    'aspartate aminotransferase (AST)':  '1920-8, aspartate aminotransferase (AST) (U/L)',
    'C-reactive protein (CRP) (Normal Sensitivity)':  '1988-5, C-reactive protein (CRP) (Normal Sensitivity) (mg/L)',
    'cardiac troponin (High Sensitivity)':  '49563-0, cardiac troponin (High Sensitivity) (ng/mL)',
    'cardiac troponin (Normal Sensitivity)': '6598-7, cardiac troponin (Normal Sensitivity) (ug/L)',
    'creatinine':  '2160-0, creatinine (mg/dL)',
    'D-dimer':  '48065-7, 48066-5, D-dimer (ng/mL)',
    'Ferritin':  '2276-4, Ferritin (ng/mL)',
    'Fibrinogen':  '3255-7, Fibrinogen (mg/dL)',
    'lactate dehydrogenase (LDH)':  '2532-0, lactate dehydrogenase (LDH) (U/L)',
    'lymphocyte count':  '731-0, lymphocyte count (10*3/uL)',
    'neutrophil count':  '751-8, neutrophil count (10*3/uL)',
    'procalcitonin':   '33959-8, procalcitonin (ng/mL)',
    'prothrombin time (PT)':  '5902-2, prothrombin time (PT) (s)',
    'total bilirubin':  '1975-2, total bilirubin (mg/dL)',
    'white blood cell count (Leukocytes)': '6690-2, white blood cell count (Leukocytes) (10*3/uL)' 
}

processed_labs["nm.lab"] = processed_labs["nm.lab"].apply(lambda x: loinc_map[x])

processed_labs = processed_labs.rename(columns={
    'nm.lab': 'lab',
    'y.scale': 'scale'
})

processed_labs

In [None]:
Y_SCALES = ['original', 'log']
Y_SCALE_COLORS = ['steelblue', 'salmon']

# Visualization

In [None]:
input_dropdown = alt.binding_select(options=list(processed_labs['lab'].unique()))
selection = alt.selection_single(fields=['lab'], bind=input_dropdown, name='Lab ', init={'lab': list(processed_labs['lab'].unique())[0]})

color_scale = alt.Scale(domain=Y_SCALES, range=Y_SCALE_COLORS)

ci = alt.Chart(
    processed_labs
).mark_area(

).encode(
    x=alt.X('day:Q', title='Days since positive'),
    y=alt.Y('auc_ci_lb:Q', title='AUC'),
    y2=alt.Y2('auc_ci_ub:Q'),
    color=alt.Color('scale:N', scale=color_scale, title='Scale'),
    opacity=alt.value(0.3),
    tooltip=[
        alt.Tooltip("auc", title="AUC", format=".2f"),
        alt.Tooltip("day", title="Days since positive")
    ]
)

mean = ci.mark_line(
    size=3,
    strokeDash=[3, 2]
).encode(
    y=alt.Y('auc:Q', title='AUC'),
    opacity=alt.value(1)
)

plot = (
    (ci + mean)
        .properties(width=450, height=450)
    .add_selection(
        selection
    ).transform_filter(
        selection
    )
)
# For showing all together
# .facet(
#     facet=alt.Facet("lab:N", title=None, header=alt.Header(labels=False)), bounds="flush"
# )



plot = plot.properties(
    title={
        "text": "Lab ROC Distribution Over Time",
        "subtitle": get_visualization_subtitle(data_release='2020-08-03', with_num_sites=True, num_sites=45),
        "subtitleColor": "gray",
        "anchor": "middle",
    }
)

plot = apply_theme(plot, legend_orient="right", header_label_font_size=15)

for_website(plot, "1.1_Labs", "Lab ROC distrubution")

plot