In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from os.path import join
import math

from utils_1_1 import (
    get_country_color_map_none_pediatric,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

# Data Preprocessing

## Lab Variation Data From Figshare (Not supported yet)
Use the latest data from xxx

In [None]:
# Lab_VariationByCountry.csv
# labs = pd.read_csv("https://ndownloader.figshare.com/files/22345587")

# labs.head()

In [None]:
labs = pd.read_csv('../data/Figure_3_lab_variation_day0_7.csv', header=[0]) # For loading local data
labs

In [None]:
processed_labs = labs.copy()
processed_labs['country'] = processed_labs['country'].apply(lambda x: 'USA' if x == 'USA' else x.capitalize())
processed_labs['scale'] = processed_labs['scale'].apply(lambda x: x.capitalize())
processed_labs = processed_labs.rename(columns={
    'ever': 'Ever',
    'never': 'Never',
    'diff': 'Difference'
})

loinc_map = {
    'alanine aminotransferase (ALT)': '1742-6, alanine aminotransferase (ALT) (U/L)',
    'albumin':  '1751-7, albumin (g/dL)',
    'aspartate aminotransferase (AST)':  '1920-8, aspartate aminotransferase (AST) (U/L)',
    'C-reactive protein (CRP) (Normal Sensitivity)':  '1988-5, C-reactive protein (CRP) (Normal Sensitivity) (mg/L)',
    'cardiac troponin (High Sensitivity)':  '49563-0, cardiac troponin (High Sensitivity) (ng/mL)',
    'cardiac troponin (Normal Sensitivity)': '6598-7, cardiac troponin (Normal Sensitivity) (ug/L)',
    'creatinine':  '2160-0, creatinine (mg/dL)',
    'D-dimer':  '48065-7, 48066-5, D-dimer (ng/mL)',
    'Ferritin':  '2276-4, Ferritin (ng/mL)',
    'Fibrinogen':  '3255-7, Fibrinogen (mg/dL)',
    'lactate dehydrogenase (LDH)':  '2532-0, lactate dehydrogenase (LDH) (U/L)',
    'lymphocyte count':  '731-0, lymphocyte count (10*3/uL)',
    'neutrophil count':  '751-8, neutrophil count (10*3/uL)',
    'procalcitonin':   '33959-8, procalcitonin (ng/mL)',
    'prothrombin time (PT)':  '5902-2, prothrombin time (PT) (s)',
    'total bilirubin':  '1975-2, total bilirubin (mg/dL)',
    'white blood cell count (Leukocytes)': '6690-2, white blood cell count (Leukocytes) (10*3/uL)' 
}

processed_labs["labname"] = processed_labs["labname"].apply(lambda x: loinc_map[x])

# Use only the original scale data
processed_labs = processed_labs[processed_labs['scale'] == 'Original']

processed_labs = pd.melt(
    processed_labs,
    id_vars=['labname', 'scale', 'country'],
    value_vars=['Ever', 'Never', 'Difference'], 
    var_name='severity',
    value_name='var'
)

processed_labs

# processed_labs['country'].unique() # for debug

In [None]:
COUNTRY_NAMES = ['Within country', 'Between country'] + list(get_country_color_map_none_pediatric().keys()) + ['Within site']
COUNTRY_COLORS = ['#000000', '#000000'] + list(get_country_color_map_none_pediatric().values()) + ['#000000']

COUNTRY_NAMES

# Visualization

In [None]:
input_dropdown = alt.binding_select(options=processed_labs['labname'].unique())
selection = alt.selection_single(fields=['labname'], bind=input_dropdown, name='Lab ', init={'labname': processed_labs['labname'].unique()[0]})
legend_selection = alt.selection_multi(fields=['country'], bind="legend")

color_scale = alt.Scale(domain=COUNTRY_NAMES, range=COUNTRY_COLORS)
tick_size = 30

"""
For SD
"""
# base = alt.Chart(processed_labs).mark_bar(size=tick_size).encode(
#     x=alt.Y('country:N'),
#     color=alt.Color('country:N', scale=color_scale, title=None),
#     stroke=alt.Color('country:N', scale=color_scale),
#     strokeWidth=alt.value(1),
#     y=alt.Y('y:Q', title='Lab variation'),
#     y2=alt.Y2('y:Q'),
# #     tooltip=[
# #         alt.Tooltip("Country", title="Category"),
# #         alt.Tooltip("mean_val", title="Mean", format=".2f"),
# #         alt.Tooltip("stdev_val", title="Standard deviation", format=".2f"),
# #         alt.Tooltip("days_since_positive", title="Days since positive")
# #     ]
# )

mean = alt.Chart(
    processed_labs
).mark_tick(
    size=tick_size,
    thickness=4
).encode(
    x=alt.X('country:N', title=None, scale=alt.Scale(domain=COUNTRY_NAMES)),
    opacity=alt.value(1),
    color=alt.Color('country:N', scale=color_scale, title=None),
    y=alt.Y('var:Q', title='Lab variation'),
    tooltip=[
        alt.Tooltip('labname', title="Labs"),
        alt.Tooltip("country", title="Country"),
        alt.Tooltip("var", title="Variation", format=".2f"),
    ],
)

plot = (
    mean
        .properties(height=400, width=400)
).facet(
    facet=alt.Facet("severity:N", title=None, header=alt.Header(labels=False), sort=['ever', 'never', 'difference'])
)

plot = plot.add_selection(
    selection
).transform_filter(
    selection
).add_selection(
    legend_selection
).transform_filter(
    legend_selection
)

plot = apply_theme(plot, legend_orient="right", header_label_font_size=15).properties(
    title={
        "text": "Lab Variation Across Sites on Day 0",
        "subtitle": get_visualization_subtitle(data_release='2020-08-03', with_num_sites=True, num_sites=45),
        "subtitleColor": "gray",
        "anchor": "middle",
    },
#     width=350, height=400 # This generates error (which should be submitted to the repo as an issue: More readable error message)
)

for_website(plot, "1.1_Labs", "Lab variation across sites on Day 0")

plot