In [1]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

LOG_LABS = ['alanine aminotransferase (ALT)', 'aspartate aminotransferase (AST)', 'C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer', 'Ferritin', 'lactate dehydrogenase (LDH)']

consistent_loinc = {
    "C-reactive protein (CRP) (Normal Sensitivity)": "C-reactive protein (Normal Sensitivity) (mg/dL)",
    "creatinine": "Creatinine (mg/dL)",
    "Ferritin": "Ferritin (ng/mL)",
    "D-dimer": "D-dimer (ng/mL)",
    "albumin": "Albumin (g/dL)",        

    "Fibrinogen": "Fibrinogen (mg/dL)",
    "alanine aminotransferase (ALT)": "Alanine aminotransferase (U/L)",
    "aspartate aminotransferase (AST)": "Aspartate aminotransferase (U/L)",
    "total bilirubin": "Total bilirubin (mg/dL)",
    "lactate dehydrogenase (LDH)": "Lactate dehydrogenase (U/L)",
    "cardiac troponin": "Cardiac troponin (ng/mL)",
    "cardiac troponin (High Sensitivity)": "Cardiac Troponin (High Sensitivity) (ng/mL)",
    "cardiac troponin (Normal Sensitivity)": "Cardiac Troponin (Normal Sensitivity) (ng/mL)",
    "prothrombin time (PT)": "Prothrombin time (s)",
    "white blood cell count (Leukocytes)": "White blood cell count (10*3/uL)",
    "lymphocyte count": "Lymphocyte count (10*3/uL)",
    "neutrophil count": "Neutrophil count (10*3/uL)",
    "procalcitonin": "Procalcitonin (ng/mL)",
}

# Let's remove units
consistent_loinc = {
    "C-reactive protein (CRP) (Normal Sensitivity)": "C-reactive protein (Normal Sensitivity)",
    "creatinine": "Creatinine",
    "Ferritin": "Ferritin",
    "D-dimer": "D-dimer",
    "albumin": "Albumin",        

    "Fibrinogen": "Fibrinogen",
    "alanine aminotransferase (ALT)": "Alanine aminotransferase",
    "aspartate aminotransferase (AST)": "Aspartate aminotransferase",
    "total bilirubin": "Total bilirubin",
    "lactate dehydrogenase (LDH)": "Lactate dehydrogenase",
    "cardiac troponin": "Cardiac troponin",
    "cardiac troponin (High Sensitivity)": "Cardiac Troponin (High Sensitivity)",
    "cardiac troponin (Normal Sensitivity)": "Cardiac Troponin (Normal Sensitivity)",
    "prothrombin time (PT)": "Prothrombin time",
    "white blood cell count (Leukocytes)": "White blood cell count",
    "lymphocyte count": "Lymphocyte count",
    "neutrophil count": "Neutrophil count",
    "procalcitonin": "Procalcitonin",
}

In [2]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_values_standardized.csv"))

df

Unnamed: 0.1,Unnamed: 0,scale,setting,Lab,days_since_positive,mean,total_n,wave
0,1,original,all,alanine aminotransferase (ALT),0,1.000000,21375,early
1,2,original,all,alanine aminotransferase (ALT),1,1.085580,15575,early
2,3,original,all,alanine aminotransferase (ALT),2,1.142294,12721,early
3,4,original,all,alanine aminotransferase (ALT),3,1.202003,11616,early
4,5,original,all,alanine aminotransferase (ALT),4,1.292556,10475,early
...,...,...,...,...,...,...,...,...
2695,2696,log,never,white blood cell count (Leukocytes),10,1.075961,2357,late
2696,2697,log,never,white blood cell count (Leukocytes),11,1.073347,2016,late
2697,2698,log,never,white blood cell count (Leukocytes),12,1.053355,1768,late
2698,2699,log,never,white blood cell count (Leukocytes),13,1.081957,1491,late


In [3]:
obs = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_obs.csv"))

obs

Unnamed: 0.1,Unnamed: 0,country,lab,day,cohort,setting,p,se,ci_95L,ci_95U,wave
0,1,ALL,alanine aminotransferase (ALT),0,dayX,all,0.645241,0.035045,0.576553,0.713929,early
1,2,ALL,alanine aminotransferase (ALT),0,dayX,ever,0.742850,0.033811,0.676580,0.809119,early
2,3,ALL,alanine aminotransferase (ALT),0,day0,all,0.645241,0.035045,0.576553,0.713929,early
3,4,ALL,alanine aminotransferase (ALT),0,day0,ever,0.742850,0.033811,0.676580,0.809119,early
4,5,ALL,albumin,0,dayX,all,0.511156,0.075393,0.363386,0.658927,early
...,...,...,...,...,...,...,...,...,...,...,...
2035,2036,ALL,prothrombin time (PT),14,day0,ever,0.058576,0.010483,0.038029,0.079122,late
2036,2037,ALL,cardiac troponin (Normal Sensitivity),14,dayX,all,0.181888,0.093055,-0.000499,0.364275,late
2037,2038,ALL,cardiac troponin (Normal Sensitivity),14,dayX,ever,0.211473,0.109851,-0.003835,0.426781,late
2038,2039,ALL,cardiac troponin (Normal Sensitivity),14,day0,all,0.026835,0.010066,0.007106,0.046564,late


In [4]:
pdf = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals.csv"))

pdf

Unnamed: 0,setting,scale,day,alanine aminotransferase (ALT),albumin,aspartate aminotransferase (AST),C-reactive protein (CRP) (Normal Sensitivity),creatinine,D-dimer,Ferritin,Fibrinogen,lactate dehydrogenase (LDH),lymphocyte count,neutrophil count,procalcitonin,prothrombin time (PT),total bilirubin,white blood cell count (Leukocytes)
0,all,original,0,0.032548,0.353888,0.026598,0.278001,0.564719,0.507553,0.200011,0.359778,0.035103,0.876745,0.3407859,0.930396,0.636803,0.422204,0.06471542
1,all,original,1,0.029348,0.086859,0.077904,0.136755,0.081747,0.105413,0.166666,0.088545,0.012047,0.305688,0.6464815,0.182258,0.714604,0.776399,0.336121
2,all,original,7,0.042113,0.086858,0.000236,1e-06,0.037937,0.613761,0.006207,0.011046,0.762652,0.780891,9.343029e-05,0.01857,0.111383,0.227032,0.0009124298
3,all,original,14,0.952383,0.517127,0.619118,0.574316,0.084144,0.252127,0.713868,0.653797,0.273464,0.755246,0.03166109,0.169323,0.232971,0.823694,0.1234478
4,all,log,0,0.011767,0.701279,0.004853,0.035668,0.578194,0.867523,0.093924,0.185032,0.082304,0.575154,0.4406131,,0.592056,0.290839,0.06360718
5,all,log,1,0.015438,0.551489,0.007788,0.006148,0.13031,0.533865,0.057882,0.079137,0.026931,0.02009,0.8157373,0.081388,0.770159,0.932564,0.3114199
6,all,log,7,0.070514,0.049591,6e-06,5.8e-05,0.06544,0.727616,0.003818,0.024274,0.147255,0.717529,2.047855e-05,0.961325,0.261806,0.922171,0.0001586717
7,all,log,14,0.008944,0.72344,0.001153,0.641766,0.161468,0.124273,0.269769,0.59211,0.258909,0.002684,0.1609682,0.307511,0.375149,0.068328,0.04873132
8,ever,original,0,0.069859,0.222214,0.122178,0.116177,0.904323,0.334781,0.244104,0.520219,0.062955,0.385283,0.1796908,0.783816,0.844947,0.356185,0.1271719
9,ever,original,1,0.020204,0.063487,0.160135,0.060498,0.175808,0.059711,0.289826,0.03469,0.017186,0.145699,0.9513512,0.172087,0.956442,0.712913,0.7351312


In [5]:
domain_by_lab = {
    'C-reactive protein (CRP) (Normal Sensitivity)': [0.85, 1.15],
    'albumin': [0.75, 1.05], 
    'total bilirubin': [0.95, 1.30], 
    'creatinine': [0.85, 1.15], 
    'Ferritin': [0.95, 1.07], 
    'D-dimer': [0.97, 1.14]
}

domain_by_lab = {
    'C-reactive protein (CRP) (Normal Sensitivity)': [0.85, 1.15],
    'Ferritin': [0.97, 1.08], 
    'Fibrinogen': [0.88, 1.05], 
    'procalcitonin': [0.9, 2.5], 
    'D-dimer': [0.96, 1.15], 
    'creatinine': [0.85, 1.15]
}

def plot(_d, patient_group='all', lab=None, obs=None, pdf=None, i=0, show_patients=False):
    d = _d.copy()
    o = obs.copy()
    p = pdf.copy()
    
    """
    DATA PREPROCESSING...
    """
    if lab in LOG_LABS:
        d = d[d.scale == 'log']
        p = p[p.scale == 'log']
    else:
        d = d[d.scale == 'original']
        p = p[p.scale == 'original']
        
    d = d.drop(columns=['Unnamed: 0'])
    d.wave = d.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    d = d[d.setting != 'never']
    d = d[d.setting == patient_group]
    d = d.rename(columns={
        'wave': 'Wave'
    })
    
    d = d[d.Lab == lab]
    
    d17 = d[(1 <= d.days_since_positive) & (d.days_since_positive <= 7)]
    d = d[(d.days_since_positive == 0) | (d.days_since_positive == 1) | (d.days_since_positive == 7)]

    d.days_since_positive = d.days_since_positive.apply(lambda x: f"Day {x}")
    d17.days_since_positive = d17.days_since_positive.apply(lambda x: f"Day {x}")
    
    #### PVAL ###############################
    p = p[p.setting == patient_group]
    p['is_sig'] = False
    p.is_sig = p[lab] <= 0.05
    p.is_sig = p.is_sig.apply(lambda x: 'p<0.05' if x else 'p>0.05')
    p = p[['setting','day', 'is_sig']]
    p.day = p.day.apply(lambda x: f"Day {x}")
    
    """
    MERGE
    """
    d = pd.merge(d, p, how='left', left_on=['days_since_positive','setting'], right_on = ['day','setting'])
    
    #### OBS ################################
    o = o.rename(columns={"p.lwr": "p_lwr", "p.upr": "p_upr"})
    o = o[o.lab == lab]
    o = o.drop(columns=['Unnamed: 0'])
    o.wave = o.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    o = o[o.cohort == 'dayX']
    o = o[o.setting == patient_group]
    o = o.rename(columns={
        'wave': 'Wave'
    })    
    
    """
    DAY 1-7 AVERAGE
    """
    d17['mean'] = d17['mean'] * d17['total_n']
    d17 = d17.groupby(['Lab', 'setting', 'Wave']).sum().reset_index()
    d17['mean'] = d17['mean'] / d17['total_n']

    """
    CONSTANTS
    """
    LABS = d.Lab.unique().tolist()
    WAVE_COLOR = [
        '#D45E00', # '#BA4338', # early
        '#0072B2', # late
        'black'
    ]
    
    """
    PLOT
    """
    titleX=-60    
    opacity=0.7
    
    """
    LABS
    """
    LAB_DROPDOWN = alt.binding_select(options=LABS)
    LAB_SELECTION = alt.selection_single(fields=["Lab"], bind=LAB_DROPDOWN, init={"Lab": lab if lab != None else LABS[0]}, name="Select")
    
    line_m = alt.Chart(
        d
    ).mark_line(
        size=4, opacity=opacity, point=False
    ).encode(
        x=alt.X('days_since_positive:N', title=None, axis=alt.Axis(labels=False) if (patient_group == 'all') & (show_patients == False) else alt.Axis()), # ''Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False, nice=False, padding=0, domain=domain_by_lab[lab]), title=['Mean Lab Value'] if i == 0 else None, axis=alt.Axis(titleX=titleX)), #domain=domain_by_lab[lab]
        color=alt.Color('Wave:N', scale=alt.Scale(domain=['First', 'Second'], range=WAVE_COLOR))
    ).properties(
        width=200,
        height=200
    )
    
    point_m = alt.Chart(
        d
    ).mark_point(
        opacity=opacity, filled=True, strokeWidth=3
    ).encode(
        x=alt.X('days_since_positive:N', title=None), # ''Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False), title=['Mean Lab Value'] if i == 0 else None, axis=alt.Axis(titleX=titleX)), # 'All Patients' if patient_group == 'all' else "Ever Severe Patients", 
        color=alt.Color('Wave:N', scale=alt.Scale(range=WAVE_COLOR)),
        stroke=alt.Stroke('is_sig:N', scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance'),
        size=alt.Size('total_n:Q', title="# of Patients", scale=alt.Scale(domain=[0, 30000], range=[100, 600], zero=False)),
        strokeWidth=alt.value(3)
    )
    
    line_m = (line_m + point_m)
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    """
    Day 1-7 Average
    """
    bar_m = alt.Chart(
        d17
    ).mark_bar(
        size=32,
        stroke='black'
    ).encode(
        x=alt.X('Wave:N', title=None), # 'Wave'),
        y=alt.Y('mean:Q', title='Day1-7 Mean Lab Value', axis=alt.Axis(ticks=False, labels=False, domain=False, orient='left'), scale=alt.Scale(padding=10, nice=False)),
        color=alt.Color('Wave:N', scale=alt.Scale(range=WAVE_COLOR))
    ).properties(
        width=100
    )
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    text = alt.Chart(
        d17
    ).mark_text(size=16, dx=0, dy=-4, color='black', baseline='bottom', align='center', angle=0, fontWeight=500).encode(
        x=alt.X('Wave:N', title=None), # 'Wave'),
        y=alt.Y('mean:Q', title='Day1-7 Mean Lab Value' if i == 0 else None, axis=alt.Axis(ticks=False, labels=False, domain=False, orient='left'), scale=alt.Scale(padding=10, nice=False)),
        text=alt.Text('mean:Q', format=".2f")
    )
#     .transform_filter(
#         LAB_SELECTION
#     )
    bar_m = (bar_m)# + text)
       
        
    """
    OBSERVATION
    """
    LAB_FIELD_NAME = lab.replace(' ', '.').replace('(', '.').replace(')', '.').replace('-', '.') # Used in the original files
    o = o.rename(columns={
        f"{LAB_FIELD_NAME}": LAB_FIELD_NAME.replace('.', '_')
    })
    LAB_FIELD_NAME = LAB_FIELD_NAME.replace('.', '_')
    
    obs_line = alt.Chart(
        o
    ).mark_line(
        point=True, opacity=0.7, size=4
    ).encode(
        x=alt.X('day:Q', title='Days Since Admission'),
        y=alt.Y(f"p:Q", scale=alt.Scale(domain=[0, 1]), axis=alt.Axis(format='0.0%', titleX=titleX), title="% Patients Tested"),
        color=alt.Color("Wave:N")
    )

    obs_error = obs_line.mark_errorbar(opacity=0.6).encode(
        x=alt.X('day:Q', title='Days Since Admission', scale=alt.Scale(padding=10, nice=False)),
        y=alt.Y('ci_95L:Q', axis=alt.Axis(format='0.0%', titleX=titleX), title="% Patients Tested" if i == 0 else None),
        y2=alt.Y2('ci_95U:Q'),
        color=alt.Color("Wave:N"),
        strokeWidth=alt.value(1.5)
        # color=alt.value('gray')
        # color=alt.Color('country:N', scale=alt.Scale(range=COUNTRY_COLORS))
    )
    
    obs_line = alt.layer(obs_line, obs_error).properties(
#         title={
#             "text": "Percentage of Patients Tested",
#             "dx": 80,
#             # "fontSize": 16,
#             # "color": "gray"
#         },
        width=200,
        height=150
    )
    
    plot = (
        line_m if show_patients == False else (
            alt.vconcat(
            # alt.hconcat(line_m, bar_m, spacing=20).resolve_scale(y='shared'),
            line_m,
            obs_line,
            spacing=5
        ).resolve_scale(x='independent')
        )
    ).properties(
            title={
                "text": consistent_loinc[lab].replace(' (Normal Sensitivity)', ''),
                "anchor": 'middle',
                'fontSize': 18,
                'dx': (5 if i != 0 else 13) if show_patients == False else (20 if i != 0 else 40)
            }
        )
    
    return plot

# Main Labs

In [6]:
SELECTED_LABS = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'Fibrinogen', 'procalcitonin', 'D-dimer', 'creatinine']
show_patients = True

for i, lab in enumerate(SELECTED_LABS):
#     DEBUG
#     if i == 1:
#         break
#     new = plot(df, patient_group='all', lab=lab, obs=obs, pdf=pdf, i=i)
    new = alt.vconcat(
        plot(df, patient_group='all', lab=lab, obs=obs, pdf=pdf, i=i, show_patients=show_patients), 
#         plot(df, patient_group='ever', lab=lab, obs=obs, pdf=pdf, i=i, show_patients=show_patients), 
        spacing=20
    ).resolve_scale(y='shared')#, color='independent', size='independent', stroke='independent')
    if i != 0:
        res = alt.hconcat(res, new, spacing=10)
    else:
        res = new
    # DEBUG
    #if i == 0:
        #break

res = res.properties(
    title={
        "text": [
            f"Mean Standardized Lab Values Of All Patients By Wave"
        ],
        "dx": 80,
#         "subtitle": [
#             get_visualization_subtitle(data_release='2021-05-06', with_num_sites=False)], 
        "subtitleFontSize": 16,
        "subtitleColor": "gray",
    }
)
        
# res = apply_theme(
#     res,
#     axis_y_title_font_size=16,
#     title_anchor='start',
#     legend_orient='right',
#     point_size=40
# )
res

# obs

In [7]:
SELECTED_LABS = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'Fibrinogen', 'procalcitonin', 'D-dimer', 'creatinine']
show_patients = True

for i, lab in enumerate(SELECTED_LABS):
#     DEBUG
#     if i == 1:
#         break
#     new = plot(df, patient_group='all', lab=lab, obs=obs, pdf=pdf, i=i)
    new = alt.vconcat(
#         plot(df, patient_group='all', lab=lab, obs=obs, pdf=pdf, i=i, show_patients=show_patients), 
        plot(df, patient_group='ever', lab=lab, obs=obs, pdf=pdf, i=i, show_patients=show_patients), 
        spacing=20
    ).resolve_scale(y='shared')#, color='independent', size='independent', stroke='independent')
    if i != 0:
        res2 = alt.hconcat(res2, new, spacing=10)
    else:
        res2 = new
    # DEBUG
    #if i == 0:
        #break

res2 = res2.properties(
    title={
        "text": [
            f"Mean Standardized Lab Values Of Ever Severe Patients By Wave"
        ],
        "dx": 80,
#         "titlePadding": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release='2021-05-06', with_num_sites=False)], 
        "subtitleFontSize": 16,
        "subtitleColor": "gray",
    }
)
        
# res = apply_theme(
#     res,
#     axis_y_title_font_size=16,
#     title_anchor='start',
#     legend_orient='right'
# )

res = alt.vconcat(res, res2, spacing=50).resolve_scale(size='shared')

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    point_size=30
)

res

# All Labs

In [8]:
domain_by_lab = {
    'C-reactive protein (CRP) (Normal Sensitivity)': [0.85, 1.15],
    'Ferritin': [0.97, 1.08], 
    'Fibrinogen': [0.88, 1.05], 
    'procalcitonin': [0.9, 2.5], 
    'D-dimer': [0.96, 1.15], 
    'creatinine': [0.85, 1.15],
    
    'alanine aminotransferase (ALT)': [0.95, 1.15],
    'albumin': [0.75, 1.05],
    'aspartate aminotransferase (AST)': [0.96, 1.05],
    'lactate dehydrogenase (LDH)': [0.95, 1.05],
    'lymphocyte count': [0.85, 1.15],
    'neutrophil count': [0.9, 1.55],
    'prothrombin time (PT)': [0.95, 1.25],
    'total bilirubin': [0.95, 1.25],
    'white blood cell count (Leukocytes)': [0.95, 1.5]
}

def plot(_d, patient_group='all', lab=None, obs=None, pdf=None, i=0, show_patients=False):
    d = _d.copy()
    o = obs.copy()
    p = pdf.copy()
    
    """
    DATA PREPROCESSING...
    """
    if lab in LOG_LABS:
        d = d[d.scale == 'log']
        p = p[p.scale == 'log']
    else:
        d = d[d.scale == 'original']
        p = p[p.scale == 'original']
        
    d = d.drop(columns=['Unnamed: 0'])
    d.wave = d.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    d = d[d.setting != 'never']
    d = d[d.setting == patient_group]
    d = d.rename(columns={
        'wave': 'Wave'
    })
    
    d = d[d.Lab == lab]
    
    d17 = d[(1 <= d.days_since_positive) & (d.days_since_positive <= 7)]
    d = d[(d.days_since_positive == 0) | (d.days_since_positive == 1) | (d.days_since_positive == 7)]

    d.days_since_positive = d.days_since_positive.apply(lambda x: f"Day {x}")
    d17.days_since_positive = d17.days_since_positive.apply(lambda x: f"Day {x}")
    
    #### PVAL ###############################
    p = p[p.setting == patient_group]
    p['is_sig'] = False
    p.is_sig = p[lab] <= 0.05
    p.is_sig = p.is_sig.apply(lambda x: 'p<0.05' if x else 'p>0.05')
    p = p[['setting','day', 'is_sig']]
    p.day = p.day.apply(lambda x: f"Day {x}")
    
    """
    MERGE
    """
    d = pd.merge(d, p, how='left', left_on=['days_since_positive','setting'], right_on = ['day','setting'])
    
    #### OBS ################################
    o = o.rename(columns={"p.lwr": "p_lwr", "p.upr": "p_upr"})
    o = o[o.lab == lab]
    o = o.drop(columns=['Unnamed: 0'])
    o.wave = o.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    o = o[o.cohort == 'dayX']
    o = o[o.setting == patient_group]
    o = o.rename(columns={
        'wave': 'Wave'
    })    
    
    """
    DAY 1-7 AVERAGE
    """
    d17['mean'] = d17['mean'] * d17['total_n']
    d17 = d17.groupby(['Lab', 'setting', 'Wave']).sum().reset_index()
    d17['mean'] = d17['mean'] / d17['total_n']

    """
    CONSTANTS
    """
    LABS = d.Lab.unique().tolist()
    WAVE_COLOR = [
        '#D45E00', # '#BA4338', # early
        '#0072B2', # late
        'black'
    ]
    
    """
    PLOT
    """
    titleX=-60    
    opacity=0.7
    
    """
    LABS
    """
    LAB_DROPDOWN = alt.binding_select(options=LABS)
    LAB_SELECTION = alt.selection_single(fields=["Lab"], bind=LAB_DROPDOWN, init={"Lab": lab if lab != None else LABS[0]}, name="Select")
    
    line_m = alt.Chart(
        d
    ).mark_line(
        size=4, opacity=opacity, point=False
    ).encode(
        x=alt.X('days_since_positive:N', title=None, axis=alt.Axis(labels=False) if (patient_group == 'all') & (show_patients == False) else alt.Axis()), # ''Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False, nice=False, padding=0, domain=domain_by_lab[lab]), title=['Mean Lab Value'] if patient_group == 'all' else None, axis=alt.Axis(titleX=titleX)),
        color=alt.Color('Wave:N', scale=alt.Scale(domain=['First', 'Second'], range=WAVE_COLOR))
    ).properties(
        width=340,
        height=200
    )
    
    point_m = alt.Chart(
        d
    ).mark_point(
        opacity=opacity, filled=True, strokeWidth=3
    ).encode(
        x=alt.X('days_since_positive:N', title=None), # ''Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False), title=['Mean Lab Value'] if patient_group == 'all' else None, axis=alt.Axis(titleX=titleX)), # 'All Patients' if patient_group == 'all' else "Ever Severe Patients", 
        color=alt.Color('Wave:N', scale=alt.Scale(range=WAVE_COLOR)),
        stroke=alt.Stroke('is_sig:N', scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance'),
        size=alt.Size('total_n:Q', title="# of Patients"),
        strokeWidth=alt.value(3)
    )
    
    line_m = (line_m + point_m)
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    """
    Day 1-7 Average
    """
    bar_m = alt.Chart(
        d17
    ).mark_bar(
        size=32,
        stroke='black'
    ).encode(
        x=alt.X('Wave:N', title=None), # 'Wave'),
        y=alt.Y('mean:Q', title='Day1-7 Mean Lab Value', axis=alt.Axis(ticks=False, labels=False, domain=False, orient='left'), scale=alt.Scale(padding=1)),
        color=alt.Color('Wave:N', scale=alt.Scale(range=WAVE_COLOR))
    ).properties(
        width=200
    )
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    text = alt.Chart(
        d17
    ).mark_text(size=16, dx=0, dy=-4, color='black', baseline='bottom', align='center', angle=0, fontWeight=500).encode(
        x=alt.X('Wave:N', title=None), # 'Wave'),
        y=alt.Y('mean:Q', title='Day1-7 Mean Lab Value' if patient_group == 'all' else None, axis=alt.Axis(ticks=False, labels=False, domain=False, orient='left'), scale=alt.Scale(padding=1)),
        text=alt.Text('mean:Q', format=".2f")
    )
#     .transform_filter(
#         LAB_SELECTION
#     )
    bar_m = (bar_m)# + text)
       
        
    """
    OBSERVATION
    """
    LAB_FIELD_NAME = lab.replace(' ', '.').replace('(', '.').replace(')', '.').replace('-', '.') # Used in the original files
    o = o.rename(columns={
        f"{LAB_FIELD_NAME}": LAB_FIELD_NAME.replace('.', '_')
    })
    LAB_FIELD_NAME = LAB_FIELD_NAME.replace('.', '_')
    
    obs_line = alt.Chart(
        o
    ).mark_line(
        point=True, opacity=0.7, size=4
    ).encode(
        x=alt.X('day:Q', title='Days Since Positive'),
        y=alt.Y(f"p:Q", scale=alt.Scale(domain=[0, 1]), axis=alt.Axis(format='0.0%', titleX=titleX), title="% Patients Tested"),
        color=alt.Color("Wave:N")
    )

    obs_error = obs_line.mark_errorbar(opacity=0.3).encode(
        x=alt.X('day:Q', title='Days Since Positive', scale=alt.Scale(padding=10, nice=False)),
        y=alt.Y('p_upr:Q', axis=alt.Axis(format='0.0%', titleX=titleX), title="% Patients Tested" if patient_group == 'all' else None),
        y2=alt.Y2('p_lwr:Q'),
        color=alt.Color("Wave:N")
        # color=alt.value('gray')
        # color=alt.Color('country:N', scale=alt.Scale(range=COUNTRY_COLORS))
    )
    
    obs_line = alt.layer(obs_line, obs_error).properties(
#         title={
#             "text": "Percentage of Patients Tested",
#             "dx": 80,
#             # "fontSize": 16,
#             # "color": "gray"
#         },
        width=340,
        height=200
    )
    
    plot = (
        line_m if show_patients == False else (
            alt.vconcat(
            # alt.hconcat(line_m, bar_m, spacing=20).resolve_scale(y='shared'),
            line_m,
            obs_line,
            spacing=5
        ).resolve_scale(x='independent')
        )
    ).properties(
            title={
                "text": f"{patient_group.capitalize()} Patients", # == 'all'consistent_loinc[lab].replace(' (Normal Sensitivity)', ''),
                "anchor": 'middle',
                'fontSize': 16,
                'dx': 30 # (5 if i != 0 else 13) if show_patients == False else (20 if i != 0 else 40)
            }
        )
    
    return plot

SELECTED_LABS = df.Lab.unique().tolist()

show_patients = True

for i, lab in enumerate(SELECTED_LABS):
#     DEBUG
#     if i == 1:
#         break
#     new = plot(df, patient_group='all', lab=lab, obs=obs, pdf=pdf, i=i)
    new = alt.hconcat(
        plot(df, patient_group='all', lab=lab, obs=obs, pdf=pdf, i=i, show_patients=show_patients), 
        plot(df, patient_group='ever', lab=lab, obs=obs, pdf=pdf, i=i, show_patients=show_patients), 
        spacing=20
    ).resolve_scale(y='shared')#, color='independent', size='independent', stroke='independent')

    # DEBUG
    #if i == 0:
        #break

    new = new.properties(
        title={
            "text": [
                f"Mean Standardized Lab Values Of All And Severe Patients By Wave"
            ],
            "dx": 80,
            "subtitle": [
                lab.capitalize()
            ], 
            "subtitleFontSize": 18,
            "subtitleColor": "gray",
        }
    )
    
    if i != 0:
        res = alt.vconcat(res, new, spacing=30).resolve_scale(y='independent', color='independent', size='independent', stroke='independent')
    else:
        res = new
        
res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    point_size=40,
    title_dy=0
)
res

# Standardized Mean Labs

In [9]:
def plot(_d, patient_group='all', lab=None, obs=None, pdf=None, i=0, show_patients=False):
    d = _d.copy()
    o = obs.copy()
    p = pdf.copy()
    
    """
    DATA PREPROCESSING...
    """
    if lab in LOG_LABS:
        d = d[d.scale == 'log']
        p = p[p.scale == 'log']
    else:
        d = d[d.scale == 'original']
        p = p[p.scale == 'original']
        
    d = d.drop(columns=['Unnamed: 0'])
    d.wave = d.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    d = d[d.setting != 'never']
    d = d[d.setting == patient_group]
    d = d.rename(columns={
        'wave': 'Wave'
    })
    
    d = d[d.Lab == lab]
    
    d17 = d[(1 <= d.days_since_positive) & (d.days_since_positive <= 7)]
    d = d[(d.days_since_positive == 0) | (d.days_since_positive == 1) | (d.days_since_positive == 7)]

    d.days_since_positive = d.days_since_positive.apply(lambda x: f"Day {x}")
    d17.days_since_positive = d17.days_since_positive.apply(lambda x: f"Day {x}")
    
    #### PVAL ###############################
    p = p[p.setting == patient_group]
    p['is_sig'] = False
    p.is_sig = p[lab] <= 0.05
    p.is_sig = p.is_sig.apply(lambda x: 'p<0.05' if x else 'p>0.05')
    p = p[['setting','day', 'is_sig']]
    p.day = p.day.apply(lambda x: f"Day {x}")
    
    """
    MERGE
    """
    d = pd.merge(d, p, how='left', left_on=['days_since_positive','setting'], right_on = ['day','setting'])
    
    #### OBS ################################
    o = o.rename(columns={"p.lwr": "p_lwr", "p.upr": "p_upr"})
    o = o[o.lab == lab]
    o = o.drop(columns=['Unnamed: 0'])
    o.wave = o.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    o = o[o.cohort == 'dayX']
    o = o[o.setting == patient_group]
    o = o.rename(columns={
        'wave': 'Wave'
    })    
    
    """
    DAY 1-7 AVERAGE
    """
    d17['mean'] = d17['mean'] * d17['total_n']
    d17 = d17.groupby(['Lab', 'setting', 'Wave']).sum().reset_index()
    d17['mean'] = d17['mean'] / d17['total_n']

    """
    CONSTANTS
    """
    LABS = d.Lab.unique().tolist()
    WAVE_COLOR = [
        '#D45E00', # '#BA4338', # early
        '#0072B2', # late
        'black'
    ]
    
    """
    PLOT
    """
    titleX=-60    
    opacity=0.7
    
    """
    LABS
    """
    LAB_DROPDOWN = alt.binding_select(options=LABS)
    LAB_SELECTION = alt.selection_single(fields=["Lab"], bind=LAB_DROPDOWN, init={"Lab": lab if lab != None else LABS[0]}, name="Select")
    
    line_m = alt.Chart(
        d
    ).mark_line(
        size=4, opacity=opacity, point=False
    ).encode(
        x=alt.X('days_since_positive:N', title=None, axis=alt.Axis(labels=False) if (patient_group == 'all') & (show_patients == False) else alt.Axis()), # ''Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False, nice=False, padding=10), title=['Mean Lab Value'] if i == 0 else None, axis=alt.Axis(titleX=titleX)), #domain=domain_by_lab[lab]
        color=alt.Color('Wave:N', scale=alt.Scale(domain=['First', 'Second'], range=WAVE_COLOR))
    ).properties(
        width=240,
        height=200
    )
    
    point_m = alt.Chart(
        d
    ).mark_point(
        opacity=opacity, filled=True, strokeWidth=3
    ).encode(
        x=alt.X('days_since_positive:N', title=None), # ''Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False), title=['Mean Lab Value'] if i == 0 else None, axis=alt.Axis(titleX=titleX)), # 'All Patients' if patient_group == 'all' else "Ever Severe Patients", 
        color=alt.Color('Wave:N', scale=alt.Scale(range=WAVE_COLOR)),
        stroke=alt.Stroke('is_sig:N', scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance'),
        size=alt.Size('total_n:Q', title="# of Patients"),
        strokeWidth=alt.value(3)
    )
    
    line_m = (line_m + point_m)
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    """
    Day 1-7 Average
    """
    bar_m = alt.Chart(
        d17
    ).mark_bar(
        size=32,
        stroke='black'
    ).encode(
        x=alt.X('Wave:N', title=None), # 'Wave'),
        y=alt.Y('mean:Q', title='Day1-7 Mean Lab Value', axis=alt.Axis(ticks=False, labels=False, domain=False, orient='left'), scale=alt.Scale(padding=1)),
        color=alt.Color('Wave:N', scale=alt.Scale(range=WAVE_COLOR))
    ).properties(
        width=100
    )
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    text = alt.Chart(
        d17
    ).mark_text(size=16, dx=0, dy=-4, color='black', baseline='bottom', align='center', angle=0, fontWeight=500).encode(
        x=alt.X('Wave:N', title=None), # 'Wave'),
        y=alt.Y('mean:Q', title='Day1-7 Mean Lab Value' if i == 0 else None, axis=alt.Axis(ticks=False, labels=False, domain=False, orient='left'), scale=alt.Scale(padding=1)),
        text=alt.Text('mean:Q', format=".2f")
    )
#     .transform_filter(
#         LAB_SELECTION
#     )
    bar_m = (bar_m)# + text)
       
        
    """
    OBSERVATION
    """
    LAB_FIELD_NAME = lab.replace(' ', '.').replace('(', '.').replace(')', '.').replace('-', '.') # Used in the original files
    o = o.rename(columns={
        f"{LAB_FIELD_NAME}": LAB_FIELD_NAME.replace('.', '_')
    })
    LAB_FIELD_NAME = LAB_FIELD_NAME.replace('.', '_')
    
    obs_line = alt.Chart(
        o
    ).mark_line(
        point=True, opacity=0.7, size=4
    ).encode(
        x=alt.X('day:Q', title='Days Since Positive'),
        y=alt.Y(f"p:Q", scale=alt.Scale(domain=[0, 1]), axis=alt.Axis(format='0.0%', titleX=titleX), title="% Patients Tested"),
        color=alt.Color("Wave:N")
    )

    obs_error = obs_line.mark_errorbar(opacity=0.3).encode(
        x=alt.X('day:Q', title='Days Since Positive', scale=alt.Scale(padding=10, nice=False)),
        y=alt.Y('p_upr:Q', axis=alt.Axis(format='0.0%', titleX=titleX), title="% Patients Tested" if i == 0 else None),
        y2=alt.Y2('p_lwr:Q'),
        color=alt.Color("Wave:N")
        # color=alt.value('gray')
        # color=alt.Color('country:N', scale=alt.Scale(range=COUNTRY_COLORS))
    )
    
    obs_line = alt.layer(obs_line, obs_error).properties(
#         title={
#             "text": "Percentage of Patients Tested",
#             "dx": 80,
#             # "fontSize": 16,
#             # "color": "gray"
#         },
        width=240,
        height=150
    )
    
    plot = (
        line_m if show_patients == False else (
            alt.vconcat(
            # alt.hconcat(line_m, bar_m, spacing=20).resolve_scale(y='shared'),
            line_m,
            obs_line,
            spacing=5
        ).resolve_scale(x='independent')
        )
    ).properties(
            title={
                "text": consistent_loinc[lab].replace(' (Normal Sensitivity)', ''),
                "anchor": 'middle',
                'fontSize': 16,
                'dx': (5 if i != 0 else 13) if show_patients == False else (20 if i != 0 else 40)
            }
        )
    
    return plot


SELECTED_LABS = ['C-reactive protein (CRP) (Normal Sensitivity)', 'albumin', 'creatinine', 'D-dimer']
show_patients = True

for i, lab in enumerate(SELECTED_LABS):
    new = alt.vconcat(
        plot(df, patient_group='ever', lab=lab, obs=obs, pdf=pdf, i=i, show_patients=show_patients), 
        spacing=20
    ).resolve_scale(y='shared')
    if i != 0:
        res = alt.hconcat(res, new, spacing=10)
    else:
        res = new
    # DEBUG
    if i == 0:
        break

res = res.properties(
    title={
        "text": [
            f"Mean Standardized Lab Values Of Ever Severe Patients By Wave"
        ],
        "dx": 80
    }
)

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right'
)
res

In [10]:
cdf = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_values_standardized_bycountry.csv"))

cdf

Unnamed: 0,scale,setting,Lab,days_since_positive,mean,se,wave,country
0,original,all,alanine aminotransferase (ALT),0,1.000000,21.623056,early,FRANCE
1,original,all,alanine aminotransferase (ALT),1,1.039034,1.154195,early,FRANCE
2,original,all,alanine aminotransferase (ALT),2,1.123584,1.868956,early,FRANCE
3,original,all,alanine aminotransferase (ALT),3,1.138845,1.237518,early,FRANCE
4,original,all,alanine aminotransferase (ALT),4,1.314944,3.412156,early,FRANCE
...,...,...,...,...,...,...,...,...
12235,log,never,cardiac troponin (Normal Sensitivity),10,,,late,SPAIN
12236,log,never,cardiac troponin (Normal Sensitivity),11,,,late,SPAIN
12237,log,never,cardiac troponin (Normal Sensitivity),12,,,late,SPAIN
12238,log,never,cardiac troponin (Normal Sensitivity),13,,,late,SPAIN


In [11]:
cobs = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_obs_bycountry.csv"))

cobs

Unnamed: 0,country,lab,day,cohort,setting,p,se,ci_95L,ci_95U,wave
0,FRANCE,alanine aminotransferase (ALT),0,dayX,all,0.506785,0.130227,0.251541,0.762029,early
1,FRANCE,alanine aminotransferase (ALT),0,day0,all,0.506785,0.130227,0.251541,0.762029,early
2,FRANCE,albumin,0,dayX,all,0.196784,0.045584,0.107439,0.286129,early
3,FRANCE,albumin,0,day0,all,0.196784,0.045584,0.107439,0.286129,early
4,FRANCE,aspartate aminotransferase (AST),0,dayX,all,0.468982,0.091598,0.289451,0.648513,early
...,...,...,...,...,...,...,...,...,...,...
6777,GERMANY,white blood cell count (Leukocytes),12,day0,ever,0.509804,0.039794,0.431328,0.587800,late
6778,GERMANY,white blood cell count (Leukocytes),13,dayX,ever,0.893617,0.024267,0.815144,0.941181,late
6779,GERMANY,white blood cell count (Leukocytes),13,day0,ever,0.549020,0.039124,0.469936,0.625702,late
6780,GERMANY,white blood cell count (Leukocytes),14,dayX,ever,0.865169,0.028566,0.778961,0.921157,late


In [12]:
fr = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_FRANCE.csv"))
ge = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_GERMANY.csv"))
it = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_ITALY.csv"))
sp = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_SPAIN.csv"))
us = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_USA.csv"))

cpdf = fr.append([ge, it, sp, us])
cpdf = cpdf.drop(columns=['Unnamed: 0'])
cpdf

Unnamed: 0,setting,scale,day,alanine aminotransferase (ALT),albumin,aspartate aminotransferase (AST),C-reactive protein (CRP) (Normal Sensitivity),creatinine,D-dimer,Ferritin,...,lactate dehydrogenase (LDH),lymphocyte count,neutrophil count,procalcitonin,total bilirubin,white blood cell count (Leukocytes),country,cardiac troponin (High Sensitivity),prothrombin time (PT),cardiac troponin (Normal Sensitivity)
0,all,original,0,0.895366,0.735680,8.647676e-02,2.942395e-02,0.847918,0.000130,0.353902,...,0.446442,0.543677,9.456107e-01,0.087760,0.171662,0.033090,FRANCE,,,
1,all,original,1,0.448318,0.675929,5.863738e-02,3.631642e-02,0.626777,0.001723,0.251972,...,0.461475,0.927711,8.802409e-01,0.047039,0.118557,0.564299,FRANCE,,,
2,all,original,7,0.896926,0.000667,2.284844e-01,7.691089e-11,0.029057,0.607015,0.136490,...,0.423420,0.659622,6.245142e-01,,0.007089,0.629639,FRANCE,,,
3,all,original,14,0.616432,0.412190,2.851647e-01,7.745325e-01,0.087021,0.279152,0.213729,...,0.565310,0.457203,7.298958e-01,0.717102,0.928762,0.925057,FRANCE,,,
4,all,log,0,0.144862,0.737787,4.662963e-09,2.024631e-04,0.730903,0.022192,0.347907,...,0.248616,0.283723,6.282509e-01,,0.694267,0.000219,FRANCE,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19,never,original,14,0.148963,0.588382,2.755305e-02,8.159755e-01,0.050539,0.593080,0.373624,...,0.293381,0.532717,1.348507e-01,0.120008,0.096347,0.000016,USA,,0.753876,
20,never,log,0,0.666252,0.818660,5.803072e-01,7.413165e-01,0.962747,0.980572,0.911110,...,0.853981,0.120558,1.072864e-01,,0.773796,0.014870,USA,,0.177290,
21,never,log,1,0.188713,0.819767,7.744477e-02,3.590841e-01,0.939170,0.965223,0.551318,...,0.787007,0.000031,7.810042e-02,,0.815226,0.080535,USA,,0.709895,
22,never,log,7,0.004206,0.880860,3.058342e-06,8.278079e-03,0.272456,0.999458,0.046250,...,0.752025,0.800048,4.761860e-08,0.097539,0.520888,0.000002,USA,,0.591642,


In [13]:
def plot_country(_d, patient_group='all', lab=None, obs=None, pdf=None):
    d = _d.copy()
    o = obs.copy()
    p = pdf.copy()
    
    """
    DATA PREPROCESSING...
    """
    if lab in ['C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer']:
        d = d[d.scale == 'log']
        p = p[p.scale == 'log']
    else:
        d = d[d.scale == 'original']
        p = p[p.scale == 'original']
        
    d.wave = d.wave.apply(lambda x: { 'early': 'Early', 'late': 'Late' }[x])
    d = d[d.setting != 'never']
    d = d[d.setting == patient_group]
    d = d.rename(columns={
        'wave': 'Wave'
    })
    d = d[d.country != 'SINGAPORE']
#     d = d[d.country != 'GERMANY']
    d.country = d.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    d = d[(d.days_since_positive == 0) | (d.days_since_positive == 1) | (d.days_since_positive == 7)]
    d.days_since_positive = d.days_since_positive.apply(lambda x: f"Day {x}")
    d = d[d.Lab == lab]

    """
    OBS DATA
    """
    # o = o.drop(columns=['Unnamed: 0'])
    o = o[o.lab == lab]
    o.wave = o.wave.apply(lambda x: { 'early': 'Early', 'late': 'Late' }[x])
    o = o[o.cohort == 'dayX']
    o = o[o.country != 'GERMANY']
    o.country = o.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    o = o[o.setting == patient_group]
    o = o.rename(columns={
        'wave': 'Wave'
    })    
    
    """
    PVAL DATA
    """
    p = p[p.setting == patient_group]
    p.country = p.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    p['is_sig'] = False
    p.is_sig = p[lab] <= 0.05
    p.is_sig = p.is_sig.apply(lambda x: 'p<0.05' if x else 'p>0.05')
    p = p[['setting','day', 'country', 'is_sig']]
    p.day = p.day.apply(lambda x: f"Day {x}")
    
    """
    MERGE
    """
    d = pd.merge(d, p, how='left', left_on=['days_since_positive','setting', 'country'], right_on = ['day','setting', 'country'])
    
    """
    CONSTANTS
    """
    LABS = d.Lab.unique().tolist()
    WAVE_COLOR = [
        '#D45E00', # '#BA4338', # early
        '#0072B2', # late
        'black'
    ]
    COUNTRIES = ['France', 'Germany', 'Italy', 'Spain', 'USA']
    COUNTRY_COLORS = ['#0072B2', '#E6A01B', '#029F73', '#D45E00', '#CB7AA7']
    
    """
    PLOT
    """
    titleX=-60    
    opacity=0.7
    
    """
    LABS
    """
    LAB_DROPDOWN = alt.binding_select(options=LABS)
    LAB_SELECTION = alt.selection_single(fields=["Lab"], bind=LAB_DROPDOWN, init={"Lab": LABS[0]}, name="Select")
    
    line_m = alt.Chart(
        d
    ).mark_line(
        size=4, opacity=opacity
    ).encode(
        x=alt.X('days_since_positive:N', title=None),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False), title='Standardized Mean Lab', axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS))
    ).properties(
        width=200,
        height=200
    )
    
    point_m = line_m.mark_point(
        size=80, opacity=opacity, filled=True
    ).encode(
        x=alt.X('days_since_positive:N', title=None),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False), title='Standardized Mean Lab', axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS)),
#         stroke=alt.Stroke('is_sig:N', scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance'),
#         strokeWidth=alt.value(3)
    )
    
    line_m = (line_m + point_m).facet(
        column=alt.Column('Wave:N', header=alt.Header(labelOrient="top", title=None, titleOrient="bottom", labels=True))
    )
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    """
    OBSERVATION
    """
    LAB_FIELD_NAME = lab.replace(' ', '.').replace('(', '.').replace(')', '.').replace('-', '.') # Used in the original files
    o = o.rename(columns={
        f"{LAB_FIELD_NAME}": LAB_FIELD_NAME.replace('.', '_')
    })
    LAB_FIELD_NAME = LAB_FIELD_NAME.replace('.', '_')

    obs_line = alt.Chart(
        o
    ).mark_line(
        point=True, opacity=0.7, size=4
    ).encode(
        x=alt.X('day:Q', title='Days Since Positive'),
        y=alt.Y(f"p:Q", axis=alt.Axis(format='0.0%', titleX=titleX), title="% Patients"),
        color=alt.Color('country:N', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS)),
        column=alt.Column('Wave:N', header=alt.Header(labelOrient="top", title=None, titleOrient="top", labels=False))
    ).properties(
        width=200,
        height=100
    )
#     .properties(
#         title={
#             "text": "Percentage of Patients Tested",
#             "dx": 80,
#             # "fontSize": 16,
#             # "color": "gray"
#         },
#         width=350,
#         height=100
#     )
    
    line_m = alt.vconcat(line_m, obs_line, spacing=20)
    
    plot = (
        alt.hconcat(line_m, spacing=20).resolve_scale(y='shared')
    ).properties(
            title={
                "text": f"Country-Level Lab Values of {'All' if patient_group == 'all' else 'Ever Severe'} Patients by Wave",
                "dx": 80,
                "subtitle": [
                    consistent_loinc[lab], #.title(),
                    get_visualization_subtitle(data_release='2021-04-27', with_num_sites=False)
                ], 
                "subtitleFontSize": 16,
                "subtitleColor": "gray",
            }
        )
    
    return plot

In [14]:
LABS = df.Lab.unique().tolist()
# print(len(LABS))

for i, lab in enumerate(LABS):
    new = alt.hconcat(
        plot_country(cdf, patient_group='all', lab=lab, obs=cobs, pdf=cpdf), 
        plot_country(cdf, patient_group='ever', lab=lab, obs=cobs, pdf=cpdf), 
        spacing=50
    ).resolve_scale(y='shared', color='independent', size='independent', stroke='independent')
    if i != 0:
        res = alt.vconcat(res, new, spacing=10)
    else:
        res = new
    # DEBUG
#    if i == 0:
#         break
res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    header_label_font_size=16
)
res

In [15]:
# LABS = df.Lab.unique().tolist()
# print(len(LABS))

for i, lab in enumerate(SELECTED_LABS):
    new = alt.hconcat(
        plot_country(cdf, patient_group='all', lab=lab, obs=cobs, pdf=cpdf), 
        plot_country(cdf, patient_group='ever', lab=lab, obs=cobs, pdf=cpdf), 
        spacing=50
    ).resolve_scale(y='shared', color='independent', size='independent', stroke='independent')
    if i != 0:
        res = alt.vconcat(res, new, spacing=10)
    else:
        res = new
    # DEBUG
#    if i == 0:
#         break
res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    header_label_font_size=16
)
res

In [16]:
def day0_overview(_d, lab, _p):
    d = _d.copy()
    p = _p.copy()
    
    """
    DATA PREPROCESSING...
    """
    d.wave = d.wave.apply(lambda x: { 'early': 'Early', 'late': 'Late' }[x])
    d = d.rename(columns={
        'wave': 'Wave'
    })
    d = d[d.country != 'SINGAPORE']
    d = d[d.country != 'GERMANY']
    d = d[d.days_since_positive == 0]
    d = d[d.Lab == lab]
    d.country = d.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    d.days_since_positive = d.days_since_positive.apply(lambda x: f"Day {x}")
    d.setting = d.setting.apply(lambda x: {'all': 'All Patients', 'ever': 'Ever Severe Patients', 'never': 'Never Severe Patients'}[x])
    
    if lab in ['C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer']:
        d = d[d.scale == 'log']
        p = p[p.scale == 'log']
    else:
        d = d[d.scale == 'original']
        p = p[p.scale == 'original']
    
    d = d.drop(columns=['scale', 'Lab', 'days_since_positive'])
    d = d.pivot_table(values='mean', index=['country', 'setting'], columns='Wave').reset_index()
    d = d[(d.Early.notna() & d.Late.notna())]
    d['increase'] = d.Early < d.Late
    
    p.country = p.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    p['sig'] = p[lab].apply(lambda x: 'yes' if x < 0.05 else 'no')
    p[lab] = -np.log10(p[lab])
    
    d = pd.merge(d, p,  how='left', left_on=['country','setting'], right_on = ['country','setting'])
    
    # print(d)
    # print(p[lab])
    # print(p)
    """
    CONSTANTS
    """
    COUNTRIES = ['France', 'Italy', 'Spain', 'USA'] # 'Germany', 
    COUNTRY_COLORS = ['#0072B2', '#029F73', '#D45E00', '#CB7AA7'] # '#E79F00', 
    
    """
    PLOT
    """
    titleX=-80
    opacity=0.7
    
    """
    LABS
    """    
    bar = alt.Chart(
        d
    ).mark_bar(
        size=6
    ).encode(
        y=alt.Y('Early:Q', scale=alt.Scale(zero=False), title='Mean Lab Value'),
        y2=alt.Y2('Late:Q'),
        x=alt.X('country:N', title='Country'), # axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', title='Country', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS))
    ).properties(
        width=200,
        height=220
    )
    
    tr = alt.Chart(
        d
    ).transform_filter(
        alt.FieldOneOfPredicate(field='increase', oneOf=[True])
    ).mark_point(
        shape="triangle-up", filled=True, size=300, yOffset=3, opacity=1
    ).encode(
        y=alt.Y('Late:Q', scale=alt.Scale(zero=False), title='Mean Lab Value'),
        x=alt.X('country:N', title='Country'),# axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', title='Country', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS))
    )
    
    tl = alt.Chart(
        d
    ).transform_filter(
        alt.FieldOneOfPredicate(field='increase', oneOf=[False])
    ).mark_point(
        shape="triangle-down", filled=True, size=300, yOffset=-3, opacity=1
    ).encode(
        y=alt.Y('Late:Q', scale=alt.Scale(zero=False), title='Mean Lab Value'),
        x=alt.X('country:N', title='Country'), # axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', title='Country', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS))
    )
    
    baseline = alt.Chart(
        pd.DataFrame({'baseline': [1]})
    ).mark_rule(color='gray').encode(
        y=alt.Y('baseline:Q')
    )
        
    plot = (bar + tr + tl).facet(
        column=alt.Column('setting:N', header=alt.Header(labelOrient="top", title=None, titleOrient="top", labels=True)),
        spacing=30
    )
#     .add_selection(
#         LAB_SELECTION
#     ).transform_filter(
#         LAB_SELECTION
#     )
    
    plot = plot.properties(
        title={
            "text": [
                f"Standardized Mean Lab Values by Country from Early to Late Wave"
            ],
            "dx": 50,
            "subtitle": [
                # 'Relative to Day 0 Mean Value During Early Phase',
                consistent_loinc[lab],
                #get_visualization_subtitle(data_release='2021-01-25', with_num_sites=False)
            ], 
            "subtitleFontSize": 16,
            "subtitleColor": "gray",
        }
    )
    
    """
    p-value triangles
    """
    d_notna = d[d[lab].notna()].copy()

    p_base = alt.Chart(
        d_notna
    ).mark_point(
        size=200, filled=True, opacity=1, shape='triangle-up', strokeWidth=1, xOffset=0
    ).encode(
        x=alt.X('country:N', title='Country'),# axis=alt.Axis(titleX=titleX)),
        y=alt.Y(f'{lab}:Q', title="P Value (-log10)", scale=alt.Scale(zero=False)),
        color=alt.Color('country:N', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS), title='Country'),
        stroke=alt.Stroke('sig:N', scale=alt.Scale(domain=['no', 'yes'], range=['white', 'black']), title='p < 0.05?', legend=None)
    ).properties(
        width=200,
        height=200
    )
    
    p_base_no_y = p_base.encode(
        x=alt.X('country:N', title='Country'),
        y=alt.Y(f'{lab}:Q', title="P Value (-log10)", scale=alt.Scale(zero=False), axis=alt.Axis(ticks=False, domain=False, title=None, labels=False)),
        color=alt.Color('country:N', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS)),
        stroke=alt.Stroke('sig:N', scale=alt.Scale(domain=['no', 'yes'], range=['white', 'black']), title='p < 0.05?', legend=None)
    )

    p0_05_all = alt.Chart(
        pd.DataFrame({'baseline': [1.30102999566], 'zero': [0]})
    ).mark_rule(color='firebrick', strokeDash=[3,3]).encode(
        y=alt.Y('baseline:Q')
    )
    
    p0_05_all_rect = alt.Chart(
        pd.DataFrame({'baseline': [1.30102999566]})
    ).mark_rect(color='transparent', stroke='firebrick', strokeDash=[3,3], strokeWidth=2).encode(
        y=alt.Y('baseline:Q'),
        y2=alt.value(0)
    )
    
    p0_05_text = alt.Chart(
        pd.DataFrame({'baseline': [1.30102999566], 'text': 'p=0.05'})
    ).mark_text(color='firebrick', align='right', baseline='top', y=0, x=195).encode(
        # y=alt.Y('baseline:Q'),
        y=alt.value(2),
        text=alt.value('Statistical Significance: p<0.05')
    )
    
    """
    ALL PATIENTS
    """    
    p_t_all1 = (
        #p0_05_all
        #+ 
        p0_05_text
        + 
        p_base.transform_filter(
            alt.FieldOneOfPredicate(field='increase', oneOf=[True])
        ).transform_filter(
            alt.FieldOneOfPredicate(field='setting', oneOf=['All Patients'])
        )
        + 
        p_base.mark_point(
            size=200, filled=True, opacity=1, shape='triangle-down', strokeWidth=1, xOffset=0
        ).transform_filter(
            alt.FieldOneOfPredicate(field='increase', oneOf=[False])
        ).transform_filter(
            alt.FieldOneOfPredicate(field='setting', oneOf=['All Patients'])
        ) 
        +
        p0_05_all_rect
    ).properties(
        title={
            'text': 'All Patients',
            'dx': 105,
            'fontSize': 16
        }
    )
    
    """
    EVER SEVERE
    """    
    p_t_all2 = (
        #p0_05_all 
        #+ 
        p0_05_text
        + 
        p_base_no_y.transform_filter(
            alt.FieldOneOfPredicate(field='setting', oneOf=['Ever Severe Patients'])
        ).transform_filter(
            alt.FieldOneOfPredicate(field='increase', oneOf=[True])
        )
        + 
        p_base_no_y.mark_point(
            size=200, filled=True, opacity=1, shape='triangle-down', strokeWidth=1, xOffset=0
        ).transform_filter(
            alt.FieldOneOfPredicate(field='setting', oneOf=['Ever Severe Patients'])
        ).transform_filter(
            alt.FieldOneOfPredicate(field='increase', oneOf=[False])
        )
        + p0_05_all_rect
    ).properties(
        title={
            'text': 'Ever Severe Patients',
            'dx': 30,
            'fontSize': 16
        }
    )
    
    """
    NEVER SEVERE
    """    
    p_t_all3 = (
        #p0_05_all 
        #+ 
        p0_05_text
        + 
        p_base_no_y.transform_filter(
            alt.FieldOneOfPredicate(field='setting', oneOf=['Never Severe Patients'])
        ).transform_filter(
            alt.FieldOneOfPredicate(field='increase', oneOf=[True])
        )
        + 
        p_base_no_y.mark_point(
            size=200, filled=True, opacity=1, shape='triangle-down', strokeWidth=1, xOffset=0
        ).transform_filter(
            alt.FieldOneOfPredicate(field='setting', oneOf=['Never Severe Patients'])
        ).transform_filter(
            alt.FieldOneOfPredicate(field='increase', oneOf=[False])
        )
        + p0_05_all_rect
    ).properties(
        title={
            'text': 'Never Severe Patients',
            'dx': 30,
            'fontSize': 16
        }
    )
    
    """
    COMBINE
    """
    p_t = alt.hconcat(p_t_all1, p_t_all2, p_t_all3, spacing=20).resolve_scale(y='shared')
    
    p_t = p_t.properties(
        title={
            "text": [
                f"P-Values at 0 Days Since Positive Across Waves"
            ],
            "dx": 50,
            ''
            "subtitle": [
                # 'Relative to Day 0 Mean Value During Early Phase',
                consistent_loinc[lab],
                #get_visualization_subtitle(data_release='2021-01-25', with_num_sites=False)
            ], 
            "subtitleFontSize": 16,
            "subtitleColor": "gray",
        }
    )
    
    
    return alt.vconcat(plot, p_t, spacing=30).resolve_scale(color='independent', stroke='independent')

fr = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_FRANCE.csv"))
ge = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_GERMANY.csv"))
it = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_ITALY.csv"))
sp = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_SPAIN.csv"))
us = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_pvals_USA.csv"))

pdf = fr.append([ge, it, sp, us])
pdf = pdf.drop(columns=['Unnamed: 0'])
# pdf = pdf[pdf.setting != 'never']
pdf = pdf[pdf.day == 0]
pdf.setting = pdf.setting.apply(lambda x: { 'all': 'All Patients', 'ever': 'Ever Severe Patients', 'never': 'Never Severe Patients'}[x])

pdf

# lab = df.Lab.unique().tolist()[0]
labs = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'D-dimer', 'creatinine', 'albumin']

for l in labs:
    plot = day0_overview(cdf, l, pdf)

    plot = apply_theme(
        plot,
        axis_y_title_font_size=16,
        title_anchor='start',
        # legend_title_orient='left',
        legend_orient='right',
        header_label_font_size=16
    )

    plot.display()
# cdf

In [17]:
def plot_country_raw(_d, patient_group='all', lab=None):
    d = _d.copy()
    
    """
    DATA PREPROCESSING...
    """
    d['se_top'] = d['mean'] + d['se']
    d['se_bottom'] = d['mean'] - d['se']
    d.wave = d.wave.apply(lambda x: { 'early': 'First Wave', 'late': 'Second Wave' }[x])
    d = d[d.setting != 'never']
    d = d[d.setting == patient_group]
    d = d.rename(columns={
        'wave': 'Wave'
    })
    d = d[d.country != 'SINGAPORE']
#     d = d[d.country != 'GERMANY']
    d.country = d.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    d = d[d.Lab == lab]

    # ['C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer']
    if lab in ['alanine aminotransferase (ALT)', 'aspartate aminotransferase (AST)', 'C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer', 'Ferritin', 'lactate dehydrogenase (LDH)']:
        d = d[d.scale == 'log']
        yTitle = 'Mean Lab Values (Log)'
    else:
        d = d[d.scale == 'original']
        yTitle = 'Mean Lab Values'

    """
    CONSTANTS
    """
    LABS = d.Lab.unique().tolist()
    WAVE_COLOR = [
        '#D45E00', # '#BA4338', # early
        '#0072B2', # late
        'black'
    ]
    COUNTRIES = ['France', 'Germany', 'Italy', 'Spain', 'USA']
    COUNTRY_COLORS = ['#0072B2', '#E6A01B', '#029F73', '#D45E00', '#CB7AA7']
    
    """
    PLOT
    """
    titleX=-60    
    opacity=0.7
    width=350
    height=250
    size=3
    
    """
    LABS
    """
    # LAB_DROPDOWN = alt.binding_select(options=LABS)
    # LAB_SELECTION = alt.selection_single(fields=["Lab"], bind=LAB_DROPDOWN, init={"Lab": LABS[0]}, name="Select")
    
    line_m = alt.Chart(
        d
    ).mark_line(
        size=size, opacity=opacity, point=True
    ).encode(
        x=alt.X('days_since_positive:N', title='Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False), title=yTitle, axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS), title='Country')
    ).properties(
        width=width,
        height=height
    )
    
    """
    ERROR BAR
    """
    error_m = line_m.mark_errorbar(color='gray', opacity=0.3).encode(
        x=alt.X('days_since_positive:N', title='Days Since Positive'),
        y=alt.Y('se_top:Q', scale=alt.Scale(zero=False), title=yTitle, axis=alt.Axis(titleX=titleX)),
        y2=alt.Y2('se_bottom:Q'),
        # color=alt.value('gray')
        # color=alt.Color('country:N', scale=alt.Scale(range=COUNTRY_COLORS))
    )
    
    """
    COMBINE
    """
    line_m = (line_m + error_m).facet(
        column=alt.Column('Wave:N', header=alt.Header(labelOrient="top", title=None, titleOrient="bottom", labels=True))
    )

    plot = (
        alt.hconcat(line_m, spacing=20).resolve_scale(y='shared')
    ).properties(
        title={
            "text": f"Country-Level Mean Lab Values Of {'All' if patient_group == 'all' else 'Ever Severe'} Patients By Wave",
            "dx": 80,
            "subtitle": [
                consistent_loinc[lab]
                # lab, #.title(),
                # get_visualization_subtitle(data_release='2021-04-25', with_num_sites=False)
            ], 
            "subtitleFontSize": 18,
            "subtitleColor": "gray",
        }
    )
    
    return plot

In [18]:
raw = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_values_bycountry.csv"))

print(raw.Lab.unique().tolist())

# raw = raw[~pd.isnull(raw.mean)]

# !! We are removing the following lab since this does not contain any info
raw = raw[raw.Lab != 'cardiac troponin (Normal Sensitivity)']


labs = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'D-dimer', 'creatinine', 'albumin']
labs = raw.Lab.unique().tolist()

patient_groups = ['all', 'ever']

for li, lab in enumerate(labs):
    
    for i, patient_group in enumerate(patient_groups):
        
        if i == 0:
            res = plot_country_raw(raw, patient_group=patient_group, lab=lab)
        else:
            res = alt.hconcat(
                res, plot_country_raw(raw, patient_group=patient_group, lab=lab), spacing=30
            ).resolve_scale(y='shared', color='shared')

    if li == 0:
        plot = res
    else:
        plot = alt.vconcat(
            plot, res, spacing=30
        ).resolve_scale(y='independent', color='independent')
        
# plot = plot.properties(
#         title={
#             "text": f"Country-Level Mean Lab Values Of {'All' if patient_group == 'all' else 'Ever Severe'} Patients By Wave",
#             "dx": 80
#         }
#     )
plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    header_label_font_size=16,
    title_anchor='start',
    legend_orient='right'
)
plot

['alanine aminotransferase (ALT)', 'albumin', 'aspartate aminotransferase (AST)', 'C-reactive protein (CRP) (Normal Sensitivity)', 'creatinine', 'D-dimer', 'Ferritin', 'Fibrinogen', 'lactate dehydrogenase (LDH)', 'lymphocyte count', 'neutrophil count', 'procalcitonin', 'total bilirubin', 'white blood cell count (Leukocytes)', 'prothrombin time (PT)', 'cardiac troponin (High Sensitivity)', 'cardiac troponin (Normal Sensitivity)']


In [19]:
def plot_country_raw(_d, patient_group='all', lab=None):
    d = _d.copy()
    
    """
    DATA PREPROCESSING...
    """
    d['se_top'] = d['mean'] + d['se']
    d['se_bottom'] = d['mean'] - d['se']
    d.wave = d.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    d = d[d.setting != 'never']
    d = d[d.setting == patient_group]
    d = d.rename(columns={
        'wave': 'Wave'
    })
    d = d[d.country != 'SINGAPORE']
#     d = d[d.country != 'GERMANY']
    d.country = d.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    d = d[d.Lab == lab]

    # ['C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer']
    if lab in ['alanine aminotransferase (ALT)', 'aspartate aminotransferase (AST)', 'C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer', 'Ferritin', 'lactate dehydrogenase (LDH)']:
        d = d[d.scale == 'log']
        yTitle = 'Mean Lab Values (Log)'
    else:
        d = d[d.scale == 'original']
        yTitle = 'Mean Lab Values'

    """
    CONSTANTS
    """
    LABS = d.Lab.unique().tolist()
    WAVE_COLOR = [
        '#D45E00', # '#BA4338', # early
        '#0072B2', # late
        'black'
    ]
    COUNTRIES = ['France', 'Germany', 'Italy', 'Spain', 'USA']
    COUNTRY_COLORS = ['#0072B2', '#E6A01B', '#029F73', '#D45E00', '#CB7AA7']
    
    """
    PLOT
    """
    titleX=-60    
    opacity=0.7
    width=350
    height=250
    size=3
    
    """
    LABS
    """
    # LAB_DROPDOWN = alt.binding_select(options=LABS)
    # LAB_SELECTION = alt.selection_single(fields=["Lab"], bind=LAB_DROPDOWN, init={"Lab": LABS[0]}, name="Select")
    
    line_m = alt.Chart(
        d
    ).mark_line(
        size=size, opacity=opacity, point=True
    ).encode(
        x=alt.X('days_since_positive:N', title='Days Since Positive'),
        y=alt.Y('mean:Q', scale=alt.Scale(zero=False), title=yTitle, axis=alt.Axis(titleX=titleX, tickCount=6)),
        color=alt.Color('Wave:N', scale=alt.Scale(domain=['First', 'Second'], range=WAVE_COLOR), title='Wave')
    ).properties(
        width=width,
        height=height
    )
    
    """
    ERROR BAR
    """
    error_m = line_m.mark_errorbar(color='gray', opacity=0.3).encode(
        x=alt.X('days_since_positive:N', title='Days Since Positive'),
        y=alt.Y('se_top:Q', scale=alt.Scale(zero=False), title=yTitle, axis=alt.Axis(titleX=titleX)),
        y2=alt.Y2('se_bottom:Q'),
        # color=alt.value('gray')
        # color=alt.Color('country:N', scale=alt.Scale(range=COUNTRY_COLORS))
    )
    
    """
    COMBINE
    """
#     line_m = (line_m + error_m).facet(
#         column=alt.Column('Wave:N', header=alt.Header(labelOrient="top", title=None, titleOrient="bottom", labels=True))
#     )
    line_m = (line_m + error_m).facet(
        column=alt.Column('country:N', header=alt.Header(labelOrient="top", title=None, titleOrient="bottom", labels=True))
    )

    plot = (
        alt.hconcat(line_m, spacing=20).resolve_scale(y='shared')
    ).properties(
        title={
            "text": f"Country-Level Mean Lab Values Of {'All' if patient_group == 'all' else 'Ever Severe'} Patients By Wave",
            "dx": 80,
            "subtitle": [
                consistent_loinc[lab]
#                 # lab, #.title(),
#                 # get_visualization_subtitle(data_release='2021-04-25', with_num_sites=False)
            ], 
            "subtitleFontSize": 18,
            "subtitleColor": "gray",
        }
    )
    
    return plot


raw = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_values_bycountry.csv"))
# raw = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_values_standardized_bycountry.csv"))

# labs = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'D-dimer', 'creatinine', 'albumin']
labs = raw.Lab.unique().tolist()
# SELECTED_LABS = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'Fibrinogen', 'procalcitonin', 'D-dimer', 'creatinine']

patient_groups = ['all', 'ever']

for li, lab in enumerate(labs):
    
    for i, patient_group in enumerate(patient_groups):
        
        if i == 0:
            res = plot_country_raw(raw, patient_group=patient_group, lab=lab)
        else:
            res = alt.hconcat(
                res, plot_country_raw(raw, patient_group=patient_group, lab=lab), spacing=30
            ).resolve_scale(y='shared', color='independent')

    if li == 0:
        plot = res
    else:
        plot = alt.vconcat(
            plot, res, spacing=30
        ).resolve_scale(y='independent', color='independent')

# plot = plot.properties(
#         title={
#             "text": "Proportion Of Patients Being Tested In Each Country",
#             "dx": 80
#         }
#     )
        
plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    header_label_font_size=16,
    title_anchor='start',
    legend_orient='right',
    title_dy=0
)
plot

In [31]:
def plot_country_raw(_d, patient_group='all', lab=None):
    d = _d.copy()
    
    """
    DATA PREPROCESSING...
    """
#     d['se_top'] = d['mean'] + d['se']
#     d['se_bottom'] = d['mean'] - d['se']
    d.wave = d.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    d = d[d.setting != 'never']
    d = d[d.setting == patient_group]
    d = d.rename(columns={
        'wave': 'Wave'
    })
    d = d[d.cohort == 'dayX']
#     d = d[d.country != 'SINGAPORE']
#     d = d[d.country != 'GERMANY']
    d.country = d.country.apply(lambda x: x.capitalize() if x != 'USA' else x)
    d = d[d.lab == lab]

    # ['C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer']
    yTitle = consistent_loinc[lab].replace('(Normal Sensitivity)', '') # 'Mean Lab Values'
#     if lab in ['alanine aminotransferase (ALT)', 'aspartate aminotransferase (AST)', 'C-reactive protein (CRP) (Normal Sensitivity)', 'D-dimer', 'Ferritin', 'lactate dehydrogenase (LDH)']:
#         d = d[d.scale == 'log']
#         yTitle = 'Mean Lab Values (Log)'
#     else:
#         d = d[d.scale == 'original']
#         yTitle = 'Mean Lab Values'

    """
    CONSTANTS
    """
    LABS = d.lab.unique().tolist()
    WAVE_COLOR = [
        '#D45E00', # '#BA4338', # early
        '#0072B2', # late
        'black'
    ]
    COUNTRY_COLORS = ['#0072B2', '#029F73', '#D45E00', '#CB7AA7']
    
    """
    PLOT
    """
    titleX=-60    
    opacity=0.7
    width=260
    height=200
    size=3
    
    """
    LABS
    """
    # LAB_DROPDOWN = alt.binding_select(options=LABS)
    # LAB_SELECTION = alt.selection_single(fields=["Lab"], bind=LAB_DROPDOWN, init={"Lab": LABS[0]}, name="Select")
    
    line_m = alt.Chart(
        d
    ).mark_line(
        size=size, opacity=opacity, point=True
    ).encode(
        x=alt.X('day:Q', title='Days Since Admission'),
        y=alt.Y('p:Q', scale=alt.Scale(zero=True, clamp=True), title=yTitle, axis=alt.Axis(titleX=titleX, format='%')),
        color=alt.Color('Wave:N', scale=alt.Scale(domain=['First', 'Second'], range=WAVE_COLOR), title='Wave')
    ).properties(
        width=width,
        height=height
    )
    
    """
    ERROR BAR
    """
    error_m = line_m.mark_errorbar(color='gray', opacity=0.6).encode(
        x=alt.X('day:Q', title='Days Since Admission' if lab == 'creatinine' else None, axis=alt.Axis(labels=True if lab == 'creatinine' else False), scale=alt.Scale(padding=10, nice=False)),
        y=alt.Y('ci_95U:Q', scale=alt.Scale(zero=True), title=yTitle, axis=alt.Axis(titleX=titleX)),
        y2=alt.Y2('ci_95L:Q'),
        strokeWidth=alt.value(1.5)
        # color=alt.value('gray')
        # color=alt.Color('country:N', scale=alt.Scale(range=COUNTRY_COLORS))
    )
    
    """
    COMBINE
    """
#     line_m = (line_m + error_m).facet(
#         column=alt.Column('Wave:N', header=alt.Header(labelOrient="top", title=None, titleOrient="bottom", labels=True))
#     )
    line_m = (line_m + error_m).facet(
        column=alt.Column('country:N', header=alt.Header(labelOrient="top", title=None, titleOrient="bottom", labels=True if lab == 'C-reactive protein (CRP) (Normal Sensitivity)' else False))
    )

    plot = (
        alt.hconcat(line_m, spacing=20).resolve_scale(y='shared')
    )
#     .properties(
#         title={
#             "text": consistent_loinc[lab], #f"Country-Level Mean Lab Values Of {'All' if patient_group == 'all' else 'Ever Severe'} Patients By Wave",
#             "dx": 80,
# #             "subtitle": [
# #                 consistent_loinc[lab]
# #                 # lab, #.title(),
# #                 # get_visualization_subtitle(data_release='2021-04-25', with_num_sites=False)
# #             ], 
#             "subtitleFontSize": 18,
#             "subtitleColor": "gray",
#         }
#     )
    
    return plot


obs


obs = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_obs_bycountry.csv"))

# obs = obs[obs.country != 'GERMANY']

obs = obs[~((obs.country == 'FRANCE') & (obs.lab == 'procalcitonin'))] # & (obs.wave == 'late'))]
obs = obs[~((obs.country == 'GERMANY') & (obs.lab == 'creatinine'))] # & (obs.wave == 'late'))]

obs = obs.append({'day': '1', 'setting': 'all', 'p': None, 'lab': 'procalcitonin', 'country': 'FRANCE', 'wave': 'late', 'cohort': 'dayX'}, ignore_index=True)
obs = obs.append({'day': '1', 'setting': 'all', 'p': None, 'lab': 'creatinine', 'country': 'GERMANY', 'wave': 'late', 'cohort': 'dayX'}, ignore_index=True)

SELECTED_LABS = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'Fibrinogen', 'procalcitonin', 'D-dimer', 'creatinine']

patient_groups = ['all']

for li, lab in enumerate(SELECTED_LABS):
    
    for i, patient_group in enumerate(patient_groups):
        
        if i == 0:
            res = plot_country_raw(obs, patient_group=patient_group, lab=lab)
        else:
            res = alt.hconcat(
                res, plot_country_raw(obs, patient_group=patient_group, lab=lab), spacing=0
            ).resolve_scale(y='shared', color='independent')

    if li == 0:
        plot = res
    else:
        plot = alt.vconcat(
            plot, res, spacing=20
        ).resolve_scale(y='independent', color='shared')

plot = plot.properties(
        title={
            "text": "Laboratory Testing Rates Across Hospitalization Days In Each Country",
            "dx": 80,
            "dy": -10
        }
    )
        
plot = apply_theme(
    plot,
    axis_y_title_font_size=20,
    header_label_font_size=20,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='left',
    axis_domain_width=0,
    title_dy=0,
    point_size=100
)
plot

# obs[(obs.country == 'SPAIN') & (obs.day == 14) & (obs.setting == 'all') & (obs.lab == 'C-reactive protein (CRP) (Normal Sensitivity)')]

# obs.country.unique().tolist()

# obs

In [21]:
cdf[(cdf.country == 'ITALY') & (cdf.Lab == 'Ferritin') & (cdf.days_since_positive == 0)]

Unnamed: 0,scale,setting,Lab,days_since_positive,mean,se,wave,country


In [22]:
obd = pd.read_csv(join("..", "data", "1.1.resurgence", "labs", "lab_obs_bycountry.csv"))

obd

Unnamed: 0,country,lab,day,cohort,setting,p,se,ci_95L,ci_95U,wave
0,FRANCE,alanine aminotransferase (ALT),0,dayX,all,0.506785,0.130227,0.251541,0.762029,early
1,FRANCE,alanine aminotransferase (ALT),0,day0,all,0.506785,0.130227,0.251541,0.762029,early
2,FRANCE,albumin,0,dayX,all,0.196784,0.045584,0.107439,0.286129,early
3,FRANCE,albumin,0,day0,all,0.196784,0.045584,0.107439,0.286129,early
4,FRANCE,aspartate aminotransferase (AST),0,dayX,all,0.468982,0.091598,0.289451,0.648513,early
...,...,...,...,...,...,...,...,...,...,...
6777,GERMANY,white blood cell count (Leukocytes),12,day0,ever,0.509804,0.039794,0.431328,0.587800,late
6778,GERMANY,white blood cell count (Leukocytes),13,dayX,ever,0.893617,0.024267,0.815144,0.941181,late
6779,GERMANY,white blood cell count (Leukocytes),13,day0,ever,0.549020,0.039124,0.469936,0.625702,late
6780,GERMANY,white blood cell count (Leukocytes),14,dayX,ever,0.865169,0.028566,0.778961,0.921157,late


In [23]:
obd = obd[(obd.cohort == 'dayX') & (obd.setting == 'all') & (obd.day == 0)]

obd

Unnamed: 0,country,lab,day,cohort,setting,p,se,ci_95L,ci_95U,wave
0,FRANCE,alanine aminotransferase (ALT),0,dayX,all,0.506785,0.130227,0.251541,0.762029,early
2,FRANCE,albumin,0,dayX,all,0.196784,0.045584,0.107439,0.286129,early
4,FRANCE,aspartate aminotransferase (AST),0,dayX,all,0.468982,0.091598,0.289451,0.648513,early
6,FRANCE,total bilirubin,0,dayX,all,0.504279,0.133820,0.241992,0.766566,early
8,FRANCE,C-reactive protein (CRP) (Normal Sensitivity),0,dayX,all,0.569739,0.135822,0.303529,0.835950,early
...,...,...,...,...,...,...,...,...,...,...
6482,GERMANY,neutrophil count,0,dayX,all,0.177551,0.018523,0.146263,0.213856,late
6542,GERMANY,procalcitonin,0,dayX,all,0.553061,0.022163,0.508797,0.596500,late
6602,GERMANY,prothrombin time (PT),0,dayX,all,0.000000,0.003969,0.000000,0.007779,late
6662,GERMANY,total bilirubin,0,dayX,all,0.655102,0.020783,0.611955,0.695836,late


In [24]:
def obs(obd, lab):
    d = obd.copy()

    """
    DATA PREPROCESSING...
    """
    d.wave = d.wave.apply(lambda x: { 'early': 'Early', 'late': 'Late' }[x])
    d = d.rename(columns={
        'wave': 'Wave'
    })
    d = d[d.country != 'SINGAPORE']
    d = d[d.country != 'GERMANY']
    d.country = d.country.apply(lambda x: x.capitalize() if x != 'USA' else x)

    # "Also, for the lab observations, in germany whenever there’s just a number 0, we’ll count it as unobserved and we should show no arrow for that"
    d = d[~((d.country == "Germany") & (d[lab] == 0))]
    
    d = d.pivot_table(values=lab, index=['country', 'setting'], columns='Wave').reset_index()
    d = d[(d.Early.notna() & d.Late.notna())]
    d['increase'] = d.Early < d.Late

    """
    CONSTANTS
    """
    COUNTRIES = ['France', 'Italy', 'Spain', 'USA']
    COUNTRY_COLORS = ['#0072B2','#029F73', '#D45E00', '#CB7AA7']

    """
    PLOT
    """
    titleX=-80
    opacity=0.7

    """
    LABS
    """    
    bar = alt.Chart(
        d
    ).mark_bar(
        size=6
    ).encode(
        x=alt.X('Early:Q', scale=alt.Scale(zero=False), title='Percentage of Patients'),
        x2=alt.X2('Late:Q'),
        y=alt.Y('country:N', title='Country', axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', title='Country', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS))
    ).properties(
        width=300,
        height=320
    )

    tr = alt.Chart(
        d
    ).transform_filter(
        alt.FieldOneOfPredicate(field='increase', oneOf=[True])
    ).mark_point(
        shape="triangle-right", filled=True, size=300, xOffset=-3, opacity=1
    ).encode(
        x=alt.X('Late:Q', scale=alt.Scale(zero=False), title='Percentage of Patients'),
        y=alt.Y('country:N', title='Country', axis=alt.Axis(titleX=titleX)),
        color=alt.Color('country:N', title='Country', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS))
    )

    tl = alt.Chart(
        d
    ).transform_filter(
        alt.FieldOneOfPredicate(field='increase', oneOf=[False])
    ).mark_point(
        shape="triangle-left", filled=True, size=300, xOffset=3, opacity=1
    ).encode(
        x=alt.X('Late:Q', scale=alt.Scale(zero=False), title='Percentage of Patients'),
        y=alt.Y('country:N', title='Country', axis=alt.Axis(titleX=titleX), scale=alt.Scale(domain=COUNTRIES)),
        color=alt.Color('country:N', title='Country', scale=alt.Scale(domain=COUNTRIES, range=COUNTRY_COLORS))
    )

    baseline = alt.Chart(
        pd.DataFrame({'baseline': [1]})
    ).mark_rule(color='gray').encode(
        x=alt.X('baseline:Q')
    )

    plot = (bar + tr + tl)

    plot = plot.properties(
        title={
            "text": [
                f"Patients Tested at Day 0 from Early to Late Wave"
            ],
            "dx": 95,
            "subtitle": [
                # 'Relative to Day 0 Mean Value During Early Phase',
                consistent_loinc[lab],
                #get_visualization_subtitle(data_release='2021-01-25', with_num_sites=False)
            ],
            "subtitleFontSize": 16,
            "subtitleColor": "gray",
        }
    )
    return plot

labs = ['C-reactive protein (CRP) (Normal Sensitivity)', 'Ferritin', 'D-dimer', 'creatinine', 'albumin']

for lab in labs:
    res = obs(obd, lab)

    res = apply_theme(
        res,
        axis_y_title_font_size=16,
        title_anchor='start',
        # legend_title_orient='left',
        legend_orient='right',
        header_label_font_size=16
    )

    res.display()

KeyError: 'C-reactive protein (CRP) (Normal Sensitivity)'