In [1]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [2]:
data_release='2021-05-28'
consistent_loinc = {
    "C_reactive_protein_CRP_Normal_Sensitivity": "C-reactive protein (Normal Sensitivity) (mg/dL)",
    "creatinine": "Creatinine (mg/dL)",
    "Ferritin": "Ferritin (ng/mL)",
    "D_dimer": "D-dimer (ng/mL)",
    "albumin": "Albumin (g/dL)",        

    "Fibrinogen": "Fibrinogen (mg/dL)",
    "alanine_aminotransferase_ALT": "Alanine aminotransferase (U/L)",
    "aspartate_aminotransferase_AST": "Aspartate aminotransferase (U/L)",
    "total_bilirubin": "Total bilirubin (mg/dL)",
    "lactate_dehydrogenase_LDH": "Lactate dehydrogenase (U/L)",
    "cardiac_troponin_High_Sensitivity": "Cardiac troponin High Sensitivity (ng/mL)",
    "cardiac_troponin_Normal_Sensitivity": "Cardiac troponin Normal Sensitivity (ng/mL)",
    "prothrombin_time_PT": "Prothrombin time (s)",
    "white_blood_cell_count_Leukocytes": "White blood cell count (10*3/uL)",
    "lymphocyte_count": "Lymphocyte count (10*3/uL)",
    "neutrophil_count": "Neutrophil count (10*3/uL)",
    "procalcitonin": "Procalcitonin (ng/mL)",
}

continents = ['USA', 'EUROPE']
continent_colors = ['#D45E00', '#57B4E9']

countries = ['USA', 'EUROPE', 'FRANCE', 'GERMANY', 'ITALY']
country_colors = ['#D45E00', '#57B4E9', '#0072B2', '#029F73', '#E5DA3E']

sites = ['APHP', 'FRBDX', 'UKFR', 'BIDMC', 'MGB', 'NWU', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['#0072B2', '#0072B2', '#029F73', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00']
site_shapes = ['circle', 'circle', 'circle', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond']
# ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']
# len(sites)
# len(site_colors)
# len(site_shapes)

# Lab Trajectory

In [23]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'D-dimer (ng/mL)', 'Procalcitonin (ng/mL)', 'Neutrophil count (10*3/uL)']
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']


Unnamed: 0,lab,siteid,day,value,N
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,1,0.647449,
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,1,0.681337,
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-FRANCE,1,0.680747,
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-GERMANY,1,0.829691,
4,C-reactive protein (Normal Sensitivity) (mg/dL),META-ITALY,1,0.688717,
...,...,...,...,...,...
1675,Neutrophil count (10*3/uL),VA1,14,0.817568,5578.0
1676,Neutrophil count (10*3/uL),VA2,14,0.622558,6468.0
1677,Neutrophil count (10*3/uL),VA3,14,0.670180,6649.0
1678,Neutrophil count (10*3/uL),VA4,14,0.687714,5571.0


In [56]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.1,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Variations

In [42]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

In [57]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    # d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, labels=False, domain=False), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=200
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=200
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = alt.vconcat(cp, sp, spacing=10).resolve_scale(color='independent', size='independent', shape='independent', y='shared')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

In [98]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    # d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, labels=False, domain=False), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=200
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot
    
    plot_dim = plot.mark_line(opacity=0.1, size=2.5)
    point_dim = point.mark_point(filled=True, opacity=0.1, size=30)
    
    cp_dim = (point_dim + plot_dim)

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=200
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    point_dim = point.mark_point(filled=True, opacity=0.1, size=150)
    
    sp_dim = (point_dim)
    
    """
    Combine
    """
    upper = alt.layer(sp_dim, cp).resolve_scale(color='independent', size='independent', shape='independent', y='shared')
    under = alt.layer(sp, cp_dim).resolve_scale(color='independent', size='independent', shape='independent', y='shared')
    
    plot = alt.vconcat(upper, under, spacing=10).resolve_scale(color='independent', size='independent', shape='independent', y='shared')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

In [81]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    # d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, labels=False, domain=False), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=200
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_point(
#         point=True,
        size=32.5,
#         stroke='black',
        opacity=0.8
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=200
    )
    
    bar = alt.Chart(
        dm
    ).mark_bar(
        size=20
#         filled=True,
#         opacity=0.5,
#         size=150
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10), sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']),
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        y=alt.Y('N:Q', title='Sample Size'),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    ).properties(
        width=450,
        height=200
    )
    

    sp = (plot) # if is_country else plot
    
    """
    Combine
    """
    plot = alt.vconcat(cp, sp, spacing=10).resolve_scale(color='independent', size='independent', shape='independent', y='shared')
    
    plot = alt.vconcat(plot, bar, spacing=10).resolve_scale(color='independent', size='independent', shape='independent', y='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

All Labs

In [6]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.all.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)
print(len(unique_labs))

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df

['Alanine aminotransferase (U/L)', 'Albumin (g/dL)', 'Aspartate aminotransferase (U/L)', 'C-reactive protein (Normal Sensitivity) (mg/dL)', 'Cardiac troponin High Sensitivity (ng/mL)', 'Cardiac troponin Normal Sensitivity (ng/mL)', 'Creatinine (mg/dL)', 'D-dimer (ng/mL)', 'Ferritin (ng/mL)', 'Fibrinogen (mg/dL)', 'Lactate dehydrogenase (U/L)', 'Lymphocyte count (10*3/uL)', 'Neutrophil count (10*3/uL)', 'Procalcitonin (ng/mL)', 'Prothrombin time (s)', 'Total bilirubin (mg/dL)', 'White blood cell count (10*3/uL)']
17
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']


Unnamed: 0,lab,siteid,day,value,N
0,Alanine aminotransferase (U/L),META-USA,1,0.536670,
1,Alanine aminotransferase (U/L),META-EUROPE,1,0.573365,
2,Alanine aminotransferase (U/L),META-FRANCE,1,0.579315,
3,Alanine aminotransferase (U/L),META-GERMANY,1,0.612328,
4,Alanine aminotransferase (U/L),META-ITALY,1,0.581525,
...,...,...,...,...,...
4755,White blood cell count (10*3/uL),VA1,14,0.436928,5578.0
4756,White blood cell count (10*3/uL),VA2,14,0.641065,6468.0
4757,White blood cell count (10*3/uL),VA3,14,0.673743,6649.0
4758,White blood cell count (10*3/uL),VA4,14,0.670703,5571.0


In [7]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Aspartate aminotransferase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab.capitalize()}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p4 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[9:12]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p5 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[12:15]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p6 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[15:18]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, p4, p5, p6, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

In [8]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
#         size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
#         color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]), legend=alt.Legend() if showLegend else None)
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent', opacity='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 30,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
#         "text": [
#             f"Lab Trajectory",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

In [9]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['diamond', 'circle']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
#         size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
#         color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]), legend=alt.Legend() if showLegend else None)
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent', opacity='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 30,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
#         "text": [
#             f"Lab Trajectory",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# PPV

In [10]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.ppv.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'D-dimer (ng/mL)', 'Procalcitonin (ng/mL)', 'Neutrophil count (10*3/uL)']
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'FRBDX', 'NWU', 'UCLA', 'UKFR', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'MGB', 'UMICH']


Unnamed: 0,lab,siteid,day,value,N
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,1,0.230815,
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,1,0.283767,
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-FRANCE,1,0.287519,
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-GERMANY,1,0.556042,
4,C-reactive protein (Normal Sensitivity) (mg/dL),META-ITALY,1,0.185898,
...,...,...,...,...,...
1325,Neutrophil count (10*3/uL),VA1,14,0.807621,5578.0
1326,Neutrophil count (10*3/uL),VA2,14,0.306491,6468.0
1327,Neutrophil count (10*3/uL),VA3,14,0.410271,6649.0
1328,Neutrophil count (10*3/uL),VA4,14,0.602689,5571.0


In [11]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='PPV', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='PPV', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# TPR

In [12]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.tpr.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'D-dimer (ng/mL)', 'Procalcitonin (ng/mL)', 'Neutrophil count (10*3/uL)']
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'FRBDX', 'NWU', 'UCLA', 'UKFR', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'MGB', 'UMICH']


Unnamed: 0,lab,siteid,day,value,N
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,1,0.182012,
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,1,0.195493,
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-FRANCE,1,0.193570,
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-GERMANY,1,0.362230,
4,C-reactive protein (Normal Sensitivity) (mg/dL),META-ITALY,1,0.183067,
...,...,...,...,...,...
1325,Neutrophil count (10*3/uL),VA1,14,0.546465,5578.0
1326,Neutrophil count (10*3/uL),VA2,14,0.209064,6468.0
1327,Neutrophil count (10*3/uL),VA3,14,0.256425,6649.0
1328,Neutrophil count (10*3/uL),VA4,14,0.351594,5571.0


In [13]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='TPR', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='TPR', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Compare

In [14]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.comparison.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid', 'phase'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))
df['siteid-phase'] = df.siteid + df.phase.astype(str)
df.phase = df.phase.apply(lambda x: 'Aggregated' if x == 1 else 'Patient-Level')

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)']
['META-USA', 'META-EUROPE']


Unnamed: 0,lab,siteid,phase,day,value,siteid-phase
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,Patient-Level,1,0.673722,META-USA2
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,Patient-Level,1,0.684534,META-EUROPE2
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,Aggregated,1,0.647449,META-USA1
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,Aggregated,1,0.681337,META-EUROPE1
4,Albumin (g/dL),META-USA,Patient-Level,1,0.604223,META-USA2
...,...,...,...,...,...,...
107,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,Aggregated,14,0.637184,META-EUROPE1
108,Albumin (g/dL),META-USA,Patient-Level,14,0.699270,META-USA2
109,Albumin (g/dL),META-EUROPE,Patient-Level,14,0.779368,META-EUROPE2
110,Albumin (g/dL),META-USA,Aggregated,14,0.684549,META-USA1


In [15]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    d.siteid = d.siteid.apply(lambda x: x.replace('META-', ''))
    
    showLegend = False
    if lab == 'Albumin (g/dL)':
        showLegend = True
        
    continents = ['META-USA1', 'META-USA2', 'META-EUROPE1', 'META-EUROPE2']
    continent_colors = ['#D45E00', '#D45E00', '#57B4E9', '#57B4E9']
    color_scale=alt.Scale(domain=continents, range=continent_colors)
    
    plot = alt.Chart(
        d
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 1])),
        color=alt.Color("siteid-phase:N", title='Country', scale=color_scale, legend=None),
        shape=alt.Shape("phase:N", title='Data', scale=alt.Scale(range=['circle', 'diamond']), legend=alt.Legend(symbolStrokeWidth=2, symbolFillColor='white', symbolStrokeColor='gray') if showLegend else None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1
    ).encode(
        color=alt.value("white"),
        stroke=alt.Color("siteid:N", title='Country', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['#D45E00', '#57B4E9']), legend=alt.Legend() if showLegend else None),
        shape=alt.Shape("phase:N", title='Data', scale=alt.Scale(range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None)
    )
    
    text = plot.transform_filter(
        {'field': 'day', 'oneOf': [14]}
    ).mark_text(
        align='left',
        baseline='middle',
        dx=7
    ).encode(
        text='siteid'
    )


#     plot = plot.facet(
#         column=alt.Column("siteid:N", header=alt.Header(title=None))
#     )

    plot = (plot + point) # if is_country else plot
        
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

plot = alt.hconcat(*(
#     alt.hconcat(
#         plot_lab(df=df, lab=lab, is_country=False),
#         spacing=30
#     ).resolve_scale(color='independent', stroke='independent')
    plot_lab(df=df, lab=lab, is_country=False) for lab in unique_labs
), spacing=30).resolve_scale(color='shared', stroke='independent', shape='independent')

# plot = plot.properties(
#     title={
#         "text": ["Comparison Between Data"
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
    point_size=100
)

plot

# Prediction Baseline

In [16]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.prediction.baselineLab.csv"))

df = df.drop(columns=['Unnamed: 0' ])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['siteid', 'lab'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper().replace('NORTH AMERICA', 'USA'))
df.day = df.day.apply(lambda x: x.replace('day', ''))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

countries = ['META', 'USA', 'EUROPE', 'FRANCE', 'GERMANY']
country_colors = ['black', '#D45E00', '#57B4E9', '#0072B2', '#029F73']

df

['Alanine aminotransferase (U/L)', 'Albumin (g/dL)', 'Aspartate aminotransferase (U/L)', 'Creatinine (mg/dL)', 'C-reactive protein (Normal Sensitivity) (mg/dL)', 'Total bilirubin (mg/dL)', 'White blood cell count (10*3/uL)', 'Lymphocyte count (10*3/uL)', 'Neutrophil count (10*3/uL)']
['META', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'META-USA', 'META-EUROPE', 'META-US', 'META-FRANCE', 'META-GERMANY']


Unnamed: 0,siteid,lab,day,value,N
0,META,Alanine aminotransferase (U/L),1,0.476701,
1,APHP,Alanine aminotransferase (U/L),1,0.550915,17513.0
2,BIDMC,Alanine aminotransferase (U/L),1,,1227.0
3,FRBDX,Alanine aminotransferase (U/L),1,0.494597,1317.0
4,MGB,Alanine aminotransferase (U/L),1,0.484481,4427.0
...,...,...,...,...,...
2613,META-USA,Neutrophil count (10*3/uL),14,0.589044,
2614,META-EUROPE,Neutrophil count (10*3/uL),14,0.584900,
2615,META-US,Neutrophil count (10*3/uL),14,0.589044,
2616,META-FRANCE,Neutrophil count (10*3/uL),14,0.585736,


In [17]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Aspartate aminotransferase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.3, 1], clamp=True)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            "fontSize": 18,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)

plot = alt.vconcat(p1, p2, p3, spacing=30)

# plot = plot.properties(
#     title={
#         "text": [
#             f"Baseline Labs In Death Prediction",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Prediction Cov

In [18]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.prediction.cov.csv"))

df = df.drop(columns=['Unnamed: 0' ])
df = pd.melt(df, id_vars=['siteid', 'model'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper().replace('NORTH AMERICA', 'USA'))
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_models = df.model.unique().tolist()
print(unique_models)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

countries = ['META', 'USA', 'EUROPE', 'FRANCE', 'GERMANY']
country_colors = ['black', '#D45E00', '#57B4E9', '#0072B2', '#029F73']

df

['dem+cls+9lab', 'dem+cls+3lab']
['META', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'META-US', 'META-FRANCE', 'META-GERMANY', 'META-USA', 'META-EUROPE']


Unnamed: 0,siteid,model,day,value,N
0,META,dem+cls+9lab,1,0.812247,
1,APHP,dem+cls+9lab,1,0.821831,17513.0
2,BIDMC,dem+cls+9lab,1,,1227.0
3,FRBDX,dem+cls+9lab,1,0.515293,1317.0
4,MGB,dem+cls+9lab,1,0.853286,4427.0
...,...,...,...,...,...
583,META-US,dem+cls+3lab,14,0.729223,
584,META-FRANCE,dem+cls+3lab,14,0.778113,
585,META-GERMANY,dem+cls+3lab,14,0.732580,
586,META-USA,dem+cls+3lab,14,0.729223,


In [19]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.model == lab]
    
    showLegend = False
    if lab == 'dem+cls+3lab':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 1], clamp=True)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

plot = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_models
), spacing=30)

# plot = plot.properties(
#     title={
#         "text": [
#             f"Cox Model For Death Prediction",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Predection Transport

In [20]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.prediction.transport.csv"))

df = df.drop(columns=['Unnamed: 0' ])
df = df.rename(columns={"from": "siteid"})
df = pd.melt(df, id_vars=['siteid', 'to'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper().replace('NORTH AMERICA', 'USA'))
df.day = df.day.apply(lambda x: x.replace('day', ''))
# df.lab = df.lab.apply(lambda x: consistent_loinc[x])

df.loc[df.siteid == df.to, 'siteid'] = 'META-Local result'

unique_tos = df.to.unique().tolist()
print(unique_tos)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

countries = ['Local result', 'USA', 'EUROPE', 'FRANCE', 'GERMANY']
country_colors = ['black', '#D45E00', '#57B4E9', '#0072B2', '#029F73']

df

['MGB', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'APHP']
['APHP', 'BIDMC', 'FRBDX', 'ICSM', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'META-USA', 'META-EUROPE', 'META-Local result', 'MGB']


Unnamed: 0,siteid,to,day,value,N
0,APHP,MGB,1,0.841441,17513.0
1,BIDMC,MGB,1,0.838885,1227.0
2,FRBDX,MGB,1,0.837940,1317.0
3,ICSM,MGB,1,0.816447,459.0
4,NWU,MGB,1,0.863027,6608.0
...,...,...,...,...,...
1759,VA4,APHP,14,0.782028,5571.0
1760,VA5,APHP,14,0.780030,5140.0
1761,META-USA,APHP,14,0.781174,
1762,META-EUROPE,APHP,14,0.739997,


In [21]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.to == lab]
    
    showLegend = False
    if lab == 'VA2':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", title='Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 1], clamp=True)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"From All Sites To {lab}",
            ],
            "dx": 50,
            "fontSize": 18
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
#             "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_tos[0:3]
), spacing=30)

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_tos[3:6]
), spacing=30)

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_tos[6:7]
), spacing=30)

plot = alt.vconcat(p1, p2, p3, spacing=30)
# .properties(
#      title={
# #             "text": [
# #                 f"Transportability Of Coefficient",
# #             ],
#             "dx": 30,
# #             "subtitle": [
# #                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #             ],
#             "subtitleColor": "gray",
#         }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot