In [1]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [2]:
data_release='2021-05-28'
consistent_loinc = {
    "C_reactive_protein_CRP_Normal_Sensitivity": "C-reactive protein (Normal Sensitivity) (mg/dL)",
    "creatinine": "Creatinine (mg/dL)",
    "Ferritin": "Ferritin (ng/mL)",
    "D_dimer": "D-dimer (ng/mL)",
    "albumin": "Albumin (g/dL)",        

    "Fibrinogen": "Fibrinogen (mg/dL)",
    "alanine_aminotransferase_ALT": "Alanine aminotransferase (U/L)",
    "aspartate_aminotransferase_AST": "Aspartate aminotransferase (U/L)",
    "total_bilirubin": "Total bilirubin (mg/dL)",
    "lactate_dehydrogenase_LDH": "Lactate dehydrogenase (U/L)",
    "cardiac_troponin_High_Sensitivity": "Cardiac troponin High Sensitivity (ng/mL)",
    "cardiac_troponin_Normal_Sensitivity": "Cardiac troponin Normal Sensitivity (ng/mL)",
    "prothrombin_time_PT": "Prothrombin time (s)",
    "white_blood_cell_count_Leukocytes": "White blood cell count (10*3/uL)",
    "lymphocyte_count": "Lymphocyte count (10*3/uL)",
    "neutrophil_count": "Neutrophil count (10*3/uL)",
    "procalcitonin": "Procalcitonin (ng/mL)",
}

continents = ['NORTH AMERICA', 'EUROPE']
continent_colors = ['#D45E00', '#57B4E9']

countries = ['USA', 'FRANCE', 'GERMANY', 'ITALY']
country_colors = ['#D45E00', '#0072B2', '#029F73', '#B2AA2F'] # '#E5DA3E']

sites = ['APHP', 'FRBDX', 'UKFR', 'BIDMC', 'MGB', 'NWU', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['#0072B2', '#0072B2', '#029F73', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00']
site_shapes = ['circle', 'circle', 'circle', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond', 'diamond']

site_to_contry = {
    'META-USA': 'USA',
    'META-EUROPE': 'EUROPE',
    'META-FRANCE': 'FRANCE',
    'META-GERMANY': 'GERMANY',
    'META-ITALY': 'ITALY',
    'APHP': 'FRANCE',
    'FRBDX': 'FRANCE',
    'UKFR': 'FRANCE',
    'BIDMC': 'GERMANY',
    'MGB': 'USA',
    'NWU': 'USA', 
    'UCLA': 'USA', 
    'UMICH': 'USA', 
    'UPENN': 'USA', 
    'UPITT': 'USA', 
    'VA1': 'USA', 
    'VA2': 'USA', 
    'VA3': 'USA', 
    'VA4': 'USA', 
    'VA5': 'USA'
}

# ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']
# len(sites)
# len(site_colors)
# len(site_shapes)

# Lab Trajectory

In [3]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df['level'] = df.siteid.apply(lambda x: 'site' if not 'META-' in x else 'continent' if (('USA' in x) | ('EUROPE' in x)) else 'country')

df['country'] = df.siteid.apply(lambda x: site_to_contry[x])

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'D-dimer (ng/mL)', 'Procalcitonin (ng/mL)', 'Neutrophil count (10*3/uL)']
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']


Unnamed: 0,lab,siteid,day,value,N,level,country
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,1,0.647449,,continent,USA
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,1,0.681337,,continent,EUROPE
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-FRANCE,1,0.680747,,country,FRANCE
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-GERMANY,1,0.829691,,country,GERMANY
4,C-reactive protein (Normal Sensitivity) (mg/dL),META-ITALY,1,0.688717,,country,ITALY
...,...,...,...,...,...,...,...
1675,Neutrophil count (10*3/uL),VA1,14,0.817568,5578.0,site,USA
1676,Neutrophil count (10*3/uL),VA2,14,0.622558,6468.0,site,USA
1677,Neutrophil count (10*3/uL),VA3,14,0.670180,6649.0,site,USA
1678,Neutrophil count (10*3/uL),VA4,14,0.687714,5571.0,site,USA


In [4]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
        
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Continent-Level
    """
    nd = d[d.siteid.str.contains('USA') | d.siteid.str.contains('EUROPE')].copy()
    nd.siteid = nd.siteid.apply(lambda x: x.replace('META-', ''))
    nd.siteid = nd.siteid.apply(lambda x: x.replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=continents, range=continent_colors)
    
    plot = alt.Chart(
        nd
    ).mark_line(
#         point=True,
        size=3.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, labels=False), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale, legend=alt.Legend() if showLegend else None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=60
    ).encode(
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale)
    )
    

    rp = (plot + point)
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm = dm[~dm.siteid.str.contains('EUROPE')]
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    dm.level = "continent"
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
        point=False,
        size=2,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10, labels=False), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country Level', scale=color_scale, legend=alt.Legend(symbolDash=[3, 3]) if showLegend else None),
        strokeDash=alt.value([3, 3])
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country', scale=color_scale, legend=None)
    )
    

    cp = alt.layer(plot, point).resolve_scale(color='independent')

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=countries, range=country_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
        point=False,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10, labels=True), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("country:N", title='Site Level', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.2,
        size=150,
    ).encode(
        color=alt.Color("country:N", title="Site Level", scale=alt.Scale(domain=countries, range=country_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend(symbolFillColor='black') if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = alt.layer(plot, point)
    
    """
    Combine
    """
    plot = alt.vconcat(rp, alt.layer(sp, cp).resolve_scale(color='independent'), spacing=10).resolve_scale(color='independent', size='independent', shape='independent', y='shared', x='shared')
    # plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

## All Labs

In [5]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.all.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df['level'] = df.siteid.apply(lambda x: 'site' if not 'META-' in x else 'continent' if (('USA' in x) | ('EUROPE' in x)) else 'country')

df['country'] = df.siteid.apply(lambda x: site_to_contry[x])

df

# df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.all.csv"))

# df = df.drop(columns=['Unnamed: 0'])
# df = df.rename(columns={"nm.lab": "lab"})
# df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
# df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
# df.lab = df.lab.apply(lambda x: consistent_loinc[x])
# df.day = df.day.apply(lambda x: x.replace('day', ''))

# unique_labs = df.lab.unique().tolist()
# print(unique_labs)
# print(len(unique_labs))

# unique_sites = df.siteid.unique().tolist()
# print(unique_sites)

# sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
# dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

# df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

# df

['Alanine aminotransferase (U/L)', 'Albumin (g/dL)', 'Aspartate aminotransferase (U/L)', 'C-reactive protein (Normal Sensitivity) (mg/dL)', 'Cardiac troponin High Sensitivity (ng/mL)', 'Cardiac troponin Normal Sensitivity (ng/mL)', 'Creatinine (mg/dL)', 'D-dimer (ng/mL)', 'Ferritin (ng/mL)', 'Fibrinogen (mg/dL)', 'Lactate dehydrogenase (U/L)', 'Lymphocyte count (10*3/uL)', 'Neutrophil count (10*3/uL)', 'Procalcitonin (ng/mL)', 'Prothrombin time (s)', 'Total bilirubin (mg/dL)', 'White blood cell count (10*3/uL)']
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']


Unnamed: 0,lab,siteid,day,value,N,level,country
0,Alanine aminotransferase (U/L),META-USA,1,0.536670,,continent,USA
1,Alanine aminotransferase (U/L),META-EUROPE,1,0.573365,,continent,EUROPE
2,Alanine aminotransferase (U/L),META-FRANCE,1,0.579315,,country,FRANCE
3,Alanine aminotransferase (U/L),META-GERMANY,1,0.612328,,country,GERMANY
4,Alanine aminotransferase (U/L),META-ITALY,1,0.581525,,country,ITALY
...,...,...,...,...,...,...,...
4755,White blood cell count (10*3/uL),VA1,14,0.436928,5578.0,site,USA
4756,White blood cell count (10*3/uL),VA2,14,0.641065,6468.0,site,USA
4757,White blood cell count (10*3/uL),VA3,14,0.673743,6649.0,site,USA
4758,White blood cell count (10*3/uL),VA4,14,0.670703,5571.0,site,USA


In [6]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
        
    showLegend = False
    if (lab == 'Aspartate aminotransferase (U/L)'): # | (lab == 'Cardiac troponin Normal Sensitivity (ng/mL)') | (lab == 'Ferritin (ng/mL)') | (lab == 'Lymphocyte count (10*3/uL)') | (lab == 'Prothrombin time (s)'):
        showLegend = True
    
    """
    Continent-Level
    """
    nd = d[d.siteid.str.contains('USA') | d.siteid.str.contains('EUROPE')].copy()
    nd.siteid = nd.siteid.apply(lambda x: x.replace('META-', ''))
    nd.siteid = nd.siteid.apply(lambda x: x.replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=continents, range=continent_colors)
    
    plot = alt.Chart(
        nd
    ).mark_line(
#         point=True,
        size=3.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, labels=False), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale, legend=alt.Legend() if showLegend else None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=60
    ).encode(
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale)
    )
    

    rp = (plot + point)
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm = dm[~dm.siteid.str.contains('EUROPE')]
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    dm.level = "continent"
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
        point=False,
        size=2,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10, labels=False), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country Level', scale=color_scale, legend=alt.Legend(symbolDash=[3, 3]) if showLegend else None),
        strokeDash=alt.value([3, 3])
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country', scale=color_scale, legend=None)
    )
    

    cp = alt.layer(plot, point).resolve_scale(color='independent')

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=countries, range=country_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
        point=False,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10, labels=True), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("country:N", title='Site Level', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.2,
        size=150,
    ).encode(
        color=alt.Color("country:N", title="Site Level", scale=alt.Scale(domain=countries, range=country_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend(symbolFillColor='black') if showLegend else None, scale=alt.Scale(domain=[0, 20000])),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = alt.layer(plot, point)
    
    """
    Combine
    """
    plot = alt.vconcat(rp, alt.layer(sp, cp).resolve_scale(color='independent'), spacing=10).resolve_scale(color='independent', size='independent', shape='independent', y='shared', x='shared')
    # plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 50,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot
# def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
#     d = df.copy()
#     d = d[d.lab == lab]
    
#     showLegend = False
#     if lab == 'Aspartate aminotransferase (U/L)':
#         showLegend = True
    
#     """
#     Country-Level
#     """
#     dm = d[d.siteid.str.contains('META')].copy()
#     dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
#     color_scale=alt.Scale(domain=countries, range=country_colors)
    
#     plot = alt.Chart(
#         dm
#     ).mark_line(
# #         point=True,
#         size=2.5,
# #         stroke='black',
#         opacity=1
#     ).encode(
#         x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
#         y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
#         # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
#         color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
#     ).properties(
#         width=450,
#         height=250
#     )
    
#     point = plot.mark_point(
#         filled=True,
#         opacity=1,
#         size=30
#     ).encode(
#         color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
#     )
    

#     cp = (plot + point) # if is_country else plot

#     """
#     Site-Level
#     """
#     dm = d[~d.siteid.str.contains('META')].copy()
#     color_scale=alt.Scale(domain=sites, range=site_colors)

#     plot = alt.Chart(
#         dm
#     ).mark_line(
# #         point=True,
#         size=2.5,
# #         stroke='black',
#         opacity=0
#     ).encode(
#         x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
#         y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
#         color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
#     ).properties(
#         width=450,
#         height=250
#     )
    
#     point = plot.mark_point(
#         filled=True,
#         opacity=0.5,
#         size=150
#     ).encode(
#         color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
#         size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
#         # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
#         # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
#         # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
#     )
    

#     sp = (plot + point) # if is_country else plot
    
#     """
#     Combine
#     """
#     plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
#     plot = plot.properties(
#         title={
#             "text": [
#                 f"{lab.capitalize()}",
#             ],
#             "dx": 50,
#             'fontSize': 18,
#             "subtitleColor": "gray",
#         }
#     )

#     return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p4 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[9:12]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p5 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[12:15]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p6 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[15:18]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, p4, p5, p6, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

In [7]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
#         size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
#         color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]), legend=alt.Legend() if showLegend else None)
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent', opacity='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 30,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
#         "text": [
#             f"Lab Trajectory",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

In [8]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['diamond', 'circle']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=30
    ).encode(
        color=alt.Color("siteid:N", title='Country & Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
#         size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
#         color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]), legend=alt.Legend() if showLegend else None)
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent', opacity='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "dx": 30,
            'fontSize': 18,
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
#         "text": [
#             f"Lab Trajectory",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# PPV

In [9]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.ppv.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'D-dimer (ng/mL)', 'Procalcitonin (ng/mL)', 'Neutrophil count (10*3/uL)']
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'FRBDX', 'NWU', 'UCLA', 'UKFR', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'MGB', 'UMICH']


Unnamed: 0,lab,siteid,day,value,N
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,1,0.230815,
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,1,0.283767,
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-FRANCE,1,0.287519,
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-GERMANY,1,0.556042,
4,C-reactive protein (Normal Sensitivity) (mg/dL),META-ITALY,1,0.185898,
...,...,...,...,...,...
1325,Neutrophil count (10*3/uL),VA1,14,0.807621,5578.0
1326,Neutrophil count (10*3/uL),VA2,14,0.306491,6468.0
1327,Neutrophil count (10*3/uL),VA3,14,0.410271,6649.0
1328,Neutrophil count (10*3/uL),VA4,14,0.602689,5571.0


In [10]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    d.day = d.day.apply(lambda x: 3 if x == '3' else 7 if x == '7' else 14)
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Continent-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '').replace('META', 'All Countries').replace('USA', 'NORTH AMERICA')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['NORTH AMERICA', 'EUROPE'], range=['#D45E00', '#57B4E9'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title='PPV', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    np = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['USA', 'FRANCE', 'GERMANY', 'ITALY'], range=['#D6641E', '#0072B2', '#029F73', '#B2AA2F'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title='PPV', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country Level', scale=color_scale, legend=alt.Legend(symbolDash=[3, 3]) if showLegend else None),
        strokeDash=alt.value([3, 3])
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    cp = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    dm['country'] = dm.siteid.apply(lambda x: site_to_contry[x])
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title='PPV', scale=alt.Scale(zero=False, nice=False, padding=10, clamp=True)), # , domain=[0.5, 0.9]
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("country:N", title='Site Level', scale=alt.Scale(domain=['USA', 'FRANCE', 'GERMANY', 'ITALY'], range=['#D6641E', '#0072B2', '#029F73', '#B2AA2F']), legend=alt.Legend() if showLegend else None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend(symbolFillColor='black') if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + np + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# TPR

In [11]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.phase1.tpr.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.upper().replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'D-dimer (ng/mL)', 'Procalcitonin (ng/mL)', 'Neutrophil count (10*3/uL)']
['META-USA', 'META-EUROPE', 'META-FRANCE', 'META-GERMANY', 'META-ITALY', 'APHP', 'FRBDX', 'NWU', 'UCLA', 'UKFR', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'MGB', 'UMICH']


Unnamed: 0,lab,siteid,day,value,N
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-USA,1,0.182012,
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,1,0.195493,
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-FRANCE,1,0.193570,
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-GERMANY,1,0.362230,
4,C-reactive protein (Normal Sensitivity) (mg/dL),META-ITALY,1,0.183067,
...,...,...,...,...,...
1325,Neutrophil count (10*3/uL),VA1,14,0.546465,5578.0
1326,Neutrophil count (10*3/uL),VA2,14,0.209064,6468.0
1327,Neutrophil count (10*3/uL),VA3,14,0.256425,6649.0
1328,Neutrophil count (10*3/uL),VA4,14,0.351594,5571.0


In [12]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    d.day = d.day.apply(lambda x: 3 if x == '3' else 7 if x == '7' else 14)
    
    showLegend = False
    if lab == 'Lactate dehydrogenase (U/L)':
        showLegend = True
    
    """
    Continent-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '').replace('META', 'All Countries').replace('USA', 'NORTH AMERICA')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['NORTH AMERICA', 'EUROPE'], range=['#D45E00', '#57B4E9'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title='TPR', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    np = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['USA', 'FRANCE', 'GERMANY', 'ITALY'], range=['#D6641E', '#0072B2', '#029F73', '#B2AA2F'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title='TPR', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country Level', scale=color_scale, legend=alt.Legend(symbolDash=[3, 3]) if showLegend else None),
        strokeDash=alt.value([3, 3])
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    cp = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    dm['country'] = dm.siteid.apply(lambda x: site_to_contry[x])
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title='TPR', scale=alt.Scale(zero=False, nice=False, padding=10, clamp=True)), # , domain=[0.5, 0.9]
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("country:N", title='Site Level', scale=alt.Scale(domain=['USA', 'FRANCE', 'GERMANY', 'ITALY'], range=['#D6641E', '#0072B2', '#029F73', '#B2AA2F']), legend=alt.Legend() if showLegend else None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend(symbolFillColor='black') if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + np + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)#.resolve_scale(color='shared', shape='shared', stroke='independent', size='independent')

plot = alt.vconcat(p1, p2, p3, spacing=30)#.resolve_scale(color='shared')

# plot = plot.properties(
#     title={
# #         "text": [
# #             f"Lab Trajectory",
# #         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Compare

In [13]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.classification.comparison.csv"))

df = df.drop(columns=['Unnamed: 0'])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['lab', 'siteid', 'phase'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper()) #.replace('NORTH AMERICA', 'USA'))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])
df.day = df.day.apply(lambda x: x.replace('day', ''))
df['siteid-phase'] = df.siteid + df.phase.astype(str)
df.phase = df.phase.apply(lambda x: 'Aggregated' if x == 1 else 'Patient-Level')

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

df

['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)']
['META-NORTH AMERICA', 'META-EUROPE']


Unnamed: 0,lab,siteid,phase,day,value,siteid-phase
0,C-reactive protein (Normal Sensitivity) (mg/dL),META-NORTH AMERICA,Patient-Level,1,0.673722,META-NORTH AMERICA2
1,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,Patient-Level,1,0.684534,META-EUROPE2
2,C-reactive protein (Normal Sensitivity) (mg/dL),META-NORTH AMERICA,Aggregated,1,0.647449,META-NORTH AMERICA1
3,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,Aggregated,1,0.681337,META-EUROPE1
4,Albumin (g/dL),META-NORTH AMERICA,Patient-Level,1,0.604223,META-NORTH AMERICA2
...,...,...,...,...,...,...
107,C-reactive protein (Normal Sensitivity) (mg/dL),META-EUROPE,Aggregated,14,0.637184,META-EUROPE1
108,Albumin (g/dL),META-NORTH AMERICA,Patient-Level,14,0.699270,META-NORTH AMERICA2
109,Albumin (g/dL),META-EUROPE,Patient-Level,14,0.779368,META-EUROPE2
110,Albumin (g/dL),META-NORTH AMERICA,Aggregated,14,0.684549,META-NORTH AMERICA1


In [14]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    d.siteid = d.siteid.apply(lambda x: x.replace('META-', ''))
    
    showLegend = False
    if lab == 'Albumin (g/dL)':
        showLegend = True
        
    continents = ['META-NORTH AMERICA1', 'META-NORTH AMERICA2', 'META-EUROPE1', 'META-EUROPE2']
    continent_colors = ['#D45E00', '#D45E00', '#57B4E9', '#57B4E9']
    color_scale=alt.Scale(domain=continents, range=continent_colors)
    
    plot = alt.Chart(
        d
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 1])),
        color=alt.Color("siteid-phase:N", title='Continent', scale=color_scale, legend=None),
        shape=alt.Shape("phase:N", title='Data', scale=alt.Scale(range=['circle', 'diamond']), legend=alt.Legend(symbolStrokeWidth=2, symbolFillColor='white', symbolStrokeColor='gray') if showLegend else None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1
    ).encode(
        color=alt.value("white"),
        stroke=alt.Color("siteid:N", title='Continent', scale=alt.Scale(domain=['NORTH AMERICA', 'EUROPE'], range=['#D45E00', '#57B4E9']), legend=alt.Legend() if showLegend else None),
        shape=alt.Shape("phase:N", title='Data', scale=alt.Scale(range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None)
    )
    
    text = plot.transform_filter(
        {'field': 'day', 'oneOf': [14]}
    ).mark_text(
        align='left',
        baseline='middle',
        dx=7
    ).encode(
        text='siteid'
    )


#     plot = plot.facet(
#         column=alt.Column("siteid:N", header=alt.Header(title=None))
#     )

    plot = (plot + point) # if is_country else plot
        
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

plot = alt.hconcat(*(
#     alt.hconcat(
#         plot_lab(df=df, lab=lab, is_country=False),
#         spacing=30
#     ).resolve_scale(color='independent', stroke='independent')
    plot_lab(df=df, lab=lab, is_country=False) for lab in unique_labs
), spacing=30).resolve_scale(color='shared', stroke='independent', shape='independent')

# plot = plot.properties(
#     title={
#         "text": ["Comparison Between Data"
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
    point_size=100
)

plot

# Prediction Baseline

In [15]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.prediction.baselineLab.csv"))

df = df.drop(columns=['Unnamed: 0' ])
df = df.rename(columns={"nm.lab": "lab"})
df = pd.melt(df, id_vars=['siteid', 'lab'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper().replace('NORTH AMERICA', 'USA'))
df.day = df.day.apply(lambda x: x.replace('day', ''))
df.lab = df.lab.apply(lambda x: consistent_loinc[x])

unique_labs = df.lab.unique().tolist()
print(unique_labs)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

countries = ['META', 'USA', 'EUROPE', 'FRANCE', 'GERMANY']
country_colors = ['black', '#D45E00', '#57B4E9', '#0072B2', '#029F73']

df

# df[df.siteid == 'META-EUROPE']

['Alanine aminotransferase (U/L)', 'Albumin (g/dL)', 'Aspartate aminotransferase (U/L)', 'Creatinine (mg/dL)', 'C-reactive protein (Normal Sensitivity) (mg/dL)', 'Total bilirubin (mg/dL)', 'White blood cell count (10*3/uL)', 'Lymphocyte count (10*3/uL)', 'Neutrophil count (10*3/uL)']
['META', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'META-USA', 'META-EUROPE', 'META-US', 'META-FRANCE', 'META-GERMANY']


Unnamed: 0,siteid,lab,day,value,N
0,META,Alanine aminotransferase (U/L),1,0.476701,
1,APHP,Alanine aminotransferase (U/L),1,0.550915,17513.0
2,BIDMC,Alanine aminotransferase (U/L),1,,1227.0
3,FRBDX,Alanine aminotransferase (U/L),1,0.494597,1317.0
4,MGB,Alanine aminotransferase (U/L),1,0.484481,4427.0
...,...,...,...,...,...
2613,META-USA,Neutrophil count (10*3/uL),14,0.589044,
2614,META-EUROPE,Neutrophil count (10*3/uL),14,0.584900,
2615,META-US,Neutrophil count (10*3/uL),14,0.589044,
2616,META-FRANCE,Neutrophil count (10*3/uL),14,0.585736,


In [16]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.lab == lab]
    d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    d.day = d.day.apply(lambda x: 3 if x == '3' else 7 if x == '7' else 14)
    
    showLegend = False
    if lab == 'Aspartate aminotransferase (U/L)':
        showLegend = True
    
    """
    Continent-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '').replace('META', 'All Countries').replace('USA', 'NORTH AMERICA')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['All Countries', 'NORTH AMERICA', 'EUROPE'], range=['black', '#D45E00', '#57B4E9'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    np = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['FRANCE', 'GERMANY', 'USA'], range=['#0072B2', '#029F73', '#D6641E'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country Level', scale=color_scale, legend=alt.Legend(symbolDash=[3, 3]) if showLegend else None),
        strokeDash=alt.value([3, 3])
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    cp = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    dm['country'] = dm.siteid.apply(lambda x: site_to_contry[x])
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10, clamp=True, domain=[0.35, 0.85])), # , domain=[0.5, 0.9]
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("country:N", title='Site Level', scale=alt.Scale(domain=['FRANCE', 'GERMANY', 'USA'], range=['#0072B2', '#029F73', '#D6641E']), legend=alt.Legend() if showLegend else None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend(symbolFillColor='black') if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + np + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[0:3]
), spacing=30)

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[3:6]
), spacing=30)

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=lab, is_country=True) for lab in unique_labs[6:9]
), spacing=30)

plot = alt.vconcat(p1, p2, p3, spacing=30)

# plot = plot.properties(
#     title={
#         "text": [
#             f"Baseline Labs In Death Prediction",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Prediction Cov

In [49]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.prediction.cov.csv"))

df = df.drop(columns=['Unnamed: 0' ])
df = pd.melt(df, id_vars=['siteid', 'model'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper().replace('NORTH AMERICA', 'USA'))
df.day = df.day.apply(lambda x: x.replace('day', ''))

unique_models = df.model.unique().tolist()
print(unique_models)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

countries = ['META', 'USA', 'EUROPE', 'FRANCE', 'GERMANY']
country_colors = ['black', '#D45E00', '#57B4E9', '#0072B2', '#029F73']

df

# df[df.siteid == 'META-EUROPE']

['dem+cls+9lab', 'dem+cls+3lab']
['META', 'APHP', 'BIDMC', 'FRBDX', 'MGB', 'NWU', 'UCLA', 'UKFR', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5', 'META-US', 'META-FRANCE', 'META-GERMANY', 'META-USA', 'META-EUROPE']


Unnamed: 0,siteid,model,day,value,N
0,META,dem+cls+9lab,1,0.812247,
1,APHP,dem+cls+9lab,1,0.821831,17513.0
2,BIDMC,dem+cls+9lab,1,,1227.0
3,FRBDX,dem+cls+9lab,1,0.515293,1317.0
4,MGB,dem+cls+9lab,1,0.853286,4427.0
...,...,...,...,...,...
583,META-US,dem+cls+3lab,14,0.729223,
584,META-FRANCE,dem+cls+3lab,14,0.778113,
585,META-GERMANY,dem+cls+3lab,14,0.732580,
586,META-USA,dem+cls+3lab,14,0.729223,


In [33]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.model == lab]
    d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    d.day = d.day.apply(lambda x: 3 if x == '3' else 7 if x == '7' else 14)
    
    showLegend = False
    if lab == 'dem+cls+3lab' or True:
        showLegend = True
    
    """
    Continent-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '').replace('META', 'All Countries')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['All Countries'], range=['black'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Meta Analysis', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    np = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['FRANCE', 'GERMANY', 'USA'], range=['#0072B2', '#029F73', '#D6641E'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country Level', scale=color_scale, legend=alt.Legend(symbolDash=[3, 3], symbolStrokeWidth=4, symbolSize=300) if showLegend else None),
        strokeDash=alt.value([3, 3])
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    cp = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    dm['country'] = dm.siteid.apply(lambda x: site_to_contry[x])
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 0.9], clamp=True)), # , 
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("country:N", title='Site Level', scale=alt.Scale(domain=['FRANCE', 'GERMANY', 'USA'], range=['#0072B2', '#029F73', '#D6641E']), legend=alt.Legend() if showLegend else None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend(symbolFillColor='black') if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + np + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
#                 f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

plot = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_models[:1]
), spacing=30)

# plot = plot.properties(
#     title={
#         "text": [
#             f"Cox Model For Death Prediction",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Second version with two panels

In [76]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.model == lab]
    d = d[(d.day == '3') | (d.day == '7') | (d.day == '14')]
    d.day = d.day.apply(lambda x: 3 if x == '3' else 7 if x == '7' else 14)
    
    showLegend = False
    if lab == 'dem+cls+3lab' or True:
        showLegend = True

    """
    Meta Analysis
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '').replace('META', 'All Countries')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['All Countries'], range=['black'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Meta Analysis', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    
    mp = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot
        
    """
    Continent-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    
    # REMOVE THIS WHEN WE HAVE DATA FOR EUOROPES 
    dm.loc[dm.siteid == 'META-EUROPE', 'value'] = 0.75
    # REMOVE THIS WHEN WE HAVE DATA FOR EUOROPES
    
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '').replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['NORTH AMERICA', 'EUROPE'], range=['#D45E00', '#57B4E9'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, labels=False, domain=False), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 0.9])),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent Level', scale=color_scale, legend=alt.Legend(symbolStrokeWidth=4, symbolSize=300) if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    
    np = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', '')) # .replace('USA', 'NORTH AMERICA'))
    color_scale=alt.Scale(domain=['FRANCE', 'GERMANY', 'USA'], range=['#0072B2', '#029F73', '#D6641E'])
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Country Level', scale=color_scale, legend=alt.Legend(symbolDash=[3, 3], symbolStrokeWidth=4, symbolSize=300) if showLegend else None),
        strokeDash=alt.value([3, 3])
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", scale=color_scale, legend=None)
    )
    

    cp = alt.layer(plot, point).resolve_scale(color='independent') # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    dm['country'] = dm.siteid.apply(lambda x: site_to_contry[x])
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:O", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=0.3, domain=['3', '7', '14'])),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 0.9], clamp=True)), # , 
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None)
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.5,
        size=150
    ).encode(
        color=alt.Color("country:N", title='Site Level', scale=alt.Scale(domain=['FRANCE', 'GERMANY', 'USA'], range=['#0072B2', '#029F73', '#D6641E']), legend=alt.Legend() if showLegend else None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend(symbolFillColor='black') if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (point) # if is_country else plot
    
    """
    Combine
    """
    plot = alt.vconcat((mp + np), (sp + cp), spacing=10).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
#                 f"{lab}",
            ],
            "fontSize": 18,
            "dx": 30,
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
            "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

plot = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_models[:1]
), spacing=30)

# plot = plot.properties(
#     title={
#         "text": [
#             f"Cox Model For Death Prediction",
#         ],
#         "dx": 30,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ],
#         "subtitleColor": "gray",
#     }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# Predection Transport

In [None]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.R1.prediction.transport.csv"))

df = df.drop(columns=['Unnamed: 0' ])
df = df.rename(columns={"from": "siteid"})
df = pd.melt(df, id_vars=['siteid', 'to'], var_name='day', value_name='value')
df.siteid = df.siteid.apply(lambda x: x.replace('Eurpoe', 'Europe').upper().replace('NORTH AMERICA', 'USA'))
df.day = df.day.apply(lambda x: x.replace('day', ''))
# df.lab = df.lab.apply(lambda x: consistent_loinc[x])

df.loc[df.siteid == df.to, 'siteid'] = 'META-Local result'

unique_tos = df.to.unique().tolist()
print(unique_tos)

unique_sites = df.siteid.unique().tolist()
print(unique_sites)

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.totalN.csv"))
dic = pd.Series(sdf.N.values, index=sdf.siteid).to_dict()

df['N'] = df.siteid.apply(lambda x: dic[x] if x in dic.keys() else None)

countries = ['Local result', 'USA', 'EUROPE', 'FRANCE', 'GERMANY']
country_colors = ['black', '#D45E00', '#57B4E9', '#0072B2', '#029F73']

df

In [None]:
def plot_lab(df=None, lab='Neutrophil count (10*3/uL)', is_country=True):
    d = df.copy()
    d = d[d.to == lab]
    
    showLegend = False
    if lab == 'VA2':
        showLegend = True
    
    """
    Country-Level
    """
    dm = d[d.siteid.str.contains('META')].copy()
    dm.siteid = dm.siteid.apply(lambda x: x.replace('META-', ''))
    color_scale=alt.Scale(domain=countries, range=country_colors)
    
    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=3,
#         stroke='black',
        opacity=1
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10)),
        # shape=alt.Shape("siteid:N", title='Continent', scale=alt.Scale(domain=['USA', 'EUROPE'], range=['circle', 'diamond']), legend=alt.Legend() if showLegend else None),
        color=alt.Color("siteid:N", title='Continent', scale=color_scale, legend=alt.Legend() if showLegend else None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=1,
        size=50
    ).encode(
        color=alt.Color("siteid:N", title='Continent', scale=color_scale)
    )
    

    cp = (plot + point) # if is_country else plot

    """
    Site-Level
    """
    dm = d[~d.siteid.str.contains('META')].copy()
    color_scale=alt.Scale(domain=sites, range=site_colors)

    plot = alt.Chart(
        dm
    ).mark_line(
#         point=True,
        size=2.5,
#         stroke='black',
        opacity=0
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(clamp=True, nice=False, padding=10)),
        y=alt.Y("value:Q", title='AUC', scale=alt.Scale(zero=False, nice=False, padding=10, domain=[0.5, 1], clamp=True)),
        color=alt.Color("siteid:N", title=None, scale=color_scale, legend=None),
    ).properties(
        width=450,
        height=250
    )
    
    point = plot.mark_point(
        filled=True,
        opacity=0.3,
        size=150
    ).encode(
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        size=alt.Size('N:Q', title='Sample Size', legend=alt.Legend() if showLegend else None),
        
        # color=alt.Color("N:Q", title='Sample Size', legend=alt.Legend() if showLegend else None),
        # shape=alt.Shape("siteid:N", title=None, scale=alt.Scale(domain=sites, range=site_shapes), legend=None),
        
        # opacity=alt.Opacity('N:Q', title='Sample Size', scale=alt.Scale(range=[0, 1]))
    )
    

    sp = (plot + point) # if is_country else plot
    
    """
    Combine
    """
    plot = (sp + cp).resolve_scale(color='independent', size='independent', shape='independent')
    
    plot = plot.properties(
        title={
            "text": [
                f"From All Sites To {lab}",
            ],
            "dx": 50,
            "fontSize": 18
#             "subtitle": [
#                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#             ],
#             "subtitleColor": "gray",
        }
    )

    return plot

# plot = plot_lab(df=df)

p1 = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_tos[0:3]
), spacing=30)

p2 = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_tos[3:6]
), spacing=30)

p3 = alt.hconcat(*(
   plot_lab(df=df, lab=model, is_country=True) for model in unique_tos[6:7]
), spacing=30)

plot = alt.vconcat(p1, p2, p3, spacing=30)
# .properties(
#      title={
# #             "text": [
# #                 f"Transportability Of Coefficient",
# #             ],
#             "dx": 30,
# #             "subtitle": [
# #                 get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #             ],
#             "subtitleColor": "gray",
#         }
# )

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     point_size=100
)

plot

# AUC Matrix

In [None]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.auc.port.toShare.csv"))

df = df.rename(columns={"site.from": "from", 'site.to': 'to', 'N.from': 'from_size'})

site_to_contry = {
    'APHP': 'France',
    'FRBDX': 'France',
    'UKFR': 'France',
    'BIDMC': 'Germany',
    'MGB': 'USA',
    'NWU': 'USA', 
    'UCLA': 'USA', 
    'UMICH': 'USA', 
    'UPENN': 'USA', 
    'UPITT': 'USA', 
    'VA1': 'USA', 
    'VA2': 'USA', 
    'VA3': 'USA', 
    'VA4': 'USA', 
    'VA5': 'USA',
    'all': 'Meta',
    'US': 'Meta',
    'France': 'Meta',
    'Germany': 'Meta',
}

df['country'] = df['from'].apply(lambda x: site_to_contry[x])
df['to-country'] = df['to'].apply(lambda x: site_to_contry[x])

df['from'] = df['from'].apply(lambda x: f"{x} →")
df['to'] = df['to'].apply(lambda x: f"→ {x.replace('all', 'All').replace('US', 'USA')}") # ←


countries = ['USA', 'France', 'Germany'] # , 'ITALY']
country_colors = ['#D45E00', '#0072B2', '#029F73'] # , '#B2AA2F']

# df = pd.melt(df, id_vars=['siteid', 'to'], var_name='day', value_name='value')

unique_sites = df['from'].unique().tolist()
print(unique_sites)
print(len(unique_sites))

df

In [None]:
def plot(df=None):
    d = df.copy()
    
    sort_from = ['APHP →', 'FRBDX →', 'UKFR →', 'BIDMC →', 'MGB →', 'NWU →', 'UCLA →', 'UMICH →', 'UPENN →', 'UPITT →', 'VA1 →', 'VA2 →', 'VA3 →', 'VA4 →', 'VA5 →']
    sort_to = ['→ APHP', '→ FRBDX', '→ UKFR', '→ BIDMC', '→ MGB', '→ NWU', '→ UCLA', '→ UMICH', '→ UPENN', '→ UPITT', '→ VA1', '→ VA2', '→ VA3', '→ VA4', '→ VA5']
    
    sort_from = ['APHP →', 'FRBDX →', 'UKFR →', 'BIDMC →', 'NWU →', 'MGB →', 'VA2 →', 'VA3 →',  'UPENN →',    'VA4 →', 'VA1 →', 'VA5 →', 'UPITT →','UMICH →','UCLA →']
    sort_to = ['→ APHP', '→ FRBDX', '→ UKFR', '→ BIDMC', '→ NWU', '→ MGB', '→ VA2', '→ VA3', '→ UPENN',     '→ VA4', '→ VA1','→ VA5',  '→ UPITT','→ UMICH', '→ UCLA']
    
    plot = alt.Chart(
        d[d['to-country'] != 'Meta']
    ).mark_square(
        opacity=1
    ).encode(
        x=alt.X("to:N", title='To', axis=alt.Axis(labelAngle=-55, domain=True, orient='top'), scale=alt.Scale(), sort=sort_to),
        y=alt.Y("from:N", title='From', axis=alt.Axis(labelAngle=0, domain=True), scale=alt.Scale(), sort=sort_from),
        size=alt.Size('from_size:Q', title='Sample Size', scale=alt.Scale(range=[100, 2000], type='log'), legend=alt.Legend(direction='horizontal', symbolFillColor='lightgray')),
#         size=alt.value(2400),
        color=alt.Color("auc:Q", title='AUC', scale=alt.Scale(scheme='redpurple', domain=[0.7, 0.9]), legend=alt.Legend(direction='horizontal', gradientLength=440, gradientThickness=30)),
    ).properties(
        width=750,
        height=750
    )
    
    meta = alt.Chart(
        d[d['to-country'] == 'Meta']
    ).mark_square(
        opacity=1
    ).encode(
        x=alt.X("to:N", title=None, axis=alt.Axis(labelAngle=-55, domain=True, orient='top'), scale=alt.Scale(), sort=sort_to),
        y=alt.Y("from:N", title=None, axis=alt.Axis(labelAngle=0, domain=False, labels=False), scale=alt.Scale(), sort=sort_from),
        size=alt.Size('from_size:Q', title='Sample Size', scale=alt.Scale(range=[100, 2000], type='log'), legend=alt.Legend(direction='horizontal', symbolFillColor='lightgray')),
#         size=alt.value(2400),
        color=alt.Color("auc:Q", title='AUC', scale=alt.Scale(scheme='redpurple', domain=[0.7, 0.9]), legend=alt.Legend(direction='horizontal', gradientLength=440, gradientThickness=30)),
    ).properties(
        width=200,
        height=750
    )
    
    n = alt.Chart(
        d
    ).mark_bar(
        opacity=1
    ).encode(
        x=alt.X("mean(from_size):Q", title='Sample Size', axis=alt.Axis(labelAngle=0, tickCount=2, domain=True, orient='top'), scale=alt.Scale(domain=[0, 20000])),
        y=alt.Y("from:N", title=None, axis=alt.Axis(labelAngle=0, labels=False, domain=True), scale=alt.Scale(), sort=sort_from),
        color=alt.Color("country:N", title='Country', scale=alt.Scale(range=country_colors, domain=countries), legend=alt.Legend(direction='vertical', gradientLength=440, gradientThickness=30)),
    ).properties(
        width=100,
        height=750
    )
    
    text = n.mark_text(
        angle=90,
        align='center',
        baseline='bottom',
        dy=-6,
    ).encode(
        text='from_size'
    )

    
    plot = alt.hconcat(alt.hconcat(plot, meta).resolve_scale(color='shared'), (n + text), spacing=0).properties(
#         title={
#             "text": [
#                 f"Transportability Of Cox Regression Model Across Different Sites",
#             ],
#             "dx": 50,
#             "fontSize": 18
#         }
    )
    
    return plot

res = plot(df)

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     legend_stroke_color='white'
)

res

# BETA HEATMAP

In [None]:
df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.beta.toShare.csv"))

df = df.drop(columns=['Unnamed: 0'])

df.siteid = df.siteid.apply(lambda x: x.replace('meta-', '').replace('meta', 'All'))

# df = df.rename(columns={"site.from": "from", 'site.to': 'to', 'N.from': 'from_size'})

# site_to_contry = {
#     'APHP': 'France',
#     'FRBDX': 'France',
#     'UKFR': 'France',
#     'BIDMC': 'Germany',
#     'MGB': 'USA',
#     'NWU': 'USA', 
#     'UCLA': 'USA', 
#     'UMICH': 'USA', 
#     'UPENN': 'USA', 
#     'UPITT': 'USA', 
#     'VA1': 'USA', 
#     'VA2': 'USA', 
#     'VA3': 'USA', 
#     'VA4': 'USA', 
#     'VA5': 'USA',
#     'all': 'Meta',
#     'US': 'Meta',
#     'France': 'Meta',
#     'Germany': 'Meta',
# }

# df['country'] = df['from'].apply(lambda x: site_to_contry[x])
# df['to-country'] = df['to'].apply(lambda x: site_to_contry[x])

# df['from'] = df['from'].apply(lambda x: f"{x} →")
# df['to'] = df['to'].apply(lambda x: f"→ {x.replace('all', 'All').replace('US', 'USA')}") # ←


# countries = ['USA', 'France', 'Germany'] # , 'ITALY']
# country_colors = ['#D45E00', '#0072B2', '#029F73'] # , '#B2AA2F']

# # df = pd.melt(df, id_vars=['siteid', 'to'], var_name='day', value_name='value')

unique_sites = df.siteid.unique().tolist()
print(unique_sites)
# print(len(unique_sites))

unique_vars = df.variable.unique().tolist()
print(unique_vars)

# df.beta = df.beta.apply(lambda x: -3 if x <= -3 else 3 if x >= 3 else x)

# df[df.variable == 'mis_albumin']

In [None]:
def plot(df=None):
    d = df.copy()
    
    plot = alt.Chart(
        d
    ).mark_square(
        opacity=1
    ).encode(
        x=alt.X("variable:N", title=None, axis=alt.Axis(labelAngle=-55, domain=True, orient='top'), scale=alt.Scale()),
        y=alt.Y("siteid:N", title=None, axis=alt.Axis(labelAngle=0, domain=True), scale=alt.Scale()),
#         size=alt.Size('from_size:Q', title='Sample Size', scale=alt.Scale(range=[100, 2000], type='log'), legend=alt.Legend(direction='horizontal', symbolFillColor='lightgray')),
        size=alt.value(600),
        color=alt.Color("beta:Q", title='Coefficient', scale=alt.Scale(scheme='redblue', domain=[-3, 3]), legend=alt.Legend(direction='horizontal', gradientLength=440, gradientThickness=30)),
    ).properties(
        width=750,
        height=700
    )
    
    plot = plot.properties(
#         title={
#             "text": [
#                 f"Transportability Of Cox Regression Model Across Different Sites",
#             ],
#             "dx": 50,
#             "fontSize": 18
#         }
    )
    
    return plot

res = plot(df)

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    subtitle_font_size=18,
#     legend_stroke_color='white'
)

res