In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
data_release='2021-04-27'

df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.recover.toShare.csv"))
print(df.head())

# Rename columns
df = df.drop(columns=["Unnamed: 0"])

df = df.drop_duplicates(subset=None, keep="first", inplace=False) # Identical rows are duplicated

df = df.rename(columns={"nm.lab": "lab"})

# loinc
consistent_loinc = {
    "C_reactive_protein_CRP_Normal_Sensitivity": "C-reactive protein (Normal Sensitivity) (mg/dL)",
    "creatinine": "Creatinine (mg/dL)",
    "Ferritin": "Ferritin (ng/mL)",
    "D_dimer": "D-dimer (ng/mL)",
    "albumin": "Albumin (g/dL)",        

    "Fibrinogen": "Fibrinogen (mg/dL)",
    "alanine_aminotransferase_ALT": "Alanine aminotransferase (U/L)",
    "aspartate_aminotransferase_AST": "Aspartate aminotransferase (U/L)",
    "total_bilirubin": "Total bilirubin (mg/dL)",
    "lactate_dehydrogenase_LDH": "Lactate dehydrogenase (U/L)",
    "cardiac_troponin_High_Sensitivity": "Cardiac troponin High Sensitivity (ng/mL)",
    "cardiac_troponin_Normal_Sensitivity": "Cardiac troponin Normal Sensitivity (ng/mL)",
    "prothrombin_time_PT": "Prothrombin time (s)",
    "white_blood_cell_count_Leukocytes": "White blood cell count (10*3/uL)",
    "lymphocyte_count": "Lymphocyte count (10*3/uL)",
    "neutrophil_count": "Neutrophil count (10*3/uL)",
    "procalcitonin": "Procalcitonin (ng/mL)",
}

consistent_date = {
    '2020-Mar-Apr': 'Mar - Apr',
    '2020-May-Jun': 'May - Jun',
    '2020-Jul-Aug': 'Jul - Aug',
    '2020-Sep-Oct': 'Sep - Oct',
    'After 2020-Nov': 'Since Nov'
}

days = [f"day{i + 1}" for i in range(14)]

colors = ['#E79F00', '#0072B2', '#D45E00', '#CB7AA7', '#029F73', '#57B4E9']

# sites = ['META', 'BIDMC', 'FRBDX', 'NWU', 'MGB', 'UPENN', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#D45E00', '#0072B2', '#CB7AA7', '#E79F00', '#029F73', '#DBD03C', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9']
sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'UKFR', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']

df.lab = df.lab.apply(lambda x: consistent_loinc[x])

df.siteid = df.siteid.apply(lambda x: x.upper())
print(df.siteid.unique().tolist())

#df = pd.melt(df, id_vars=['siteid', 'lab', 'period', 'length'], value_vars=days, var_name='day', value_name='value')

#df.day = df.day.apply(lambda x: x.replace('day', ''))


df['length'] = df['length'].apply(lambda x: {
    'week1': '<=1 Week',
    'week2': '1-2 Week',
    'week3': '2+ Week',
    'all': 'All',
}[x])
lengths = df["length"].unique().tolist()
print(lengths)

df.period = df.period.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])
    
# df = df[pd.isnull(df.value) == False]

#print(df.lab.unique().tolist())
unique_sites = df[pd.isnull(df.value) == False].lab.unique().tolist()

print(df.day.unique().tolist())
unique_days = df.day.unique().tolist()

#print(lengths)

df # [df.length == 'week3'].period.unique().tolist()
# df[df.lab == "Prothrombin time (s)"]

# All Sites

In [None]:
def plot_lab(df=None, lab='Albumin (g/dL)', length='All'):
    d = df.copy()
    d = d[(d.lab == lab) & (d.length == length)]
    
    if length == 'All':
        legend=alt.Legend()
        header = alt.Header(title=None)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)
    elif length == '>2 Week':
        legend=None
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(labelAngle=-55, tickCount=10)
    else:
        legend=None
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)
        
    d['reference'] = d['value']
    
    de = d[d.period == 'First'].copy()
    de.reference = de.day.apply(lambda x: de[(de.day == x) & (de.siteid == 'META')].value.sum())
    dl = d[d.period == 'Second'].copy()
    dl.reference = dl.day.apply(lambda x: dl[(dl.day == x) & (dl.siteid == 'META')].value.sum())
#     dl.reference = dl.reference.apply(lambda x: None if x == 0 else x)
#     print(dl.reference)

    d = de.append(dl)
    # print(d)
        
    plot = alt.Chart(
        d
    ).mark_line(
        point=True,
        size=2,
#         opacity=0.3
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=x, scale=alt.Scale(padding=1), sort=unique_days),
        y=alt.Y("value:Q", title=[f'{length.capitalize()} Patients', 'Fixed Effect'], scale=alt.Scale(zero=False)),
#         color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        color=alt.Color("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['#D45E00', '#0072B2']), title='Wave', legend=legend),
        shape=alt.Shape("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['circle', 'diamond']), legend=None)
    ).properties(
        width=200,
        height=150
    )

    ref = plot.transform_filter(
            {'not': alt.FieldEqualPredicate(field='reference', equal=0)}
        ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=x, sort=unique_days), #scale=alt.Scale(padding=1), sort=unique_days),
        color=alt.Color("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['#D4D4D4']), legend=None),
        shape=alt.Shape("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['circle', 'diamond']), legend=None),
        y=alt.Y("reference:Q", title=[f'{length.capitalize()} Patients', 'Fixed Effect'])
    )

    plot = alt.layer(ref, plot).resolve_scale(color='independent')
    plot = plot.facet(
        column=alt.Column("siteid:N", header=header, sort=sites)
    ).resolve_scale(color='shared')

    if length == 'all':
        plot = plot.properties(
            title={
                "text": [
                    f"Lab Recovery Rate By Length Of Stay In Hospital"
                ],
                "dx": 60,
                "subtitle": [
                    f"{lab}",
                   'Gray Trends Represent Meta-Analysis Results',
                    get_visualization_subtitle(data_release=data_release, with_num_sites=False)
                ],
                "subtitleColor": "gray",
            }
        )

    return plot

# plot = plot_lab(df=df)

plot = alt.vconcat(*(
    alt.vconcat(*(
        plot_lab(df=df, lab=lab, length=length) for length in lengths
    ), spacing=5).resolve_scale(color='shared', y='shared') for lab in unique_sites 
), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top',
    legend_title_orient='left',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=70
)

plot
save(plot,join("..", "result", "final-recover-site.png"), scalefactor=8.0)



# Meta w CI

Two columns, ci_l and ci_u, represent the lower and upper CIs. Since the current data do not have CIs, I am adding that info -10% and +10% of the actual value.

In [None]:
# Need to be removed if we have actual CI information in the files
#df['ci_l'] = df.value * 0.9
#df['ci_u'] = df.value * 1.1
print(df)

In [None]:
width=200
height=140
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)

print(df.lab.unique().tolist())

def plot_lab(df=None, lab='Albumin (g/dL)', length='All'):
    d = df.copy()
    d = d[(d.lab == lab) & (d.length == length)]
    d = d[d.siteid == 'META']
    
    if lab == 'C-reactive protein (Normal Sensitivity) (mg/dL)':
        legend=alt.Legend()
        header = alt.Header(title=None)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)
    elif lab == 'D-dimer (ng/mL)':
        legend=None
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(labelAngle=0, tickCount=10)
    else:
        legend=None
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)
        
    yTitle = f"{lab.replace('C-reactive protein (Normal Sensitivity)', 'CRP')}" if length == 'All' else None
    
    plot = alt.Chart(
        d
    ).mark_line(
        point=True, # point,
        size=2,
        opacity=0.3
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=x, scale=alt.Scale(padding=10, nice=False), sort=unique_days),
        y=alt.Y("value:Q", title=yTitle, scale=alt.Scale(zero=False, nice=False, padding=16), axis=alt.Axis(titleX=-40) if length == 'All' else alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)),
#         color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        color=alt.Color("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['#D45E00', '#0072B2']), title='Wave', legend=legend),
        shape=alt.Shape("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['circle', 'diamond']), legend=None)
    ).properties(
        width=width,
        height=height
    )

    
    tick = plot.mark_errorbar(
        opacity=0.7 #, color='black'
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission'),
        y=alt.Y("ci_l:Q", title=yTitle),
        y2=alt.Y2("ci_u:Q"),
#         stroke=alt.value('black'),
        strokeWidth=alt.value(1)
    )
    
    plot = (tick + plot)
    
#     plot = plot.facet(
#         column=alt.Column("siteid:N", title=None, header=alt.Header(title=None), sort=sites)
#     )
    if lab == 'C-reactive protein (Normal Sensitivity) (mg/dL)':
        plot = plot.properties(
            title={
                "text": [f'{length} Patients'],#, 'Fixed Effect'],
                "fontSize": 16,
                "anchor": 'middle',
                "subtitleColor": "gray",
            }
        )
#                 plot = plot.properties(
#             title={
#                 "text": [
#                     f"Meta-Analysis Of Lab Recovery Rate"
#                 ],
#                 "dx": 75,
#                 "subtitle": [
#                     lab,
#                     get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#                 ],
#                 "subtitleColor": "gray",
#             }
#         )

    return plot

# plot = plot_lab(df=df)

# plot = alt.hconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30).resolve_scale(color='independent')

# 'C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'Total bilirubin (mg/dL)', 'Lymphocyte count (10*3/uL)'Creatinine (mg/dL)', 'Aspartate aminotransferase (U/L)', 'Alanine aminotransferase (U/L)', 'Cardiac troponin High Sensitivity (ng/mL)', 'Procalcitonin (ng/mL)', 'Ferritin (ng/mL)', 'D-dimer (ng/mL)', 'Cardiac troponin Normal Sensitivity (ng/mL)', 'Prothrombin time (s)']
SELECTED_LABS = ['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Creatinine (mg/dL)', 'D-dimer (ng/mL)'] # 'Albumin (g/dL)',

plot = alt.vconcat(*(
    alt.hconcat(*(
        plot_lab(df=df, lab=lab, length=length) for length in lengths
    ), spacing=10).resolve_scale(color='shared', y='shared') for lab in SELECTED_LABS #unique_sites 
), spacing=10).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Meta-Analysis Of Lab Recovery Rate"
        ],
        "dx": 65,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='left',
    axis_label_font_size=14,
    header_label_font_size=16,
#     point_size=100
)

plot    

save(plot,join("..", "result", "final-recover-meta.png"), scalefactor=8.0)



## Meta country w CI

In [None]:
width=200
height=140
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)

print(df.lab.unique().tolist())

def plot_lab(df=None, lab='Albumin (g/dL)', length='All', country=None):
    d = df.copy()
    d = d[(d.lab == lab) & (d.length == length)]
    d = d[d.siteid == country]
    
    if lab == 'C-reactive protein (Normal Sensitivity) (mg/dL)':
        legend=alt.Legend()
        header = alt.Header(title=None)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)
    elif lab == 'D-dimer (ng/mL)':
        legend=None
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(labelAngle=0, tickCount=10)
    else:
        legend=None
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)
        
    yTitle = f"{lab.replace('C-reactive protein (Normal Sensitivity)', 'CRP')}" if length == 'All' else None
    
    plot = alt.Chart(
        d
    ).mark_line(
        point=True, # point,
        size=2,
#         opacity=0.3
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission', axis=x, scale=alt.Scale(padding=10, nice=False), sort=unique_days),
        y=alt.Y("value:Q", title=yTitle, scale=alt.Scale(zero=False, nice=False, padding=16), axis=alt.Axis(titleX=-40) if length == 'All' else alt.Axis(ticks=False, labels=False, domain=False, title=None, tickCount=10)),
#         color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors), legend=None),
        color=alt.Color("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['#D45E00', '#0072B2']), title='Wave', legend=legend),
        shape=alt.Shape("period:N", scale=alt.Scale(domain=['First', 'Second'], range=['circle', 'diamond']), legend=None)
    ).properties(
        width=width,
        height=height
    )
    
    tick = plot.mark_errorbar(
        opacity=0.7 #, color='black'
    ).encode(
        x=alt.X("day:Q", title='Days Since Admission'),
        y=alt.Y("ci_l:Q", title=yTitle),
        y2=alt.Y2("ci_u:Q"),
#         stroke=alt.value('black'),
        strokeWidth=alt.value(1)
    )
    
    plot = (tick + plot)
    
#     plot = plot.facet(
#         column=alt.Column("siteid:N", title=None, header=alt.Header(title=None), sort=sites)
#     )
    if lab == 'C-reactive protein (Normal Sensitivity) (mg/dL)':
        plot = plot.properties(
            title={
                "text": [f'{length} Patients'],#, 'Fixed Effect'],
                "fontSize": 16,
                "anchor": 'middle',
                "subtitleColor": "gray",
            }
        )
#                 plot = plot.properties(
#             title={
#                 "text": [
#                     f"Meta-Analysis Of Lab Recovery Rate"
#                 ],
#                 "dx": 75,
#                 "subtitle": [
#                     lab,
#                     get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#                 ],
#                 "subtitleColor": "gray",
#             }
#         )

    return plot

# plot = plot_lab(df=df)

# plot = alt.hconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30).resolve_scale(color='independent')

# 'C-reactive protein (Normal Sensitivity) (mg/dL)', 'Albumin (g/dL)', 'Lactate dehydrogenase (U/L)', 'Total bilirubin (mg/dL)', 'Lymphocyte count (10*3/uL)'Creatinine (mg/dL)', 'Aspartate aminotransferase (U/L)', 'Alanine aminotransferase (U/L)', 'Cardiac troponin High Sensitivity (ng/mL)', 'Procalcitonin (ng/mL)', 'Ferritin (ng/mL)', 'D-dimer (ng/mL)', 'Cardiac troponin Normal Sensitivity (ng/mL)', 'Prothrombin time (s)']
SELECTED_LABS = ['C-reactive protein (Normal Sensitivity) (mg/dL)', 'Creatinine (mg/dL)', 'D-dimer (ng/mL)'] # 'Albumin (g/dL)',

country_list=["META-USA","META-FRANCE","META-ITALY", "META-SPAIN"]

lengths_new=["All"]
def plot_lab_wrap(df=None, country=None):
    plot = alt.vconcat(*(
        alt.hconcat(*(
            plot_lab(df=df, lab=lab, length=length, country=country) for length in lengths_new
        ), spacing=10).resolve_scale(color='shared', y='shared') for lab in SELECTED_LABS #unique_sites 
    ), spacing=10).resolve_scale(color='shared')


    plot = plot.properties(
        title={
            "text": [
                country.replace("META-", "")
            ],
            "dx": 65,
    #         "subtitle": [
    #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
    #         ],
            "subtitleColor": "gray",
        }
    )
    return plot

plot = alt.hconcat(*(
        plot_lab_wrap(df=df, country=country) for country in country_list
    ), spacing=10).resolve_scale(color='shared', y='shared') 



plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='left',
    axis_label_font_size=14,
    header_label_font_size=16,
#     point_size=100
)

plot    

save(plot,join("..", "result", "final-recover-country.png"), scalefactor=8.0)

