In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

> For the labs observations is there a way for us to see the trend over an entire period of day0-7 or 0-14 for each lab? There might need to be multiple graphs but for the paper/presentation we will most likely just pick snd choose labs. We will probably upload all of them to the website though

> I think it might be too busy if we do all + severe in the same plot, so maybe we should facet it to have all in 1 group and then ever-severe in another group.

In [None]:
df = pd.read_csv(join("..", "data", "Phase2.1", "temporal", "labs", "lab_obs.csv"))
df = df.drop(columns=[df.columns.values.tolist()[0]])

df = df.melt(id_vars=["cohort", "day", "setting", "wave"], var_name="lab")

lab_nice = {
     'alanine.aminotransferase..ALT.': 'alanine aminotransferase (ALT)',
     'albumin': 'albumin',
     'aspartate.aminotransferase..AST.': 'aspartate aminotransferase (AST)',
     'total.bilirubin': 'total bilirubin',
     'C.reactive.protein..CRP...Normal.Sensitivity.': 'C-reactive protein (CRP) (Normal Sensitivity)',
     'creatinine': 'creatinine',
     'Ferritin': 'Ferritin',
     'lactate.dehydrogenase..LDH.': 'lactate dehydrogenase (LDH)',
     'Fibrinogen': 'Fibrinogen',
     'procalcitonin': 'procalcitonin',
     'cardiac.troponin..High.Sensitivity.': 'cardiac troponin (High Sensitivity)',
     'white.blood.cell.count..Leukocytes.': 'white blood cell count (Leukocytes)',
     'lymphocyte.count': 'lymphocyte count',
     'neutrophil.count': 'neutrophil count',
     'D.dimer': 'D-dimer',
     'prothrombin.time..PT.': 'prothrombin time (PT)',
     'cardiac.troponin..Normal.Sensitivity.': 'cardiac troponin (Normal Sensitivity)'
}
df["lab"] = df["lab"].apply(lambda x: lab_nice[x])
df["wave"] = df["wave"].apply(lambda x: x[0].upper() + x[1:])
df["setting"] = df["setting"].apply(lambda x: x[0].upper() + x[1:])

orig_df = df.copy()

# We are most interested in certain time points
df.head()

In [None]:
wave_color_scale = alt.Scale(domain=["Early", "Late"], range=['#D45E00', '#0072B2'])

In [None]:
def compute_early_late_difference(df):
    ls_df = pd.DataFrame()
    for (setting, lab, day), lab_setting_df in df.groupby(by=["setting", "lab", "day"]):    
        ls_df = ls_df.append({
            "setting": setting,
            "lab": lab,
            "day": day,
            "value_early": lab_setting_df.loc[lab_setting_df["wave"] == "Early"].iloc[0]["value"],
            "value_late": lab_setting_df.loc[lab_setting_df["wave"] == "Late"].iloc[0]["value"]
        }, ignore_index=True)
    df = ls_df
    df["difference"] = df["value_late"] - df["value_early"]
    df["is_left_direction"] = df["difference"] < 0.0
    df["is_right_direction"] = df["difference"] > 0.0
    df = df.sort_values(by="difference")

    return df

In [None]:
def make_early_late_angle_plot_for_day(day):
    day0_df = df.loc[(df["day"] == day) & (df["cohort"] == "dayX")]
    day0_diff_df = compute_early_late_difference(day0_df)

    lab_sort = day0_diff_df["lab"].unique().tolist()
    
    value_domain = [0.0, 1.0]

    points = alt.Chart().mark_point().encode(
        x=alt.X("wave:O", axis=alt.Axis(title=None)),
        y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients tested"), scale=alt.Scale(domain=value_domain)),
        color=alt.Color("setting:N", legend=alt.Legend(title="Severity"))
    )

    lines = alt.Chart().mark_line().encode(
        x=alt.X("wave:O", axis=alt.Axis(title=None)),
        y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients tested"), scale=alt.Scale(domain=value_domain)),
        color=alt.Color("setting:N", legend=alt.Legend(title="Severity"))
    )
    chart = alt.layer(points, lines, data=day0_df).facet(
        column=alt.Column("lab:N", header=alt.Header(title="Lab", orient="bottom", labelAngle=-60, labelAnchor="middle", labelAlign="right"), sort=lab_sort)
    )

    chart = chart.properties(
        title={
            "text": f"Percent of patients tested, day {day} of wave",
            "subtitle": "Day-specific denominator",
            "subtitleColor": "gray"
        },
    )

    chart = apply_theme(chart)

    return chart

In [None]:
make_early_late_angle_plot_for_day(0)

In [None]:
make_early_late_angle_plot_for_day(1)

In [None]:
make_early_late_angle_plot_for_day(7)

In [None]:


day0_df = df.loc[(df["day"] == 0) & (df["cohort"] == "dayX")]
day0_diff_df = compute_early_late_difference(day0_df)

lab_sort = day0_diff_df["lab"].unique().tolist()

value_domain = [0.0, 1.0]

points = alt.Chart().mark_point().encode(
    x=alt.X("wave:O", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients"), scale=alt.Scale(domain=value_domain)),
    color=alt.Color("setting:N", legend=alt.Legend(title="Severity")),
    opacity=alt.Opacity("cohort:N", legend=alt.Legend(title="Denominator"))
).properties(height=120)

lines = alt.Chart().mark_line().encode(
    x=alt.X("wave:O", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients"), scale=alt.Scale(domain=value_domain)),
    color=alt.Color("setting:N", legend=alt.Legend(title="Severity")),
    opacity=alt.Opacity("cohort:N", legend=alt.Legend(title="Denominator"))
)
chart = alt.layer(points, lines, data=df).facet(
    column=alt.Column("lab:N", header=alt.Header(title="Lab", orient="bottom", labelAngle=-60, labelAnchor="middle", labelAlign="right"), sort=lab_sort),
    row=alt.Row("day:O", header=alt.Header(title="Day of wave", labelAngle=0, labelFontSize=20))
)

chart = chart.properties(
    title="Percent of patients tested by wave",
)

chart = apply_theme(chart)

chart

In [None]:
day0_df = df.loc[(df["day"] == 0) & (df["cohort"] == "dayX")]
day0_diff_df = compute_early_late_difference(day0_df)

lab_sort = day0_diff_df["lab"].unique().tolist()

value_domain = [0.0, 1.0]

day_specific_df = df.loc[df["cohort"] == "dayX"]

limited_days_df = day_specific_df.loc[day_specific_df["day"].isin([0, 1, 7])]

points = alt.Chart().mark_point().encode(
    x=alt.X("wave:O", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients"), scale=alt.Scale(domain=value_domain)),
    color=alt.Color("setting:N", legend=alt.Legend(title="Severity"))
).properties(height=120)

lines = alt.Chart().mark_line().encode(
    x=alt.X("wave:O", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients"), scale=alt.Scale(domain=value_domain)),
    color=alt.Color("setting:N", legend=alt.Legend(title="Severity"))
)
chart = alt.layer(points, lines, data=limited_days_df).facet(
    column=alt.Column("lab:N", header=alt.Header(title="Lab", orient="bottom", labelAngle=-60, labelAnchor="middle", labelAlign="right"), sort=lab_sort),
    row=alt.Row("day:O", header=alt.Header(title="Day of wave", labelAngle=0, labelFontSize=20))
)

chart = chart.properties(
    title={
            "text": f"Percent of patients tested by wave",
            "subtitle": "Day-specific denominator",
            "subtitleColor": "gray"
        }
)

chart = apply_theme(chart)

chart

In [None]:
day0_df = df.loc[(df["day"] == 0) & (df["cohort"] == "dayX")]
day0_diff_df = compute_early_late_difference(day0_df)

lab_sort = day0_diff_df["lab"].unique().tolist()

value_domain = [0.0, 1.0]

day_specific_df = df.loc[df["cohort"] == "dayX"]

limited_days_df = day_specific_df.loc[day_specific_df["day"].isin([0, 1, 7])]

points = alt.Chart().mark_point().encode(
    x=alt.X("wave:O", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients"), scale=alt.Scale(domain=value_domain)),
    color=alt.Color("setting:N", legend=alt.Legend(title="Severity"))
).properties(height=120)

lines = alt.Chart().mark_line().encode(
    x=alt.X("wave:O", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Percent of patients"), scale=alt.Scale(domain=value_domain)),
    color=alt.Color("setting:N", legend=alt.Legend(title="Severity"))
)
chart = alt.layer(points, lines, data=limited_days_df).facet(
    column=alt.Column("lab:N", header=alt.Header(title="Lab", orient="bottom", labelAngle=-60, labelAnchor="middle", labelAlign="right"), sort=lab_sort),
    row=alt.Row("day:O", header=alt.Header(title="Day of wave", labelAngle=0, labelFontSize=20))
)

chart = chart.properties(
    title={
            "text": f"Percent of patients tested by wave",
            "subtitle": "Day-specific denominator",
            "subtitleColor": "gray"
        }
)

chart = apply_theme(chart)

chart

In [None]:
def make_line_chart_for_lab(lab, severity):
    
    lab_df = orig_df.loc[(orig_df["cohort"] == "dayX") & (orig_df["lab"] == lab) & (orig_df["setting"] == severity)]
    
    day0_df = lab_df.loc[(lab_df["day"] == 0)]
    day0_diff_df = compute_early_late_difference(day0_df)


    percent_axis = axis=alt.Axis(title="Percent of patients", format="%")
    percent_scale = scale=alt.Scale(domain=[-0.05, 1.05], nice=False, clamp=False)

    chart = alt.Chart(lab_df).mark_line(point=True, size=3).encode(
        x=alt.X("day:Q", axis=alt.Axis(title="Day", labelAngle=0, labelAlign="center", labelBaseline="top"), scale=alt.Scale(domain=[-1, 15], nice=False, clamp=False)),
        y=alt.Y("value:Q", axis=percent_axis, scale=percent_scale),
        color=alt.Color("wave:N", legend=alt.Legend(title="Wave"), scale=wave_color_scale)
    )
    

    chart = chart.properties(
        title={
                "text": f"Percent of patients measured by day of wave",
                "subtitle": f"Lab: {lab} | Severity: {severity} | Denominator: day-specific",
                "subtitleColor": "gray",
                "dx": 72
            },
        width=400,
        height=400
    )

    chart = apply_theme(chart, title_anchor='start')

    return chart

In [None]:
make_line_chart_for_lab("creatinine", "All")

In [None]:
def make_candlestick_chart_for_lab(lab, severity):
    
    lab_df = df.loc[(df["cohort"] == "dayX") & (df["lab"] == lab) & (df["setting"] == severity)]
    lab_df = compute_early_late_difference(lab_df)
    
    lab_df["shift"] = lab_df["is_right_direction"].apply(lambda x: "Increase" if x else "Decrease")
    
    percent_axis = axis=alt.Axis(title="Percent of patients", format="%")
    percent_scale = scale=alt.Scale(domain=[-0.05, 1.05], nice=False, clamp=False)
    
    direction_opacity_scale = scale=alt.Scale(domain=[True, False], range=[1, 0])

    inc_bars = alt.Chart().mark_bar(size=12, cornerRadiusBottomLeft=9, cornerRadiusBottomRight=9).encode(
        x=alt.X("day:O", axis=alt.Axis(title="Day", labelAngle=0, labelAlign="center", labelBaseline="top"),),
        y=alt.Y("value_early:Q", axis=percent_axis, scale=percent_scale),
        y2=alt.Y2("value_late:Q"),
        color=alt.Color("shift:N", legend=alt.Legend(title="Shift (early to late)"), scale=alt.Scale(domain=["Increase", "Decrease"])),
        opacity=alt.Opacity("is_left_direction:Q", scale=direction_opacity_scale, legend=None)
    )
    
    dec_bars = alt.Chart().mark_bar(size=12, cornerRadiusTopLeft=9, cornerRadiusTopRight=9).encode(
        x=alt.X("day:O", axis=alt.Axis(title="Day", labelAngle=0, labelAlign="center", labelBaseline="top"),),
        y=alt.Y("value_early:Q", axis=percent_axis, scale=percent_scale),
        y2=alt.Y2("value_late:Q"),
        color=alt.Color("shift:N", legend=alt.Legend(title="Shift (early to late)"), scale=alt.Scale(domain=["Increase", "Decrease"])),
        opacity=alt.Opacity("is_right_direction:Q", scale=direction_opacity_scale, legend=None)
    )
    
    chart = alt.layer(inc_bars, dec_bars, data=lab_df)

    chart = chart.properties(
        title={
                "text": f"Percent of patients measured by day of wave",
                "subtitle": f"Lab: {lab} | Severity: {severity} | Denominator: day-specific",
                "subtitleColor": "gray",
                "dx": 72
            },
        width=400,
        height=400
    )

    chart = apply_theme(chart, title_anchor='start')

    return chart

In [None]:
make_candlestick_chart_for_lab("creatinine", "All")

In [None]:
def make_arrow_chart_for_lab(lab, severity):
    
    lab_df = df.loc[(df["cohort"] == "dayX") & (df["lab"] == lab) & (df["setting"] == severity)]

    diff_lab_df = compute_early_late_difference(lab_df)
    diff_lab_df = diff_lab_df.drop(columns=["lab", "setting"]).set_index("day")
    join_df = lab_df.join(diff_lab_df, on="day")
    
    join_df["shift"] = join_df["is_right_direction"].apply(lambda x: "Increase" if x else "Decrease")
    
        
    percent_axis = axis=alt.Axis(title="Percent of patients", format="%")
    percent_scale = scale=alt.Scale(domain=[-0.05, 1.05], nice=False, clamp=False)
    
    direction_opacity_scale = alt.Scale(domain=[True, False], range=[1, 0])
    
    wave_opacity_scale = alt.Scale(domain=["Early", "Late"], range=[1, 0])
    
    lines = alt.Chart().mark_line(size=1, point=False).encode(
        x=alt.X("day:O", axis=alt.Axis(title="Day", labelAngle=0, labelAlign="center", labelBaseline="top"),),
        y=alt.Y("value:Q", axis=percent_axis, scale=percent_scale),
        opacity=alt.Opacity("wave:N", legend=None, scale=wave_opacity_scale),
        color=alt.value("gray")
    )
    
    bars = alt.Chart().mark_bar(size=3).encode(
        x=alt.X("day:O", axis=alt.Axis(title="Day", labelAngle=0, labelAlign="center", labelBaseline="top"),),
        y=alt.Y("value_early:Q", axis=percent_axis, scale=percent_scale),
        y2=alt.Y2("value_late:Q"),
        color=alt.Color("shift:N", legend=alt.Legend(title="Shift (early to late)"), scale=alt.Scale(domain=["Increase", "Decrease"]))
    )

    dec_points = alt.Chart().mark_point(shape="triangle-down").encode(
        x=alt.X("day:O", axis=alt.Axis(title="Day", labelAngle=0, labelAlign="center", labelBaseline="top"),),
        y=alt.Y("value_late:Q", axis=percent_axis, scale=percent_scale),
        color=alt.Color("shift:N", legend=alt.Legend(title="Shift (early to late)"), scale=alt.Scale(domain=["Increase", "Decrease"])),
        opacity=alt.Opacity("is_left_direction:Q", scale=direction_opacity_scale, legend=None)
    )
    
    inc_points = alt.Chart().mark_point(shape="triangle-up").encode(
        x=alt.X("day:O", axis=alt.Axis(title="Day", labelAngle=0, labelAlign="center", labelBaseline="top"),),
        y=alt.Y("value_late:Q", axis=percent_axis, scale=percent_scale),
        color=alt.Color("shift:N", legend=alt.Legend(title="Shift (early to late)"), scale=alt.Scale(domain=["Increase", "Decrease"])),
        opacity=alt.Opacity("is_right_direction:Q", scale=direction_opacity_scale, legend=None)
    )
    
    chart = alt.layer(bars, lines, inc_points, dec_points, data=join_df)

    chart = chart.properties(
        title={
                "text": f"Percent of patients measured by day of wave",
                "subtitle": f"Lab: {lab} | Severity: {severity} | Denominator: day-specific",
                "subtitleColor": "gray",
                "dx": 72
            },
        width=400,
        height=400
    )

    chart = apply_theme(chart, title_anchor='start')

    return chart

In [None]:
make_arrow_chart_for_lab("creatinine", "All")