In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_lab_df,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

In [None]:
df = read_full_lab_df()
df

In [None]:
df

In [None]:
COUNTRIES = ["France", "Germany", "Italy", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#D55E00"]
COUNTRY_COLOR_MAP = {COUNTRIES[i]: COUNTRY_COLOR[i] for i in range(len(COUNTRIES))}

In [None]:
loinc_df = read_loinc_df().set_index('LOINC')

LOINC_NAMES = loinc_df["Name"].unique().tolist()
LOINC_IDS = loinc_df.index.values.tolist()
LOINC_UNITS = loinc_df['Units'].values.tolist()
LOINC_MAP = { LOINC_IDS[i]: f'{LOINC_NAMES[i]} ({LOINC_UNITS[i]})' for i in range(len(LOINC_IDS)) }

loinc_df

In [None]:
def process_labs_df(df_lb):
    
    # Negative values to zeros
    df_lb.loc[df_lb['mean_value_ever_severe'] < 0, 'mean_value_ever_severe'] = 0
    df_lb.loc[df_lb['stdev_value_ever_severe'] < 0, 'stdev_value_ever_severe'] = 0
    df_lb.loc[df_lb['mean_log_value_ever_severe'] < 0, 'mean_log_value_ever_severe'] = 0
    df_lb.loc[df_lb['stdev_log_value_ever_severe'] < 0, 'stdev_log_value_ever_severe'] = 0
    
    # Remove white space which shouldn't be provided
    df_lb['loinc'] = df_lb['loinc'].apply(lambda loinc: loinc.replace(' ', ''))
    
    # Add readable names for LOINC and units
    df_lb['loinc_name'] = df_lb['loinc'].apply(lambda loinc: 
        loinc_df.at[loinc, "Name"] + " (" + loinc_df.at[loinc, "Units"] + ")"
    )
    
    # Upper and under bound for values
    df_lb["upper_all"] = df_lb['mean_value_all'] + df_lb['stdev_value_all'] 
    df_lb["under_all"] = df_lb['mean_value_all'] - df_lb['stdev_value_all']
    df_lb["upper_ever_severe"] = df_lb['mean_value_ever_severe'] + df_lb['stdev_value_ever_severe'] 
    df_lb["under_ever_severe"] = df_lb['mean_value_ever_severe'] - df_lb['stdev_value_ever_severe']
    
    return df_lb

In [None]:
pdf = process_labs_df(df)
pdf.head()

In [None]:
def lab_plot(
    df=None,
    loinc='1742-6', # ALT
    width=700, 
    height=300,
    bar_size=8,
    point_size=30,
    no_axis_title=False,
    no_legend=False, 
    legend_columns=None,
    y_domain_top=None, 
    y_domain_bottom=None
):
    if(df is None):
        print('No Data Frame Suggested.')
        return
    
    # data field and title definition
    loinc_code = 'loinc'
    loinc_name = 'loinc_name'
    color_field = 'siteid'
    x_field = 'days_since_admission'
    mean_field = 'mean_value_ever_severe'
    num_pat_field = 'num_patients_ever_severe'
    
    x_field_title = 'Days since positive'
    mean_field_title = 'Mean value'
    
    # data selection
    base = (
        alt.Chart(df)
#             .transform_filter(alt.datum[loinc_code] == loinc)
            .transform_filter(alt.datum[x_field] >= 0)
            .transform_filter(alt.datum[x_field] <= 30)
            .transform_filter(alt.datum[num_pat_field] >= 10)
    )
    
    input_dropdown = alt.binding_select(options=LOINC_IDS)
    selection = alt.selection_single(
        fields=['loinc'], 
        bind=input_dropdown, 
        name='Lab ', 
        init={'loinc': loinc}
    )
    
    """
    Lab mean values in line/dot plot
    """
    y_title = None if no_axis_title else mean_field_title
    no_x_axis = False

    x_axis = (
        alt.Axis(grid=True, labels=False, ticks=False, domain=True)
            if no_x_axis 
            else alt.Axis(grid=True, labels=True, ticks=True, domain=True, tickMinStep=1)
    )
    y_scale = (
        alt.Scale(zero=False, domain=y_domain_bottom, type="linear") 
            if y_domain_bottom != None 
            else alt.Scale(zero=False, type="linear")
    )

    circle = (
        base
            .mark_circle(size=point_size, opacity=0.7)
            .encode(
                x=alt.X(
                    f"{x_field}:Q",
                    title=None,
                    scale=alt.Scale(zero=False, nice=False, padding=10),
                    axis=x_axis,
                ),
                y=alt.Y(
                    f"mean({mean_field}):Q",
                    title=y_title,
                    scale=y_scale,
                    axis=alt.Axis(format='r')
                ),
                color=alt.Color(f"{color_field}:N", title='Site ID')
            )
    )
    
    line = circle.mark_line(size=2, opacity=0.7)

    errorbar = (
        circle
            .mark_errorbar()
            .encode(
                y=alt.Y("mean(upper_ever_severe):Q", title=""), 
                y2="mean(under_ever_severe):Q",
                size=alt.value(1),
                opacity=alt.value(1)
            )
    )
    
    lab_value_plot = (
        alt.layer(circle, line)
            .properties(height=height, width=width)
    )

    """
    # of patients in bar charts
    """
    num_pat_field_title = None if no_axis_title else "# of patients"

    bar = base.mark_bar(size=bar_size).encode(
        y=alt.Y(
            f"sum({num_pat_field}):Q", 
            title=num_pat_field_title,
            axis=alt.Axis(
                format='r',
                tickMinStep=1
            )
        ),
        x=alt.X(
            f"{x_field}:Q",
            title=x_field_title,
            scale=alt.Scale(
                zero=False,
                nice=False,
                padding=10
            ),
            axis=alt.Axis(
                grid=True,
                labels=False,
                ticks=False,
                domain=True
            )
        ),
        color=alt.Color(
            f"{color_field}:N",
            title=None
        )
    )
    
    middle_chart = (
        bar
    ).properties(
        height=120, 
        width=width
    )
    
    """
    # of sites in bar charts
    """
    bar_bottom_y_title ="# of sites"

    bottom_bar = base.mark_bar(size=bar_size).encode(
        x=alt.X(
            f"{x_field}:Q",
            title=x_field_title,
            scale=alt.Scale(
                zero=False,
                nice=False,
                padding=10
            )
        ),
        y=alt.Y(
            f"sum({num_pat_field}):Q", 
            title=bar_bottom_y_title,
            axis=alt.Axis(
                format='r', 
                tickMinStep=1
            )
        ),
        color=alt.Color(
            f"{color_field}:N"
        )
    )

    bottom_chart = bottom_bar.properties(
        height=120,
        width=width
    )

    result_vis = alt.vconcat(
        lab_value_plot, 
        middle_chart, 
        # bottom_chart,
        spacing=5
    ).resolve_scale(
        y="independent", 
        x="independent",
        color="shared"
    ).properties(title={
        "text": LOINC_MAP[loinc],
        'subtitle': get_visualization_subtitle(),
        'subtitleColor': 'gray'
    }).add_selection(selection).transform_filter(selection)
    
    return result_vis

In [None]:
plot = lab_plot(df=pdf)
plot = apply_theme(plot)
plot