In [1]:
from bokeh.plotting import figure, show
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import column
from bokeh.models.layouts import Panel, Tabs 
# Depending on what version of Bokeh you have, you may need to change Panel to TabPanel
import pandas as pd
from bokeh.io import output_notebook

In [2]:
def get_data():
    # Data structure: OMOP
    # Sample data for drugs/medications, procedures, and notes. All columns other than event date were removed
    data_types = ['Med', 'Procedure', 'Note']
    data_files = [f'Data/deid_drugs.csv', f'Data/deid_procedure.csv',
                  f'Data/deid_notes.csv']
    date_name = ['drug_exposure_start_DATE', 'procedure_DATE', 'note_DATE']

    category = []
    date = []
    # Create a pandas DataFrame with a row for each medical event. Column values indicate
    # the type of medical event and the date of the event.
    for type, file, date_colname in zip(data_types, data_files, date_name):
        df = pd.read_csv(file)
        num = df.shape[0]
        category += [type] * num
        date += pd.to_datetime(df[date_colname]).dt.normalize().to_list()

    df = pd.DataFrame({'category': category, 'date': date})
    return df

In [3]:
def create_plot(df):
    output_notebook()
    data_types = ['Med', 'Procedure', 'Note']
    colors = ["#dfb834", "#649e92", "#a13d61"]
    bin_names = ["Year", "Month", "Week"]
    bin_sizes = ["Y", "M", "W"]
    time_formats = ["%Y", "%m/%y", "%m/%d/%y"]
    figures = []
    
    # For loop creates a new figure for each time bucket: Year, Month, Week
    for bin_name, bin_size, time_format in zip(bin_names, bin_sizes, time_formats):
        grouped = df.groupby([pd.Grouper(key='date', freq=bin_size), 'category']).size().unstack(fill_value=0)
        time_bucket = list(map(str, grouped.index.tolist()))

        data = {'time_bucket': time_bucket}
        for item in data_types:
            data[item] = grouped[item].values

        p = figure(x_range=time_bucket, title=f"Event Counts by {bin_name}",
                   toolbar_location="right",
                   tooltips="$name @time_bucket: @$name",
                   width=1000,
                   height=500)
        p.vbar_stack(data_types, x='time_bucket', width=0.9, source=data,
                     color=colors, legend_label=data_types)

        p.y_range.start = 0
        p.x_range.range_padding = 0.1
        p.xgrid.grid_line_color = None
        p.axis.minor_tick_line_color = None
        p.outline_line_color = None
        p.legend.location = "top_left"
        p.legend.orientation = "horizontal"
        p.xaxis.major_label_orientation = 'vertical'
        p.xaxis.formatter = DatetimeTickFormatter(
            days=f"{time_format}",
            months=f"{time_format}",
            years=f"{time_format}"
        )
        p.xaxis.major_label_text_font_size = "10pt"

        figures += [p]
    
    # Plot each figure into a new tab
    tab_list = []
    for p, bin_name in zip(figures, bin_names):
        tab = Panel(child=column(p), title=bin_name)
        tab_list += [tab]

    tabs = Tabs(tabs=tab_list)
    show(tabs)

In [4]:
df = get_data()
create_plot(df)
# Note: Zoom into specific sections in Month/Week panel to see dates more clearly
# To Do: Fix crowded x-axis label