## Embedding a Bokeh server in a Notebook

This notebook shows how a Bokeh server application can be embedded inside a Jupyter notebook. 

In [None]:
import yaml
import datetime as dt

import pandas as pd

from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider
from bokeh.plotting import figure
from bokeh.themes import Theme
from bokeh.io import show, output_notebook

from bokeh.sampledata.sea_surface_temperature import sea_surface_temperature

output_notebook()

In [None]:
## IMPORT IO TRACES AND CREATE TWO DATAFRAMES

io_traces = None
with open("io_operations.yml", "r", encoding="utf-8") as traces:
    io_traces = yaml.load(traces, Loader=yaml.SafeLoader)

job_columns = [
    "job_id",
    "job_uid"
    "job_status", 
    "job_submit_ts", 
    "job_end_ts", 
    "job_duration",
    "origin_runtime",
    "origin_read_bytes",
    "origin_written_bytes",
    "origin_core_used",
    "origin_mpi_procs",
    "job_sleep_time",
]

# Not all columns are used by every type of action...
action_columns = [
    "act_name",
    "act_type",
    "act_status",
    "act_start_ts",
    "act_end_ts",
    "act_duration",
    "src_storage_service",
    "src_storage_server",
    "src_storage_disk",
    "src_file_path",
    "src_file_name",
    "src_file_size_bytes",
    "dst_storage_service",
    "dst_storage_server",
    "dst_storage_disk",
    "dst_file_path",
    "dst_file_name",
    "dst_file_size_bytes",
    "parent_job_id",
]

# DATAFRAME WITH JOBS ONLY
jobs = pd.DataFrame(io_traces, columns=job_columns)



# Merge all actions into a second DataFrame, with new "parent_job_id" field associated to each one.
action_list = []
for trace in io_traces:
    local_actions = trace["job_actions"]
    for l_act in local_actions:
        l_act["parent_job_id"] = trace["job_id"]
        if not "src_storage_service" in l_act:
            l_act["src_storage_service"] = "NA"
            l_act["src_storage_server"] = "NA"
            l_act["src_storage_disk"] = "NA"
            l_act["src_file_path"] = "NA"
            l_act["src_file_name"] = "NA"
            l_act["src_file_size_bytes"] = 0
        if not "dst_storage_service" in l_act:
            l_act["dst_storage_service"] = "NA"
            l_act["dst_storage_server"] = "NA"
            l_act["dst_storage_disk"] = "NA"
            l_act["dst_file_path"] = "NA"
            l_act["dst_file_name"] = "NA"
            l_act["dst_file_size_bytes"] = 0
        action_list.append(l_act)
        
# DATAFRAME WITH ACTIONS ONLY
actions = pd.DataFrame(action_list, columns=action_columns)

In [None]:
jobs = jobs.sort_values("job_submit_ts")
jobs["job_submit_td"] = jobs["job_submit_ts"].apply(pd.to_timedelta, unit="s")
jobs

In [None]:
actions = actions.sort_values("act_start_ts")

# Create Timedelta columns from timestamp, in order to display data with valid date range in following plots
actions["act_start_td"] = actions["act_start_ts"].apply(pd.to_timedelta, unit="s")
actions["act_end_td"] = actions["act_end_ts"].apply(pd.to_timedelta, unit="s")

print(f"# First action registered on {dt.datetime.fromtimestamp(actions['act_start_ts'].min())}")
print(f"# Last action ends on {dt.datetime.fromtimestamp(actions['act_end_ts'].max())}")

actions

In [None]:
import numpy as np
 
from bokeh.models import ColumnDataSource, RangeTool, DatetimeTickFormatter, NumeralTickFormatter
from bokeh.transform import factor_cmap, factor_mark


VERBOSE_DT_FORMATTER = DatetimeTickFormatter(days='%d/%m', hours="%d/%m - %Hh", hourmin='%H:%M', minutes = '%H:%M')
BYTE_FORMATTER = NumeralTickFormatter(format='0.0b')


def bkapp(doc):
    df = actions.copy()
    source = ColumnDataSource(data=df)

    DT_X_RANGE = (df["act_start_td"].min(), df["act_start_td"].max())
    
    # TIMELINE WITH VARIOUS METRICS
    p = figure(
        title="R/W Volume", 
        height=300, 
        width=800, 
        tools=["xpan", "save", "reset", "box_zoom", "pan", "zoom_out"],
        x_axis_location="above",
        background_fill_color="#efefef", 
        x_range=DT_X_RANGE
    )
    
    p.vbar(x='act_start_td', top='src_file_size_bytes', source=source , width=0.9)
    
    # p.line('act_start_td', 'src_file_size_bytes', source=source, line_color=(0, 128, 255, 0.5), line_width=2)
    # p.line('act_start_td', 'dst_file_size_bytes', source=source, line_color=(128, 255, 0, 0.5), line_width=2)
    p.yaxis.axis_label = 'Byte size of file'
    p.xaxis.formatter = VERBOSE_DT_FORMATTER
    p.yaxis.formatter = BYTE_FORMATTER
    

    TYPES = ["FILECOPY", "FILEREAD", "COMPUTE", "FILEWRITE", "FILEDELETE"]
    #TYPES = sorted(df.act_type.unique())
    p2 = figure(
        title="ActionTypes", 
        height=300, 
        width=800, 
        tools=["xpan", "save", "reset", "box_zoom", "pan", "zoom_out"],
        x_axis_location="above",
        background_fill_color="#efefef", 
        x_range=p.x_range, 
        y_range=TYPES[::-1]
    )

    MARKERS = ['hex', 'circle_x', 'triangle', 'plus', 'diamond']
    p2.scatter("act_start_td", "act_type", source=source,
          legend_group="act_type", fill_alpha=0.3, size=12,
          marker=factor_mark('act_type', MARKERS, TYPES),
          color=factor_cmap('act_type', 'Category10_5', TYPES))
    p2.yaxis.axis_label = 'Action Type'
    p2.xaxis.formatter = VERBOSE_DT_FORMATTER
    
    select = figure(title="Range selection plot",
                height=100, width=800, y_range=p.y_range, y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#efefef")
    
    range_tool = RangeTool(x_range=p.x_range)
    range_tool.overlay.fill_color = "navy"
    range_tool.overlay.fill_alpha = 0.1

    select.line('act_start_td', 'src_file_size_bytes', source=source, line_color=(255, 128, 0, 0.5))
    select.line('act_start_td', 'dst_file_size_bytes', source=source, line_color=(128, 255, 0, 0.5))
    select.ygrid.grid_line_color = None
    select.xaxis.formatter = VERBOSE_DT_FORMATTER
    select.yaxis.formatter = BYTE_FORMATTER
    select.add_tools(range_tool)
    select.toolbar.active_multi = range_tool
    
    def callback(attr, old, new):
        if new == 0:
            data = df
        else:
            data = df
        source.data = ColumnDataSource.from_df(data)

    doc.add_root(column(p, p2, select))

    doc.theme = Theme(json=yaml.load("""
        attrs:
            figure:
                background_fill_color: "#DDDDDD"
                outline_line_color: white
                toolbar_location: above
                height: 700
                width: 1200
            Grid:
                grid_line_dash: [6, 4]
                grid_line_color: white
    """, Loader=yaml.FullLoader))

Now we can display our application using ``show``, which will automatically create an ``Application`` that wraps ``bkapp`` using ``FunctionHandler``. The end result is that the Bokeh server will call ``bkapp`` to build new documents for every new sessions that is opened.

**Note**: If the current notebook is not displayed at the default URL, you must update the `notebook_url` parameter in the comment below to match, and pass it to `show`.

In [None]:
show(bkapp) # notebook_url="http://localhost:8888" 