In [71]:
def pre_process(df):
    if framework == 'spark':
        df['thread'] = df['process']

    df['runtime'] = df.end - df.start

    # Simplify the workers name
    df['worker_name'] = df.worker.map(str).map(lambda x: ''.join(x.split('-')[1:]).split('.')[0])

    # Simplify the thread number for each worker
    thread_worker = {w+'::'+str(t): i+1 
                     for w in df.worker_name.unique()
                         for i, t in enumerate(df[df.worker_name == w].thread.unique())
                     }
    df['worker_thread'] = df.worker_name.map(str) + '::' + df.thread.map(str)
    df['thread_number'] = df.worker_thread.map(lambda x: thread_worker[x])
    df['worker_thread'] = df.worker_name.map(str) + '::thread' + df.thread_number.map(str)
    df = df.sort_values(by=['worker_name','thread_number'], ascending=[False, True])
    return df

In [72]:
def group_by(*, df, fig):
    bottom = -0.5
    top = 0.5
    labels = []
    for i, x in enumerate(df.groupby('worker_thread', sort=False)):
        labels.append(x[0])
        for y in x[1].groupby("func"):
            start = y[1]["start"]
            end = y[1]["end"]
            fig.quad(left=start,
                     right=end,
                     bottom=bottom,
                     top=top,
                     color=color[y[0]],
                     alpha=0.6,
                    )
        top += 1
        bottom += 1
    return labels

In [46]:
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
output_notebook()

In [47]:
import pandas as pd

col_name = ['func', 'start', 'end', 'filename', 'worker', 'thread', 'process']
framework = 'spark'
filename = '../results/inc/data-1/results-spark_inc-baseline.csv'

df = pd.read_csv(filename, header=None, names=col_name)
### PRE-PROCESS ###
df = pre_process(df)
### END PRE-PROCCESS

color = {"read_img":"turquoise", "increment":"crimson", 'save_results':'orange'}  # CUSTOMIZE
fig = figure()
### GROUP BY ###
labels = group_by(df=df, fig=fig)
### END GROUP BY ###

fig.yaxis.ticker = list(range(0, len(labels)))
fig.yaxis.major_label_overrides = {k: v for k, v in zip(range(0, len(labels)), labels)}

show(fig)

# !!!ax.set_xlabel("time [s]")  # CUSTOMIZE

# Set legend
# !!!red_patch = mpatches.Patch(color='turquoise', label='read_img')  # CUSTOMIZE
# !!!blue_patch = mpatches.Patch(color='crimson', label='increment')  # CUSTOMIZE
# !!!orange_patch = mpatches.Patch(color='orange', label='save_results')  # CUSTOMIZE
# !!!plt.legend(loc='upper center', bbox_to_anchor=(1.085, 1.008), handles=[red_patch, blue_patch, orange_patch])


In [145]:
import numpy as np

from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid
from bokeh.models.glyphs import Quad
from bokeh.models.tools import BoxZoomTool, HoverTool, PanTool, ResetTool, SaveTool, TapTool, WheelZoomTool
from bokeh.io import curdoc, show
from bokeh.palettes import Colorblind8

col_name = ['func', 'start', 'end', 'filename', 'worker', 'thread', 'process']
framework = 'spark'
filename = '../results/inc/data-1/results-spark_inc-baseline.csv'

df = pd.read_csv(filename, header=None, names=col_name)
df = pre_process(df)  # PRE-PROCESS

### GROUP BY ###
y = 0
labels = []
for i, x in enumerate(df.groupby('worker_thread', sort=False)):
    labels.append(x[0])
    df.loc[df.index.isin(x[1].index), 'bottom'] = y - 0.5
    df.loc[df.index.isin(x[1].index), 'top'] = y + 0.5
    y += 1
### END GROUP BY ###

# Define color map for the function
for i, x in enumerate(df.func.unique()):
    df.loc[df.func == x, 'color'] = Colorblind8[i]

source = ColumnDataSource(df)

plot = Plot()
glyph = Quad(left="start",
             right="end",
             top="top",
             bottom="bottom",
             fill_color="color",
             fill_alpha=0.66,
             line_color="color",
             line_width=0.75,)

plot.add_glyph(source, glyph)

xaxis = LinearAxis()
plot.add_layout(xaxis, 'below')

yaxis = LinearAxis()
plot.add_layout(yaxis, 'left')

plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

# Set y axis tick label
plot.yaxis.ticker = list(range(0, len(labels)))
plot.yaxis.major_label_overrides = {k: v for k, v in zip(range(0, len(labels)), labels)}

## Tool
plot.add_tools(BoxZoomTool())
plot.add_tools(HoverTool(tooltips=[
    ('filename', '@filename'),
    ('worker', '@worker_thread'),
    ('function', '@func'),
    ('runtime', '@runtime{%8.3f sec}'),
    ('start time', '@start{%8.3f sec}'),
    ('end time', '@end{%8.3f sec}'),
],
                         formatters={
    'runtime': 'printf',
    'start': 'printf',
    'end': 'printf',
                         }))
plot.add_tools(PanTool())
plot.add_tools(ResetTool())
plot.add_tools(SaveTool())
plot.add_tools(TapTool())
plot.add_tools(WheelZoomTool())

curdoc().add_root(plot)

show(plot)