In [274]:
# %pylab inline
import pywren
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
sns.set_style('whitegrid')
import os
import matplotlib.patches as mpatches
#########################
from bokeh.core.properties import value
from bokeh.io import output_notebook, show, push_notebook
import bokeh.plotting.figure
from bokeh.models import ColumnDataSource, HoverTool, WheelZoomTool

In [194]:
# run pywren
def my_function(key):
    import time
    time.sleep(key * 5)
    return key

pwex = pywren.default_executor()
futures1 = pwex.map(my_function, range(5))
pywren.wait(futures1)

futures2 = pwex.map(my_function, range(5))
pywren.wait(futures2)

([<pywren.future.ResponseFuture at 0x1114d2828>,
  <pywren.future.ResponseFuture at 0x10fafba90>,
  <pywren.future.ResponseFuture at 0x1023e8ba8>,
  <pywren.future.ResponseFuture at 0x115be6e48>,
  <pywren.future.ResponseFuture at 0x1112f8ba8>],
 [])

In [290]:
# collect info from pywren futures
def collect_execution_info(futures):
    results = [f.result() for f in futures]
    run_statuses = [f.run_status for f in futures]
    invoke_statuses = [f.invoke_status for f in futures]
    # need to analyze both run_statuses and invoke_statuses
    print(run_statuses)
    return {'results' : results,'run_statuses' : run_statuses, 'invoke_statuses' : invoke_statuses}

info = collect_execution_info(futures1 + futures2)

[{'callset_id': '0cf77c7b-542a-4c8b-b2a1-09718c393d2a', 'data_download_time': 0.10668087005615234, 'runtime_s3_key_used': 'ce6fc6-pywren.runtime/pywren_runtime-3.6-default.tar.gz.0009', 'aws_request_id': '091523f6-920d-11e7-a30f-6dde058401f6', 'log_stream_name': '2017/09/05/[$LATEST]e1e95461ea704795b39b602eb3856f6c', 'stdout': '', 'exec_time': 2.0818300247192383, 'start_time': 1504597035.034687, 'func_key': 'pywren.jobs/0cf77c7b-542a-4c8b-b2a1-09718c393d2a/func.json', 'setup_time': 12.667610883712769, 'runtime_cached': False, 'runtime_s3_bucket_used': 'ericmjonas-public', 'exception': None, 'host_submit_time': 1504597034.218695, 'server_info': {'uname': 'Linux ip-10-16-168-40 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', '/proc/self/cgroup': 'MemTotal:        3857664 kB\nMemFree:         1631108 kB\nMemAvailable:    3477432 kB\nBuffers:           36728 kB\nCached:          1873640 kB\nSwapCached:            0 kB\nActive:           3330

In [288]:
# visualization
def visualize_execution(info):
    # preparing data
    run_df = pd.DataFrame(info['run_statuses'])
    invoke_df = pd.DataFrame(info['invoke_statuses'])
    info_df = pd.concat([run_df, invoke_df], axis=1)
    
    def remove_duplicate_columns(df):
        Cols = list(df.columns)
        for i,item in enumerate(df.columns):
            if item in df.columns[:i]: Cols[i] = "toDROP"
        df.columns = Cols
        return df.drop("toDROP",1)

    info_df = remove_duplicate_columns(info_df)
    
    total_tasks = len(info_df)
    y = np.arange(total_tasks)
    
###################################################
# Intermediate Data Handling

    # calculated deltas
    time_offset = np.min(info_df.host_submit_time)
    fields = [('host submit', info_df.host_submit_time - time_offset), 
              ('job start', info_df.start_time - info_df.host_submit_time), 
              ('setup done', info_df.setup_time), 
              ('job done', info_df.end_time - info_df.start_time - info_df.setup_time), 
              ('results returned', info_df.download_output_timestamp - info_df.end_time)]

###################################################
# Bokeh Visualization

    output_notebook()
    
    # set up data sources
    # is inaccurate if jobIDs do not start at 0
    jobIDs = list(range(len(info_df.host_submit_time)))
    jobIDs = [str(i) for i in jobIDs]
    stages = ["host submit", "job start", "setup done", "job done", "results returned"]
    data = {'jobIDs' : jobIDs,
               'host submit' : list(fields[0][1]),
               'job start' : list(fields[1][1]),
               'setup done' : list(fields[2][1]),
               'job done' : list(fields[3][1]),
               'results returned' : list(fields[4][1])}
    
    # should change colors
    colors = ["#ffffcc", "#ffccff", "#ff99ff", "#ff66ff", "#ff33ff"]
#    colors = ["#33ff00", "#330055", "#33ff00", "#330055", "#33ff00"]
#    colors = ["#ffff00", "#00ff00", "#0000ff", "#800080", "#000000"]

    # plot structure
    # auto enabled wheel_zoom, but should be 'xwheel_zoom'
    p = bokeh.plotting.figure(y_range=jobIDs, plot_height=400, plot_width=800, x_range=(-15, 70),
                              title="Pywren Execution GANTT Chart", active_scroll = 'wheel_zoom')

    # plot contents
    stacked = p.hbar_stack(stages, y='jobIDs', height=0.9, color=colors, source=ColumnDataSource(data),
                 legend=[value(x) for x in stages])

    # plot details
    # per dev, "Hover was broken for HBar... wait for 0.12.8 (probably Monday)"
    p.legend.location = "top_left"
    p.xaxis.axis_label = "Wallclock Time (sec)"
    p.yaxis.axis_label = "Job ID"
    hover = bokeh.models.HoverTool(tooltips=[("job ID", "@{jobIDs}")])
#    p.add_tools(HoverTool(tooltips = [("ID", "@jobIDs")]))

    # show plot
    bokeh.io.show(p)

    

visualize_execution(info)