In [1]:
# %pylab inline
import pywren
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
sns.set_style('whitegrid')
import os
import matplotlib.patches as mpatches
#########################
from bokeh.core.properties import value
from bokeh.io import output_notebook, show, push_notebook
import bokeh.plotting.figure
from bokeh.models import ColumnDataSource, HoverTool, WheelZoomTool
output_notebook()

In [2]:
# run pywren
def my_function(key):
    import time
    time.sleep(1)
    return key

pwex = pywren.default_executor()
futures1 = pwex.map(my_function, range(5))
pywren.wait(futures1)

futures2 = pwex.map(my_function, range(5))
pywren.wait(futures2)

([<pywren.future.ResponseFuture at 0x10fa9ec88>,
  <pywren.future.ResponseFuture at 0x1029df160>,
  <pywren.future.ResponseFuture at 0x10d2bb208>,
  <pywren.future.ResponseFuture at 0x10fa9e518>,
  <pywren.future.ResponseFuture at 0x100e5d080>],
 [])

In [3]:
# collect info from pywren futures
def collect_execution_info(futures):
    results = [f.result() for f in futures]
    run_statuses = [f.run_status for f in futures]
    invoke_statuses = [f.invoke_status for f in futures]
    # need to analyze both run_statuses and invoke_statuses
#    print(invoke_statuses)
    return {'results' : results,'run_statuses' : run_statuses, 'invoke_statuses' : invoke_statuses}

info = collect_execution_info(futures1 + futures2)

In [37]:
# visualization
def visualize_execution(info):
    # preparing data
    run_df = pd.DataFrame(info['run_statuses'])
    invoke_df = pd.DataFrame(info['invoke_statuses'])
    info_df = pd.concat([run_df, invoke_df], axis=1)
    
    def remove_duplicate_columns(df):
        Cols = list(df.columns)
        for i,item in enumerate(df.columns):
            if item in df.columns[:i]: Cols[i] = "toDROP"
        df.columns = Cols
        return df.drop("toDROP",1)

    info_df = remove_duplicate_columns(info_df)
    
    total_tasks = len(info_df)
    y = np.arange(total_tasks)
    
###################################################
# Intermediate Data Handling

    # calculated deltas
    # should ANNOTATE NAMES with deltas
    time_offset = np.min(info_df.host_submit_time)
    fields = [('host submit', info_df.host_submit_time - time_offset), 
              ('job start', info_df.start_time - info_df.host_submit_time), 
              ('setup done', info_df.setup_time), 
              ('job done', info_df.end_time - info_df.start_time - info_df.setup_time), 
              ('results returned', info_df.download_output_timestamp - info_df.end_time),
              ('server_info', info_df.server_info)]

    server_info = list(fields[5][1])
    unames = list(map(lambda x:x['uname'], server_info))
    print(unames)
###################################################
# Bokeh Visualization
    
    # set up data sources
    # is inaccurate if jobIDs do not start at 0
    jobIDs = list(range(len(info_df.host_submit_time)))
    jobIDs = [str(i) for i in jobIDs]
#    jobIDs = list(map(lambda x:x['uname'], server_info))
    stages = ["host submit", "job start", "setup done", "job done", "results returned"]
    data = {'jobIDs' : jobIDs,
            #'jobIDs' : jobIDs,
            'host submit' : list(fields[0][1]),
            'job start' : list(fields[1][1]),
            'setup done' : list(fields[2][1]),
            'job done' : list(fields[3][1]),
            'results returned' : list(fields[4][1])}
    
    # should change colors
    # try to make first bar transparent
    colors = ["#ffffff", "#ffccff", "#ff99ff", "#ff66ff", "#ff33ff"]
#    colors = ["#33ff00", "#330055", "#33ff00", "#330055", "#33ff00"]
#    colors = ["#ffff00", "#00ff00", "#0000ff", "#800080", "#000000"]

    # plot structure
    # auto enabled wheel_zoom, but should be 'xwheel_zoom' ALSO
    p = bokeh.plotting.figure(y_range=jobIDs, plot_height=400, plot_width=800, x_range=(-15, 70),
                              title="Pywren Execution GANTT Chart", active_scroll = 'wheel_zoom')

    # plot contents
    stacked = p.hbar_stack(stages, y='jobIDs', height=0.9, color=colors, source=ColumnDataSource(data),
                 legend=[value(x) for x in stages])

    # plot details
    # in order to get xwheel_zoom, scroll over the x axis; this is not intuitive
    p.legend.location = "top_left"
    p.xaxis.axis_label = "Wallclock Time (sec)"
    p.yaxis.axis_label = "Task ID"
    hover = bokeh.models.HoverTool(tooltips=[("jobID", "@{jobIDs}"),
                                             ("host submit delta", "@{host submit}"),
                                             ("job start delta", "@{job start}"),
                                             ("setup done delta", "@{setup done}"),
                                             ("job done delta", "@{job done}"),
                                             ("results returned delta", "@{results returned}")])
    p.add_tools(hover)

    # show plot
    bokeh.io.show(p)
    
    # find out what Amazon does with my tasks are distrubuted across WHICH containers

    

visualize_execution(info)

['Linux ip-10-20-39-173 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-12-156-144 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-20-39-173 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-20-7-218 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-20-7-218 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-20-39-173 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-12-156-144 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-20-7-218 4.4.51-40.69.amzn1.x86_64 #1 SMP Sat Aug 12 01:12:36 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux\n', 'Linux ip-10-20-7-218 4.4.51-40.69.amzn1