# Evaluate the box below to initialize the web UI.

In [None]:
%%html
<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>

<button id="do_run_all">Click to run all</button>
<script>
$("#do_run_all").click(
    function () {
        $("#run_all_cells").click();
    }
);
</script>

In [1]:
import ipywidgets as widgets
import os
import pandas as pd
import pprint
import qgrid
import ray
import subprocess
import sys
import tempfile
import time
import math

from IPython.display import display

ray.init(redis_address=os.environ["REDIS_ADDRESS"])

{'local_scheduler_socket_names': ['/tmp/scheduler48824835'],
 'node_ip_address': '127.0.0.1',
 'object_store_addresses': [ObjectStoreAddress(name='/tmp/plasma_store41748701', manager_name='/tmp/plasma_manager518413', manager_port=13391)],
 'redis_address': '127.0.0.1:11315'}

#### Summary

In [None]:
earliest, latest, tasks = ray.global_state.job_length()
dur = latest - earliest
print("This job took " + str(dur) + " seconds to complete and a total of " + str(tasks) + " tasks were run.")

**Object search.**

In [None]:
object_search = widgets.Text(
    value="",
    placeholder="Object ID",
    description="Search for an object:",
    disabled=False
)
display(object_search)

def handle_submit(sender):
    pp = pprint.PrettyPrinter()
    pp.pprint(ray.global_state.object_table(object_search.value))

object_search.on_submit(handle_submit)

**Task search.**

In [None]:
def dependency_graph(task_id):
    import networkx as nx
    from bokeh.plotting import figure, show
    from bokeh.resources import CDN
    from bokeh.io import output_notebook
    from bokeh.models import CustomJS, ColumnDataSource, Slider, HoverTool, TapTool
    output_notebook( resources=CDN )
    from bokeh.layouts import gridplot
    from bokeh.models.widgets import Div
 
    edges = []
    nodes = set()
 
    DG = nx.DiGraph()
 
    import time
    task_profiles = ray.global_state.task_profiles(start=0,end=time.time())
    task_info = ray.global_state.task_table()
   
    if task_id not in task_info:
        raise ValueError()
   
    DG.add_node(task_id)
 
    try:
        while task_id not in nodes:
            nodes.add(task_id)
            parent_id = task_info[task_id]["TaskSpec"]["ParentTaskID"]
            if parent_id not in task_info:
                break
            DG.add_node(parent_id)
            edges.append([task_id, parent_id])
            task_id = parent_id
    except KeyError:
        pass
   
    DG.add_edges_from(edges)
   
    from collections import defaultdict
    df = defaultdict(list)
    pts = nx.shell_layout(DG)
   
    for task_id, indices in pts.items():
        df["x"].append(indices[0])
        df["y"].append(indices[1])
        df["task_id"].append(task_info[task_id]["TaskSpec"]["TaskID"])
       
        try:
            df["function_name"].append(task_profiles[task_id]["function_name"])
        except KeyError:
            df["function_name"].append("None")
       
        try:
            df["parent_id"].append(task_info[task_id]["TaskSpec"]["ParentTaskID"])
            df["return_object"].append(list(map(lambda x: x.hex(), task_info[task_id]["TaskSpec"]["ReturnObjectIDs"])))
        except KeyError:
            df["parent_id"].append("None")
 
    source = ColumnDataSource(data=dict(
        x=[],
        y=[],
        task_id=[],
        parent_id=[],
        function_name=[],
        return_object=[]
    ))
    source.data = dict(
                    x= df["x"],
                    y= df["y"],
                    parent_id=df["parent_id"],
                    task_id=df["task_id"],
                    function_name=df["function_name"],
                    return_object=df["return_object"]
                    )
    
    hover = HoverTool(tooltips=[
        ("TaskID", "@task_id"),
        ("Function", "@function_name"),
        ("ParentID", "@parent_id"),
        ("Return ObjectID", "@return_object")
    ])
 
    p = figure(
        x_range=(-5,5),
        y_range=(-5,5),
        height=700,
        width=700,
        tools=[hover,"pan","wheel_zoom","box_zoom", "save"],
        toolbar_sticky=False
    )

    def callback(source=source):
        data = source.get('data')
        f = cb_obj.get('task_id')
        pp = pprint.PrettyPrinter()
        pp.pprint(task_info(f))
        div = Div(text=task_info(f),
        width=300, height=300)
    
 
    p.xaxis.visible = False
    p.yaxis.visible = False
 
    p.line(
        x="x",
        y="y",
        source=source
    )
 
    p.circle(
    x="x",
    y="y",
    source=source,
    size=40,
    color="#dbe8ff",
    line_color="black")
 
    show(gridplot(p, ncols=1, plot_width=500, plot_height=500, toolbar_location="below"))
 
 
task_search = widgets.Text(
    value="",
    placeholder="Task ID",
    description="Search for a task:",
    disabled=False
)
display(task_search)
 
def handle_submit(sender):
    pp = pprint.PrettyPrinter()
    pp.pprint(ray.global_state.task_table(task_search.value))
    dependency_graph(task_search.value)

task_search.on_submit(handle_submit)

#### Error search.

In [None]:
errors = ray.global_state.error_info()
results = dict()
def handle_submit(sender):
    msg = error_search.value
    for task_id, data in errors.items(): 
        if msg in data["traceback"]: 
            results[task_id] = data 
    df = pd.DataFrame(results)
    df_t = df.T
    df_t.index.name = "TaskID"
    grid = qgrid.QGridWidget(df=df_t)
    display(grid)

error_search = widgets.Text(
    value="",
    placeholder="e.g. - division by zero",
    description="Error message",
    disabled=False
)
display(error_search)
error_search.on_submit(handle_submit)

#### Task Time Series.

In [None]:
_time_series_first_task_time = []
 
def time_series(start=None, end=None, num=None):
    if start is None:
        start = 0
    if end is None:
        end = time.time()
       
    if len(_time_series_first_task_time) == 0:
        fst_task = ray.global_state.task_profiles(num=1)
        if fst_task:
            task_dict = next(iter(fst_task.values()))
            _time_series_first_task_time.append(task_dict["score"])
        else:
            return [], 0, 1, 1
   
    start += _time_series_first_task_time[0]
    end += _time_series_first_task_time[0]
   
    granularity = 1
    earliest = end
    latest = 0
   
    tasks = ray.global_state.task_profiles(start=start, end=end, num=num)
    for task_id, data in tasks.items():
        if data["score"] > latest:
            latest = data["score"]
        if data["score"] < earliest:
            earliest = data["score"]
   
    num_buckets = math.ceil((latest - earliest) / granularity)
   
    buckets = []
   
    for i in range(0, num_buckets, granularity):
        start = i * granularity + earliest
        end = ((i + 1) * granularity) + earliest
        t = ray.global_state.task_profiles(start=start, end=end)
        buckets.append(len(t))
   
    start_point = earliest - _time_series_first_task_time[0]
   
    return buckets, start_point, granularity
 
def time_series_data(buckets, start_point, granularity):
    import numpy as np
   
    if len(buckets) == 0:
        return [], [], []
   
    distr = []
    for x in range(len(buckets)):
        distr.extend([start_point + granularity * x] * buckets[x])
   
    bins = [start_point + (i - 1) * granularity for i in range(len(buckets) + 2)]
    hist, bin_edges = np.histogram(distr, bins=bins)
   
    left = bin_edges[:-1]
    right = bin_edges[1:]
    top = hist
   
    return left, right, top
 
def time_series_plot(left, right, top):
    from bokeh.layouts import gridplot
    from bokeh.plotting import figure, show
    from bokeh.resources import CDN
    from bokeh.io import output_notebook
    output_notebook(resources=CDN)
    from bokeh.models import Range1d, ColumnDataSource
 
    x_range = (max(0, min(left)) if len(left) else 0, max(right) if len(right) else 1)
    y_range = (0, max(top) + 1 if len(top) else 1)
   
    fig = figure(title="Task Time Series",tools=["save", "hover", "wheel_zoom", "box_zoom", "pan"],
                background_fill_color="#FFFFFF", y_range=y_range, x_range=x_range)
 
    source = ColumnDataSource(data=dict(
        left=[],
        right=[],
        top=[]
    ))
   
    source.data = {'left': left, 'right': right, 'top': top}
   
    fig.quad(left='left', right='right', top='top', bottom=0,
            source=source, fill_color="#dbe8ff", line_color="#033649")
 
    fig.xaxis.axis_label = 'Time in seconds'
    fig.yaxis.axis_label = 'Number of concurrent tasks'
    show(gridplot(fig, ncols=1, plot_width=500, plot_height=500, toolbar_location="below"), notebook_handle=True)
   
    return (fig, source)
 
start_time = widgets.Text(
    value="",
    placeholder="e.g. - 0",
    description="Start time (s):",
    disabled=False
)
display(start_time)
 
end_time = widgets.Text(
    value="",
    placeholder="e.g. - 100",
    description="End time (s):",
    disabled=False
)
display(end_time)

num_tasks_bound = widgets.Text(
    value="",
    placeholder="e.g. - 10",
    description="Number of tasks:",
    disabled=False
)
display(num_tasks_bound)
 
button = widgets.Button(
    description="Show plot",
    disabled=False,
    button_style="", 
    tooltip="Click me"
)
display(button)
 

_time_series_plot_data = []
def handle_submit(sender):
    from bokeh.plotting import figure, show, helpers
    from bokeh.io import push_notebook
 
    pp = pprint.PrettyPrinter()
    if start_time.value is "":
        start = 0 
    else:
        start = int(start_time.value)
    if end_time.value is "":
        end = time.time()
    else: 
        end = int(end_time.value)
    if num_tasks_bound.value is "":
        num_t=None
    else:
        num_t = int(num_tasks_bound.value)
        
    buckets, start_point, granularity = time_series(start=start, end=end, num=num_t)
    left, right, top = time_series_data(buckets, start_point, granularity)
   
    if len(_time_series_plot_data) == 0:
        _time_series_plot_data.append(time_series_plot(left, right, top))
    else:
        fig, source = _time_series_plot_data[0]
        source.data = {'left': left, 'right': right, 'top': top}
        x_range = (max(0, min(left)) if len(left) else 0, max(right) if len(right) else 1)
        y_range = (0, max(top) + 1 if len(top) else 1)
       
        x_range = helpers._get_range(x_range)
        fig.x_range.start = x_range.start
        fig.x_range.end = x_range.end
       
        y_range = helpers._get_range(y_range)
        fig.y_range.start = y_range.start
        fig.y_range.end = y_range.end
       
        push_notebook()

button.on_click(handle_submit)

#### Cluster Usage. 

In [None]:
import time
import math
def heat_map():
    start = 0
    end = time.time()
   
    granularity = 1
    earliest = end
    latest = 0
    tasks = ray.global_state.task_profiles(start=start, end=end)
    for task_id, data in tasks.items():
        if data["score"] > latest:
            latest = data["score"]
        if data["score"] < earliest:
            earliest = data["score"]
    num_buckets = math.ceil((latest - earliest) / granularity)
    buckets = [0] * num_buckets
    
    worker_info = ray.global_state.workers()
    num_tasks = []
    nodes = []
    times = []
    start_point = earliest
    end_point = len(buckets) * granularity + earliest
    
    for i in range(0, len(buckets), granularity):
        start = i * granularity + earliest
        end = (i + 1) * granularity + earliest
        t = ray.global_state.task_profiles(start=math.floor(start), end=math.ceil(end))
        
        node_to_num = dict()
        for task_id, data in t.items():
            worker = data["worker_id"]
            node = worker_info[worker]["node_ip_address"]
            if node not in node_to_num:
                node_to_num[node] = 0
            node_to_num[node] += 1
            
        for node_ip, counter in node_to_num.items():
            num_tasks.append(node_to_num[node_ip])
            nodes.append(node_ip)
            times.append(i)
            
    return nodes, times, num_tasks
 
from math import pi
import pandas as pd
import random
import numpy as np
from bokeh.io import show, output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    BasicTicker,
    PrintfTickFormatter,
    ColorBar,
)
from bokeh.plotting import figure

output_notebook()
 
node_ip_address, times, num_tasks = heat_map()

if len(node_ip_address) is not 0: 
 
    df = pd.DataFrame({"node_ip_address":node_ip_address, "time":times, "num_tasks":num_tasks})

    colors = ["#FFFFFF", "#E8E8E8", "#DCDCDC", "#D3D3D3", "#B8B8B8", "#A8A8A8", "#696969", "#383838", "#000000"]
    mapper = LinearColorMapper(palette=colors, low=df.num_tasks.min() - 1, high=df.num_tasks.max() + 1)
    source = ColumnDataSource(df)

    TOOLS = "hover,save,xpan,box_zoom,reset,xwheel_zoom"

    p = figure(title="Cluster Usage", y_range=list(set(node_ip_address)),
               x_axis_location="above", plot_width=900, plot_height=500,
               tools=TOOLS, toolbar_location='below')

    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = "10pt"
    p.axis.major_label_standoff = 0
    p.xaxis.major_label_orientation = pi / 3

    p.rect(x="time", y="node_ip_address", width=1, height=1,
           source=source,
           fill_color={'field': 'num_tasks', 'transform': mapper},
           line_color=None)

    color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="8pt",
                         ticker=BasicTicker(desired_num_ticks=len(colors)),
                         label_standoff=6, border_line_color=None, location=(0, 0))
    p.add_layout(color_bar, 'right')

    p.select_one(HoverTool).tooltips = [
         ('Node IP Address', '@node_ip_address'),
         ('Number of tasks running', '@num_tasks'),
         ('Time', '@time')
    ]


    p.xaxis.axis_label = "Time in seconds"
    p.yaxis.axis_label = "Node IP Address"

    show(p)

#### Task Completion Time Distribution.

In [None]:
import time
from bokeh.models import Range1d
from bokeh.layouts import gridplot
tasks = ray.global_state.task_profiles(start=0, end=time.time())
if len(tasks) is not 0: 
    unique = set()
    distr = []
    mu, sigma = 0, 0.5

    for task_id, data in tasks.items():
        unique.add(data["store_outputs_end"] - data["get_task_start"])
        distr.append(data["store_outputs_end"] - data["get_task_start"])

    hist, bin_edges = np.histogram(distr, bins = range(len(unique)))
    p = figure(title="Task Completion Time Distribution",tools=["save", "hover", "wheel_zoom", "box_zoom", "pan"],
                background_fill_color="#FFFFFF", x_range = (0,max(distr) +2), y_range = (0, max(hist)+2))
    p.quad(top=hist, bottom=0, left=bin_edges[:-1], right=bin_edges[1:],
            fill_color="#B3B3B3", line_color="#033649")
    x = np.linspace(-2, 2, 1000)

    p.xaxis.axis_label = 'Time in seconds'
    p.yaxis.axis_label = 'Number of concurrent tasks'

    show(gridplot(p, ncols=1, plot_width=500, plot_height=500, toolbar_location="below"))

#### Task Timeline

In [None]:
start_bound = widgets.Text(
    value="",
    placeholder="e.g. - 0 ",
    description="Query start time (s):",
    disabled=False
)
display(start_bound)

end_bound = widgets.Text(
    value="",
    placeholder="e.g. - 100 ",
    description="Query end time (s):",
    disabled=False
)
display(end_bound)

num_bound = widgets.Text(
    value="",
    placeholder="e.g. - 10 ",
    description="# of tasks shown (s):",
    disabled=False
)
display(num_bound)

breakdown_opt = widgets.Dropdown(
    options=["Basic", "Task Breakdowns"],
    value="Basic",
    description="View options:",
    disabled=False,
)
display(breakdown_opt)

path_input = widgets.Button(description="View task timeline")
display(path_input)

def find_trace2html():
    trace2html = os.path.join(os.path.expanduser("~"), "catapult", "tracing", "bin", "trace2html")
    assert os.path.exists(trace2html), "Could not find catapult, please clone it into your home directory from https://github.com/catapult-project/catapult/tree/master/tracing"
    return trace2html

def handle_submit(sender):
    tmp = tempfile.mktemp() + ".json"
    tmp2 = tempfile.mktemp() + ".html"
    
    if start_bound.value is "":
        start_t = None
    else: 
        start_t = int(start_bound.value) 
    
    if end_bound.value is "":
        end_t = None
    else: 
        end_t = int(end_bound.value)
    
    if num_bound.value is "":
        num_t = None 
    else:
        num_t = int(num_bound.value)
        
    if breakdown_opt.value is "Basic":
        breakdown = False
    else:
        breakdown = True

    print("Dumping task profiling data to " + tmp)
    ray.global_state.dump_catapult_trace(tmp, start=start_t, end=end_t, num=num_t, breakdowns=breakdown)
    print("Converting chrome trace to " + tmp2)
    trace2html = find_trace2html()
    subprocess.check_output(["python2", trace2html, tmp, '--output', tmp2])
    print("Opening html file in browser...")
    subprocess.Popen(["open", "-a", "Google Chrome", tmp2])

path_input.on_click(handle_submit)

In [27]:
start, end, num_tasks = ray.global_state.job_length()
duration = end - start 

def handle_submit(sender):
    x,y = range_slide.value
    if break_down_opt.value is "% total time": 
    
    

print("This job took " + str(duration) + " seconds to complete and a total of " + str(num_tasks) + " tasks were run.")
range_slide = widgets.IntRangeSlider(
    value=[70, 100],
    min=0,
    max=100,
    step=1,
    description='%:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.0i%',
)

breakdown_opt = widgets.Dropdown(
    options=["% total time", "% total tasks"],
    value="% total time",
    description="Options:",
    disabled=False,
)
display(breakdown_opt)

display(range_slide)
path_input = widgets.Button(description="View plot")
display(path_input)
path_input.on_click(handle_submit)

This job took 0.013583898544311523 seconds to complete and a total of 100 tasks were run.


In [None]:
import heapq
def handle_submit(sender):
    if num_slowest.value is "":
        num_slow = sys.maxsize
    else: 
        num_slow = int(num_slowest.value) 
        
    tasks = ray.global_state.task_profiles(num = num_slow)
    longest = []
    heapq.heapify(longest)
    l_size = 0 
    for task_id, data in tasks.items():
        dur = data["store_outputs_end"] - data["get_arguments_start"]
        heapq.heappush(longest, (dur, task_id))
        l_size += 1 
        if l_size == num_slow: 
            shortest_id, shortest = heapq.heappop(longest)
            del tasks[shortest_id]
    return longest

num_slowest = widgets.Text(
    value="",
    placeholder="e.g. - 10 ",
    description="Top _ stragglers:",
    disabled=False
)
display(num_slowest)
path_input = widgets.Button(description="View table")
display(path_input)
path_input.on_click(handle_submit)

#### Stragglers

In [None]:
import json
import heapq
import qgrid 
import pandas as pd
from pandas.io.json import json_normalize

table = ray.global_state.task_table()
stragglers = dict()

def handle_submit(sender):
    if num_slowest.value is "":
        num_slow = sys.maxsize
    else: 
        num_slow = int(num_slowest.value) 
        
    tasks = ray.global_state.task_profiles(num = num_slow)
    longest = []
    heapq.heapify(longest)
    l_size = 0 
    for task_id, data in tasks.items():
        dur = data["store_outputs_end"] - data["get_arguments_start"]
        heapq.heappush(longest, (dur, task_id))
        l_size += 1 
        if l_size > num_slow: 
            shortest, shortest_id = heapq.heappop(longest)
    for x,y in longest:
        stragglers[y] = dict()
        stragglers[y]["Duration(s)"] = x 
        
    df_o = pd.DataFrame.from_dict(stragglers)
    df = df_o.T
    df.index.name = "TaskID"
    df.columns = ["Duration (s)"]
    grid = qgrid.QGridWidget(df=df)
    display(grid)
     


num_slowest = widgets.Text(
    value="",
    placeholder="e.g. - 10 ",
    description="Top _ stragglers:",
    disabled=False
)
display(num_slowest)
path_input = widgets.Button(description="View table")
display(path_input)
path_input.on_click(handle_submit)