# Ray web UI.

#### Evaluate the cells below to initialize the notebook.

In [2]:
%%html
<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>

<button id="do_run_all">Click to run all</button>
<script>
$("#do_run_all").click(
    function () {
        $("#run_all_cells").click();
    }
);
</script>

In [3]:
import ipywidgets as widgets
import os
import pandas as pd
import pprint
import qgrid
import ray
import subprocess
import sys

from IPython.display import display

ray.init(redis_address=os.environ["REDIS_ADDRESS"])

{'local_scheduler_socket_names': ['/tmp/scheduler41900926'],
 'node_ip_address': '127.0.0.1',
 'object_store_addresses': [ObjectStoreAddress(name='/tmp/plasma_store85889657', manager_name='/tmp/plasma_manager65427943', manager_port=12230)],
 'redis_address': '127.0.0.1:28354'}

#### Task timeline.

In [4]:
path_input = widgets.Button(description="View task timeline")
display(path_input)

import tempfile

def find_trace2html():
    trace2html = os.path.join("/Users/michellemarzoev/Desktop/catapult/", "tracing/bin/trace2html")
    assert os.path.exists(trace2html), "Could not find catapult, please clone it into your home directory from https://github.com/catapult-project/catapult/tree/master/tracing"
    return trace2html

def handle_submit(sender):
    tmp = tempfile.mktemp() + ".json"
    tmp2 = tempfile.mktemp() + ".html"

    print("Dumping task profiling data to " + tmp)
    ray.global_state.dump_catapult_trace(tmp)
    print("Converting chrome trace to " + tmp2)
    trace2html = find_trace2html()
    subprocess.check_output(["python2", trace2html, tmp, '--output', tmp2])
    print("Opening html file in browser...")
    subprocess.Popen(["open", "-a", "Google Chrome", tmp2])

path_input.on_click(handle_submit)

#### Task time series.

In [22]:
import numpy as np
import scipy.special
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show
from bokeh.resources import CDN
from bokeh.io import output_notebook
output_notebook(resources=CDN)
from bokeh.models import Range1d

buckets, earliest, latest, gran = ray.global_state.time_series()
desired_range = (min(buckets), max(buckets)+10)
distr = []
for x in range(len(buckets)): 
    for y in range(buckets[x]): 
        distr.append(x)
        
p = figure(title="Task Time Series",tools=["save", "hover", "wheel_zoom", "box_zoom", "pan"],
            background_fill_color="#FFFFFF", y_range = desired_range, x_range = (0, latest - earliest))

hist, bin_edges = np.histogram(distr, bins = range(len(buckets)))

p.quad(top=hist, bottom=0, left=bin_edges[:-1], right=bin_edges[1:],
        fill_color="#B3B3B3", line_color="#033649")

p.xaxis.axis_label = 'Time in seconds'
p.yaxis.axis_label = 'Number of concurrent tasks'

show(gridplot(p, ncols=1, plot_width=500, plot_height=500, toolbar_location="below"))

#### Task completion time distribution.

In [27]:
import time
from bokeh.models import Range1d
tasks, e, l = ray.global_state.task_profiles(start=0, end=time.time())
unique = set()
distr = []
mu, sigma = 0, 0.5

for task_id, data in tasks.items(): 
    unique.add(data["store_outputs_end"] - data["get_task_start"])
    distr.append(data["store_outputs_end"] - data["get_task_start"])
    
p = figure(title="Task Completion Time Distribution",tools=["save", "hover", "wheel_zoom", "box_zoom", "pan"],
            background_fill_color="#FFFFFF", x_range = (0,15), y_range = (0, len(distr)))
# print(unique)
hist, bin_edges = np.histogram(distr, bins = range(len(unique)))
p.quad(top=hist, bottom=0, left=bin_edges[:-1], right=bin_edges[1:],
        fill_color="#B3B3B3", line_color="#033649")
x = np.linspace(-2, 2, 1000)
# pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))
# cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2
# p.line(x, pdf, line_color="blue", line_width=8, alpha=0.7, legend="PDF")
# p.line(x, cdf, line_color="red", line_width=2, alpha=0.7, legend="CDF")


p.xaxis.axis_label = 'Time in seconds'
p.yaxis.axis_label = 'Number of concurrent tasks'

show(gridplot(p, ncols=1, plot_width=500, plot_height=500, toolbar_location="below"))   

#### Cluster usage.

In [7]:
from math import pi
import pandas as pd
import random
from bokeh.io import show, output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    BasicTicker,
    PrintfTickFormatter,
    ColorBar,
)
from bokeh.plotting import figure

time = np.random.uniform(low=0, high=40, size=(50))
node_ips = ["127.0.0.1", "8.8.6.8", "8.8.8.8", "9.3.4.5", "9.3.6.5"]
node_ip_address = [random.choice(node_ips) for _ in range(50)]
num_tasks = np.random.uniform(low=0, high=20, size=(50))

df = pd.DataFrame({"node_ip_address":node_ip_address, "time":time, "num_tasks":num_tasks})

colors = ["#FFFFFF", "#E8E8E8", "#DCDCDC", "#D3D3D3", "#B8B8B8", "#A8A8A8", "#696969", "#383838", "#000000"]
mapper = LinearColorMapper(palette=colors, low=df.num_tasks.min(), high=df.num_tasks.max())
source = ColumnDataSource(df)

TOOLS = "hover,save,xpan,box_zoom,reset,xwheel_zoom"

p = figure(title="Cluster Usage", y_range=node_ip_address,
           x_axis_location="above", plot_width=900, plot_height=500,
           tools=TOOLS, toolbar_location='below')

p.grid.grid_line_color = None
p.axis.axis_line_color = None
# p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3

p.rect(x="time", y="node_ip_address", width=1, height=1,
       source=source,
       fill_color={'field': 'num_tasks', 'transform': mapper},
       line_color=None)

color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="8pt",
                     ticker=BasicTicker(desired_num_ticks=len(colors)),
                     label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')

p.select_one(HoverTool).tooltips = [
     ('Node IP Address', '@node_ip_address'),
     ('Number of tasks running', '@num_tasks'),
     ('Time', '@time')
]


p.xaxis.axis_label = "Time in seconds"
p.yaxis.axis_label = "Node IP Address"

show(p)

**Object search.**

In [8]:
object_search = widgets.Text(
    value="",
    placeholder="Object ID",
    description="Search for an object:",
    disabled=False
)
display(object_search)

def handle_submit(sender):
    pp = pprint.PrettyPrinter()
    pp.pprint(ray.global_state.object_table(object_search.value))

object_search.on_submit(handle_submit)

**Task search.**

In [9]:
task_search = widgets.Text(
    value="",
    placeholder="Task ID",
    description="Search for a task:",
    disabled=False
)
display(task_search)

def handle_submit(sender):
    pp = pprint.PrettyPrinter()
    pp.pprint(ray.global_state.task_table(task_search.value))

task_search.on_submit(handle_submit)

#### Client information.

In [10]:
ctable = ray.global_state.client_table()

client_list = []
for node_ip in ctable:
    for client in ctable[node_ip]:
        client["node_ip_address"] = node_ip
        client_list.append(client)

client_df = pd.DataFrame(client_list)
if not client_df.empty:
    client_df.columns = ["Aux Address", "Client Type", "DB Client ID", "Deleted", "Local Scheduler Socket", "Num CPUs", "NumGPUs", "Node IP Address"]
qgrid.show_grid(client_df)

#### Function information.

In [11]:
fn_table = ray.global_state.function_table()
fn_list = []
for fn_id in fn_table:
    val = fn_table[fn_id]
    val["function_id"] = fn_id
    fn_list.append(val)
qgrid.nbinstall(overwrite = True)
frame = pd.DataFrame(fn_list) 
frame = frame.reset_index(drop=True)
if not frame.empty:
    frame.columns = ["DriverID", "Module", "Function", "FunctionID"]
qgrid.show_grid(frame, grid_options = {"forceFitColumns": False, "defaultColumnWidth": 200})

#### Task information table. 

In [12]:
from pandas.io.json import json_normalize

tt = ray.global_state.task_table()
tt_list = list(tt.values())
for d in tt_list:
    d['TaskSpec']['ReturnObjectIDs'] = [oid.hex() for oid in d['TaskSpec']['ReturnObjectIDs']]
    d['TaskSpec']['Args'] = [arg.hex() if isinstance(arg, ray.local_scheduler.ObjectID) else arg for arg in d['TaskSpec']['Args']]

task_df = json_normalize(tt_list)
if not task_df.empty: 
    task_df.columns = ["Local Scheduler ID", "State", "Actor Counter", "ActorID", "Arguments", "DriverID", "FunctionID", 
                      "Parent Counter", "Parent Task ID", "Required CPUs", "Required GPUs", "Return Object IDs", "TaskID" ]
qgrid.show_grid(task_df, grid_options = {"forceFitColumns": True, "defaultColumnWidth": 200})