In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import re
import requests
import time

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

plt.style.use('ggplot')

## Configuration

In [None]:
BASE_URL = 'http://localhost:9000'
SAMPLING_FREQ_SEC = 1
DURATION_SEC = 300

In [None]:
def splitComponent(component, pattern):
    m = METRIC_PATTERN.match(component)
    return m.group(1), m.group(2)

def getAvailableVertexMetrics(jobID, vertexID):
    return requests.get(f'{BASE_URL}/jobs/{jobID}/vertices/{vertexID}/metrics').json()

def getMetrics(jobID, vertexID, metrics, maxRequestLength=40):
    def rawGetMetrics(jobID, vertexID, metrics):
        metricString = ','.join(metrics)
        return requests.get(f'{BASE_URL}/jobs/{runningJobID}/vertices/{vertexID}/metrics', params={'get': metricString}).json()
    completeJSON = []
    # Split metric requests so that the request string does not become too long
    for i in range(0, len(metrics), maxRequestLength):
        partialMetrics = metrics[i:i+maxRequestLength]
        completeJSON += rawGetMetrics(jobID, vertexID, partialMetrics)
    return completeJSON

In [None]:
METRIC_PATTERN = None

jobs = requests.get(f'{BASE_URL}/jobs').json()['jobs']
runningJobs = [job for job in jobs if job['status'] == 'RUNNING']
assert len(runningJobs) == 1, 'Toolkit can only work with exactly one running job!'
runningJobID = runningJobs[0]['id']
print(f'Selected running job: {runningJobID}')

vertices = requests.get(f'{BASE_URL}/jobs/{runningJobID}').json()['vertices']
allMetrics = set()
metricRequests = {}
vertexIndex = []

for vertex in vertices:
    vertexID = vertex['id']
    vertexIndex.append((vertexID, vertex['name']))
    availableMetrics = getAvailableVertexMetrics(runningJobID, vertexID)
    allMetrics |= set(metric['id'].split('.')[-1] for metric in availableMetrics)
    
@interact(metric=allMetrics)
def retrieveMetrics(metric):
    global METRIC_PATTERN
    METRIC_PATTERN = re.compile(f'(\d+)\.(.+)\.{metric}')
    for vertex in vertices:
        vertexID = vertex['id']
        availableMetrics = getAvailableVertexMetrics(runningJobID, vertexID)
        selectedMetrics = [metric['id'] for metric in availableMetrics if METRIC_PATTERN.match(metric['id'])]
        metricRequests[vertexID] = selectedMetrics
        print(f'{len(selectedMetrics)} metrics for {vertexID}')

In [None]:
# Print Vertex Index
for (vertexID, vertexName) in vertexIndex:
    print(vertexID[:5], vertexName)

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%matplotlib notebook

records = pd.DataFrame(columns=['t', 'vertice', 'component', 'instance', 'value'])


fig, ax = plt.subplots(figsize=(8, 6))
plt.ion()
fig.show()
fig.canvas.draw()

start = time.time()
currentTime = time.time()
while currentTime - start < DURATION_SEC:
    for vertex in vertices:
        vertexID = vertex['id']
        metricValues = getMetrics(runningJobID, vertexID, metricRequests[vertexID])
        for metric in metricValues:
            component = metric['id']
            componentInstance, componentName  = splitComponent(component, METRIC_PATTERN)
            records = records.append({'t': currentTime, 'vertice': vertexID, 'component': componentName, 'instance': componentInstance, 'value': metric['value']}, ignore_index=True)
        currentTime = time.time()
    time.sleep(SAMPLING_FREQ_SEC)
    records['t'] = records['t'].astype(int)
    records['value'] = records['value'].astype(float)
    ax.clear()
    for name, group in records.groupby(['t', 'vertice', 'component']).mean().groupby(level=['vertice', 'component']):
        data = group.reset_index()
        ax.plot(data.t, data.value, alpha=.7, label=name[0][:5] + '_' + name[1][:15])
    ax.legend()
    fig.canvas.draw()
    