In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import re
import requests
import time
import numpy as np
from collections import defaultdict
from matplotlib import lines, markers

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

plt.style.use('ggplot')


## Configuration

In [None]:
BASE_URL = 'http://localhost:8081'
SAMPLING_FREQ_SEC = 1
DURATION_SEC = 300

In [None]:
def splitComponent(component, pattern):
    m = METRIC_PATTERN.match(component)
    if m:
        mdict = matchDict(m)
        return mdict['instance'], mdict['component'], mdict['metric']
    raise Exception(f'Failed to match {component}!')

def getAvailableVertexMetrics(jobID, vertexID):
    return requests.get(f'{BASE_URL}/jobs/{jobID}/vertices/{vertexID}/metrics').json()

def getMetrics(jobID, vertexID, metrics, maxRequestLength=40):
    def rawGetMetrics(jobID, vertexID, metrics):
        metricString = ','.join(metrics)
        return requests.get(f'{BASE_URL}/jobs/{jobID}/vertices/{vertexID}/metrics', params={'get': metricString}).json()
    completeJSON = []
    # Split metric requests so that the request string does not become too long
    for i in range(0, len(metrics), maxRequestLength):
        partialMetrics = metrics[i:i+maxRequestLength]
        completeJSON += rawGetMetrics(jobID, vertexID, partialMetrics)
    return completeJSON

def matchDict(match):
    d = defaultdict(lambda: 'DEFAULT')
    matchDict = match.groupdict()
    d.update(matchDict)
    return d

def plotAggregatedInstances(df, ax):
    markerstyles = list(markers.MarkerStyle.markers.keys())
    aggregated = df.groupby(['t', 'vertex', 'component']).aggregate({'value': [np.mean, np.std]})
    for i, (name, group) in enumerate(aggregated.groupby(level=['vertex', 'component'])):
        data = group.reset_index()
        data.t -= data.t.min()
        ax.plot(data.t, data.value['mean'], alpha=.7, label=name[0][:5] + '_' + name[1][:15], 
                marker=markerstyles[i % len(markerstyles)], markevery=20, markersize=5)
        ax.fill_between(data.t, data.value['mean'] - data.value['std']/2, data.value['mean'] + data.value['std']/2, alpha=.3)

In [None]:
METRIC_PATTERN = None
METRICS = None 
PLOTTED_METRIC = None
OPERATOR_METRIC_PATTERN = re.compile('^(?P<instance>\d+)\.(?P<component>.+)\.(?P<metric>.+)$')
CHAIN_METRIC_PATTERN = re.compile('^(?P<instance>\d+)\.(?P<metric>[^\.]+)$')

jobs = requests.get(f'{BASE_URL}/jobs').json()['jobs']
runningJobs = [job for job in jobs if job['status'] == 'RUNNING']
assert len(runningJobs) == 1, 'Toolkit can only work with exactly one running job!'
jobID = runningJobs[0]['id']

jobInfo = requests.get(f'{BASE_URL}/jobs/{jobID}').json()
jobName = jobInfo['name']
vertices = jobInfo['vertices']

print(f'Selected job: {jobName} ({jobID})')

operatorMetrics = set()
chainMetrics = set()
metricRequests = {}
vertexIndex = []

for vertex in vertices:
    # Pattern that captures the metric name
    # and matches only for metrics that apply to operators
    # i.e., instanceNo.opereatorName.metricName
    vertexIndex.append((vertex['id'], vertex['name']))
    availableMetrics = getAvailableVertexMetrics(jobID, vertex['id'])
    for metric in availableMetrics:
        m = OPERATOR_METRIC_PATTERN.match(metric['id'])
        if m:
            operatorMetrics.add(m.group('metric'))
            continue
        m = CHAIN_METRIC_PATTERN.match(metric['id'])
        if m:
            chainMetrics.add(m.group('metric'))
            continue
        raise Exception(f'Failed to match {metric}')

def selectPlottedMetric(metric):
    global PLOTTED_METRIC
    PLOTTED_METRIC = metric

def retrieveMetrics(metrics):
    global METRICS
    METRICS = metrics
    for vertex in vertices:
        vertexID = vertex['id']
        availableMetrics = getAvailableVertexMetrics(jobID, vertexID)
        selectedMetrics = []
        for metric in availableMetrics:
            m = METRIC_PATTERN.match(metric['id'])
            if m and m.group('metric') in METRICS:
                selectedMetrics.append(metric['id'])
        metricRequests[vertexID] = selectedMetrics
        print(f'{len(selectedMetrics)} metrics for {vertexID}')

@interact(metricLevel={'operator': (operatorMetrics, OPERATOR_METRIC_PATTERN), 'chain': (chainMetrics, CHAIN_METRIC_PATTERN)})
def selectMetrics(metricLevel):
    global METRIC_PATTERN
    METRIC_PATTERN = metricLevel[1]
    interact(retrieveMetrics, metrics=widgets.SelectMultiple(options=metricLevel[0]))
    
records = pd.DataFrame(columns=['t', 'vertex', 'component', 'instance', 'metric', 'value'])
records['t'] = records['t'].astype(float)
records['value'] = records['value'].astype(float)

In [None]:
# Print Vertex Index
for (vertexID, vertexName) in vertexIndex:
    print(vertexID, '\n', vertexName.replace(' -> ', '\n').strip(), '\n')

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%matplotlib notebook
    
fig, axes = plt.subplots(figsize=(8, 4*len(METRICS)), nrows=len(METRICS))
plt.ion()
fig.show()
fig.canvas.draw()

start = time.time()
currentTime = time.time()
while currentTime - start < DURATION_SEC:
    for vertex in vertices:
        vertexID = vertex['id']
        metricValues = getMetrics(jobID, vertexID, metricRequests[vertexID])
        for metric in metricValues:
            componentInstance, componentName, baseMetric  = splitComponent(metric['id'], METRIC_PATTERN)
            records = records.append({'t': float(currentTime), 'vertex': vertexID, 'component': componentName, 'instance': componentInstance, 'metric': baseMetric, 'value': float(metric['value'])}, ignore_index=True)
    for i, plottedMetric in enumerate(METRICS):
        ax = axes[i]
        ax.clear()
        plotAggregatedInstances(records[records.metric == plottedMetric], ax)
        ax.legend()
        ax.set(xlabel='sec', title=plottedMetric)
    fig.canvas.draw()
    currentTime = time.time()
    time.sleep(SAMPLING_FREQ_SEC)


    

In [None]:
def convertToRelativeChange(df):
    df['value'] /= df['value'].iloc[0]
    return df

relativeChange = records.groupby(['vertex', 'instance', 'component']).apply(convertToRelativeChange)

In [None]:
fig, ax = plt.subplots()
plotAggregatedInstances(relativeChange, ax)