In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import re
import requests
import time

plt.style.use('ggplot')

In [None]:
def splitComponent(component, pattern):
    m = METRIC_PATTERN.match(component)
    return m.group(1), m.group(2)

## Configuration

In [None]:
METRIC = 'numRecordsOutPerSecond'
BASE_URL = 'http://localhost:9000'
SAMPLING_FREQ_SEC = 1
DURATION_SEC = 300

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%matplotlib notebook

METRIC_PATTERN = re.compile(f'(\d+)\.(.+)\.{METRIC}')

records = pd.DataFrame(columns=['t', 'vertice', 'component', 'instance', 'value'])

jobs = requests.get(f'{BASE_URL}/jobs').json()['jobs']
runningJobs = [job for job in jobs if job['status'] == 'RUNNING']
assert len(runningJobs) == 1, 'Toolkit can only work with exactly one running job!'
runningJobID = runningJobs[0]['id']
print(f'Recording metrics for {runningJobID}')

vertices = requests.get(f'{BASE_URL}/jobs/{runningJobID}').json()['vertices']
metricRequests = {}
vertexIndex = []
for vertice in vertices:
    verticeID = vertice['id']
    vertexIndex.append((vertice['id'], vertice['name']))
    metricsList = requests.get(f'{BASE_URL}/jobs/{runningJobID}/vertices/{verticeID}/metrics').json()
    selectedMetrics = [metric['id'] for metric in metricsList if METRIC_PATTERN.match(metric['id'])]
    metricRequests[verticeID] = ','.join(selectedMetrics)
    print(f'{verticeID}: {len(selectedMetrics)} metrics')

print()
# Print Vertex Index
for (vId, v) in vertexIndex:
    print(vId[:5], v)
    
fig, ax = plt.subplots(figsize=(8, 6))
plt.ion()
fig.show()
fig.canvas.draw()

start = time.time()
currentTime = time.time()
while currentTime - start < DURATION_SEC:
    for vertice in vertices:
        verticeID = vertice['id']
        req = metricRequests[verticeID]
        metricValues = requests.get(f'{BASE_URL}/jobs/{runningJobID}/vertices/{verticeID}/metrics?get={req}').json()
        for metric in metricValues:
            component = metric['id']
            componentInstance, componentName  = splitComponent(component, METRIC_PATTERN)
            records = records.append({'t': currentTime, 'vertice': verticeID, 'component': componentName, 'instance': componentInstance, 'value': metric['value']}, ignore_index=True)
        currentTime = time.time()
    time.sleep(SAMPLING_FREQ_SEC)
    records['t'] = records['t'].astype(int)
    records['value'] = records['value'].astype(float)
    ax.clear()
    for name, group in records.groupby(['t', 'vertice', 'component']).mean().groupby(level=['vertice', 'component']):
        data = group.reset_index()
        ax.plot(data.t, data.value, alpha=.7, label=name[0][:5] + '_' + name[1][:15])
    ax.legend()
    fig.canvas.draw()
    