Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New plot script: Run duration over time #1760

Merged
merged 6 commits into from Jul 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions requirements.txt
@@ -1,5 +1,7 @@
matplotlib>=2.1.1
numpy==1.16.4
pandas==0.24.2
pexpect>=4.7.0
scipy>=1.0
termcolor>=1.0
terminaltables>=3.0
matplotlib>=2.1.1
pexpect>=4.7.0
terminaltables>=3.0
85 changes: 85 additions & 0 deletions scripts/plot_run_duration_over_time.py
@@ -0,0 +1,85 @@
#!/usr/bin/env python3

# Takes a single benchmark output JSON and plots the duration of item executions over time

import json
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys

if(len(sys.argv) != 2):
print('Usage: ' + sys.argv[0] + ' benchmark.json')
exit()

with open(sys.argv[1]) as json_file:
data_json = json.load(json_file)['benchmarks']

# Build flat list of {name, begin, duration} entries for every benchmark item run
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As is evident, I am not the biggest Python hacker. I can make things work, but there are probably various things that could be improved here. Please let me know!

data = []
benchmark_names = []
for benchmark_json in data_json:
name = benchmark_json['name']
benchmark_names.append(name)
for run_json in benchmark_json['successful_runs']:
data.append({'name': name, 'begin': run_json['begin'] / 1e9, 'duration': run_json['duration'], 'success': True})
for run_json in benchmark_json['unsuccessful_runs']:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really happy with the code duplication here, but I have no idea how to avoid it without making everything more verbose.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sure we can handle these two duplicated lines

data.append({'name': name, 'begin': run_json['begin'] / 1e9, 'duration': run_json['duration'], 'success': False})


df = pd.DataFrame(data).reset_index()

# Set the colors
benchmark_names.sort() # Sort the benchmarks for a deterministic color mapping
name_to_color = {}
prop_cycle = plt.rcParams['axes.prop_cycle']
default_colors = prop_cycle.by_key()['color']

fig, ax = plt.subplots()
for i in range(0, len(benchmark_names)):
benchmark_name = benchmark_names[i]
color = default_colors[i % len(default_colors)]
name_to_color[benchmark_name] = color

filtered_df = df[df['name'] == benchmark_name]

# Plot into combined graph
for success in [True, False]:
if not filtered_df[filtered_df['success']==success].empty:
filtered_df[filtered_df['success']==success].plot(ax=ax, kind='scatter', x='begin', y='duration', c=color, figsize=(12, 9), s=5, marker=('o' if success else 'x'), linewidth=1)

# Finish combined graph
ax.set_xlabel('Seconds since start')
ax.set_ylabel('Execution duration [ns]')
ax.grid(True, alpha=.3)

# Add legend to combined graph
handles = []
for name, color in name_to_color.items():
handles.append(mpatches.Patch(label=name, color=color))
plt.legend(handles=handles)

# Write combined graph to file
basename = sys.argv[1].replace('.json', '')
plt.savefig(basename + '.pdf')
plt.savefig(basename + '.png')

# Plot detailed graph
grouped_df = df.reset_index().groupby('name')
fig, axes = plt.subplots(nrows=len(benchmark_names), ncols=1, figsize=(12, 3*len(benchmark_names)), sharex=True)
for (key, ax) in zip(grouped_df.groups.keys(), axes.flatten()):
filtered_df = grouped_df.get_group(key)
for success in [True, False]:
if not filtered_df[filtered_df['success']==success].empty:
filtered_df[filtered_df['success']==success].plot(ax=ax, kind='scatter', x='begin', y='duration', c=name_to_color[key], s=5, marker=('o' if success else 'x'), linewidth=1)
ax.set_title(key)
ax.set_xlabel('Seconds since start')
ax.set_ylabel('Execution duration [ns]')
ax.yaxis.set_label_coords(-.08, 0.5)
ax.grid(True, alpha=.3)
fig.tight_layout()

# Write detailed graph to file
plt.savefig(basename + '_detailed.pdf')
plt.savefig(basename + '_detailed.png')