Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bench: generalised result processing
This introduces a basic generalised result processor that: - accepts a bunch of directories with JSON representing the run result matrix (profiles x iterations) - classifies per-run data into inputs and outputs - performs a statistical tally on the run outputs - outputs one of the implemented table formats (CSV, github markdown) The format specification is reasonably generic, and currently supports just the `cardano-memory-benchmark` output -- see bench/membenches_v1.jq
- Loading branch information
Showing
5 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
def standard_run_desc(filename; sha256; format; ctime; mtime; branch; commit; config_name; iter; data): | ||
{ filename: filename | ||
, sha256: sha256 | ||
, format: format | ||
, ctime: ctime | ||
, mtime: mtime | ||
, branch: branch | ||
, commit: commit | ||
, config_name: config_name | ||
, iter: iter | ||
, data: data | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
def format_specs: | ||
|
||
{ config_specs: | ||
[{ key: "config", header: "Config name" | ||
, path: ["config_name"]} | ||
,{ key: "flags", header: "RTS Flags" | ||
, path: ["data", "flags"] | ||
} | ||
,{ key: "failed", header: "Failed" | ||
, path: ["data", "failed"] | ||
# , post: not | ||
}] | ||
, result_specs: | ||
[{ key: "RssAvg", header: "Avg RSS, MB" | ||
, path: ["RSS", "avg"], round: true | ||
} | ||
,{ key: "RssMax", header: "Max RSS, MB" | ||
, path: ["RSS", "max"], round: true | ||
} | ||
,{ key: "HeapAvg", header: "Avg heap, MB" | ||
, path: ["Heap", "avg"], round: true | ||
} | ||
,{ key: "HeapMax", header: "Max heap, MB" | ||
, path: ["Heap", "max"], round: true | ||
} | ||
,{ key: "WallSec", header: "Wall, s" | ||
, path: ["totaltime"], round: true | ||
} | ||
,{ key: "CpuMax", header: "OS CPU, s" | ||
, path: ["CentiCpuMax"], scale: 100, round: true | ||
} | ||
,{ key: "MutMax", header: "Mutator, s" | ||
, path: ["CentiMutMax"], scale: 100, round: true | ||
} | ||
,{ key: "GCSec", header: "GC time, s" | ||
, path: ["SecGC"], round: true | ||
} | ||
] | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
## | ||
## Aggregation | ||
## | ||
def mean: if length == 0 | ||
then 0 | ||
else reduce .[] as $n (0; . + $n) / length end; | ||
def pow2: . * .; | ||
def variance: . | mean as $mean | map_values(. - $mean | pow2) | mean; | ||
def stddev: . | (variance ?) | sqrt; | ||
|
||
def samples_to_variable (n): | ||
stddev as $stddev | ||
| (add / n) as $mean | ||
| { "mean": $mean | ||
, "stddev": $stddev | ||
, "relstddev": (if $stddev == null then 0 else $stddev / $mean end) | ||
, "raw": . | ||
}; | ||
|
||
def varspec_to_variable (objects; nobjects): | ||
.key as $key | ||
| .path as $path | ||
| objects | ||
| { "\($key)": | ||
map (.data | ||
| getpath($path) | ||
as $val | ||
| if $val != null then $val | ||
else error("Path \($path) unreachable among top level keys: \(keys)") | ||
end | ||
) | ||
| samples_to_variable(nobjects) | ||
}; | ||
|
||
def description_from_headliner(x; rest): | ||
map ( . as $spec | ||
| (x | getpath($spec.path)) as $head | ||
| (rest | map (getpath($spec.path) == $head) | all) as $coherence | ||
| { "\($spec.key)": | ||
(if $coherence == true | ||
then $head | ||
else error("Incoherence on config key: \($spec.key)") | ||
end) }) | ||
| add; | ||
|
||
def aggregate_config_runs_variables (config_specs; result_specs): | ||
. as $runs | ||
| .[0] as $headliner | ||
| length as $nruns | ||
| result_specs | ||
| map(varspec_to_variable($runs; $nruns)) | ||
| add | ||
| (config_specs | description_from_headliner($headliner; $runs[1:])) | ||
+ .; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
#!/bin/sh | ||
# shellcheck disable=SC1090 | ||
set -eu | ||
|
||
fail() { | ||
echo -e "ERROR: $*" >&2 | ||
exit 1 | ||
} | ||
|
||
default_op='report' | ||
default_format='csv' | ||
|
||
format=$default_format | ||
header_footer='false' | ||
no_progress= | ||
|
||
while test $# -ge 1 | ||
do case "$1" in | ||
--github ) format='github';; | ||
--csv ) format='csv'; header_footer='false';; | ||
--cls ) echo -en "\ec" >&2;; | ||
--no-progress ) no_progress='true';; | ||
--trace ) set -x;; | ||
* ) break;; esac; shift; done | ||
|
||
op=${1:-$default_op}; shift || true | ||
|
||
main() { | ||
PROCESS_ORIG_PWD=$(pwd) | ||
pushd "$(dirname $0)" >/dev/null || exit 1 | ||
case "${op}" in | ||
collect ) op_collect "$@";; | ||
process ) op_process;; | ||
render ) op_render;; | ||
|
||
report ) op_collect "$@" | op_process | op_render;; | ||
|
||
call ) eval "$@";; | ||
* ) echo "ERROR: operation must be one of: collect process render report" >&2; exit 1;; esac | ||
} | ||
|
||
hardcoded_branch='membench' | ||
hardcoded_commit='a7ee17d1af44b571c6e476916bd24ed65db97e15' | ||
|
||
function op_collect() { | ||
local desc=${1?-USAGE: $0 collect DESCRIPTION [FORMAT] [DIR]} | ||
local format=${2:-membenches_v1} | ||
local literal_dir=${3:-.} | ||
|
||
local dir=$(pushd "$PROCESS_ORIG_PWD" >/dev/null; realpath "$literal_dir") | ||
test -d "${dir}" -a -n "$(ls "${dir}"/*/*.json)" || | ||
fail "${literal_dir} (realpath $dir) must be a writable directory with subdirectories containing JSON files with ${format} output schema" | ||
|
||
test -n "$no_progress" || echo -ne "Collecting runs in $dir: " >&2 | ||
local args_global=( | ||
--arg desc "$desc" | ||
--arg format "$format" | ||
--arg now "$(date --utc --iso-8601=seconds)" | ||
) | ||
case $format in | ||
membenches_v1 ) | ||
{ | ||
for f in $dir/*/refined.json | ||
do local fpad="$f " | ||
test -n "$no_progress" || echo -n "$fpad" >&2 | ||
local args_run=( | ||
--arg filename "$f" | ||
--arg format "$format" | ||
--arg sha256 "$(sha256sum $f | cut -d' ' -f1)" | ||
--arg ctime "$(stat --format=%w $f)" | ||
--arg mtime "$(stat --format=%y $f)" | ||
--arg config "$(echo $f | xargs dirname | xargs basename | cut -d- -f1)" | ||
--arg iter "$(echo $f | xargs dirname | xargs basename | cut -d- -f2)" | ||
--arg hardcoded_branch $hardcoded_branch | ||
--arg hardcoded_commit $hardcoded_commit | ||
--slurpfile data "$f" | ||
) | ||
jq 'include "collect"; | ||
standard_run_desc($filename; $sha256; $format; $ctime; $mtime; $hardcoded_branch; $hardcoded_commit; $config; $iter; $data[0]) | ||
' "$f" "${args_global[@]}" "${args_run[@]}" | ||
test -n "$no_progress" || printf "${fpad//?/\\b}" >&2 | ||
done | ||
test -n "$no_progress" || echo >&2; };; | ||
* ) | ||
fail "unknown result format: $format" | ||
esac | | ||
jq 'include "'"$format"'"; | ||
{ description: $desc | ||
, format: $format | ||
, ctime: $now | ||
, runs: . | ||
} | ||
+ format_specs | ||
' --slurp "${args_global[@]}" | ||
} | ||
|
||
function op_process() { | ||
jq 'include "process"; | ||
. as $batch | ||
| $batch.runs | ||
| group_by(.config_name) | ||
| map (aggregate_config_runs_variables ($batch.config_specs; $batch.result_specs)) | ||
| $batch | ||
+ { configs: . } | ||
' | ||
} | ||
|
||
function op_render() { | ||
jq 'include "render"; | ||
render('"$header_footer"') | ||
' --raw-output | ||
} | ||
|
||
### | ||
### Main | ||
### | ||
main "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
## | ||
## Presentation | ||
## | ||
|
||
def decimal_pt: (. * 10 | floor) / 10; | ||
def decimal_pt2: (. * 100 | floor) / 100; | ||
def float_n(width): "\(.)" | .[:width + (if .[0:1] == "-" then 1 else 0 end)]; | ||
def downscale(factor): . / factor; | ||
|
||
## Expect name of a simple numeric field. | ||
def field(fname; f): | ||
.[fname] as $val | ||
| "\($val | f)"; | ||
|
||
## Expect name of a rich variable. | ||
def var(fname; f): | ||
.[fname] as $val | ||
| "\($val.mean | f) | \("\($val.relstddev)" | .[:4])"; | ||
|
||
def render_config (format; cf; res): | ||
. as $x | ||
| (if format != "csv" then [null] else [] end | ||
+ | ||
(cf | ||
| map($x[.key])) | ||
+ | ||
(res | ||
| map(. as $spec | ||
| $x[$spec.key] as $val | ||
| [(($val.mean / ($val.scale // 1)) | ||
| if $spec.round then ceil else . end) | ||
, ($val.relstddev | tostring | .[:5])]) | ||
| add)) | ||
as $columns | ||
|
||
| ($columns | join(" | ")); | ||
|
||
def render_table_head (format; cf; res): | ||
. | ||
| (if format != "csv" then [null] else [] end | ||
+ | ||
(cf | ||
| map(.header)) | ||
+ | ||
(res | ||
| map([.header, "σ/μ"]) | ||
| add)) | ||
as $columns | ||
|
||
| if format == "github" | ||
then [([null] + ($columns | map("--")) + [null]) | ||
| join("|")] else [] end | ||
+ | ||
[ $columns | join(" | ")]; | ||
|
||
def render_table: | ||
.format as $format | ||
| .config_specs as $config_specs | ||
| .result_specs as $result_specs | ||
| render_table_head (.format; .config_specs; .result_specs) | ||
+ (.configs | map (render_config ($format; $config_specs; $result_specs))); | ||
|
||
def add_header_footer(commits; run_counts; slot_counts): | ||
. | ||
| ([ "Parameters:\n" | ||
, if run_counts | all(. == run_counts[0]) | ||
then " Every value is mean of \(run_counts[0]) runs,\n" | ||
else " Every value is mean of varying amount of runs (\(run_counts)).\n" end | ||
, if slot_counts | all(. == slot_counts[0]) | ||
then " Each run was syncing \(slot_counts[0]) slots, or \(slot_counts[0] / 21600 | floor) epochs over loopback, from a quiescent server.\n" | ||
else " Runs were for varying amount of slots (\(slot_counts)).\n" end | ||
, "\nLegend:\n" | ||
, " wall = total_wall_seconds, total cpu = total_cpu_seconds\n" | ||
, " total alloc = allocated_bytes, copied = copied_bytes, max live = max_live_bytes\n" | ||
, " See https://github.com/ghc/ghc/blob/master/includes/RtsAPI.h for details.\n" | ||
, " Each value is followed by σ/μ, i.e. relative stddev (or CoV).\n" | ||
, " Δ% is change from baseline.\n" | ||
, "\n\n"]) + .; | ||
#| . + ["\n\n\(commits | map(" - \(.branch) / \(.commit) = https://github.com/'${github_user}'/cardano-node/tree/\(.commit)\n") | add)"]; | ||
|
||
def render(header_footer): | ||
.commits as $commits | ||
| .run_counts as $run_counts | ||
| .slot_counts as $slot_counts | ||
| render_table | ||
| if header_footer == true | ||
then add_header_footer($commits; $run_counts; $slot_counts) | ||
else . end | ||
| join("\n"); | ||
|