Skip to content

Commit

Permalink
bench: generalised result processing
Browse files Browse the repository at this point in the history
This introduces a basic generalised result processor that:

- accepts a bunch of directories with JSON representing the run result matrix (profiles x iterations)
- classifies per-run data into inputs and outputs
- performs a statistical tally on the run outputs
- outputs one of the implemented table formats (CSV, github markdown)

The format specification is reasonably generic, and currently supports just the
`cardano-memory-benchmark` output -- see bench/membenches_v1.jq
  • Loading branch information
deepfire committed Jan 17, 2022
1 parent 2d5db67 commit 2e4cd1a
Show file tree
Hide file tree
Showing 5 changed files with 316 additions and 0 deletions.
12 changes: 12 additions & 0 deletions bench/process/collect.jq
@@ -0,0 +1,12 @@
def standard_run_desc(filename; sha256; format; ctime; mtime; branch; commit; config_name; iter; data):
{ filename: filename
, sha256: sha256
, format: format
, ctime: ctime
, mtime: mtime
, branch: branch
, commit: commit
, config_name: config_name
, iter: iter
, data: data
};
39 changes: 39 additions & 0 deletions bench/process/membenches_v1.jq
@@ -0,0 +1,39 @@
def format_specs:

{ config_specs:
[{ key: "config", header: "Config name"
, path: ["config_name"]}
,{ key: "flags", header: "RTS Flags"
, path: ["data", "flags"]
}
,{ key: "failed", header: "Failed"
, path: ["data", "failed"]
# , post: not
}]
, result_specs:
[{ key: "RssAvg", header: "Avg RSS, MB"
, path: ["RSS", "avg"], round: true
}
,{ key: "RssMax", header: "Max RSS, MB"
, path: ["RSS", "max"], round: true
}
,{ key: "HeapAvg", header: "Avg heap, MB"
, path: ["Heap", "avg"], round: true
}
,{ key: "HeapMax", header: "Max heap, MB"
, path: ["Heap", "max"], round: true
}
,{ key: "WallSec", header: "Wall, s"
, path: ["totaltime"], round: true
}
,{ key: "CpuMax", header: "OS CPU, s"
, path: ["CentiCpuMax"], scale: 100, round: true
}
,{ key: "MutMax", header: "Mutator, s"
, path: ["CentiMutMax"], scale: 100, round: true
}
,{ key: "GCSec", header: "GC time, s"
, path: ["SecGC"], round: true
}
]
};
54 changes: 54 additions & 0 deletions bench/process/process.jq
@@ -0,0 +1,54 @@
##
## Aggregation
##
def mean: if length == 0
then 0
else reduce .[] as $n (0; . + $n) / length end;
def pow2: . * .;
def variance: . | mean as $mean | map_values(. - $mean | pow2) | mean;
def stddev: . | (variance ?) | sqrt;

def samples_to_variable (n):
stddev as $stddev
| (add / n) as $mean
| { "mean": $mean
, "stddev": $stddev
, "relstddev": (if $stddev == null then 0 else $stddev / $mean end)
, "raw": .
};

def varspec_to_variable (objects; nobjects):
.key as $key
| .path as $path
| objects
| { "\($key)":
map (.data
| getpath($path)
as $val
| if $val != null then $val
else error("Path \($path) unreachable among top level keys: \(keys)")
end
)
| samples_to_variable(nobjects)
};

def description_from_headliner(x; rest):
map ( . as $spec
| (x | getpath($spec.path)) as $head
| (rest | map (getpath($spec.path) == $head) | all) as $coherence
| { "\($spec.key)":
(if $coherence == true
then $head
else error("Incoherence on config key: \($spec.key)")
end) })
| add;

def aggregate_config_runs_variables (config_specs; result_specs):
. as $runs
| .[0] as $headliner
| length as $nruns
| result_specs
| map(varspec_to_variable($runs; $nruns))
| add
| (config_specs | description_from_headliner($headliner; $runs[1:]))
+ .;
121 changes: 121 additions & 0 deletions bench/process/process.sh
@@ -0,0 +1,121 @@
#!/bin/sh
# shellcheck disable=SC1090
set -eu

fail() {
echo -e "ERROR: $*" >&2
exit 1
}

default_op='report'
default_format='csv'

format=$default_format
header_footer='false'
no_progress=

while test $# -ge 1
do case "$1" in
--github ) format='github';;
--csv ) format='csv'; header_footer='false';;
--cls ) echo -en "\ec" >&2;;
--no-progress ) no_progress='true';;
--trace ) set -x;;
* ) break;; esac; shift; done

op=${1:-$default_op}; shift || true

main() {
PROCESS_ORIG_PWD=$(pwd)
pushd "$(dirname $0)" >/dev/null || exit 1
case "${op}" in
collect ) op_collect "$@";;
process ) op_process;;
render ) op_render;;

report ) op_collect "$@" | op_process | op_render;;

call ) eval "$@";;
* ) echo "ERROR: operation must be one of: collect process render report" >&2; exit 1;; esac
}

hardcoded_branch='membench'
hardcoded_commit='a7ee17d1af44b571c6e476916bd24ed65db97e15'

function op_collect() {
local desc=${1?-USAGE: $0 collect DESCRIPTION [FORMAT] [DIR]}
local format=${2:-membenches_v1}
local literal_dir=${3:-.}

local dir=$(pushd "$PROCESS_ORIG_PWD" >/dev/null; realpath "$literal_dir")
test -d "${dir}" -a -n "$(ls "${dir}"/*/*.json)" ||
fail "${literal_dir} (realpath $dir) must be a writable directory with subdirectories containing JSON files with ${format} output schema"

test -n "$no_progress" || echo -ne "Collecting runs in $dir: " >&2
local args_global=(
--arg desc "$desc"
--arg format "$format"
--arg now "$(date --utc --iso-8601=seconds)"
)
case $format in
membenches_v1 )
{
for f in $dir/*/refined.json
do local fpad="$f "
test -n "$no_progress" || echo -n "$fpad" >&2
local args_run=(
--arg filename "$f"
--arg format "$format"
--arg sha256 "$(sha256sum $f | cut -d' ' -f1)"
--arg ctime "$(stat --format=%w $f)"
--arg mtime "$(stat --format=%y $f)"
--arg config "$(echo $f | xargs dirname | xargs basename | cut -d- -f1)"
--arg iter "$(echo $f | xargs dirname | xargs basename | cut -d- -f2)"
--arg hardcoded_branch $hardcoded_branch
--arg hardcoded_commit $hardcoded_commit
--slurpfile data "$f"
)
jq 'include "collect";
standard_run_desc($filename; $sha256; $format; $ctime; $mtime; $hardcoded_branch; $hardcoded_commit; $config; $iter; $data[0])
' "$f" "${args_global[@]}" "${args_run[@]}"
test -n "$no_progress" || printf "${fpad//?/\\b}" >&2
done
test -n "$no_progress" || echo >&2; };;
* )
fail "unknown result format: $format"
esac |
jq 'include "'"$format"'";
{ description: $desc
, format: $format
, ctime: $now
, runs: .
}
+ format_specs
' --slurp "${args_global[@]}"
}

function op_process() {
jq 'include "process";
. as $batch
| $batch.runs
| group_by(.config_name)
| map (aggregate_config_runs_variables ($batch.config_specs; $batch.result_specs))
| $batch
+ { configs: . }
'
}

function op_render() {
jq 'include "render";
render('"$header_footer"')
' --raw-output
}

###
### Main
###
main "$@"
90 changes: 90 additions & 0 deletions bench/process/render.jq
@@ -0,0 +1,90 @@
##
## Presentation
##

def decimal_pt: (. * 10 | floor) / 10;
def decimal_pt2: (. * 100 | floor) / 100;
def float_n(width): "\(.)" | .[:width + (if .[0:1] == "-" then 1 else 0 end)];
def downscale(factor): . / factor;

## Expect name of a simple numeric field.
def field(fname; f):
.[fname] as $val
| "\($val | f)";

## Expect name of a rich variable.
def var(fname; f):
.[fname] as $val
| "\($val.mean | f) | \("\($val.relstddev)" | .[:4])";

def render_config (format; cf; res):
. as $x
| (if format != "csv" then [null] else [] end
+
(cf
| map($x[.key]))
+
(res
| map(. as $spec
| $x[$spec.key] as $val
| [(($val.mean / ($val.scale // 1))
| if $spec.round then ceil else . end)
, ($val.relstddev | tostring | .[:5])])
| add))
as $columns

| ($columns | join(" | "));

def render_table_head (format; cf; res):
.
| (if format != "csv" then [null] else [] end
+
(cf
| map(.header))
+
(res
| map([.header, "σ/μ"])
| add))
as $columns

| if format == "github"
then [([null] + ($columns | map("--")) + [null])
| join("|")] else [] end
+
[ $columns | join(" | ")];

def render_table:
.format as $format
| .config_specs as $config_specs
| .result_specs as $result_specs
| render_table_head (.format; .config_specs; .result_specs)
+ (.configs | map (render_config ($format; $config_specs; $result_specs)));

def add_header_footer(commits; run_counts; slot_counts):
.
| ([ "Parameters:\n"
, if run_counts | all(. == run_counts[0])
then " Every value is mean of \(run_counts[0]) runs,\n"
else " Every value is mean of varying amount of runs (\(run_counts)).\n" end
, if slot_counts | all(. == slot_counts[0])
then " Each run was syncing \(slot_counts[0]) slots, or \(slot_counts[0] / 21600 | floor) epochs over loopback, from a quiescent server.\n"
else " Runs were for varying amount of slots (\(slot_counts)).\n" end
, "\nLegend:\n"
, " wall = total_wall_seconds, total cpu = total_cpu_seconds\n"
, " total alloc = allocated_bytes, copied = copied_bytes, max live = max_live_bytes\n"
, " See https://github.com/ghc/ghc/blob/master/includes/RtsAPI.h for details.\n"
, " Each value is followed by σ/μ, i.e. relative stddev (or CoV).\n"
, " Δ% is change from baseline.\n"
, "\n\n"]) + .;
#| . + ["\n\n\(commits | map(" - \(.branch) / \(.commit) = https://github.com/'${github_user}'/cardano-node/tree/\(.commit)\n") | add)"];

def render(header_footer):
.commits as $commits
| .run_counts as $run_counts
| .slot_counts as $slot_counts
| render_table
| if header_footer == true
then add_header_footer($commits; $run_counts; $slot_counts)
else . end
| join("\n");

0 comments on commit 2e4cd1a

Please sign in to comment.