Skip to content

Commit

Permalink
workbench: remote analysis & analysis fetching
Browse files Browse the repository at this point in the history
  • Loading branch information
deepfire committed Jun 24, 2022
1 parent ba1e6d5 commit 39d1f16
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 58 deletions.
3 changes: 3 additions & 0 deletions Makefile
Expand Up @@ -49,6 +49,9 @@ shell-dev shell-prof shell-nix: shell
shell-nix: ARGS += --arg 'workbenchDevMode' false ## Nix shell, (workbench from Nix store), vars: PROFILE, CMD, RUN
shell-prof: ARGS += --arg 'profiled' true ## Nix shell, everything Haskell built profiled

analyse: RUN := wb analyse std ${TAG}
analyse: shell

list-profiles: ## List workbench profiles
nix build .#workbench.profile-names-json --json | jq '.[0].outputs.out' -r | xargs jq .
show-profile: ## NAME=profile-name
Expand Down
2 changes: 1 addition & 1 deletion nix.mk
Expand Up @@ -14,4 +14,4 @@ membench-5-at: ## Membench: 5 iterations, set commit by: make membench-5-at RE
nix build .#membench-node-this-5.batch-report --out-link result-batch-5-report --override-input node-measured github:input-output-hk/cardano-node/${REV}

workbench-ci-test smoke: ## Workbench: test a-la Hydra, the ci-test profile, full Nix engaged
nix build '.#hydraJobsPr.linux.native.workbench-ci-test' --cores 0
nix build '.#hydraJobsPr.linux.native.workbench-ci-test' --cores 0 --out-line result-ci-test
151 changes: 94 additions & 57 deletions nix/workbench/run.sh
@@ -1,5 +1,4 @@
global_rundir_def=$PWD/run
global_rundir_alt_def=$PWD/../cardano-ops/runs

usage_run() {
usage "run" "Managing cluster runs" <<EOF
Expand Down Expand Up @@ -35,11 +34,11 @@ EOF

run() {
set -eu
if test -d "$global_rundir_def"
then global_rundir=$global_rundir_def
## Allow compatibility with cardano-ops legacy runs directory layout:
elif test -v "WB_RUNDIR" && test -d "$WB_RUNDIR"
if test -v "WB_RUNDIR" && test -d "$WB_RUNDIR"
then global_rundir=$WB_RUNDIR
## Allow compatibility with cardano-ops legacy runs directory layout:
elif test -d "$global_rundir_def"
then global_rundir=$global_rundir_def
else global_rundir=$global_rundir_def
mkdir "$global_rundir"
fi
Expand Down Expand Up @@ -253,54 +252,52 @@ case "$op" in
sort ||
true'" 2>/dev/null;;

allocate-from-aws | steal-from-aws | aws-get )
allocate-from-aws | aws-get )
local usage="USAGE: wb run $op RUN [MACHINE] [DEPLOYMENT=bench-1] [ENV=bench]"
local run=${1:?$usage}
local mach=${2:-}
local depl=${3:-bench-1}
local env=${4:-bench}

local meta=$(ssh $env -- sh -c "'jq . $depl/runs/$run/meta.json'")
if ! jq . <<<$meta >/dev/null
then fail "allocate-from-aws: malformed $(yellow meta.json) in $(white $run) on $(white $depl)@$(white env)"; fi

## Minor validation passed, create & populate run with remote data:
local dir=$global_rundir/$run
mkdir -p "$dir"
jq . <<<$meta > $dir/meta.json

local hosts=($(if test -n "$mach"; then echo $mach
else jq -r '.hostname | keys | .[]' <<<$meta; fi))
local objects=(
${hosts[*]}
genesis-shelley.json
genesis-alonzo.json
network-latency-matrix.json
machines.json
local args=(
"$run"
'if test -f compressed/logs-$obj.tar.zst; then cat compressed/logs-$obj.tar.zst; else tar c $obj --zstd --ignore-failed-read; fi'
$mach
$depl
$env
)
run_aws_get "${args[@]}";;

analysis-from-aws | aws-get-analysis | aws-geta | fetch-analysis | fa )
local usage="USAGE: wb run $op RUN.."
local runs=($*) run

progress "aws" "trying to fetch analyses: $(white ${runs[*]})"
for run in ${runs[*]}
do if test "$(ssh $env -- sh -c "'ls -ld $depl/runs/$run | wc -l'")" = 0
then fail "aws-analysis: run does not exist on AWS: $(white $run)"
elif test "$(ssh $env -- sh -c "'ls -ld $depl/runs/$run/analysis | wc -l'")" = 0
then fail "aws-analysis: run has not been analysed on AWS: $(white $run)"
else run_aws_get "$run" '{ ls {profile,machines}.json analysis/*.{json,cdf,org,txt} |
grep -v flt.json |
grep -v flt.logobjs.json |
xargs tar c --ignore-failed-read --zstd; }' 'explorer' "$depl" "$env"
fi
done
;;

analyse-aws | awsa )
local usage="USAGE: wb run $op RUN [MACHINE] [DEPLOYMENT=bench-1] [ENV=bench]"
local run=${1:?$usage}
local mach=${2:-}
local depl=${3:-bench-1}
local env=${4:-bench}

local count=${#objects[*]}
progress "run | aws-get $(white $run)" "objects to fetch: $(white $count) total: $(yellow ${objects[*]})"

local max_batch=9
progress "run | aws-get $(white $run)" "fetching in batches"

local base=0 batch
while test $base -lt $count
do local batch=(${objects[*]:$base:$max_batch})
progress_ne "run | aws-get $(white $run)" "fetching batch: "
local obj=
for obj in ${batch[*]}
do { ssh $env -- \
sh -c "'cd $depl/runs/$run && if test -f compressed/logs-$obj.tar.zst; then cat compressed/logs-$obj.tar.zst; else tar c $obj --zstd --ignore-failed-read; fi'" 2>/dev/null |
(cd $dir; tar x --zstd)
echo -ne " $(yellow $obj)" >&2
} &
done
wait
echo >&2
base=$((base + max_batch))
done;;
if test "$(ssh $env -- sh -c "'ls -ld $depl/runs/$run | wc -l'")" = 0
then fail "aws-analysis: run does not exist on AWS: $(white $run)"
else ssh $env -- sh -c "'export WB_RUNDIR=../$depl/runs && cd cardano-node && echo env: $(yellow $env), rundir: $(color blue)\$WB_RUNDIR$(color reset), workbench: $(color yellow)\$(git log -n1)$(color reset) && make analyse TAG=$run'"
fi
;;

allocate )
local usage="USAGE: wb run $op BATCH-NAME PROFILE-NAME [ENV-CONFIG-OPTS..] [-- BACKEND-ARGS-AND-ENV-CONFIG-OPTS..]"
Expand Down Expand Up @@ -472,18 +469,6 @@ case "$op" in
then jq 'keys | .[]' -r "$dir"/node-specs.json
else jq '.hostname | keys | .[]' -r "$dir"/meta.json; fi;;

fetch-analysis | fa )
local usage="USAGE: wb run $op ENV DEPL BATCH-OR-TAG.."
local env=${1:?$usage}; shift
local depl=${1:?$usage}; shift

for x in $*
do
ssh $env -- \
sh -c "'cd $depl/runs && tar c {*.$x.*,$x}/analysis/{block-propagation,logs-node-1.timeline}.txt --zstd --ignore-failed-read'" 2>/dev/null |
(cd run; tar x --zstd); done
;;

remote-machine-run-slice-list | rmrsl )
local usage="USAGE: wb run $op ENV DEPL [HOST=DEPL]"
local env=${1:?$usage}
Expand Down Expand Up @@ -631,3 +616,55 @@ EOF

* ) usage_run;; esac
}

run_aws_get() {
local usage='USAGE: run_aws_get RUN REMOTE-TAR-CMD [MACHINE] [DEPLOYMENT] [ENV]'
local run=${1:?$usage}
local remote_tar_cmd=${2:?$usage}
local mach=${3:-}
local depl=${4:-bench-1}
local env=${5:-bench}

progress "aws-get" "mach $(yellow $mach) depl $(yellow $depl) run $(white $run)"
progress "aws-get" "selector $(green $remote_tar_cmd)"

local meta=$(ssh $env -- sh -c "'jq . $depl/runs/$run/meta.json'")
if ! jq . <<<$meta >/dev/null
then fail "allocate-from-aws: malformed $(yellow meta.json) in $(white $run) on $(white $depl)@$(white env)"; fi

## Minor validation passed, create & populate run with remote data:
local dir=$global_rundir/$run
mkdir -p "$dir"
jq . <<<$meta > $dir/meta.json

local hosts=($(if test -n "$mach"; then echo $mach
else jq -r '.hostname | keys | .[]' <<<$meta; fi))
local objects=(
${hosts[*]}
genesis-alonzo.json
genesis-shelley.json
machines.json
network-latency-matrix.json
profile.json
)

local count=${#objects[*]}
progress "run | aws-get $(white $run)" "objects to fetch: $(white $count) total: $(yellow ${objects[*]})"

local max_batch=9 base=0 batch
while test $base -lt $count
do local batch=(${objects[*]:$base:$max_batch})
progress_ne "run | aws-get $(white $run)" "fetching batch: "
local obj=
for obj in ${batch[*]}
do { ssh $env -- \
sh -c "'cd $depl/runs/$run && ${remote_tar_cmd}'" 2>/dev/null |
(cd $dir; tar x --zstd)
echo -ne " $(yellow $obj)" >&2
} &
done
wait
echo >&2
base=$((base + max_batch))
done
}

0 comments on commit 39d1f16

Please sign in to comment.