Skip to content

Commit

Permalink
CAD-944 bench: change log storage scheme
Browse files Browse the repository at this point in the history
  • Loading branch information
deepfire committed Jul 2, 2020
1 parent 2be9cf8 commit 1727cd5
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 58 deletions.
6 changes: 3 additions & 3 deletions nix/sources.json
Expand Up @@ -5,10 +5,10 @@
"homepage": null,
"owner": "input-output-hk",
"repo": "cardano-benchmarking",
"rev": "156fe6d09053c139f286e1b3e0483ee7b0de521b",
"sha256": "0j8a9w1xkfwh094hkx171xhic59bazyxgs7s28cbw5fv1wmxklqr",
"rev": "435eb75084d2dabaf7165c359c6345b0a4d32a79",
"sha256": "1642gpvl3v0svva1ifvkq75xvc8ssxnkfqiszzc9ihvhlamxs25r",
"type": "tarball",
"url": "https://github.com/input-output-hk/cardano-benchmarking/archive/156fe6d09053c139f286e1b3e0483ee7b0de521b.tar.gz",
"url": "https://github.com/input-output-hk/cardano-benchmarking/archive/435eb75084d2dabaf7165c359c6345b0a4d32a79.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
},
"cardano-byron-proxy": {
Expand Down
91 changes: 60 additions & 31 deletions scripts/bench.sh
Expand Up @@ -56,9 +56,12 @@ usage_extra() {
--fast-unsafe Ignore safety, go fast. Deploys won't be made,
unprocessed logs will be lost.
--deploy Force redeployment, event if benchmarking
--force-deploy Force redeployment, event if benchmarking
a single profile.
--cls Clear screen.
--force-genesis Force genesis regeneration, event if not required
by profile settings and deployment state.
--watch-deploy Do not hide the Nixops deploy log.
--cls Clear screen, before acting further.
Other OPs:
Expand Down Expand Up @@ -90,6 +93,8 @@ EOF
verbose= debug= trace=
no_deploy=
force_deploy=
force_genesis=
watch_deploy=

self=$(realpath "$0")

Expand All @@ -99,7 +104,12 @@ main() {
while test $# -ge 1
do case "$1" in
--fast-unsafe | --fu ) no_deploy=t;;
--deploy ) force_deploy=t;;
--deploy | --force-deploy )
force_deploy=t;;
--genesis | --force-genesis )
force_genesis=t;;
--watch | --watch-deploy )
watch_deploy=t;;
--select ) jq_select="jq 'select ($2)'"; shift;;

--cls ) echo -en "\ec">&2;;
Expand Down Expand Up @@ -183,14 +193,24 @@ main() {
jq-generator | jqg ) op_jq_generator "$@" | ${jq_select};;
jq-nodes | jqn ) op_jq_nodes "$@" | ${jq_select};;

pgrep ) op_pgrep "$@";;
md5sum | md5 | hash ) op_hash "$@";;
pgrep ) nixops ssh-for-each --parallel -- pgrep -fal "$@";;
grep ) local exp=$1; shift
nixops ssh-for-each --parallel -- grep "'$exp'" "/var/lib/cardano-node/logs/*" "$@";;
md5sum | md5 | hash ) nixops ssh-for-each --parallel -- md5sum "$@";;

blocks ) op_blocks;;

eval ) eval "${@@Q}";;
* ) usage; exit 1;; esac
}
trap atexit EXIT
trap atexit SIGHUP
trap atexit SIGINT
trap atexit SIGTERM
trap atexit SIGQUIT
atexit() {
pkill -f "tee ${batch_log}"
}

###
### Top-level operations
Expand Down Expand Up @@ -293,18 +313,20 @@ fetch_systemd_unit_startup_logs() {

pushd "${dir}" >/dev/null || return 1

mkdir -p 'logs/startup/'
nixops ssh explorer \
"journalctl --boot 0 -u tx-generator | head -n 100" \
> 'logs/log-unit-startup-generator.log'
> 'logs/startup/unit-startup-generator.log'
nixops ssh explorer "journalctl --boot 0 -u cardano-node | head -n 100" \
> 'logs/log-unit-startup-explorer.log'
> 'logs/startup/unit-startup-explorer.log'
nixops ssh explorer "journalctl --boot 0 -u cardano-db-sync | head -n 100" \
> 'logs/log-unit-startup-db-sync.log'
> 'logs/startup/unit-startup-db-sync.log'

for node in $(params producers)
do nixops ssh "${node}" "journalctl --boot 0 -u cardano-node | head -n 100" \
> "logs/log-unit-startup-${node}.log"
> "logs/startup/unit-startup-${node}.log"
done

popd >/dev/null
}

Expand Down Expand Up @@ -374,8 +396,8 @@ EOF
\"logs/block-arrivals.gauge\",
\"logs/db-analysis.log\",
\"logs/db-analysis.tar.xz\",
\"logs/log-explorer-generator.tar.xz\",
\"logs/log-nodes.tar.xz\",
\"logs/logs-explorer.tar.xz\",
\"logs/logs-nodes.tar.xz\",
\"tools/*.sql\",
\"tools/*.sh\" ]
Expand Down Expand Up @@ -420,13 +442,15 @@ op_wait_for_nonempty_block() {
"'tac /var/lib/cardano-node/logs/node.json | grep -F MsgBlock | jq --slurp \"map (.data.msg.txids | select(. == [])) | length\"'")
echo -ne "\b\b"; printf "%02d" "$e"
sleep 5; done
echo " patience ran out."

echo " patience ran out, collecting logs from the botched run."
op_bench_fetch
analyse_tag "${tag}"
fail "\nNo non-empty blocks reached the explorer in ${patience} seconds -- is the cluster dead (genesis mismatch?)?"
}

op_wait_for_empty_blocks() {
local slot_length=20
local block_propagation_tolerance=1
local full_patience="$1"
local oneshot_action="${2:-true}"
local patience=${full_patience}
Expand Down Expand Up @@ -454,7 +478,7 @@ op_wait_for_empty_blocks() {
jq -cj '.blks_txs | join(",") | "["+.+"]"'
fi <<<$news
## A reasonable delay to see a new block.
sleep $((slot_length + block_propagation_tolerance))
sleep $slot_length
jqtest '.txcounts
| length == 0
or (all (. == 0) | not)' <<<$news
Expand Down Expand Up @@ -511,28 +535,33 @@ EOF

for mach in 'explorer' ${producers[*]}
do nixops ssh "${mach}" -- \
"cd /var/lib/cardano-node/logs &&
cat node-*.json > log-\${HOSTNAME}.json &&
rm node-*.json node.json &&
tar c --xz *.json" |
tar x --xz; done

echo "--( Packing logs.."
local explorer_logs=(
generator.json log-unit-startup-generator.log
log-explorer.json log-unit-startup-explorer.log
log-unit-startup-db-sync.log
"cd /var/lib/cardano-node &&
{ find logs -type l | xargs rm -f; } &&
rm -f logs-${mach} &&
ln -sf logs logs-${mach} &&
tar c --dereference --xz logs-${mach}
" | tar x --xz; done

oprint "packing logs.."
local explorer_extra_logs=(
unit-startup-generator.log
unit-startup-explorer.log
unit-startup-db-sync.log
)
tar cf log-explorer-generator.tar.xz --xz -- ${explorer_logs[*]}
rm -- ${explorer_logs[*]}
tar cf logs-explorer.tar.xz --xz -- \
logs-explorer \
${explorer_extra_logs[*]/#/startup\/}

tar cf logs-nodes.tar.xz --xz -- \
logs-node-* \
startup/unit-startup-node-*.log

## The rest must be node logs..
tar cf log-nodes.tar.xz --xz -- *.json log-unit-startup-*.log
rm -f -- *.json log-unit-startup-*.log
rm -f -- logs-*/* startup/*
rmdir -- logs-*/ startup/

popd >/dev/null

echo "--( logs collected from run: ${tag}"
oprint "logs collected from run: ${tag}"
}

# Keep this at the very end, so bash read the entire file before execution starts.
Expand Down
38 changes: 23 additions & 15 deletions scripts/lib-analysis.sh
Expand Up @@ -41,41 +41,49 @@ analyse_tag() {
meta=$(tmjq "$tag" .)

oprint "running log analyses: "
tar xaf '../logs/log-explorer-generator.tar.xz'
tar xaf '../logs/log-nodes.tar.xz'
tar xaf '../logs/logs-explorer.tar.xz'
tar xaf '../logs/logs-nodes.tar.xz'

echo " timetoblock.csv"
../tools/analyse.sh generator log-explorer "last-run/analysis/"
../tools/analyse.sh \
'logs-explorer/generator' \
'logs-explorer/node' \
'last-run/analysis/'
cp analysis/timetoblock.csv .

local blocks
blocks="$(../tools/blocksizes.sh log-explorer.json |
echo -n "--( running log analyses: blocksizes"
blocks="$(../tools/blocksizes.sh logs-explorer/node-*.json |
jq . --slurp)"

declare -A msgtys
local mach msgtys=() producers tnum msgtys_generator sub_tids
local mach msgtys=() producers tnum sub_tids
producers=($(jq '.nixops.benchmarkingTopology.coreNodes
| map(.name) | join(" ")' --raw-output <<<$meta))

for mach in explorer ${producers[*]}
do echo -n " msgtys:${mach}"
msgtys[${mach}]="$(../tools/msgtypes.sh log-explorer.json |
msgtys[${mach}]="$(../tools/msgtypes.sh logs-explorer/node-*.json |
jq . --slurp)"; done
echo -n " msgtys:generator"
msgtys_generator="$(../tools/msgtypes.sh generator.json |
jq . --slurp)"
## NOTE: This is a bit too costly, and we know the generator pretty well.
# echo -n " msgtys:generator"
# msgtys_generator="$(../tools/msgtypes.sh logs-explorer/generator.json |
# jq . --slurp)"
msgtys_generator='[]'

echo -n " node-to-node-submission-tids"
sub_tids="$(../tools/generator-logs.sh log-tids generator.json)"
sub_tids="$(../tools/generator-logs.sh log-tids \
logs-explorer/generator.json)"
for tnum in $(seq 0 $(($(echo "$sub_tids" | wc -w) - 1)))
do echo -n " node-to-node-submission:${tnum}"
../tools/generator-logs.sh tid-trace "${tnum}" generator.json \
../tools/generator-logs.sh tid-trace "${tnum}" \
logs-explorer/generator.json \
> generator.submission-thread-trace."${tnum}".json; done

echo -n " added-to-current-chain"
../tools/added-to-current-chain.sh log-explorer.json \
> explorer.added-to-current-chain.csv
for p in ${producers[*]}
do echo -n " added-to-current-chain:$p"
../tools/added-to-current-chain.sh logs-node-*/node-*.json \
> $p.added-to-current-chain.csv; done

jq '{ tx_stats: $txstats[0]
, submission_tids: '"$(jq --slurp <<<$sub_tids)"'
Expand All @@ -100,7 +108,7 @@ analyse_tag() {
fi
patch_local_tag "$tag"

rm -rf analysis log-node-*.json log-explorer.json generator.json
rm -rf analysis/ logs-node-*/ logs-explorer/

popd >/dev/null

Expand Down
22 changes: 19 additions & 3 deletions scripts/lib-deploy.sh
Expand Up @@ -124,15 +124,31 @@ nixops_deploy() {
--( benchmarking: ${benchmarking_rev}
--( ops: ${ops_rev} / ${ops_branch} ${ops_checkout_state}
EOF
local cmd=( nixops deploy
--max-concurrent-copy 50 --cores 0 -j 4
${include:+--include $include}
)

local watcher_pid=
if test -n "${watch_deploy}"
then oprint "nixops deploy log:"
{ sleep 0.3; tail -f "$deploylog"; } &
watcher_pid=$!; fi

ln -sf "$deploylog" 'last-deploy.log'
if export BENCHMARKING_PROFILE=${prof}; ! nixops deploy --max-concurrent-copy 50 -j 4 ${include:+--include $include} \
if export BENCHMARKING_PROFILE=${prof}; ! "${cmd[@]}" \
>"$deploylog" 2>&1
then echo "FATAL: deployment failed, full log in ${deploylog}"
echo -e "FATAL: here are the last 200 lines:\n"
tail -n200 "$deploylog"
if test -n "$watcher_pid"
then kill "$watcher_pid" >/dev/null 2>&1 || true
else echo -e "FATAL: here are the last 200 lines:\n"
tail -n200 "$deploylog"; fi
return 1
fi >&2

if test -n "$watcher_pid"
then kill "$watcher_pid" >/dev/null 2>&1 || true; fi

update_deployfiles "$prof" "$deploylog" "$include"
}

Expand Down
9 changes: 6 additions & 3 deletions scripts/lib-params.sh
Expand Up @@ -49,15 +49,18 @@ def profile_name($gtor; $gsis):
}
, { name: "small"
, txs: 1000, add_tx_size: 100, io_arity: 1, tps: 100
, init_cooldown: 20, finish_patience: 3 }
, init_cooldown: 25, finish_patience: 5 }
, { name: "small-32k"
, txs: 1000, add_tx_size: 100, io_arity: 1, tps: 100
, init_cooldown: 20, finish_patience: 3
, init_cooldown: 25, finish_patience: 5
, genesis_profile: 6
}
, { name: "edgesmoke"
, txs: 100, add_tx_size: 100, io_arity: 1, tps: 100
, init_cooldown: 25, finish_patience: 2 }
, { name: "smoke"
, txs: 100, add_tx_size: 100, io_arity: 1, tps: 100
, init_cooldown: 0, finish_patience: 3 }
, init_cooldown: 25, finish_patience: 5 }
] as $generator_aux_profiles
| [ { max_block_size: 2000000 }
Expand Down
8 changes: 8 additions & 0 deletions scripts/lib-profile.sh
Expand Up @@ -107,6 +107,9 @@ profile_deploy() {
## 2. profile incompatible?
regenesis_causes=()

if test -n "${force_genesis}"
then regenesis_causes+=('--genesis'); fi

if ! genesisjq . >/dev/null 2>&1
then regenesis_causes+=('missing-or-malformed-genesis-metadata')
else
Expand Down Expand Up @@ -146,6 +149,7 @@ profile_deploy() {
then redeploy_causes+=(genesis-hash-explorer)
include+=('explorer'); fi


if test ! -f "${deployfile['producers']}"
then redeploy_causes+=(missing-producers-deployfile)
include+=($(params producers))
Expand All @@ -163,6 +167,10 @@ profile_deploy() {
then redeploy_causes+=(genesis-hash-producers)
include+=($(params producers)); fi

if test -n "${force_deploy}"
then redeploy_causes+=('--deploy')
include=('explorer' $(params producers)); fi

local final_include
if test "${include[0]}" = "${include[1]:-}"
then final_include=$(echo "${include[*]}" | sed 's/explorer explorer/explorer/g')
Expand Down
24 changes: 21 additions & 3 deletions shell.nix
Expand Up @@ -72,9 +72,27 @@ let
cardano-cli --version
'';
in mkShell {
buildInputs = [ iohkNix.niv nivOverrides nixops nix cardano-cli telnet dnsutils mkDevGenesis nix-diff migrate-keys pandoc
renew-kes-keys create-shelley-genesis-and-keys test-cronjob-script kes-rotation relay-update
] ++ (with cardano-sl-pkgs.nix-tools.exes; lib.optionals (globals.topology.legacyCoreNodes != []) [ cardano-sl-auxx cardano-sl-tools ]);
buildInputs = [
cardano-cli
create-shelley-genesis-and-keys
dnsutils
iohkNix.niv
kes-rotation
migrate-keys
mkDevGenesis
nivOverrides
nix
nix-diff
nixops
pandoc
pstree
relay-update
renew-kes-keys
telnet
test-cronjob-script
] ++ (with cardano-sl-pkgs.nix-tools.exes;
lib.optionals (globals.topology.legacyCoreNodes != [])
[ cardano-sl-auxx cardano-sl-tools ]);
NIX_PATH = "nixpkgs=${path}";
NIXOPS_DEPLOYMENT = "${globals.deploymentName}";
passthru = {
Expand Down

0 comments on commit 1727cd5

Please sign in to comment.