diff --git a/agent/bench-scripts/pbench-run-benchmark b/agent/bench-scripts/pbench-run-benchmark index 577930e810..e1727bb2f4 100755 --- a/agent/bench-scripts/pbench-run-benchmark +++ b/agent/bench-scripts/pbench-run-benchmark @@ -298,6 +298,10 @@ while (scalar @param_sets > 0) { push(@iterations_labels, $iteration_label); } + if (! $pp_only) { + system("pbench-init-tools --group=" . $tool_group . " --dir=" . $base_bench_dir); + } + for (my $index=0; $index<@iterations; $index++) { my $iteration_params = $iterations[$index]; my $iteration_label = $iterations_labels[$index]; @@ -389,6 +393,8 @@ while (scalar @param_sets > 0) { print BULK_FH "echo Sample processing complete!\n"; close(BULK_FH); system(". ./bulk-sample.sh"); + } else { + system("pbench-end-tools --group=" . $tool_group . " --dir=" . $base_bench_dir); } $run_doc{'run'}{'end'} = int time * 1000; # time in milliseconds put_json_file(\%run_doc, $es_dir . "/run/run" . $run_part . "-" . $run_doc{'run'}{'id'} . ".json"); diff --git a/agent/bench-scripts/pbench-user-benchmark b/agent/bench-scripts/pbench-user-benchmark index 5e22560927..4f6c3d503e 100755 --- a/agent/bench-scripts/pbench-user-benchmark +++ b/agent/bench-scripts/pbench-user-benchmark @@ -300,6 +300,9 @@ declare -a parts # the while loop sub-shell does not share FD 0 (stdin) with the executed # benchmark script. Otherwise, benchmarks messing around with FD 0 can lead to # problems. + +pbench-init-tools --group=${tool_group} --dir=${benchmark_run_dir} + while read -u 3 line; do # Current line number, starting from 1 (not zero) (( lineno++ )) @@ -334,6 +337,8 @@ if [[ ${iter_num} -eq 1 ]]; then warn_log "[${script_name}]: iteration file did not contain any iterations!" fi +pbench-end-tools --group=${tool_group} --dir=${benchmark_run_dir} + # Now that we have finished running all the iterations, create the # reference-result symlinks. result_dirs="$(ls -1d ${benchmark_run_dir}/*/${sample_name} 2> /dev/null)" diff --git a/agent/bench-scripts/test-bin/pbench-end-tools b/agent/bench-scripts/test-bin/pbench-end-tools new file mode 120000 index 0000000000..039bec18e9 --- /dev/null +++ b/agent/bench-scripts/test-bin/pbench-end-tools @@ -0,0 +1 @@ +mock-cmd \ No newline at end of file diff --git a/agent/bench-scripts/test-bin/pbench-init-tools b/agent/bench-scripts/test-bin/pbench-init-tools new file mode 120000 index 0000000000..039bec18e9 --- /dev/null +++ b/agent/bench-scripts/test-bin/pbench-init-tools @@ -0,0 +1 @@ +mock-cmd \ No newline at end of file diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt index 588aefec0f..b7108d287a 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt index 12dcaf82f2..2cd7b1d23d 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-duration for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt index 8c1ce15b48..786cacd82f 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt @@ -29,6 +29,8 @@ Running user-benchmark-script with-duration for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt index fde7c56846..9a6cb62938 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt @@ -30,6 +30,8 @@ WARNING:root:Unable to load JSON data from /var/tmp/pbench-test-bench/pbench-age +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt index 0b8a15f7a9..69bb725d12 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt @@ -58,6 +58,8 @@ sud +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt index 1cc47a9478..195bc87b00 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt index 7dfa6dafeb..88ee5dfedb 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt index 110cd3f104..3352fcb50f 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt @@ -23,6 +23,8 @@ +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-tool-meister-start default diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt index f2d6beb069..387229bd44 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt @@ -48,6 +48,8 @@ Running bm arg1 arg2 for iteration 3-iter-three /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/bm arg1 arg2 arg3 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00/1-iter-one/sample1 diff --git a/agent/tool-scripts/dcgm b/agent/tool-scripts/dcgm new file mode 100755 index 0000000000..fdee80860f --- /dev/null +++ b/agent/tool-scripts/dcgm @@ -0,0 +1,24 @@ +#!/usr/bin/python3 +# -*- mode: python -*- + +import sys +import os +import logging + +PROG = os.path.basename(sys.argv[0]) +logger = logging.getLogger(PROG) +logger.setLevel(logging.DEBUG) +sh = logging.StreamHandler() +sh.setLevel(logging.DEBUG) +shf = logging.Formatter("%(message)s") +sh.setFormatter(shf) +logger.addHandler(sh) + +if len(sys.argv) != 2 or sys.argv[1] != "--help": + logger.info("This script is deprecated, please run it with --help for info on registering the tool.") + logger.info("Run /opt/pbench-agent/tool-scripts/dcgm --help for more info.") + exit(0) + +if sys.argv[1] == "--help": + logger.info("Options:") + logger.info("--inst= (required)") diff --git a/agent/tool-scripts/meta.json b/agent/tool-scripts/meta.json new file mode 100644 index 0000000000..b9b265b4d1 --- /dev/null +++ b/agent/tool-scripts/meta.json @@ -0,0 +1,48 @@ +{ + "transient":{ + "blktrace": null, + "bpftrace": null, + "cpuacct": null, + "disk": null, + "dm-cache": null, + "docker": null, + "docker-info": null, + "external-data-source": null, + "haproxy-ocp": null, + "iostat": null, + "jmap": null, + "jstack": null, + "kvm-spinlock": null, + "kvmstat": null, + "kvmtrace": null, + "lockstat": null, + "mpstat": null, + "numastat": null, + "oc": null, + "openvswitch": null, + "pcp": null, + "perf": null, + "pidstat": null, + "pprof": null, + "proc-interrupts": null, + "proc-sched_debug": null, + "proc-vmstat": null, + "prometheus-metrics": null, + "qemu-migrate": null, + "rabbit": null, + "sar": null, + "strace": null, + "sysfs": null, + "systemtap": null, + "tcpdump": null, + "turbostat": null, + "user-tool": null, + "virsh-migrate": null, + "vmstat": null + }, + + "persistent":{ + "node-exporter": {"collector": "prometheus", "port": "9100"}, + "dcgm": {"collector": "prometheus", "port": "8000"} + } +} diff --git a/agent/tool-scripts/node-exporter b/agent/tool-scripts/node-exporter new file mode 100755 index 0000000000..c835a99fb6 --- /dev/null +++ b/agent/tool-scripts/node-exporter @@ -0,0 +1,32 @@ +#!/usr/bin/python3 +# -*- mode: python -*- + +import os +import sys +import logging + +PROG = os.path.basename(sys.argv[0]) +logger = logging.getLogger(PROG) +logger.setLevel(logging.DEBUG) +sh = logging.StreamHandler() +sh.setLevel(logging.DEBUG) +shf = logging.Formatter("%(message)s") +sh.setFormatter(shf) +logger.addHandler(sh) + +if len(sys.argv) != 2 or sys.argv[1] != "--help": + logger.info( + "This script is deprecated, please run it with --help for info on registering the tool." + ) + logger.info( + "Run /opt/pbench-agent/tool-scripts/node-exporter --help for more info." + ) + exit(0) + +if sys.argv[1] == "--help": + logger.info("Options:") + logger.info( + "--inst= (required; to create the path of the executable, '/node-exporter' will be appended)" + ) + logger.info("Installation Guide: github.com/prometheus/node_exporter") + logger.info("Soon to come: metric enabling/disabling") diff --git a/agent/util-scripts/gold/pbench-end-tools/test-62.txt b/agent/util-scripts/gold/pbench-end-tools/test-62.txt new file mode 100644 index 0000000000..0f4f170071 --- /dev/null +++ b/agent/util-scripts/gold/pbench-end-tools/test-62.txt @@ -0,0 +1,16 @@ ++++ Running test-62 pbench-end-tools --group=default --dir=/var/tmp/pbench-test-utils/pbench/mock-run +--- Finished test-62 pbench-end-tools (status=0) ++++ pbench tree state +/var/tmp/pbench-test-utils/pbench +/var/tmp/pbench-test-utils/pbench/mock-run +/var/tmp/pbench-test-utils/pbench/mock-run/tools-default +/var/tmp/pbench-test-utils/pbench/tmp +/var/tmp/pbench-test-utils/pbench/tools-v1-default +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat +=== /var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat: +--interval=3 +--- pbench tree state ++++ test-execution.log file contents +/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-tool-meister-client default /var/tmp/pbench-test-utils/pbench/mock-run/tools-default end +--- test-execution.log file contents diff --git a/agent/util-scripts/gold/pbench-init-tools/test-61.txt b/agent/util-scripts/gold/pbench-init-tools/test-61.txt new file mode 100644 index 0000000000..6e0db73c47 --- /dev/null +++ b/agent/util-scripts/gold/pbench-init-tools/test-61.txt @@ -0,0 +1,16 @@ ++++ Running test-61 pbench-init-tools --group=default --dir=/var/tmp/pbench-test-utils/pbench/mock-run +--- Finished test-61 pbench-init-tools (status=0) ++++ pbench tree state +/var/tmp/pbench-test-utils/pbench +/var/tmp/pbench-test-utils/pbench/mock-run +/var/tmp/pbench-test-utils/pbench/mock-run/tools-default +/var/tmp/pbench-test-utils/pbench/tmp +/var/tmp/pbench-test-utils/pbench/tools-v1-default +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat +=== /var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat: +--interval=3 +--- pbench tree state ++++ test-execution.log file contents +/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-tool-meister-client default /var/tmp/pbench-test-utils/pbench/mock-run/tools-default init +--- test-execution.log file contents diff --git a/agent/util-scripts/gold/pbench-kill-tools/test-09.txt b/agent/util-scripts/gold/pbench-kill-tools/test-09.txt index 501c71c0ea..7d91023ed3 100644 --- a/agent/util-scripts/gold/pbench-kill-tools/test-09.txt +++ b/agent/util-scripts/gold/pbench-kill-tools/test-09.txt @@ -1,5 +1,12 @@ +++ Running test-09 pbench-kill-tools --group=barfoo ERROR: required directory argument missing. +The following are required: + + -g str --group=str, str = a tool group used in a benchmark + (the default group is 'default') + + -d str --dir=str, str = a directory where pbench-kill-tools + will store and process data --- Finished test-09 pbench-kill-tools (status=1) +++ pbench tree state /var/tmp/pbench-test-utils/pbench diff --git a/agent/util-scripts/gold/pbench-register-tool/test-44.txt b/agent/util-scripts/gold/pbench-register-tool/test-44.txt index c983f20aae..4b15e6a57b 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-44.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-44.txt @@ -55,6 +55,8 @@ Available tools: user-tool virsh-migrate vmstat + node-exporter + dcgm For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/pbench-register-tool/test-46.txt b/agent/util-scripts/gold/pbench-register-tool/test-46.txt index d93abb1004..605883fab7 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-46.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-46.txt @@ -55,6 +55,8 @@ Available tools: user-tool virsh-migrate vmstat + node-exporter + dcgm For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/pbench-register-tool/test-47.txt b/agent/util-scripts/gold/pbench-register-tool/test-47.txt index 6fa4518a13..f43f893c7c 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-47.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-47.txt @@ -55,6 +55,8 @@ Available tools: user-tool virsh-migrate vmstat + node-exporter + dcgm For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt index fe87040328..3b442f345b 100644 --- a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt +++ b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt @@ -52,6 +52,7 @@ /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.err /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.log /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.out +/var/tmp/pbench-test-utils/pbench/mock-run/tools-default /var/tmp/pbench-test-utils/pbench/pbench.log /var/tmp/pbench-test-utils/pbench/tmp /var/tmp/pbench-test-utils/pbench/tools-v1-default diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt index 0576cb1a63..055080ff08 100644 --- a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt +++ b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt @@ -124,6 +124,7 @@ /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.err /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.log /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.out +/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite /var/tmp/pbench-test-utils/pbench/pbench.log /var/tmp/pbench-test-utils/pbench/tmp /var/tmp/pbench-test-utils/pbench/tools-v1-lite diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt index 6b4fd3c2d8..293d8515b8 100644 --- a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt +++ b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt @@ -124,6 +124,7 @@ /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.err /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.log /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.out +/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite /var/tmp/pbench-test-utils/pbench/pbench.log /var/tmp/pbench-test-utils/pbench/tmp /var/tmp/pbench-test-utils/pbench/tools-v1-lite diff --git a/agent/util-scripts/pbench-end-tools b/agent/util-scripts/pbench-end-tools new file mode 120000 index 0000000000..eb101607d9 --- /dev/null +++ b/agent/util-scripts/pbench-end-tools @@ -0,0 +1 @@ +pbench-start-tools \ No newline at end of file diff --git a/agent/util-scripts/pbench-init-tools b/agent/util-scripts/pbench-init-tools new file mode 120000 index 0000000000..eb101607d9 --- /dev/null +++ b/agent/util-scripts/pbench-init-tools @@ -0,0 +1 @@ +pbench-start-tools \ No newline at end of file diff --git a/agent/util-scripts/pbench-postprocess-tools b/agent/util-scripts/pbench-postprocess-tools index e19ccbf8b3..fb904bbf8a 100755 --- a/agent/util-scripts/pbench-postprocess-tools +++ b/agent/util-scripts/pbench-postprocess-tools @@ -17,16 +17,20 @@ def_group="default" group="${def_group}" dir="" -# Process options and arguments - -opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") -if [[ ${?} -ne 0 ]]; then - printf "\n%s: you specified an invalid option\n\n" "${script_name}" +function usage { printf "The following are required:\n\n" printf -- "\t-g str --group=str, str = a tool group used in a benchmark\n" printf -- "\t (the default group is '%s')\n\n" "${def_group}" printf -- "\t-d str --dir=str, str = a directory where %s\n" "${script_name}" printf -- "\t will store and process data\n" +} + +# Process options and arguments + +opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") +if [[ ${?} -ne 0 ]]; then + printf "\n%s: you specified an invalid option\n\n" "${script_name}" + usage >&2 exit 1 fi eval set -- "${opts}" @@ -54,11 +58,13 @@ while true; do done if [[ -z "${group}" ]]; then - printf -- "ERROR: required tool group parameter missing.\n" >&2 + printf -- "ERROR: required tool group parameter missing.\n\n" >&2 + usage >&2 exit 1 fi if [[ -z "${dir}" ]]; then - printf -- "ERROR: required directory argument missing.\n" >&2 + printf -- "ERROR: required directory argument missing.\n\n" >&2 + usage >&2 exit 1 fi diff --git a/agent/util-scripts/pbench-register-tool b/agent/util-scripts/pbench-register-tool index 8b647c733a..994fd40d4a 100755 --- a/agent/util-scripts/pbench-register-tool +++ b/agent/util-scripts/pbench-register-tool @@ -116,9 +116,7 @@ function usage() { printf -- "\tdenoted by a leading hash, or pound (\"#\"), character.\n" printf -- "\nAvailable tools:\n" local tool="" - for tool in $(find ${pbench_bin}/tool-scripts -maxdepth 1 ! -type d ! -name '*README*' ! -name base-tool ! -name unittests -printf "%P\n" 2> /dev/null | sort); do - printf -- "\t${tool}\n" - done + python3 -c "import sys, json; meta = json.load(open(sys.argv[1])); [print(f'\t{tool}') for tool in (*meta['transient'].keys(), *meta['persistent'].keys()) ]" ${pbench_bin}/tool-scripts/meta.json # 1 2 3 4 5 6 7 8 # (no tab) 12345678901234567890123456789012345678901234567890123456789012345678901234567890 printf -- "\nFor a list of tool specific options, run:\n" diff --git a/agent/util-scripts/pbench-start-tools b/agent/util-scripts/pbench-start-tools index 5d54c5212b..a57d624cc8 100755 --- a/agent/util-scripts/pbench-start-tools +++ b/agent/util-scripts/pbench-start-tools @@ -10,7 +10,7 @@ action=${_suffix%%-*} # source the base script . "${pbench_bin}"/base -if [[ "${action}" != "kill" && "${action}" != "send" && "${action}" != "start" && "${action}" != "stop" ]]; then +if [[ "${action}" != "end" && "${action}" != "init" && "${action}" != "kill" && "${action}" != "send" && "${action}" != "start" && "${action}" != "stop" ]]; then error_log "[${script_name}] action \"${action}\" is not supported" exit 1 fi @@ -24,16 +24,20 @@ def_group="default" group="${def_group}" dir="" -# Process options and arguments - -opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") -if [[ ${?} -ne 0 ]]; then - printf "\n%s: you specified an invalid option\n\n" "${script_name}" +function usage { printf "The following are required:\n\n" printf -- "\t-g str --group=str, str = a tool group used in a benchmark\n" printf -- "\t (the default group is '%s')\n\n" "${def_group}" printf -- "\t-d str --dir=str, str = a directory where %s\n" "${script_name}" printf -- "\t will store and process data\n" +} + +# Process options and arguments + +opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") +if [[ ${?} -ne 0 ]]; then + printf "\n%s: you specified an invalid option\n\n" "${script_name}" + usage >&2 exit 1 fi eval set -- "${opts}" @@ -62,10 +66,12 @@ done if [[ -z "${group}" ]]; then printf -- "ERROR: required tool group parameter missing.\n" >&2 + usage >&2 exit 1 fi if [[ -z "${dir}" ]]; then printf -- "ERROR: required directory argument missing.\n" >&2 + usage >&2 exit 1 fi @@ -86,7 +92,7 @@ fi # The tool group's directory which stores tool output for all hosts. tool_output_dir="${dir}/tools-${group}" -if [[ "${action}" == "start" ]]; then +if [[ "${action}" == "start" || "${action}" == "init" ]]; then mkdir -p ${tool_output_dir} if [[ ${?} -ne 0 ]]; then error_log "[${script_name}] failed to create tool output directory, \"${tool_output_dir}\"" diff --git a/agent/util-scripts/pbench-tool-meister-client b/agent/util-scripts/pbench-tool-meister-client index dc0ae9019c..fa52b2c142 100755 --- a/agent/util-scripts/pbench-tool-meister-client +++ b/agent/util-scripts/pbench-tool-meister-client @@ -25,7 +25,7 @@ tm_channel = "tool-meister-chan" cl_channel = "tool-meister-client" # List of allowed actions -allowed_actions = ("start", "stop", "send", "kill") +allowed_actions = ("end", "init", "send", "start", "stop", "kill") def main(argv): diff --git a/agent/util-scripts/pbench-tool-meister-start b/agent/util-scripts/pbench-tool-meister-start index e0a7f9067b..75403b9b7e 100755 --- a/agent/util-scripts/pbench-tool-meister-start +++ b/agent/util-scripts/pbench-tool-meister-start @@ -32,6 +32,8 @@ import redis from pbench.agent.tool_data_sink import main as tds_main from pbench.agent.tool_meister import main as tm_main +from pbench.agent import PbenchAgentConfig +import pbench.agent.toolmetadata as toolmetadata # Port number is "One Tool" in hex 0x17001 @@ -369,10 +371,32 @@ def main(argv): ), f"bad channel: {resp!r}" assert resp["data"] == 1, f"bad data: {resp!r}" + # 2.5. Add tool metadata json to redis + try: + inst_dir = PbenchAgentConfig(os.environ["_PBENCH_AGENT_CONFIG"]).pbench_install_dir + except BadConfig as exc: + logger.error("%s", exc) + return 1 + except Exception: + logger.error("Unexpected error encountered logging pbench agent configuration: '%s'", exc) + return 1 + + try: + tm_start_path = Path(inst_dir).resolve(strict=True) + except FileNotFoundError: + logger.error("Unable to determine proper installation directory, '%s' not found", inst_dir) + return 1 + except Exception as exc: + logger.exception("Unexpected error encountered resolving installation directory: '%s'", exc) + return 1 + tool_metadata = toolmetadata.ToolMetadata("json", tm_start_path, logger) + tool_metadata.loadIntoRedis(redis_server) + + # 3. Start the tool-data-sink process # - leave a PID file for the tool data sink process tds_param_key = "tds-{}".format(group) - tds = dict(channel=channel, benchmark_run_dir=benchmark_run_dir) + tds = dict(channel=channel, benchmark_run_dir=benchmark_run_dir, group=group) try: redis_server.set(tds_param_key, json.dumps(tds, sort_keys=True)) except Exception: diff --git a/agent/util-scripts/samples/pbench-end-tools/test-62 b/agent/util-scripts/samples/pbench-end-tools/test-62 new file mode 120000 index 0000000000..b8de329238 --- /dev/null +++ b/agent/util-scripts/samples/pbench-end-tools/test-62 @@ -0,0 +1 @@ +../pbench-start-tools/test-05 \ No newline at end of file diff --git a/agent/util-scripts/samples/pbench-init-tools/test-61 b/agent/util-scripts/samples/pbench-init-tools/test-61 new file mode 120000 index 0000000000..b8de329238 --- /dev/null +++ b/agent/util-scripts/samples/pbench-init-tools/test-61 @@ -0,0 +1 @@ +../pbench-start-tools/test-05 \ No newline at end of file diff --git a/agent/util-scripts/test-bin/test-client-tool-meister b/agent/util-scripts/test-bin/test-client-tool-meister index 3f46ab28a4..cf65d8c26f 100755 --- a/agent/util-scripts/test-bin/test-client-tool-meister +++ b/agent/util-scripts/test-bin/test-client-tool-meister @@ -68,6 +68,13 @@ if [[ ${status} -ne 0 ]]; then exit 1 fi +_timeout pbench-init-tools --group="${group}" --dir="${benchmark_run_dir}" +status=${?} +if [[ ${status} -ne 0 ]]; then + printf -- "ERROR - \"pbench-init-tools\" failed to execute successfully (exit code: %s)\n" "${status}" >&2 + exit 1 +fi + sample="sample42" iterations="0-iter-zero 1-iter-one" @@ -113,6 +120,13 @@ if [[ "${3}" == "delayed-send" ]]; then done fi +_timeout pbench-end-tools --group="${group}" --dir="${benchmark_run_dir}" +status=${?} +if [[ ${status} -ne 0 ]]; then + printf -- "ERROR - \"pbench-end-tools\" failed to execute successfully (exit code: %s)\n" "${status}" >&2 + exit 1 +fi + _timeout pbench-tool-meister-stop status=${?} if [[ ${status} -ne 0 ]]; then diff --git a/agent/util-scripts/unittests b/agent/util-scripts/unittests index c97128e020..990256dddc 100755 --- a/agent/util-scripts/unittests +++ b/agent/util-scripts/unittests @@ -367,6 +367,8 @@ declare -A tools=( [test-58]="pbench-stop-tools" [test-59]="pbench-postprocess-tools" [test-60]="pbench-send-tools" + [test-61]="pbench-init-tools" + [test-62]="pbench-end-tools" ) declare -A options=( @@ -441,6 +443,8 @@ declare -A options=( [test-58]="--group=default --dir=42-iter/sample42" [test-59]="--group=foobar --dir=42-iter/sample42" [test-60]="--group=default --dir=42-iter/sample42" + [test-61]="--group=default --dir=${_testdir}/mock-run" + [test-62]="--group=default --dir=${_testdir}/mock-run" ) declare -A expected_status=( @@ -478,6 +482,8 @@ declare -A pre_hooks=( [test-47]='mkdir ${_testdir}/tmp; printf -- "# good list with no labels\none.example.com\ntwo.example.com\nthree.example.com\n" > ${_testdir}/tmp/remotes.lis' [test-48]='mkdir ${_testdir}/tmp; printf -- "%s\n" "30%" > ${_testdir}/tmp/foo.txt' [test-55]='pbench-register-tool --name=mpstat --remote=localhost > /dev/null; mkdir ${_testdir}/mock-run' + [test-61]='ln -s mock-pbench-tool-meister-client ${_testopt}/unittest-scripts/pbench-tool-meister-client' + [test-62]='ln -s mock-pbench-tool-meister-client ${_testopt}/unittest-scripts/pbench-tool-meister-client; mkdir -p ${_testdir}/mock-run/tools-default' ) declare -A post_hooks=( @@ -487,6 +493,8 @@ declare -A post_hooks=( [test-19]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client' [test-56]='sort ${_testdir}/mock-run/tm/pbench-tool-data-sink.log > ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted; mv ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted ${_testdir}/mock-run/tm/pbench-tool-data-sink.log; sort ${_testlog} > ${_testlog}.sorted; mv ${_testlog}.sorted ${_testlog}' [test-57]='sort ${_testdir}/mock-run/tm/pbench-tool-data-sink.log > ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted; mv ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted ${_testdir}/mock-run/tm/pbench-tool-data-sink.log; sort ${_testlog} > ${_testlog}.sorted; mv ${_testlog}.sorted ${_testlog}' + [test-61]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client' + [test-62]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client' ) tests="${*}" diff --git a/lib/pbench/agent/tool_data_sink.py b/lib/pbench/agent/tool_data_sink.py index c1812945e2..e2edf94b7a 100644 --- a/lib/pbench/agent/tool_data_sink.py +++ b/lib/pbench/agent/tool_data_sink.py @@ -16,6 +16,7 @@ import json import logging import os +import socket import subprocess import sys import tempfile @@ -32,6 +33,8 @@ from bottle import Bottle, ServerAdapter, request, abort +import pbench.agent.toolmetadata as toolmetadata + # Read in 64 KB chunks off the wire for HTTP PUT requests. _BUFFER_SIZE = 65536 @@ -68,8 +71,7 @@ class DataSinkWsgiRequestHandler(WSGIRequestHandler): _logger = logger def log_error(self, format_str, *args): - """log_error - log the error message with the client address - """ + """log_error - log the error message with the client address""" self._logger.error( "%s - - %s", self.address_string(), format_str % args ) @@ -83,8 +85,7 @@ def log_message(self, format_str, *args): ) def log_request(self, code="-", size="-"): - """log_request - log the request as an informational message. - """ + """log_request - log the request as an informational message.""" if isinstance(code, HTTPStatus): code = code.value self._logger.info( @@ -111,6 +112,128 @@ def stop(self): self._server.shutdown() +class BaseCollector: + allowed_tools = {"noop-collector": None} + + def __init__( + self, benchmark_run_dir, tool_group, host_tools_dict, logger, tool_metadata + ): + self.run = None + self.benchmark_run_dir = benchmark_run_dir + self.tool_group = tool_group + self.host_tools_dict = host_tools_dict + self.logger = logger + self.tool_metadata = tool_metadata + self.tool_group_dir = self.benchmark_run_dir / f"tools-{self.tool_group}" + self.abort_launch = True + + def launch(self): + pass + + def terminate(self): + if self.abort_launch: + return 0 + + self.run.terminate() + self.run.wait() + return 1 + + +class PromCollector(BaseCollector): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.volume = self.tool_group_dir / "prometheus" + + def launch(self): + + if self.host_tools_dict: + self.abort_launch = False + else: + return 0 + + config = open("prometheus.yml", "w") + + config.write("global:\n scrape_interval: 1s\n evaluation_interval: 1s\n\n") + # config.write("alerting:\n alertmanagers:\n - static_configs:\n - targets:\n\nrule_files:\n\n") + config.write( + "scrape_configs:\n - job_name: 'prometheus'\n static_configs:\n - targets: ['localhost:9090']\n\n" + ) + + for host in self.host_tools_dict: + if host.startswith("local"): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(("8.8.8.8", 80)) + host_ip = str(s.getsockname()[0]) + s.close() + else: + host_ip = host + + for tool in self.host_tools_dict[host]: + port = self.tool_metadata.getProperties(tool)["port"] + config.write( + " - job_name: '{}_{}'\n static_configs:\n - targets: ['{}:{}']\n\n".format( + host_ip, tool, host_ip, port + ) + ) + + config.close() + + prom_logs = open("prom.log", "w") + + if self.abort_launch: + prom_logs.write("Prometheus launch aborted, no persistent tools registered") + prom_logs.close() + return 0 + + args = ["podman", "pull", "prom/prometheus"] + prom_pull = subprocess.Popen(args, stdout=prom_logs, stderr=prom_logs) + prom_pull.wait() + + os.mkdir(self.volume) + args = ["chmod", "777", self.volume] + volume_dir = subprocess.Popen(args) + volume_dir.wait() + + args = [ + "podman", + "run", + "-p", + "9090:9090", + "-v", + f"{self.volume}:/prometheus:Z", + "-v", + f"{self.benchmark_run_dir}/tm/prometheus.yml:/etc/prometheus/prometheus.yml:Z", + "prom/prometheus", + ] + self.run = subprocess.Popen(args, stdout=prom_logs, stderr=prom_logs) + + prom_logs.close() + + return 1 + + def terminate(self): + if super().terminate() == 0: + return 0 + + self.logger.debug("PROM TERMINATED") + + args = [ + "tar", + "--remove-files", + "--exclude", + "prometheus/prometheus_data.tar.gz", + "-zcvf", + f"{self.volume}/prometheus_data.tar.gz", + "-C", + f"{self.tool_group_dir}/", + "prometheus", + ] + data_store = subprocess.Popen(args) + data_store.wait() + + return 1 + + class ToolDataSink(Bottle): """ToolDataSink - sub-class of Bottle representing state for tracking data sent from tool meisters via an HTTP PUT method. @@ -119,19 +242,22 @@ class ToolDataSink(Bottle): class Terminate(Exception): pass - def __init__(self, redis_server, channel, benchmark_run_dir, logger): + def __init__(self, redis_server, channel, benchmark_run_dir, tool_group, logger): super(ToolDataSink, self).__init__() # Save external state self.redis_server = redis_server self.channel = channel self.benchmark_run_dir = benchmark_run_dir + self.tool_group = tool_group self.logger = logger # Initialize internal state self._hostname = os.environ["full_hostname"] self.state = None self.tool_data_ctx = None self.directory = None + self.tool_metadata = toolmetadata.ToolMetadata("redis", redis_server, logger) self._data = None + self._prom_server = None self._tm_tracking = None self._lock = Lock() self._cv = Condition(lock=self._lock) @@ -168,8 +294,7 @@ def __init__(self, redis_server, channel, benchmark_run_dir, logger): self.web_server_thread = None def run(self): - """run - Start the Bottle web server running and the watcher thread. - """ + """run - Start the Bottle web server running and the watcher thread.""" self.logger.info("Running Bottle web server ...") try: super().run(server=self._server) @@ -179,8 +304,7 @@ def run(self): self.logger.info("Bottle web server exited") def execute(self): - """execute - Start the Bottle web server running and the watcher thread. - """ + """execute - Start the Bottle web server running and the watcher thread.""" self.web_server_thread = Thread(target=self.run) self.web_server_thread.start() self.logger.debug("web server 'run' thread started, processing payloads ...") @@ -280,14 +404,45 @@ def _fetch_tms(self): assert pids["ds"]["hostname"] == self._hostname, f"what? {pids['ds']!r}" for tm in pids["tm"]: assert tm["kind"] == "tm", f"what? {tm!r}" + # Fetch all the tool data for this Tool Meister. + tm_name = tm["hostname"] + tools_json_str_raw = self.redis_server.get( + f"tm-{self.tool_group}-{tm_name}" + ) + tools_json_str = tools_json_str_raw.decode("utf-8") + tools = json.loads(tools_json_str)["tools"] + noop_tools = [] + persistent_tools = [] + transient_tools = [] + for tool_name in tools.keys(): + if tool_name in self.tool_metadata.getPersistentTools(): + persistent_tools.append(tool_name) + elif tool_name in BaseCollector.allowed_tools: + noop_tools.append(tool_name) + elif tool_name in self.tool_metadata.getTransientTools(): + transient_tools.append(tool_name) + else: + self.logger.error( + f"Registered tool {tool_name} is not recognized in tool metadata" + ) + tm["noop_tools"] = noop_tools + tm["persistent_tools"] = persistent_tools + tm["transient_tools"] = transient_tools + if tm["hostname"] == self._hostname: # The "localhost" tool meister instance does not send data # to the tool data sink, it just writes it locally. - continue - # The `posted` field is "dormant" to start (as set below), - # "waiting" when we transition to the "send" state, "dormant" - # when we receive data from the target Tool Meister host. - tm["posted"] = "dormant" + tm["posted"] = None + elif not transient_tools: + # Only Tool Meisters with at least one transient tool will + # send data to a data sink, so ignore those Tool Meisters + # without any. + tm["posted"] = None + else: + # The `posted` field is "dormant" to start (as set below), + # "waiting" when we transition to the "send" state, "dormant" + # when we receive data from the target Tool Meister host. + tm["posted"] = "dormant" tms[tm["hostname"]] = tm return tms @@ -304,6 +459,8 @@ def _wait_for_all_data(self): done = False while not done: for hostname, tm in self._tm_tracking.items(): + if tm["posted"] is None: + continue if tm["posted"] == "waiting": # Don't bother checking any other Tool Meister when we # have at least one that has not sent any data. @@ -331,6 +488,8 @@ def _change_tm_tracking(self, curr, new): if self._tm_tracking is None: return for hostname, tm in self._tm_tracking.items(): + if tm["posted"] is None: + continue assert ( tm["posted"] == curr ), f"_change_tm_tracking unexpected tm posted value, {tm!r}" @@ -398,7 +557,32 @@ def state_change(self, data): # Transition to "send" state should reset self._tm_tracking with self._lock: - if self.state == "send": + if self.state == "init": + prom_tool_dict = {} + for tm in self._tm_tracking: + prom_tools = [] + persist_tools = self._tm_tracking[tm]["persistent_tools"] + for tool in persist_tools: + tool_data = self.tool_metadata.getProperties(tool) + if tool_data["collector"] == "prometheus": + prom_tools.append(tool) + if len(prom_tools) > 0: + prom_tool_dict[self._tm_tracking[tm]["hostname"]] = prom_tools + self.logger.debug(prom_tool_dict) + + if prom_tool_dict: + self._prom_server = PromCollector( + self.benchmark_run_dir, + self.tool_group, + prom_tool_dict, + self.logger, + self.tool_metadata, + ) + self._prom_server.launch() + elif self.state == "end": + if self._prom_server: + self._prom_server.terminate() + elif self.state == "send": self._change_tm_tracking("dormant", "waiting") # The Tool Data Sink cannot send success until all the Tool # Meisters have sent their collected data, so wait for all the @@ -687,6 +871,7 @@ def main(argv): params = json.loads(params_str) channel = params["channel"] benchmark_run_dir = Path(params["benchmark_run_dir"]).resolve(strict=True) + tool_group = params["group"] except Exception as ex: logger.error("Unable to fetch and decode parameter key, %s: %s", param_key, ex) return 5 @@ -723,6 +908,7 @@ def main(argv): logger.debug("constructing Redis() object") try: redis_server = redis.Redis(host=redis_host, port=redis_port, db=0) + except Exception as e: logger.error( "Unable to connect to redis server, %s:%s: %s", @@ -734,7 +920,9 @@ def main(argv): else: logger.debug("constructed Redis() object") - tds_app = ToolDataSink(redis_server, channel, benchmark_run_dir, logger) + tds_app = ToolDataSink( + redis_server, channel, benchmark_run_dir, tool_group, logger + ) tds_app.execute() except OSError as exc: if exc.errno == errno.EADDRINUSE: diff --git a/lib/pbench/agent/tool_meister.py b/lib/pbench/agent/tool_meister.py index d2322696ad..988d8db64f 100644 --- a/lib/pbench/agent/tool_meister.py +++ b/lib/pbench/agent/tool_meister.py @@ -59,6 +59,7 @@ import redis from pbench.server.utils import md5sum +import pbench.agent.toolmetadata as toolmetadata # Path to external tar executable. @@ -72,6 +73,83 @@ class ToolException(Exception): pass +class PersistentTool: + def __init__(self, name, tool_opts, logger): + self.name = name + self.tool_opts = tool_opts.split(" ") + self.logger = logger + self.install_path = None + + # Looking for required --inst option + # Reformatting appropriately if found + for opt in self.tool_opts: + if opt.startswith("--inst="): + if opt[len(opt) - 1] == "\n": + self.install_path = opt[7 : len(opt) - 1] + else: + self.install_path = opt[7:] + self.logger.debug("FOUND") + else: + self.logger.debug("NOT FOUND SOMEHOW") + + self.process = None + self.failure = False + + def start(self): + if self.install_path is None: + self.failure = True + self.logger.error( + "NO INSTALL PATH PROPERLY GIVEN AS PERSISTENT TOOL OPTION, see /opt/pbench-agent/nodexporter --help" + ) + return + + if self.name == "node-exporter": + self.logger.debug(self.install_path) + + if not os.path.isfile(self.install_path + "/node_exporter"): + self.logger.info( + self.install_path + "/node_exporter" + " does not exist" + ) + self.failure = True + return 0 + + args = [self.install_path + "/node_exporter"] + self.process = subprocess.Popen( + args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT + ) + elif self.name == "dcgm": + os.environ["PYTHONPATH"] = ( + self.install_path + + "/bindings:" + + self.install_path + + "/bindings/common" + ) + + script_path = self.install_path + "/samples/scripts/dcgm_prometheus.py" + if not os.path.isfile(script_path): + self.logger.info(script_path + " does not exist") + self.failure = True + return 0 + + args = [f"python2 {script_path}"] + self.process = subprocess.Popen(args, shell=True) + else: + self.logger.error("INVALID PERSISTENT TOOL NAME") + self.failure = True + return 0 + + return 1 + + def stop(self): + if not self.failure: + self.process.terminate() + self.process.wait() + return 1 + + self.logger.error("Nothing to terminate") + return 0 + + class Tool(object): """Encapsulates all the state needed to manage a tool running as a background process. @@ -284,6 +362,10 @@ def fetch_params(params): def __init__(self, pbench_bin, params, redis_server, logger): self.logger = logger + self.tool_metadata = toolmetadata.ToolMetadata( + "redis", redis_server, self.logger + ) + self.persist_tools = self.tool_metadata.getPersistentTools() self.pbench_bin = pbench_bin ret_val = self.fetch_params(params) ( @@ -295,6 +377,7 @@ def __init__(self, pbench_bin, params, redis_server, logger): self._tools, ) = ret_val self._running_tools = dict() + self._persistent_tools = dict() self._rs = redis_server logger.debug("pubsub") self._pubsub = self._rs.pubsub() @@ -315,14 +398,18 @@ def __init__(self, pbench_bin, params, redis_server, logger): ), f"Unexpected 'channel': {resp!r}" assert resp["data"] == 1, f"Unexpected 'data': {resp!r}" logger.debug("next done") - # We start in the "idle" state. - self.state = "idle" - self._valid_states = frozenset(["idle", "running"]) + # We start in the "startup" state, waiting for first "init" action. + self.state = "startup" + self._valid_states = frozenset(["startup", "idle", "running", "shutdown"]) self._state_trans = { + "end": {"curr": "idle", "next": "shutdown", "action": self.end_tools}, + "init": {"curr": "startup", "next": "idle", "action": self.init_tools}, "start": {"curr": "idle", "next": "running", "action": self.start_tools}, "stop": {"curr": "running", "next": "idle", "action": self.stop_tools}, } - self._valid_actions = frozenset(["start", "stop", "send", "terminate"]) + self._valid_actions = frozenset( + ["end", "init", "send", "start", "stop", "terminate"] + ) for key in self._state_trans.keys(): assert ( key in self._valid_actions @@ -476,6 +563,38 @@ def _send_client_status(self, status): ret_val = 0 return ret_val + def init_tools(self, data): + """init_tools - setup all registered tools which have data collectors. + + The Tool Data Sink will be setting up the actual processes which + collect data from these tools. + """ + failures = 0 + tool_cnt = 0 + for name, tool_opts in self._tools.items(): + if name not in self.persist_tools: + continue + tool_cnt += 1 + try: + persistent_tool = PersistentTool(name, tool_opts, self.logger) + persistent_tool.start() + + self.logger.debug("NAME: " + name + " TOOL OPTS: " + tool_opts) + except Exception: + self.logger.exception( + "Failed to init PersistentTool %s running in background", name + ) + failures += 1 + continue + else: + self._persistent_tools[name] = persistent_tool + if failures > 0: + msg = f"{failures} of {tool_cnt} persistent tools failed to start" + self._send_client_status(msg) + else: + self._send_client_status("success") + return failures + def start_tools(self, data): """start_tools - start all registered tools executing in the background @@ -552,6 +671,8 @@ def start_tools(self, data): failures = 0 tool_cnt = 0 for name, tool_opts in sorted(self._tools.items()): + if name in self.persist_tools: + continue tool_cnt += 1 try: tool = Tool( @@ -572,10 +693,7 @@ def start_tools(self, data): else: self._running_tools[name] = tool if failures > 0: - if failures == tool_cnt: - msg = "failure" - else: - msg = f"{failures} of {tool_cnt} tools failed to start" + msg = f"{failures} of {tool_cnt} tools failed to start" self._send_client_status(msg) else: self._send_client_status("success") @@ -590,11 +708,13 @@ def _wait_for_tools(self): """ failures = 0 for name in sorted(self._tools.keys()): + if name in self.persist_tools: + continue try: tool = self._running_tools[name] except KeyError: self.logger.error( - "INTERNAL ERROR - tool %s not found in list of running tools", name, + "INTERNAL ERROR - tool %s not found in list of running tools", name ) failures += 1 continue @@ -628,12 +748,16 @@ def stop_tools(self, data): return False failures = 0 + tool_cnt = 0 for name in sorted(self._tools.keys()): + if name in self.persist_tools: + continue + tool_cnt += 1 try: tool = self._running_tools[name] except KeyError: self.logger.error( - "INTERNAL ERROR - tool %s not found in list of running tools", name, + "INTERNAL ERROR - tool %s not found in list of running tools", name ) failures += 1 continue @@ -649,6 +773,8 @@ def stop_tools(self, data): # Clean up the running tools data structure explicitly ahead of # potentially receiving another start tools. for name in sorted(self._tools.keys()): + if name in self.persist_tools: + continue try: del self._running_tools[name] except KeyError: @@ -664,9 +790,11 @@ def stop_tools(self, data): self._directory = None self._tool_dir = None - self._send_client_status( - "success" if failures == 0 else "failures stopping tools" - ) + if failures > 0: + msg = f"{failures} of {tool_cnt} failed stopping tools" + self._send_client_status(msg) + else: + self._send_client_status("success") return failures def send_tools(self, data): @@ -679,6 +807,11 @@ def send_tools(self, data): payload matches what was previously provided to a "start tools" action. """ + + if len(set(self._tools.keys()) - set(self.persist_tools)) == 0: + self._send_client_status("success") + return 0 + directory = data["directory"] try: tool_dir = self.directories[directory] @@ -768,7 +901,10 @@ def send_tools(self, data): headers = {"md5sum": tar_md5} directory_bytes = data["directory"].encode("utf-8") tool_data_ctx = hashlib.md5(directory_bytes).hexdigest() - url = f"http://{self._controller}:8080/tool-data/{tool_data_ctx}/{self._hostname}" + url = ( + f"http://{self._controller}:8080/tool-data" + f"/{tool_data_ctx}/{self._hostname}" + ) sent = False retries = 200 while not sent: @@ -808,7 +944,8 @@ def send_tools(self, data): shutil.rmtree(parent_dir) except Exception: self.logger.exception( - "Failed to remove tool data hierarchy, '%s'", + "Failed to remove tool data" + " hierarchy, '%s'", parent_dir, ) failures += 1 @@ -838,7 +975,7 @@ def send_tools(self, data): ) except Exception as exc: self.logger.warning( - "unexpected error removing tools tar ball, '%s': %s", tar_file, exc, + "unexpected error removing tools tar ball, '%s': %s", tar_file, exc ) self._send_client_status( @@ -846,6 +983,38 @@ def send_tools(self, data): ) return failures + def end_tools(self, data): + """end_tools - stop all the persistent data collection tools. + """ + failures = 0 + tool_cnt = 0 + for name in self._tools.keys(): + if name not in self.persist_tools: + continue + tool_cnt += 1 + try: + persistent_tool = self._persistent_tools[name] + except KeyError: + self.logger.error( + "INTERNAL ERROR - tool %s not in list of persistent tools", name, + ) + failures += 1 + continue + try: + persistent_tool.stop() + except Exception: + self.logger.exception( + "Failed to stop persistent tool %s running in background", name + ) + failures += 1 + + if failures > 0: + msg = f"{failures} of {tool_cnt} failed stopping persistent tools" + self._send_client_status(msg) + else: + self._send_client_status("success") + return failures + def main(argv): """Main program for the Tool Meister. diff --git a/lib/pbench/agent/toolmetadata.py b/lib/pbench/agent/toolmetadata.py new file mode 100644 index 0000000000..6f32c6ead9 --- /dev/null +++ b/lib/pbench/agent/toolmetadata.py @@ -0,0 +1,115 @@ +from pathlib import Path +import json +import os + + +class ToolMetadataExc(Exception): + pass + + +class ToolMetadata: + def __init__(self, mode, context, logger): + self.logger = logger + assert mode in ( + "redis", + "json", + ), f"Logic bomb! Unexpected mode, {mode}, encountered constructing tool meta data" + assert ( + context + ), "Logic bomb! No context given on ToolMetadata object construction" + self.mode = mode + if mode == "redis": + self.redis_server = context + self.json_file = None + else: + self.redis_server = None + json_path = Path(context, "tool-scripts", "meta.json") + try: + self.json = json_path.resolve(strict=True) + except FileNotFoundError: + raise ToolMetadataExc(f"missing {json_path}") + except Exception: + raise + self.data = self.__getInitialData() + + def __getInitialData(self): + if self.mode == "json": + if not os.path.isfile(self.json): + self.logger.error( + "There is no tool-scripts/meta.json in given install dir" + ) + return None + with self.json.open("r") as json_file: + metadata = json.load(json_file) + elif self.mode == "redis": + try: + meta_raw = self.redis_server.get("tool-metadata") + except Exception: + self.logger.exception( + "Failure to fetch tool metadata from the Redis server" + ) + raise + else: + if meta_raw is None: + self.logger.error("Metadata has not been loaded into redis yet") + return None + try: + metadata = json.loads(meta_raw.decode("utf-8")) + except Exception as exc: + self.logger.error( + "Bad metadata loaded into Redis server, '%s', json=%r", + exc, + meta_raw, + ) + return None + return metadata + + def __dataCheck(self): + """Check for existing/loadable data, return True if retreival possible, False otherwise""" + if not self.data: + self.data == self.__getInitialData() + if not self.data: + self.logger.error(f"Unable to access data through {self.mode}") + return False + return True + + def getFullData(self): + if self.__dataCheck(): + return self.data + return None + + def getPersistentTools(self): + if self.__dataCheck(): + return list(self.data["persistent"].keys()) + return None + + def getTransientTools(self): + if self.__dataCheck(): + return list(self.data["transient"].keys()) + return None + + def getProperties(self, tool): + if tool in self.data["persistent"].keys(): + return self.data["persistent"][tool] + elif tool in self.data["transient"].keys(): + return self.data["transient"][tool] + + def loadIntoRedis(self, info): + if self.mode == "redis": + try: + self.json = Path(info).resolve(strict=True) + except FileNotFoundError: + raise ToolMetadataExc(f"missing {info}") + except Exception: + raise + elif self.mode == "json": + self.redis_server = info + + try: + with self.json.open("r") as json_file: + metadata = json.load(json_file) + self.redis_server.set("tool-metadata", json.dumps(metadata)) + except Exception: + self.logger.error("Failed to load the data into redis") + raise + return None