From 0380a8fec97c7ab0a35994e03acf8cc83d258b63 Mon Sep 17 00:00:00 2001 From: maxusmusti Date: Wed, 15 Jul 2020 17:17:36 -0400 Subject: [PATCH 1/2] Prometheus/Node_Exporter Full V1 Integration Commit This work adds the notion of a "collector" to the Tool Data Sink, and "persistent tools" which run continuously without cycling through the "start", "stop", and "send" phases. The collector is responsible for continuously pulling data from those tools which are now started during the new "init" phase, and stopped during the new "end" phase. The first actual implementation of this kind of collector is for the Prometheus data collection environment, where a `node-exporter` "persistent tool" is run providing an end-point for a prometheus server "collector" to pull data from it and store it locally off the run directory (`${benchmark_run_dir}`, e.g. `${benchmark_run_dir}/collector/prometheus`). This work adds the `tool-scripts/meta.json` file which is used to describe which tools are persistent and which are transient (default). --- agent/bench-scripts/pbench-run-benchmark | 6 + agent/bench-scripts/pbench-user-benchmark | 5 + agent/bench-scripts/test-bin/pbench-end-tools | 1 + .../bench-scripts/test-bin/pbench-init-tools | 1 + .../tests/pbench-user-benchmark/test-09.txt | 2 + .../tests/pbench-user-benchmark/test-10.txt | 2 + .../tests/pbench-user-benchmark/test-11.txt | 2 + .../tests/pbench-user-benchmark/test-12.txt | 2 + .../tests/pbench-user-benchmark/test-23.txt | 2 + .../tests/pbench-user-benchmark/test-24.txt | 2 + .../tests/pbench-user-benchmark/test-25.txt | 2 + .../tests/pbench-user-benchmark/test-37.txt | 2 + .../tests/pbench-user-benchmark/test-38.txt | 2 + agent/tool-scripts/meta.json | 47 ++++ agent/tool-scripts/node-exporter | 32 +++ .../gold/pbench-end-tools/test-62.txt | 16 ++ .../gold/pbench-init-tools/test-61.txt | 16 ++ .../gold/pbench-kill-tools/test-09.txt | 7 + .../gold/pbench-register-tool/test-44.txt | 1 + .../gold/pbench-register-tool/test-46.txt | 1 + .../gold/pbench-register-tool/test-47.txt | 1 + .../gold/test-client-tool-meister/test-53.txt | 1 + .../gold/test-client-tool-meister/test-56.txt | 1 + .../gold/test-client-tool-meister/test-57.txt | 1 + agent/util-scripts/pbench-end-tools | 1 + agent/util-scripts/pbench-init-tools | 1 + agent/util-scripts/pbench-postprocess-tools | 20 +- agent/util-scripts/pbench-register-tool | 4 +- agent/util-scripts/pbench-start-tools | 20 +- agent/util-scripts/pbench-tool-meister-client | 2 +- agent/util-scripts/pbench-tool-meister-start | 26 ++- .../samples/pbench-end-tools/test-62 | 1 + .../samples/pbench-init-tools/test-61 | 1 + .../test-bin/test-client-tool-meister | 14 ++ agent/util-scripts/unittests | 8 + lib/pbench/agent/tool_data_sink.py | 220 ++++++++++++++++-- lib/pbench/agent/tool_meister.py | 185 +++++++++++++-- lib/pbench/agent/toolmetadata.py | 115 +++++++++ 38 files changed, 722 insertions(+), 51 deletions(-) create mode 120000 agent/bench-scripts/test-bin/pbench-end-tools create mode 120000 agent/bench-scripts/test-bin/pbench-init-tools create mode 100644 agent/tool-scripts/meta.json create mode 100755 agent/tool-scripts/node-exporter create mode 100644 agent/util-scripts/gold/pbench-end-tools/test-62.txt create mode 100644 agent/util-scripts/gold/pbench-init-tools/test-61.txt create mode 120000 agent/util-scripts/pbench-end-tools create mode 120000 agent/util-scripts/pbench-init-tools create mode 120000 agent/util-scripts/samples/pbench-end-tools/test-62 create mode 120000 agent/util-scripts/samples/pbench-init-tools/test-61 create mode 100644 lib/pbench/agent/toolmetadata.py diff --git a/agent/bench-scripts/pbench-run-benchmark b/agent/bench-scripts/pbench-run-benchmark index 577930e810..e1727bb2f4 100755 --- a/agent/bench-scripts/pbench-run-benchmark +++ b/agent/bench-scripts/pbench-run-benchmark @@ -298,6 +298,10 @@ while (scalar @param_sets > 0) { push(@iterations_labels, $iteration_label); } + if (! $pp_only) { + system("pbench-init-tools --group=" . $tool_group . " --dir=" . $base_bench_dir); + } + for (my $index=0; $index<@iterations; $index++) { my $iteration_params = $iterations[$index]; my $iteration_label = $iterations_labels[$index]; @@ -389,6 +393,8 @@ while (scalar @param_sets > 0) { print BULK_FH "echo Sample processing complete!\n"; close(BULK_FH); system(". ./bulk-sample.sh"); + } else { + system("pbench-end-tools --group=" . $tool_group . " --dir=" . $base_bench_dir); } $run_doc{'run'}{'end'} = int time * 1000; # time in milliseconds put_json_file(\%run_doc, $es_dir . "/run/run" . $run_part . "-" . $run_doc{'run'}{'id'} . ".json"); diff --git a/agent/bench-scripts/pbench-user-benchmark b/agent/bench-scripts/pbench-user-benchmark index 5e22560927..4f6c3d503e 100755 --- a/agent/bench-scripts/pbench-user-benchmark +++ b/agent/bench-scripts/pbench-user-benchmark @@ -300,6 +300,9 @@ declare -a parts # the while loop sub-shell does not share FD 0 (stdin) with the executed # benchmark script. Otherwise, benchmarks messing around with FD 0 can lead to # problems. + +pbench-init-tools --group=${tool_group} --dir=${benchmark_run_dir} + while read -u 3 line; do # Current line number, starting from 1 (not zero) (( lineno++ )) @@ -334,6 +337,8 @@ if [[ ${iter_num} -eq 1 ]]; then warn_log "[${script_name}]: iteration file did not contain any iterations!" fi +pbench-end-tools --group=${tool_group} --dir=${benchmark_run_dir} + # Now that we have finished running all the iterations, create the # reference-result symlinks. result_dirs="$(ls -1d ${benchmark_run_dir}/*/${sample_name} 2> /dev/null)" diff --git a/agent/bench-scripts/test-bin/pbench-end-tools b/agent/bench-scripts/test-bin/pbench-end-tools new file mode 120000 index 0000000000..039bec18e9 --- /dev/null +++ b/agent/bench-scripts/test-bin/pbench-end-tools @@ -0,0 +1 @@ +mock-cmd \ No newline at end of file diff --git a/agent/bench-scripts/test-bin/pbench-init-tools b/agent/bench-scripts/test-bin/pbench-init-tools new file mode 120000 index 0000000000..039bec18e9 --- /dev/null +++ b/agent/bench-scripts/test-bin/pbench-init-tools @@ -0,0 +1 @@ +mock-cmd \ No newline at end of file diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt index 588aefec0f..b7108d287a 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt index 12dcaf82f2..2cd7b1d23d 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-duration for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt index 8c1ce15b48..786cacd82f 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt @@ -29,6 +29,8 @@ Running user-benchmark-script with-duration for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt index fde7c56846..9a6cb62938 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt @@ -30,6 +30,8 @@ WARNING:root:Unable to load JSON data from /var/tmp/pbench-test-bench/pbench-age +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt index 0b8a15f7a9..69bb725d12 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt @@ -58,6 +58,8 @@ sud +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt index 1cc47a9478..195bc87b00 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt index 7dfa6dafeb..88ee5dfedb 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt @@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00/1-default/sample1 diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt index 110cd3f104..3352fcb50f 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt @@ -23,6 +23,8 @@ +++ test-execution.log file contents /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-tool-meister-start default diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt index f2d6beb069..387229bd44 100644 --- a/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt +++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt @@ -48,6 +48,8 @@ Running bm arg1 arg2 for iteration 3-iter-three /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/bm arg1 arg2 arg3 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 --sysinfo=default end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 +/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 beg /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 end /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00/1-iter-one/sample1 diff --git a/agent/tool-scripts/meta.json b/agent/tool-scripts/meta.json new file mode 100644 index 0000000000..3de7b5032a --- /dev/null +++ b/agent/tool-scripts/meta.json @@ -0,0 +1,47 @@ +{ + "transient":{ + "blktrace": null, + "bpftrace": null, + "cpuacct": null, + "disk": null, + "dm-cache": null, + "docker": null, + "docker-info": null, + "external-data-source": null, + "haproxy-ocp": null, + "iostat": null, + "jmap": null, + "jstack": null, + "kvm-spinlock": null, + "kvmstat": null, + "kvmtrace": null, + "lockstat": null, + "mpstat": null, + "numastat": null, + "oc": null, + "openvswitch": null, + "pcp": null, + "perf": null, + "pidstat": null, + "pprof": null, + "proc-interrupts": null, + "proc-sched_debug": null, + "proc-vmstat": null, + "prometheus-metrics": null, + "qemu-migrate": null, + "rabbit": null, + "sar": null, + "strace": null, + "sysfs": null, + "systemtap": null, + "tcpdump": null, + "turbostat": null, + "user-tool": null, + "virsh-migrate": null, + "vmstat": null + }, + + "persistent":{ + "node-exporter": {"collector": "prometheus", "port": "9100"} + } +} diff --git a/agent/tool-scripts/node-exporter b/agent/tool-scripts/node-exporter new file mode 100755 index 0000000000..c835a99fb6 --- /dev/null +++ b/agent/tool-scripts/node-exporter @@ -0,0 +1,32 @@ +#!/usr/bin/python3 +# -*- mode: python -*- + +import os +import sys +import logging + +PROG = os.path.basename(sys.argv[0]) +logger = logging.getLogger(PROG) +logger.setLevel(logging.DEBUG) +sh = logging.StreamHandler() +sh.setLevel(logging.DEBUG) +shf = logging.Formatter("%(message)s") +sh.setFormatter(shf) +logger.addHandler(sh) + +if len(sys.argv) != 2 or sys.argv[1] != "--help": + logger.info( + "This script is deprecated, please run it with --help for info on registering the tool." + ) + logger.info( + "Run /opt/pbench-agent/tool-scripts/node-exporter --help for more info." + ) + exit(0) + +if sys.argv[1] == "--help": + logger.info("Options:") + logger.info( + "--inst= (required; to create the path of the executable, '/node-exporter' will be appended)" + ) + logger.info("Installation Guide: github.com/prometheus/node_exporter") + logger.info("Soon to come: metric enabling/disabling") diff --git a/agent/util-scripts/gold/pbench-end-tools/test-62.txt b/agent/util-scripts/gold/pbench-end-tools/test-62.txt new file mode 100644 index 0000000000..0f4f170071 --- /dev/null +++ b/agent/util-scripts/gold/pbench-end-tools/test-62.txt @@ -0,0 +1,16 @@ ++++ Running test-62 pbench-end-tools --group=default --dir=/var/tmp/pbench-test-utils/pbench/mock-run +--- Finished test-62 pbench-end-tools (status=0) ++++ pbench tree state +/var/tmp/pbench-test-utils/pbench +/var/tmp/pbench-test-utils/pbench/mock-run +/var/tmp/pbench-test-utils/pbench/mock-run/tools-default +/var/tmp/pbench-test-utils/pbench/tmp +/var/tmp/pbench-test-utils/pbench/tools-v1-default +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat +=== /var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat: +--interval=3 +--- pbench tree state ++++ test-execution.log file contents +/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-tool-meister-client default /var/tmp/pbench-test-utils/pbench/mock-run/tools-default end +--- test-execution.log file contents diff --git a/agent/util-scripts/gold/pbench-init-tools/test-61.txt b/agent/util-scripts/gold/pbench-init-tools/test-61.txt new file mode 100644 index 0000000000..6e0db73c47 --- /dev/null +++ b/agent/util-scripts/gold/pbench-init-tools/test-61.txt @@ -0,0 +1,16 @@ ++++ Running test-61 pbench-init-tools --group=default --dir=/var/tmp/pbench-test-utils/pbench/mock-run +--- Finished test-61 pbench-init-tools (status=0) ++++ pbench tree state +/var/tmp/pbench-test-utils/pbench +/var/tmp/pbench-test-utils/pbench/mock-run +/var/tmp/pbench-test-utils/pbench/mock-run/tools-default +/var/tmp/pbench-test-utils/pbench/tmp +/var/tmp/pbench-test-utils/pbench/tools-v1-default +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com +/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat +=== /var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat: +--interval=3 +--- pbench tree state ++++ test-execution.log file contents +/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-tool-meister-client default /var/tmp/pbench-test-utils/pbench/mock-run/tools-default init +--- test-execution.log file contents diff --git a/agent/util-scripts/gold/pbench-kill-tools/test-09.txt b/agent/util-scripts/gold/pbench-kill-tools/test-09.txt index 501c71c0ea..7d91023ed3 100644 --- a/agent/util-scripts/gold/pbench-kill-tools/test-09.txt +++ b/agent/util-scripts/gold/pbench-kill-tools/test-09.txt @@ -1,5 +1,12 @@ +++ Running test-09 pbench-kill-tools --group=barfoo ERROR: required directory argument missing. +The following are required: + + -g str --group=str, str = a tool group used in a benchmark + (the default group is 'default') + + -d str --dir=str, str = a directory where pbench-kill-tools + will store and process data --- Finished test-09 pbench-kill-tools (status=1) +++ pbench tree state /var/tmp/pbench-test-utils/pbench diff --git a/agent/util-scripts/gold/pbench-register-tool/test-44.txt b/agent/util-scripts/gold/pbench-register-tool/test-44.txt index c983f20aae..34a86880f9 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-44.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-44.txt @@ -55,6 +55,7 @@ Available tools: user-tool virsh-migrate vmstat + node-exporter For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/pbench-register-tool/test-46.txt b/agent/util-scripts/gold/pbench-register-tool/test-46.txt index d93abb1004..a459815044 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-46.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-46.txt @@ -55,6 +55,7 @@ Available tools: user-tool virsh-migrate vmstat + node-exporter For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/pbench-register-tool/test-47.txt b/agent/util-scripts/gold/pbench-register-tool/test-47.txt index 6fa4518a13..e60db9fcf9 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-47.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-47.txt @@ -55,6 +55,7 @@ Available tools: user-tool virsh-migrate vmstat + node-exporter For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt index fe87040328..3b442f345b 100644 --- a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt +++ b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt @@ -52,6 +52,7 @@ /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.err /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.log /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.out +/var/tmp/pbench-test-utils/pbench/mock-run/tools-default /var/tmp/pbench-test-utils/pbench/pbench.log /var/tmp/pbench-test-utils/pbench/tmp /var/tmp/pbench-test-utils/pbench/tools-v1-default diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt index 0576cb1a63..055080ff08 100644 --- a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt +++ b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt @@ -124,6 +124,7 @@ /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.err /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.log /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.out +/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite /var/tmp/pbench-test-utils/pbench/pbench.log /var/tmp/pbench-test-utils/pbench/tmp /var/tmp/pbench-test-utils/pbench/tools-v1-lite diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt index 6b4fd3c2d8..293d8515b8 100644 --- a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt +++ b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt @@ -124,6 +124,7 @@ /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.err /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.log /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.out +/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite /var/tmp/pbench-test-utils/pbench/pbench.log /var/tmp/pbench-test-utils/pbench/tmp /var/tmp/pbench-test-utils/pbench/tools-v1-lite diff --git a/agent/util-scripts/pbench-end-tools b/agent/util-scripts/pbench-end-tools new file mode 120000 index 0000000000..eb101607d9 --- /dev/null +++ b/agent/util-scripts/pbench-end-tools @@ -0,0 +1 @@ +pbench-start-tools \ No newline at end of file diff --git a/agent/util-scripts/pbench-init-tools b/agent/util-scripts/pbench-init-tools new file mode 120000 index 0000000000..eb101607d9 --- /dev/null +++ b/agent/util-scripts/pbench-init-tools @@ -0,0 +1 @@ +pbench-start-tools \ No newline at end of file diff --git a/agent/util-scripts/pbench-postprocess-tools b/agent/util-scripts/pbench-postprocess-tools index e19ccbf8b3..fb904bbf8a 100755 --- a/agent/util-scripts/pbench-postprocess-tools +++ b/agent/util-scripts/pbench-postprocess-tools @@ -17,16 +17,20 @@ def_group="default" group="${def_group}" dir="" -# Process options and arguments - -opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") -if [[ ${?} -ne 0 ]]; then - printf "\n%s: you specified an invalid option\n\n" "${script_name}" +function usage { printf "The following are required:\n\n" printf -- "\t-g str --group=str, str = a tool group used in a benchmark\n" printf -- "\t (the default group is '%s')\n\n" "${def_group}" printf -- "\t-d str --dir=str, str = a directory where %s\n" "${script_name}" printf -- "\t will store and process data\n" +} + +# Process options and arguments + +opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") +if [[ ${?} -ne 0 ]]; then + printf "\n%s: you specified an invalid option\n\n" "${script_name}" + usage >&2 exit 1 fi eval set -- "${opts}" @@ -54,11 +58,13 @@ while true; do done if [[ -z "${group}" ]]; then - printf -- "ERROR: required tool group parameter missing.\n" >&2 + printf -- "ERROR: required tool group parameter missing.\n\n" >&2 + usage >&2 exit 1 fi if [[ -z "${dir}" ]]; then - printf -- "ERROR: required directory argument missing.\n" >&2 + printf -- "ERROR: required directory argument missing.\n\n" >&2 + usage >&2 exit 1 fi diff --git a/agent/util-scripts/pbench-register-tool b/agent/util-scripts/pbench-register-tool index 8b647c733a..994fd40d4a 100755 --- a/agent/util-scripts/pbench-register-tool +++ b/agent/util-scripts/pbench-register-tool @@ -116,9 +116,7 @@ function usage() { printf -- "\tdenoted by a leading hash, or pound (\"#\"), character.\n" printf -- "\nAvailable tools:\n" local tool="" - for tool in $(find ${pbench_bin}/tool-scripts -maxdepth 1 ! -type d ! -name '*README*' ! -name base-tool ! -name unittests -printf "%P\n" 2> /dev/null | sort); do - printf -- "\t${tool}\n" - done + python3 -c "import sys, json; meta = json.load(open(sys.argv[1])); [print(f'\t{tool}') for tool in (*meta['transient'].keys(), *meta['persistent'].keys()) ]" ${pbench_bin}/tool-scripts/meta.json # 1 2 3 4 5 6 7 8 # (no tab) 12345678901234567890123456789012345678901234567890123456789012345678901234567890 printf -- "\nFor a list of tool specific options, run:\n" diff --git a/agent/util-scripts/pbench-start-tools b/agent/util-scripts/pbench-start-tools index 5d54c5212b..a57d624cc8 100755 --- a/agent/util-scripts/pbench-start-tools +++ b/agent/util-scripts/pbench-start-tools @@ -10,7 +10,7 @@ action=${_suffix%%-*} # source the base script . "${pbench_bin}"/base -if [[ "${action}" != "kill" && "${action}" != "send" && "${action}" != "start" && "${action}" != "stop" ]]; then +if [[ "${action}" != "end" && "${action}" != "init" && "${action}" != "kill" && "${action}" != "send" && "${action}" != "start" && "${action}" != "stop" ]]; then error_log "[${script_name}] action \"${action}\" is not supported" exit 1 fi @@ -24,16 +24,20 @@ def_group="default" group="${def_group}" dir="" -# Process options and arguments - -opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") -if [[ ${?} -ne 0 ]]; then - printf "\n%s: you specified an invalid option\n\n" "${script_name}" +function usage { printf "The following are required:\n\n" printf -- "\t-g str --group=str, str = a tool group used in a benchmark\n" printf -- "\t (the default group is '%s')\n\n" "${def_group}" printf -- "\t-d str --dir=str, str = a directory where %s\n" "${script_name}" printf -- "\t will store and process data\n" +} + +# Process options and arguments + +opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}") +if [[ ${?} -ne 0 ]]; then + printf "\n%s: you specified an invalid option\n\n" "${script_name}" + usage >&2 exit 1 fi eval set -- "${opts}" @@ -62,10 +66,12 @@ done if [[ -z "${group}" ]]; then printf -- "ERROR: required tool group parameter missing.\n" >&2 + usage >&2 exit 1 fi if [[ -z "${dir}" ]]; then printf -- "ERROR: required directory argument missing.\n" >&2 + usage >&2 exit 1 fi @@ -86,7 +92,7 @@ fi # The tool group's directory which stores tool output for all hosts. tool_output_dir="${dir}/tools-${group}" -if [[ "${action}" == "start" ]]; then +if [[ "${action}" == "start" || "${action}" == "init" ]]; then mkdir -p ${tool_output_dir} if [[ ${?} -ne 0 ]]; then error_log "[${script_name}] failed to create tool output directory, \"${tool_output_dir}\"" diff --git a/agent/util-scripts/pbench-tool-meister-client b/agent/util-scripts/pbench-tool-meister-client index dc0ae9019c..fa52b2c142 100755 --- a/agent/util-scripts/pbench-tool-meister-client +++ b/agent/util-scripts/pbench-tool-meister-client @@ -25,7 +25,7 @@ tm_channel = "tool-meister-chan" cl_channel = "tool-meister-client" # List of allowed actions -allowed_actions = ("start", "stop", "send", "kill") +allowed_actions = ("end", "init", "send", "start", "stop", "kill") def main(argv): diff --git a/agent/util-scripts/pbench-tool-meister-start b/agent/util-scripts/pbench-tool-meister-start index e0a7f9067b..75403b9b7e 100755 --- a/agent/util-scripts/pbench-tool-meister-start +++ b/agent/util-scripts/pbench-tool-meister-start @@ -32,6 +32,8 @@ import redis from pbench.agent.tool_data_sink import main as tds_main from pbench.agent.tool_meister import main as tm_main +from pbench.agent import PbenchAgentConfig +import pbench.agent.toolmetadata as toolmetadata # Port number is "One Tool" in hex 0x17001 @@ -369,10 +371,32 @@ def main(argv): ), f"bad channel: {resp!r}" assert resp["data"] == 1, f"bad data: {resp!r}" + # 2.5. Add tool metadata json to redis + try: + inst_dir = PbenchAgentConfig(os.environ["_PBENCH_AGENT_CONFIG"]).pbench_install_dir + except BadConfig as exc: + logger.error("%s", exc) + return 1 + except Exception: + logger.error("Unexpected error encountered logging pbench agent configuration: '%s'", exc) + return 1 + + try: + tm_start_path = Path(inst_dir).resolve(strict=True) + except FileNotFoundError: + logger.error("Unable to determine proper installation directory, '%s' not found", inst_dir) + return 1 + except Exception as exc: + logger.exception("Unexpected error encountered resolving installation directory: '%s'", exc) + return 1 + tool_metadata = toolmetadata.ToolMetadata("json", tm_start_path, logger) + tool_metadata.loadIntoRedis(redis_server) + + # 3. Start the tool-data-sink process # - leave a PID file for the tool data sink process tds_param_key = "tds-{}".format(group) - tds = dict(channel=channel, benchmark_run_dir=benchmark_run_dir) + tds = dict(channel=channel, benchmark_run_dir=benchmark_run_dir, group=group) try: redis_server.set(tds_param_key, json.dumps(tds, sort_keys=True)) except Exception: diff --git a/agent/util-scripts/samples/pbench-end-tools/test-62 b/agent/util-scripts/samples/pbench-end-tools/test-62 new file mode 120000 index 0000000000..b8de329238 --- /dev/null +++ b/agent/util-scripts/samples/pbench-end-tools/test-62 @@ -0,0 +1 @@ +../pbench-start-tools/test-05 \ No newline at end of file diff --git a/agent/util-scripts/samples/pbench-init-tools/test-61 b/agent/util-scripts/samples/pbench-init-tools/test-61 new file mode 120000 index 0000000000..b8de329238 --- /dev/null +++ b/agent/util-scripts/samples/pbench-init-tools/test-61 @@ -0,0 +1 @@ +../pbench-start-tools/test-05 \ No newline at end of file diff --git a/agent/util-scripts/test-bin/test-client-tool-meister b/agent/util-scripts/test-bin/test-client-tool-meister index 3f46ab28a4..cf65d8c26f 100755 --- a/agent/util-scripts/test-bin/test-client-tool-meister +++ b/agent/util-scripts/test-bin/test-client-tool-meister @@ -68,6 +68,13 @@ if [[ ${status} -ne 0 ]]; then exit 1 fi +_timeout pbench-init-tools --group="${group}" --dir="${benchmark_run_dir}" +status=${?} +if [[ ${status} -ne 0 ]]; then + printf -- "ERROR - \"pbench-init-tools\" failed to execute successfully (exit code: %s)\n" "${status}" >&2 + exit 1 +fi + sample="sample42" iterations="0-iter-zero 1-iter-one" @@ -113,6 +120,13 @@ if [[ "${3}" == "delayed-send" ]]; then done fi +_timeout pbench-end-tools --group="${group}" --dir="${benchmark_run_dir}" +status=${?} +if [[ ${status} -ne 0 ]]; then + printf -- "ERROR - \"pbench-end-tools\" failed to execute successfully (exit code: %s)\n" "${status}" >&2 + exit 1 +fi + _timeout pbench-tool-meister-stop status=${?} if [[ ${status} -ne 0 ]]; then diff --git a/agent/util-scripts/unittests b/agent/util-scripts/unittests index c97128e020..990256dddc 100755 --- a/agent/util-scripts/unittests +++ b/agent/util-scripts/unittests @@ -367,6 +367,8 @@ declare -A tools=( [test-58]="pbench-stop-tools" [test-59]="pbench-postprocess-tools" [test-60]="pbench-send-tools" + [test-61]="pbench-init-tools" + [test-62]="pbench-end-tools" ) declare -A options=( @@ -441,6 +443,8 @@ declare -A options=( [test-58]="--group=default --dir=42-iter/sample42" [test-59]="--group=foobar --dir=42-iter/sample42" [test-60]="--group=default --dir=42-iter/sample42" + [test-61]="--group=default --dir=${_testdir}/mock-run" + [test-62]="--group=default --dir=${_testdir}/mock-run" ) declare -A expected_status=( @@ -478,6 +482,8 @@ declare -A pre_hooks=( [test-47]='mkdir ${_testdir}/tmp; printf -- "# good list with no labels\none.example.com\ntwo.example.com\nthree.example.com\n" > ${_testdir}/tmp/remotes.lis' [test-48]='mkdir ${_testdir}/tmp; printf -- "%s\n" "30%" > ${_testdir}/tmp/foo.txt' [test-55]='pbench-register-tool --name=mpstat --remote=localhost > /dev/null; mkdir ${_testdir}/mock-run' + [test-61]='ln -s mock-pbench-tool-meister-client ${_testopt}/unittest-scripts/pbench-tool-meister-client' + [test-62]='ln -s mock-pbench-tool-meister-client ${_testopt}/unittest-scripts/pbench-tool-meister-client; mkdir -p ${_testdir}/mock-run/tools-default' ) declare -A post_hooks=( @@ -487,6 +493,8 @@ declare -A post_hooks=( [test-19]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client' [test-56]='sort ${_testdir}/mock-run/tm/pbench-tool-data-sink.log > ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted; mv ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted ${_testdir}/mock-run/tm/pbench-tool-data-sink.log; sort ${_testlog} > ${_testlog}.sorted; mv ${_testlog}.sorted ${_testlog}' [test-57]='sort ${_testdir}/mock-run/tm/pbench-tool-data-sink.log > ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted; mv ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted ${_testdir}/mock-run/tm/pbench-tool-data-sink.log; sort ${_testlog} > ${_testlog}.sorted; mv ${_testlog}.sorted ${_testlog}' + [test-61]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client' + [test-62]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client' ) tests="${*}" diff --git a/lib/pbench/agent/tool_data_sink.py b/lib/pbench/agent/tool_data_sink.py index c1812945e2..e2edf94b7a 100644 --- a/lib/pbench/agent/tool_data_sink.py +++ b/lib/pbench/agent/tool_data_sink.py @@ -16,6 +16,7 @@ import json import logging import os +import socket import subprocess import sys import tempfile @@ -32,6 +33,8 @@ from bottle import Bottle, ServerAdapter, request, abort +import pbench.agent.toolmetadata as toolmetadata + # Read in 64 KB chunks off the wire for HTTP PUT requests. _BUFFER_SIZE = 65536 @@ -68,8 +71,7 @@ class DataSinkWsgiRequestHandler(WSGIRequestHandler): _logger = logger def log_error(self, format_str, *args): - """log_error - log the error message with the client address - """ + """log_error - log the error message with the client address""" self._logger.error( "%s - - %s", self.address_string(), format_str % args ) @@ -83,8 +85,7 @@ def log_message(self, format_str, *args): ) def log_request(self, code="-", size="-"): - """log_request - log the request as an informational message. - """ + """log_request - log the request as an informational message.""" if isinstance(code, HTTPStatus): code = code.value self._logger.info( @@ -111,6 +112,128 @@ def stop(self): self._server.shutdown() +class BaseCollector: + allowed_tools = {"noop-collector": None} + + def __init__( + self, benchmark_run_dir, tool_group, host_tools_dict, logger, tool_metadata + ): + self.run = None + self.benchmark_run_dir = benchmark_run_dir + self.tool_group = tool_group + self.host_tools_dict = host_tools_dict + self.logger = logger + self.tool_metadata = tool_metadata + self.tool_group_dir = self.benchmark_run_dir / f"tools-{self.tool_group}" + self.abort_launch = True + + def launch(self): + pass + + def terminate(self): + if self.abort_launch: + return 0 + + self.run.terminate() + self.run.wait() + return 1 + + +class PromCollector(BaseCollector): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.volume = self.tool_group_dir / "prometheus" + + def launch(self): + + if self.host_tools_dict: + self.abort_launch = False + else: + return 0 + + config = open("prometheus.yml", "w") + + config.write("global:\n scrape_interval: 1s\n evaluation_interval: 1s\n\n") + # config.write("alerting:\n alertmanagers:\n - static_configs:\n - targets:\n\nrule_files:\n\n") + config.write( + "scrape_configs:\n - job_name: 'prometheus'\n static_configs:\n - targets: ['localhost:9090']\n\n" + ) + + for host in self.host_tools_dict: + if host.startswith("local"): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(("8.8.8.8", 80)) + host_ip = str(s.getsockname()[0]) + s.close() + else: + host_ip = host + + for tool in self.host_tools_dict[host]: + port = self.tool_metadata.getProperties(tool)["port"] + config.write( + " - job_name: '{}_{}'\n static_configs:\n - targets: ['{}:{}']\n\n".format( + host_ip, tool, host_ip, port + ) + ) + + config.close() + + prom_logs = open("prom.log", "w") + + if self.abort_launch: + prom_logs.write("Prometheus launch aborted, no persistent tools registered") + prom_logs.close() + return 0 + + args = ["podman", "pull", "prom/prometheus"] + prom_pull = subprocess.Popen(args, stdout=prom_logs, stderr=prom_logs) + prom_pull.wait() + + os.mkdir(self.volume) + args = ["chmod", "777", self.volume] + volume_dir = subprocess.Popen(args) + volume_dir.wait() + + args = [ + "podman", + "run", + "-p", + "9090:9090", + "-v", + f"{self.volume}:/prometheus:Z", + "-v", + f"{self.benchmark_run_dir}/tm/prometheus.yml:/etc/prometheus/prometheus.yml:Z", + "prom/prometheus", + ] + self.run = subprocess.Popen(args, stdout=prom_logs, stderr=prom_logs) + + prom_logs.close() + + return 1 + + def terminate(self): + if super().terminate() == 0: + return 0 + + self.logger.debug("PROM TERMINATED") + + args = [ + "tar", + "--remove-files", + "--exclude", + "prometheus/prometheus_data.tar.gz", + "-zcvf", + f"{self.volume}/prometheus_data.tar.gz", + "-C", + f"{self.tool_group_dir}/", + "prometheus", + ] + data_store = subprocess.Popen(args) + data_store.wait() + + return 1 + + class ToolDataSink(Bottle): """ToolDataSink - sub-class of Bottle representing state for tracking data sent from tool meisters via an HTTP PUT method. @@ -119,19 +242,22 @@ class ToolDataSink(Bottle): class Terminate(Exception): pass - def __init__(self, redis_server, channel, benchmark_run_dir, logger): + def __init__(self, redis_server, channel, benchmark_run_dir, tool_group, logger): super(ToolDataSink, self).__init__() # Save external state self.redis_server = redis_server self.channel = channel self.benchmark_run_dir = benchmark_run_dir + self.tool_group = tool_group self.logger = logger # Initialize internal state self._hostname = os.environ["full_hostname"] self.state = None self.tool_data_ctx = None self.directory = None + self.tool_metadata = toolmetadata.ToolMetadata("redis", redis_server, logger) self._data = None + self._prom_server = None self._tm_tracking = None self._lock = Lock() self._cv = Condition(lock=self._lock) @@ -168,8 +294,7 @@ def __init__(self, redis_server, channel, benchmark_run_dir, logger): self.web_server_thread = None def run(self): - """run - Start the Bottle web server running and the watcher thread. - """ + """run - Start the Bottle web server running and the watcher thread.""" self.logger.info("Running Bottle web server ...") try: super().run(server=self._server) @@ -179,8 +304,7 @@ def run(self): self.logger.info("Bottle web server exited") def execute(self): - """execute - Start the Bottle web server running and the watcher thread. - """ + """execute - Start the Bottle web server running and the watcher thread.""" self.web_server_thread = Thread(target=self.run) self.web_server_thread.start() self.logger.debug("web server 'run' thread started, processing payloads ...") @@ -280,14 +404,45 @@ def _fetch_tms(self): assert pids["ds"]["hostname"] == self._hostname, f"what? {pids['ds']!r}" for tm in pids["tm"]: assert tm["kind"] == "tm", f"what? {tm!r}" + # Fetch all the tool data for this Tool Meister. + tm_name = tm["hostname"] + tools_json_str_raw = self.redis_server.get( + f"tm-{self.tool_group}-{tm_name}" + ) + tools_json_str = tools_json_str_raw.decode("utf-8") + tools = json.loads(tools_json_str)["tools"] + noop_tools = [] + persistent_tools = [] + transient_tools = [] + for tool_name in tools.keys(): + if tool_name in self.tool_metadata.getPersistentTools(): + persistent_tools.append(tool_name) + elif tool_name in BaseCollector.allowed_tools: + noop_tools.append(tool_name) + elif tool_name in self.tool_metadata.getTransientTools(): + transient_tools.append(tool_name) + else: + self.logger.error( + f"Registered tool {tool_name} is not recognized in tool metadata" + ) + tm["noop_tools"] = noop_tools + tm["persistent_tools"] = persistent_tools + tm["transient_tools"] = transient_tools + if tm["hostname"] == self._hostname: # The "localhost" tool meister instance does not send data # to the tool data sink, it just writes it locally. - continue - # The `posted` field is "dormant" to start (as set below), - # "waiting" when we transition to the "send" state, "dormant" - # when we receive data from the target Tool Meister host. - tm["posted"] = "dormant" + tm["posted"] = None + elif not transient_tools: + # Only Tool Meisters with at least one transient tool will + # send data to a data sink, so ignore those Tool Meisters + # without any. + tm["posted"] = None + else: + # The `posted` field is "dormant" to start (as set below), + # "waiting" when we transition to the "send" state, "dormant" + # when we receive data from the target Tool Meister host. + tm["posted"] = "dormant" tms[tm["hostname"]] = tm return tms @@ -304,6 +459,8 @@ def _wait_for_all_data(self): done = False while not done: for hostname, tm in self._tm_tracking.items(): + if tm["posted"] is None: + continue if tm["posted"] == "waiting": # Don't bother checking any other Tool Meister when we # have at least one that has not sent any data. @@ -331,6 +488,8 @@ def _change_tm_tracking(self, curr, new): if self._tm_tracking is None: return for hostname, tm in self._tm_tracking.items(): + if tm["posted"] is None: + continue assert ( tm["posted"] == curr ), f"_change_tm_tracking unexpected tm posted value, {tm!r}" @@ -398,7 +557,32 @@ def state_change(self, data): # Transition to "send" state should reset self._tm_tracking with self._lock: - if self.state == "send": + if self.state == "init": + prom_tool_dict = {} + for tm in self._tm_tracking: + prom_tools = [] + persist_tools = self._tm_tracking[tm]["persistent_tools"] + for tool in persist_tools: + tool_data = self.tool_metadata.getProperties(tool) + if tool_data["collector"] == "prometheus": + prom_tools.append(tool) + if len(prom_tools) > 0: + prom_tool_dict[self._tm_tracking[tm]["hostname"]] = prom_tools + self.logger.debug(prom_tool_dict) + + if prom_tool_dict: + self._prom_server = PromCollector( + self.benchmark_run_dir, + self.tool_group, + prom_tool_dict, + self.logger, + self.tool_metadata, + ) + self._prom_server.launch() + elif self.state == "end": + if self._prom_server: + self._prom_server.terminate() + elif self.state == "send": self._change_tm_tracking("dormant", "waiting") # The Tool Data Sink cannot send success until all the Tool # Meisters have sent their collected data, so wait for all the @@ -687,6 +871,7 @@ def main(argv): params = json.loads(params_str) channel = params["channel"] benchmark_run_dir = Path(params["benchmark_run_dir"]).resolve(strict=True) + tool_group = params["group"] except Exception as ex: logger.error("Unable to fetch and decode parameter key, %s: %s", param_key, ex) return 5 @@ -723,6 +908,7 @@ def main(argv): logger.debug("constructing Redis() object") try: redis_server = redis.Redis(host=redis_host, port=redis_port, db=0) + except Exception as e: logger.error( "Unable to connect to redis server, %s:%s: %s", @@ -734,7 +920,9 @@ def main(argv): else: logger.debug("constructed Redis() object") - tds_app = ToolDataSink(redis_server, channel, benchmark_run_dir, logger) + tds_app = ToolDataSink( + redis_server, channel, benchmark_run_dir, tool_group, logger + ) tds_app.execute() except OSError as exc: if exc.errno == errno.EADDRINUSE: diff --git a/lib/pbench/agent/tool_meister.py b/lib/pbench/agent/tool_meister.py index d2322696ad..12f0f9f22f 100644 --- a/lib/pbench/agent/tool_meister.py +++ b/lib/pbench/agent/tool_meister.py @@ -59,6 +59,7 @@ import redis from pbench.server.utils import md5sum +import pbench.agent.toolmetadata as toolmetadata # Path to external tar executable. @@ -72,6 +73,67 @@ class ToolException(Exception): pass +class PersistentTool: + def __init__(self, name, tool_opts, logger): + self.name = name + self.tool_opts = tool_opts.split(" ") + self.logger = logger + self.install_path = None + + # Looking for required --inst option + # Reformatting appropriately if found + for opt in self.tool_opts: + if opt.startswith("--inst="): + if opt[len(opt) - 1] == "\n": + self.install_path = opt[7 : len(opt) - 1] + else: + self.install_path = opt[7:] + self.logger.debug("FOUND") + else: + self.logger.debug("NOT FOUND SOMEHOW") + + self.process = None + self.failure = False + + def start(self): + if self.install_path is None: + self.failure = True + self.logger.error( + "NO INSTALL PATH PROPERLY GIVEN AS PERSISTENT TOOL OPTION, see /opt/pbench-agent/nodexporter --help" + ) + return + + if self.name == "node-exporter": + self.logger.debug(self.install_path) + + if not os.path.isfile(self.install_path + "/node_exporter"): + self.logger.info( + self.install_path + "/node_exporter" + " does not exist" + ) + self.failure = True + return 0 + + args = [self.install_path + "/node_exporter"] + self.process = subprocess.Popen( + args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT + ) + else: + self.logger.error("INVALID PERSISTENT TOOL NAME") + self.failure = True + return 0 + + return 1 + + def stop(self): + if not self.failure: + self.process.terminate() + self.process.wait() + return 1 + + self.logger.error("Nothing to terminate") + return 0 + + class Tool(object): """Encapsulates all the state needed to manage a tool running as a background process. @@ -284,6 +346,10 @@ def fetch_params(params): def __init__(self, pbench_bin, params, redis_server, logger): self.logger = logger + self.tool_metadata = toolmetadata.ToolMetadata( + "redis", redis_server, self.logger + ) + self.persist_tools = self.tool_metadata.getPersistentTools() self.pbench_bin = pbench_bin ret_val = self.fetch_params(params) ( @@ -295,6 +361,7 @@ def __init__(self, pbench_bin, params, redis_server, logger): self._tools, ) = ret_val self._running_tools = dict() + self._persistent_tools = dict() self._rs = redis_server logger.debug("pubsub") self._pubsub = self._rs.pubsub() @@ -315,14 +382,18 @@ def __init__(self, pbench_bin, params, redis_server, logger): ), f"Unexpected 'channel': {resp!r}" assert resp["data"] == 1, f"Unexpected 'data': {resp!r}" logger.debug("next done") - # We start in the "idle" state. - self.state = "idle" - self._valid_states = frozenset(["idle", "running"]) + # We start in the "startup" state, waiting for first "init" action. + self.state = "startup" + self._valid_states = frozenset(["startup", "idle", "running", "shutdown"]) self._state_trans = { + "end": {"curr": "idle", "next": "shutdown", "action": self.end_tools}, + "init": {"curr": "startup", "next": "idle", "action": self.init_tools}, "start": {"curr": "idle", "next": "running", "action": self.start_tools}, "stop": {"curr": "running", "next": "idle", "action": self.stop_tools}, } - self._valid_actions = frozenset(["start", "stop", "send", "terminate"]) + self._valid_actions = frozenset( + ["end", "init", "send", "start", "stop", "terminate"] + ) for key in self._state_trans.keys(): assert ( key in self._valid_actions @@ -476,6 +547,38 @@ def _send_client_status(self, status): ret_val = 0 return ret_val + def init_tools(self, data): + """init_tools - setup all registered tools which have data collectors. + + The Tool Data Sink will be setting up the actual processes which + collect data from these tools. + """ + failures = 0 + tool_cnt = 0 + for name, tool_opts in self._tools.items(): + if name not in self.persist_tools: + continue + tool_cnt += 1 + try: + persistent_tool = PersistentTool(name, tool_opts, self.logger) + persistent_tool.start() + + self.logger.debug("NAME: " + name + " TOOL OPTS: " + tool_opts) + except Exception: + self.logger.exception( + "Failed to init PersistentTool %s running in background", name + ) + failures += 1 + continue + else: + self._persistent_tools[name] = persistent_tool + if failures > 0: + msg = f"{failures} of {tool_cnt} persistent tools failed to start" + self._send_client_status(msg) + else: + self._send_client_status("success") + return failures + def start_tools(self, data): """start_tools - start all registered tools executing in the background @@ -552,6 +655,8 @@ def start_tools(self, data): failures = 0 tool_cnt = 0 for name, tool_opts in sorted(self._tools.items()): + if name in self.persist_tools: + continue tool_cnt += 1 try: tool = Tool( @@ -572,10 +677,7 @@ def start_tools(self, data): else: self._running_tools[name] = tool if failures > 0: - if failures == tool_cnt: - msg = "failure" - else: - msg = f"{failures} of {tool_cnt} tools failed to start" + msg = f"{failures} of {tool_cnt} tools failed to start" self._send_client_status(msg) else: self._send_client_status("success") @@ -590,11 +692,13 @@ def _wait_for_tools(self): """ failures = 0 for name in sorted(self._tools.keys()): + if name in self.persist_tools: + continue try: tool = self._running_tools[name] except KeyError: self.logger.error( - "INTERNAL ERROR - tool %s not found in list of running tools", name, + "INTERNAL ERROR - tool %s not found in list of running tools", name ) failures += 1 continue @@ -628,12 +732,16 @@ def stop_tools(self, data): return False failures = 0 + tool_cnt = 0 for name in sorted(self._tools.keys()): + if name in self.persist_tools: + continue + tool_cnt += 1 try: tool = self._running_tools[name] except KeyError: self.logger.error( - "INTERNAL ERROR - tool %s not found in list of running tools", name, + "INTERNAL ERROR - tool %s not found in list of running tools", name ) failures += 1 continue @@ -649,6 +757,8 @@ def stop_tools(self, data): # Clean up the running tools data structure explicitly ahead of # potentially receiving another start tools. for name in sorted(self._tools.keys()): + if name in self.persist_tools: + continue try: del self._running_tools[name] except KeyError: @@ -664,9 +774,11 @@ def stop_tools(self, data): self._directory = None self._tool_dir = None - self._send_client_status( - "success" if failures == 0 else "failures stopping tools" - ) + if failures > 0: + msg = f"{failures} of {tool_cnt} failed stopping tools" + self._send_client_status(msg) + else: + self._send_client_status("success") return failures def send_tools(self, data): @@ -679,6 +791,11 @@ def send_tools(self, data): payload matches what was previously provided to a "start tools" action. """ + + if len(set(self._tools.keys()) - set(self.persist_tools)) == 0: + self._send_client_status("success") + return 0 + directory = data["directory"] try: tool_dir = self.directories[directory] @@ -768,7 +885,10 @@ def send_tools(self, data): headers = {"md5sum": tar_md5} directory_bytes = data["directory"].encode("utf-8") tool_data_ctx = hashlib.md5(directory_bytes).hexdigest() - url = f"http://{self._controller}:8080/tool-data/{tool_data_ctx}/{self._hostname}" + url = ( + f"http://{self._controller}:8080/tool-data" + f"/{tool_data_ctx}/{self._hostname}" + ) sent = False retries = 200 while not sent: @@ -808,7 +928,8 @@ def send_tools(self, data): shutil.rmtree(parent_dir) except Exception: self.logger.exception( - "Failed to remove tool data hierarchy, '%s'", + "Failed to remove tool data" + " hierarchy, '%s'", parent_dir, ) failures += 1 @@ -838,7 +959,7 @@ def send_tools(self, data): ) except Exception as exc: self.logger.warning( - "unexpected error removing tools tar ball, '%s': %s", tar_file, exc, + "unexpected error removing tools tar ball, '%s': %s", tar_file, exc ) self._send_client_status( @@ -846,6 +967,38 @@ def send_tools(self, data): ) return failures + def end_tools(self, data): + """end_tools - stop all the persistent data collection tools. + """ + failures = 0 + tool_cnt = 0 + for name in self._tools.keys(): + if name not in self.persist_tools: + continue + tool_cnt += 1 + try: + persistent_tool = self._persistent_tools[name] + except KeyError: + self.logger.error( + "INTERNAL ERROR - tool %s not in list of persistent tools", name, + ) + failures += 1 + continue + try: + persistent_tool.stop() + except Exception: + self.logger.exception( + "Failed to stop persistent tool %s running in background", name + ) + failures += 1 + + if failures > 0: + msg = f"{failures} of {tool_cnt} failed stopping persistent tools" + self._send_client_status(msg) + else: + self._send_client_status("success") + return failures + def main(argv): """Main program for the Tool Meister. diff --git a/lib/pbench/agent/toolmetadata.py b/lib/pbench/agent/toolmetadata.py new file mode 100644 index 0000000000..6f32c6ead9 --- /dev/null +++ b/lib/pbench/agent/toolmetadata.py @@ -0,0 +1,115 @@ +from pathlib import Path +import json +import os + + +class ToolMetadataExc(Exception): + pass + + +class ToolMetadata: + def __init__(self, mode, context, logger): + self.logger = logger + assert mode in ( + "redis", + "json", + ), f"Logic bomb! Unexpected mode, {mode}, encountered constructing tool meta data" + assert ( + context + ), "Logic bomb! No context given on ToolMetadata object construction" + self.mode = mode + if mode == "redis": + self.redis_server = context + self.json_file = None + else: + self.redis_server = None + json_path = Path(context, "tool-scripts", "meta.json") + try: + self.json = json_path.resolve(strict=True) + except FileNotFoundError: + raise ToolMetadataExc(f"missing {json_path}") + except Exception: + raise + self.data = self.__getInitialData() + + def __getInitialData(self): + if self.mode == "json": + if not os.path.isfile(self.json): + self.logger.error( + "There is no tool-scripts/meta.json in given install dir" + ) + return None + with self.json.open("r") as json_file: + metadata = json.load(json_file) + elif self.mode == "redis": + try: + meta_raw = self.redis_server.get("tool-metadata") + except Exception: + self.logger.exception( + "Failure to fetch tool metadata from the Redis server" + ) + raise + else: + if meta_raw is None: + self.logger.error("Metadata has not been loaded into redis yet") + return None + try: + metadata = json.loads(meta_raw.decode("utf-8")) + except Exception as exc: + self.logger.error( + "Bad metadata loaded into Redis server, '%s', json=%r", + exc, + meta_raw, + ) + return None + return metadata + + def __dataCheck(self): + """Check for existing/loadable data, return True if retreival possible, False otherwise""" + if not self.data: + self.data == self.__getInitialData() + if not self.data: + self.logger.error(f"Unable to access data through {self.mode}") + return False + return True + + def getFullData(self): + if self.__dataCheck(): + return self.data + return None + + def getPersistentTools(self): + if self.__dataCheck(): + return list(self.data["persistent"].keys()) + return None + + def getTransientTools(self): + if self.__dataCheck(): + return list(self.data["transient"].keys()) + return None + + def getProperties(self, tool): + if tool in self.data["persistent"].keys(): + return self.data["persistent"][tool] + elif tool in self.data["transient"].keys(): + return self.data["transient"][tool] + + def loadIntoRedis(self, info): + if self.mode == "redis": + try: + self.json = Path(info).resolve(strict=True) + except FileNotFoundError: + raise ToolMetadataExc(f"missing {info}") + except Exception: + raise + elif self.mode == "json": + self.redis_server = info + + try: + with self.json.open("r") as json_file: + metadata = json.load(json_file) + self.redis_server.set("tool-metadata", json.dumps(metadata)) + except Exception: + self.logger.error("Failed to load the data into redis") + raise + return None From 0b88e17ac268d4ca117265c22d1ad7573466a30a Mon Sep 17 00:00:00 2001 From: Keshav Maheshwari Date: Mon, 27 Jul 2020 13:08:57 -0400 Subject: [PATCH 2/2] Added DCGM Tool to pbench-agent (keshavm02) Co-authored-by: maxusmusti --- agent/tool-scripts/dcgm | 24 +++++++++++++++++++ agent/tool-scripts/meta.json | 3 ++- .../gold/pbench-register-tool/test-44.txt | 1 + .../gold/pbench-register-tool/test-46.txt | 1 + .../gold/pbench-register-tool/test-47.txt | 1 + lib/pbench/agent/tool_meister.py | 16 +++++++++++++ 6 files changed, 45 insertions(+), 1 deletion(-) create mode 100755 agent/tool-scripts/dcgm diff --git a/agent/tool-scripts/dcgm b/agent/tool-scripts/dcgm new file mode 100755 index 0000000000..fdee80860f --- /dev/null +++ b/agent/tool-scripts/dcgm @@ -0,0 +1,24 @@ +#!/usr/bin/python3 +# -*- mode: python -*- + +import sys +import os +import logging + +PROG = os.path.basename(sys.argv[0]) +logger = logging.getLogger(PROG) +logger.setLevel(logging.DEBUG) +sh = logging.StreamHandler() +sh.setLevel(logging.DEBUG) +shf = logging.Formatter("%(message)s") +sh.setFormatter(shf) +logger.addHandler(sh) + +if len(sys.argv) != 2 or sys.argv[1] != "--help": + logger.info("This script is deprecated, please run it with --help for info on registering the tool.") + logger.info("Run /opt/pbench-agent/tool-scripts/dcgm --help for more info.") + exit(0) + +if sys.argv[1] == "--help": + logger.info("Options:") + logger.info("--inst= (required)") diff --git a/agent/tool-scripts/meta.json b/agent/tool-scripts/meta.json index 3de7b5032a..b9b265b4d1 100644 --- a/agent/tool-scripts/meta.json +++ b/agent/tool-scripts/meta.json @@ -42,6 +42,7 @@ }, "persistent":{ - "node-exporter": {"collector": "prometheus", "port": "9100"} + "node-exporter": {"collector": "prometheus", "port": "9100"}, + "dcgm": {"collector": "prometheus", "port": "8000"} } } diff --git a/agent/util-scripts/gold/pbench-register-tool/test-44.txt b/agent/util-scripts/gold/pbench-register-tool/test-44.txt index 34a86880f9..4b15e6a57b 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-44.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-44.txt @@ -56,6 +56,7 @@ Available tools: virsh-migrate vmstat node-exporter + dcgm For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/pbench-register-tool/test-46.txt b/agent/util-scripts/gold/pbench-register-tool/test-46.txt index a459815044..605883fab7 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-46.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-46.txt @@ -56,6 +56,7 @@ Available tools: virsh-migrate vmstat node-exporter + dcgm For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/agent/util-scripts/gold/pbench-register-tool/test-47.txt b/agent/util-scripts/gold/pbench-register-tool/test-47.txt index e60db9fcf9..f43f893c7c 100644 --- a/agent/util-scripts/gold/pbench-register-tool/test-47.txt +++ b/agent/util-scripts/gold/pbench-register-tool/test-47.txt @@ -56,6 +56,7 @@ Available tools: virsh-migrate vmstat node-exporter + dcgm For a list of tool specific options, run: /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/ --help diff --git a/lib/pbench/agent/tool_meister.py b/lib/pbench/agent/tool_meister.py index 12f0f9f22f..988d8db64f 100644 --- a/lib/pbench/agent/tool_meister.py +++ b/lib/pbench/agent/tool_meister.py @@ -117,6 +117,22 @@ def start(self): self.process = subprocess.Popen( args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT ) + elif self.name == "dcgm": + os.environ["PYTHONPATH"] = ( + self.install_path + + "/bindings:" + + self.install_path + + "/bindings/common" + ) + + script_path = self.install_path + "/samples/scripts/dcgm_prometheus.py" + if not os.path.isfile(script_path): + self.logger.info(script_path + " does not exist") + self.failure = True + return 0 + + args = [f"python2 {script_path}"] + self.process = subprocess.Popen(args, shell=True) else: self.logger.error("INVALID PERSISTENT TOOL NAME") self.failure = True