From 0380a8fec97c7ab0a35994e03acf8cc83d258b63 Mon Sep 17 00:00:00 2001
From: maxusmusti <meyceoz@redhat.com>
Date: Wed, 15 Jul 2020 17:17:36 -0400
Subject: [PATCH 1/2] Prometheus/Node_Exporter Full V1 Integration Commit

This work adds the notion of a "collector" to the Tool Data Sink, and
"persistent tools" which run continuously without cycling through the
"start", "stop", and "send" phases.  The collector is responsible for
continuously pulling data from those tools which are now started during
the new "init" phase, and stopped during the new "end" phase.

The first actual implementation of this kind of collector is for the
Prometheus data collection environment, where a `node-exporter`
"persistent tool" is run providing an end-point for a prometheus server
"collector" to pull data from it and store it locally off the run
directory (`${benchmark_run_dir}`,
e.g. `${benchmark_run_dir}/collector/prometheus`).

This work adds the `tool-scripts/meta.json` file which is used to
describe which tools are persistent and which are transient (default).
---
 agent/bench-scripts/pbench-run-benchmark      |   6 +
 agent/bench-scripts/pbench-user-benchmark     |   5 +
 agent/bench-scripts/test-bin/pbench-end-tools |   1 +
 .../bench-scripts/test-bin/pbench-init-tools  |   1 +
 .../tests/pbench-user-benchmark/test-09.txt   |   2 +
 .../tests/pbench-user-benchmark/test-10.txt   |   2 +
 .../tests/pbench-user-benchmark/test-11.txt   |   2 +
 .../tests/pbench-user-benchmark/test-12.txt   |   2 +
 .../tests/pbench-user-benchmark/test-23.txt   |   2 +
 .../tests/pbench-user-benchmark/test-24.txt   |   2 +
 .../tests/pbench-user-benchmark/test-25.txt   |   2 +
 .../tests/pbench-user-benchmark/test-37.txt   |   2 +
 .../tests/pbench-user-benchmark/test-38.txt   |   2 +
 agent/tool-scripts/meta.json                  |  47 ++++
 agent/tool-scripts/node-exporter              |  32 +++
 .../gold/pbench-end-tools/test-62.txt         |  16 ++
 .../gold/pbench-init-tools/test-61.txt        |  16 ++
 .../gold/pbench-kill-tools/test-09.txt        |   7 +
 .../gold/pbench-register-tool/test-44.txt     |   1 +
 .../gold/pbench-register-tool/test-46.txt     |   1 +
 .../gold/pbench-register-tool/test-47.txt     |   1 +
 .../gold/test-client-tool-meister/test-53.txt |   1 +
 .../gold/test-client-tool-meister/test-56.txt |   1 +
 .../gold/test-client-tool-meister/test-57.txt |   1 +
 agent/util-scripts/pbench-end-tools           |   1 +
 agent/util-scripts/pbench-init-tools          |   1 +
 agent/util-scripts/pbench-postprocess-tools   |  20 +-
 agent/util-scripts/pbench-register-tool       |   4 +-
 agent/util-scripts/pbench-start-tools         |  20 +-
 agent/util-scripts/pbench-tool-meister-client |   2 +-
 agent/util-scripts/pbench-tool-meister-start  |  26 ++-
 .../samples/pbench-end-tools/test-62          |   1 +
 .../samples/pbench-init-tools/test-61         |   1 +
 .../test-bin/test-client-tool-meister         |  14 ++
 agent/util-scripts/unittests                  |   8 +
 lib/pbench/agent/tool_data_sink.py            | 220 ++++++++++++++++--
 lib/pbench/agent/tool_meister.py              | 185 +++++++++++++--
 lib/pbench/agent/toolmetadata.py              | 115 +++++++++
 38 files changed, 722 insertions(+), 51 deletions(-)
 create mode 120000 agent/bench-scripts/test-bin/pbench-end-tools
 create mode 120000 agent/bench-scripts/test-bin/pbench-init-tools
 create mode 100644 agent/tool-scripts/meta.json
 create mode 100755 agent/tool-scripts/node-exporter
 create mode 100644 agent/util-scripts/gold/pbench-end-tools/test-62.txt
 create mode 100644 agent/util-scripts/gold/pbench-init-tools/test-61.txt
 create mode 120000 agent/util-scripts/pbench-end-tools
 create mode 120000 agent/util-scripts/pbench-init-tools
 create mode 120000 agent/util-scripts/samples/pbench-end-tools/test-62
 create mode 120000 agent/util-scripts/samples/pbench-init-tools/test-61
 create mode 100644 lib/pbench/agent/toolmetadata.py

diff --git a/agent/bench-scripts/pbench-run-benchmark b/agent/bench-scripts/pbench-run-benchmark
index 577930e810..e1727bb2f4 100755
--- a/agent/bench-scripts/pbench-run-benchmark
+++ b/agent/bench-scripts/pbench-run-benchmark
@@ -298,6 +298,10 @@ while (scalar @param_sets > 0) {
         push(@iterations_labels, $iteration_label);
     }
 
+    if (! $pp_only) {
+    	system("pbench-init-tools --group=" . $tool_group . " --dir=" . $base_bench_dir);
+    }
+
     for (my $index=0; $index<@iterations; $index++) {
         my $iteration_params = $iterations[$index];
         my $iteration_label = $iterations_labels[$index];
@@ -389,6 +393,8 @@ while (scalar @param_sets > 0) {
         print BULK_FH "echo Sample processing complete!\n";
         close(BULK_FH);
         system(". ./bulk-sample.sh");
+    } else {
+	system("pbench-end-tools --group=" . $tool_group . " --dir=" . $base_bench_dir);
     }
     $run_doc{'run'}{'end'} = int time * 1000; # time in milliseconds
     put_json_file(\%run_doc, $es_dir . "/run/run" . $run_part . "-" . $run_doc{'run'}{'id'} . ".json");
diff --git a/agent/bench-scripts/pbench-user-benchmark b/agent/bench-scripts/pbench-user-benchmark
index 5e22560927..4f6c3d503e 100755
--- a/agent/bench-scripts/pbench-user-benchmark
+++ b/agent/bench-scripts/pbench-user-benchmark
@@ -300,6 +300,9 @@ declare -a parts
 # the while loop sub-shell does not share FD 0 (stdin) with the executed
 # benchmark script. Otherwise, benchmarks messing around with FD 0 can lead to
 # problems.
+
+pbench-init-tools --group=${tool_group} --dir=${benchmark_run_dir}
+
 while read -u 3 line; do
 	# Current line number, starting from 1 (not zero)
 	(( lineno++ ))
@@ -334,6 +337,8 @@ if [[ ${iter_num} -eq 1 ]]; then
 	warn_log "[${script_name}]: iteration file did not contain any iterations!"
 fi
 
+pbench-end-tools --group=${tool_group} --dir=${benchmark_run_dir}
+
 # Now that we have finished running all the iterations, create the
 # reference-result symlinks.
 result_dirs="$(ls -1d ${benchmark_run_dir}/*/${sample_name} 2> /dev/null)"
diff --git a/agent/bench-scripts/test-bin/pbench-end-tools b/agent/bench-scripts/test-bin/pbench-end-tools
new file mode 120000
index 0000000000..039bec18e9
--- /dev/null
+++ b/agent/bench-scripts/test-bin/pbench-end-tools
@@ -0,0 +1 @@
+mock-cmd
\ No newline at end of file
diff --git a/agent/bench-scripts/test-bin/pbench-init-tools b/agent/bench-scripts/test-bin/pbench-init-tools
new file mode 120000
index 0000000000..039bec18e9
--- /dev/null
+++ b/agent/bench-scripts/test-bin/pbench-init-tools
@@ -0,0 +1 @@
+mock-cmd
\ No newline at end of file
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt
index 588aefec0f..b7108d287a 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-09.txt
@@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-09_1900.01.01T00.00.00/1-default/sample1
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt
index 12dcaf82f2..2cd7b1d23d 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-10.txt
@@ -29,6 +29,8 @@ Running user-benchmark-script no-duration for iteration 1-default
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-10_1900.01.01T00.00.00/1-default/sample1
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt
index 8c1ce15b48..786cacd82f 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-11.txt
@@ -29,6 +29,8 @@ Running user-benchmark-script with-duration for iteration 1-default
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-11_1900.01.01T00.00.00/1-default/sample1
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt
index fde7c56846..9a6cb62938 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-12.txt
@@ -30,6 +30,8 @@ WARNING:root:Unable to load JSON data from /var/tmp/pbench-test-bench/pbench-age
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-12_1900.01.01T00.00.00/1-default/sample1
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt
index 0b8a15f7a9..69bb725d12 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-23.txt
@@ -58,6 +58,8 @@ sud
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-23_1900.01.01T00.00.00/1-default/sample1
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt
index 1cc47a9478..195bc87b00 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-24.txt
@@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-24_1900.01.01T00.00.00/1-default/sample1
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt
index 7dfa6dafeb..88ee5dfedb 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-25.txt
@@ -29,6 +29,8 @@ Running user-benchmark-script no-file for iteration 1-default
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-25_1900.01.01T00.00.00/1-default/sample1
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt
index 110cd3f104..3352fcb50f 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-37.txt
@@ -23,6 +23,8 @@
 +++ test-execution.log file contents
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-37_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-tool-meister-start default
diff --git a/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt b/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt
index f2d6beb069..387229bd44 100644
--- a/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt
+++ b/agent/bench-scripts/tests/pbench-user-benchmark/test-38.txt
@@ -48,6 +48,8 @@ Running bm arg1 arg2 for iteration 3-iter-three
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/bm arg1 arg2 arg3
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 --sysinfo=default end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-collect-sysinfo --sysinfo=default --check
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-end-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00
+/var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-init-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 beg
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-metadata-log --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00 end
 /var/tmp/pbench-test-bench/opt/pbench-agent/unittest-scripts/pbench-postprocess-tools --group=default --dir=/var/tmp/pbench-test-bench/pbench-agent/pbench-user-benchmark_test-38_1900.01.01T00.00.00/1-iter-one/sample1
diff --git a/agent/tool-scripts/meta.json b/agent/tool-scripts/meta.json
new file mode 100644
index 0000000000..3de7b5032a
--- /dev/null
+++ b/agent/tool-scripts/meta.json
@@ -0,0 +1,47 @@
+{
+	"transient":{
+		"blktrace": null,
+		"bpftrace": null,
+		"cpuacct": null,
+		"disk": null,
+		"dm-cache": null,
+		"docker": null,
+		"docker-info": null,
+		"external-data-source": null,
+		"haproxy-ocp": null,
+		"iostat": null,
+		"jmap": null,
+		"jstack": null,
+		"kvm-spinlock": null,
+		"kvmstat": null,
+		"kvmtrace": null,
+		"lockstat": null,
+		"mpstat": null,
+		"numastat": null,
+		"oc": null,
+		"openvswitch": null,
+		"pcp": null,
+		"perf": null,
+		"pidstat": null,
+		"pprof": null,
+		"proc-interrupts": null,
+		"proc-sched_debug": null,
+		"proc-vmstat": null,
+		"prometheus-metrics": null,
+		"qemu-migrate": null,
+		"rabbit": null,
+		"sar": null,
+		"strace": null,
+		"sysfs": null,
+		"systemtap": null,
+		"tcpdump": null,
+		"turbostat": null,
+		"user-tool": null,
+		"virsh-migrate": null,
+		"vmstat": null
+	},
+
+	"persistent":{
+		"node-exporter": {"collector": "prometheus", "port": "9100"}
+	}
+}
diff --git a/agent/tool-scripts/node-exporter b/agent/tool-scripts/node-exporter
new file mode 100755
index 0000000000..c835a99fb6
--- /dev/null
+++ b/agent/tool-scripts/node-exporter
@@ -0,0 +1,32 @@
+#!/usr/bin/python3
+# -*- mode: python -*-
+
+import os
+import sys
+import logging
+
+PROG = os.path.basename(sys.argv[0])
+logger = logging.getLogger(PROG)
+logger.setLevel(logging.DEBUG)
+sh = logging.StreamHandler()
+sh.setLevel(logging.DEBUG)
+shf = logging.Formatter("%(message)s")
+sh.setFormatter(shf)
+logger.addHandler(sh)
+
+if len(sys.argv) != 2 or sys.argv[1] != "--help":
+    logger.info(
+        "This script is deprecated, please run it with --help for info on registering the tool."
+    )
+    logger.info(
+        "Run /opt/pbench-agent/tool-scripts/node-exporter --help for more info."
+    )
+    exit(0)
+
+if sys.argv[1] == "--help":
+    logger.info("Options:")
+    logger.info(
+        "--inst=</path/to/node_exporter/dir> (required; to create the path of the executable, '/node-exporter' will be appended)"
+    )
+    logger.info("Installation Guide: github.com/prometheus/node_exporter")
+    logger.info("Soon to come: metric enabling/disabling")
diff --git a/agent/util-scripts/gold/pbench-end-tools/test-62.txt b/agent/util-scripts/gold/pbench-end-tools/test-62.txt
new file mode 100644
index 0000000000..0f4f170071
--- /dev/null
+++ b/agent/util-scripts/gold/pbench-end-tools/test-62.txt
@@ -0,0 +1,16 @@
++++ Running test-62 pbench-end-tools --group=default --dir=/var/tmp/pbench-test-utils/pbench/mock-run
+--- Finished test-62 pbench-end-tools (status=0)
++++ pbench tree state
+/var/tmp/pbench-test-utils/pbench
+/var/tmp/pbench-test-utils/pbench/mock-run
+/var/tmp/pbench-test-utils/pbench/mock-run/tools-default
+/var/tmp/pbench-test-utils/pbench/tmp
+/var/tmp/pbench-test-utils/pbench/tools-v1-default
+/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com
+/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat
+=== /var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat:
+--interval=3
+--- pbench tree state
++++ test-execution.log file contents
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-tool-meister-client default /var/tmp/pbench-test-utils/pbench/mock-run/tools-default end
+--- test-execution.log file contents
diff --git a/agent/util-scripts/gold/pbench-init-tools/test-61.txt b/agent/util-scripts/gold/pbench-init-tools/test-61.txt
new file mode 100644
index 0000000000..6e0db73c47
--- /dev/null
+++ b/agent/util-scripts/gold/pbench-init-tools/test-61.txt
@@ -0,0 +1,16 @@
++++ Running test-61 pbench-init-tools --group=default --dir=/var/tmp/pbench-test-utils/pbench/mock-run
+--- Finished test-61 pbench-init-tools (status=0)
++++ pbench tree state
+/var/tmp/pbench-test-utils/pbench
+/var/tmp/pbench-test-utils/pbench/mock-run
+/var/tmp/pbench-test-utils/pbench/mock-run/tools-default
+/var/tmp/pbench-test-utils/pbench/tmp
+/var/tmp/pbench-test-utils/pbench/tools-v1-default
+/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com
+/var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat
+=== /var/tmp/pbench-test-utils/pbench/tools-v1-default/testhost.example.com/mpstat:
+--interval=3
+--- pbench tree state
++++ test-execution.log file contents
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-tool-meister-client default /var/tmp/pbench-test-utils/pbench/mock-run/tools-default init
+--- test-execution.log file contents
diff --git a/agent/util-scripts/gold/pbench-kill-tools/test-09.txt b/agent/util-scripts/gold/pbench-kill-tools/test-09.txt
index 501c71c0ea..7d91023ed3 100644
--- a/agent/util-scripts/gold/pbench-kill-tools/test-09.txt
+++ b/agent/util-scripts/gold/pbench-kill-tools/test-09.txt
@@ -1,5 +1,12 @@
 +++ Running test-09 pbench-kill-tools --group=barfoo
 ERROR: required directory argument missing.
+The following are required:
+
+	-g str --group=str, str = a tool group used in a benchmark
+	                          (the default group is 'default')
+
+	-d str --dir=str, str = a directory where pbench-kill-tools
+	                        will store and process data
 --- Finished test-09 pbench-kill-tools (status=1)
 +++ pbench tree state
 /var/tmp/pbench-test-utils/pbench
diff --git a/agent/util-scripts/gold/pbench-register-tool/test-44.txt b/agent/util-scripts/gold/pbench-register-tool/test-44.txt
index c983f20aae..34a86880f9 100644
--- a/agent/util-scripts/gold/pbench-register-tool/test-44.txt
+++ b/agent/util-scripts/gold/pbench-register-tool/test-44.txt
@@ -55,6 +55,7 @@ Available tools:
 	user-tool
 	virsh-migrate
 	vmstat
+	node-exporter
 
 For a list of tool specific options, run:
 	/var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/<tool-name> --help
diff --git a/agent/util-scripts/gold/pbench-register-tool/test-46.txt b/agent/util-scripts/gold/pbench-register-tool/test-46.txt
index d93abb1004..a459815044 100644
--- a/agent/util-scripts/gold/pbench-register-tool/test-46.txt
+++ b/agent/util-scripts/gold/pbench-register-tool/test-46.txt
@@ -55,6 +55,7 @@ Available tools:
 	user-tool
 	virsh-migrate
 	vmstat
+	node-exporter
 
 For a list of tool specific options, run:
 	/var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/<tool-name> --help
diff --git a/agent/util-scripts/gold/pbench-register-tool/test-47.txt b/agent/util-scripts/gold/pbench-register-tool/test-47.txt
index 6fa4518a13..e60db9fcf9 100644
--- a/agent/util-scripts/gold/pbench-register-tool/test-47.txt
+++ b/agent/util-scripts/gold/pbench-register-tool/test-47.txt
@@ -55,6 +55,7 @@ Available tools:
 	user-tool
 	virsh-migrate
 	vmstat
+	node-exporter
 
 For a list of tool specific options, run:
 	/var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/<tool-name> --help
diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt
index fe87040328..3b442f345b 100644
--- a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt
+++ b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt
@@ -52,6 +52,7 @@
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.err
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-default-testhost.example.com.out
+/var/tmp/pbench-test-utils/pbench/mock-run/tools-default
 /var/tmp/pbench-test-utils/pbench/pbench.log
 /var/tmp/pbench-test-utils/pbench/tmp
 /var/tmp/pbench-test-utils/pbench/tools-v1-default
diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt
index 0576cb1a63..055080ff08 100644
--- a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt
+++ b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt
@@ -124,6 +124,7 @@
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.err
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.out
+/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite
 /var/tmp/pbench-test-utils/pbench/pbench.log
 /var/tmp/pbench-test-utils/pbench/tmp
 /var/tmp/pbench-test-utils/pbench/tools-v1-lite
diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt
index 6b4fd3c2d8..293d8515b8 100644
--- a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt
+++ b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt
@@ -124,6 +124,7 @@
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.err
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/tm-lite-testhost.example.com.out
+/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite
 /var/tmp/pbench-test-utils/pbench/pbench.log
 /var/tmp/pbench-test-utils/pbench/tmp
 /var/tmp/pbench-test-utils/pbench/tools-v1-lite
diff --git a/agent/util-scripts/pbench-end-tools b/agent/util-scripts/pbench-end-tools
new file mode 120000
index 0000000000..eb101607d9
--- /dev/null
+++ b/agent/util-scripts/pbench-end-tools
@@ -0,0 +1 @@
+pbench-start-tools
\ No newline at end of file
diff --git a/agent/util-scripts/pbench-init-tools b/agent/util-scripts/pbench-init-tools
new file mode 120000
index 0000000000..eb101607d9
--- /dev/null
+++ b/agent/util-scripts/pbench-init-tools
@@ -0,0 +1 @@
+pbench-start-tools
\ No newline at end of file
diff --git a/agent/util-scripts/pbench-postprocess-tools b/agent/util-scripts/pbench-postprocess-tools
index e19ccbf8b3..fb904bbf8a 100755
--- a/agent/util-scripts/pbench-postprocess-tools
+++ b/agent/util-scripts/pbench-postprocess-tools
@@ -17,16 +17,20 @@ def_group="default"
 group="${def_group}"
 dir=""
 
-# Process options and arguments
-
-opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}")
-if [[ ${?} -ne 0 ]]; then
-	printf "\n%s: you specified an invalid option\n\n" "${script_name}"
+function usage {
 	printf "The following are required:\n\n"
 	printf -- "\t-g str --group=str, str = a tool group used in a benchmark\n"
 	printf -- "\t                          (the default group is '%s')\n\n" "${def_group}"
 	printf -- "\t-d str --dir=str, str = a directory where %s\n" "${script_name}"
 	printf -- "\t                        will store and process data\n"
+}
+
+# Process options and arguments
+
+opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}")
+if [[ ${?} -ne 0 ]]; then
+	printf "\n%s: you specified an invalid option\n\n" "${script_name}"
+	usage >&2
 	exit 1
 fi
 eval set -- "${opts}"
@@ -54,11 +58,13 @@ while true; do
 done
 
 if [[ -z "${group}" ]]; then
-	printf -- "ERROR: required tool group parameter missing.\n" >&2
+	printf -- "ERROR: required tool group parameter missing.\n\n" >&2
+	usage >&2
 	exit 1
 fi
 if [[ -z "${dir}" ]]; then
-	printf -- "ERROR: required directory argument missing.\n" >&2
+	printf -- "ERROR: required directory argument missing.\n\n" >&2
+	usage >&2
 	exit 1
 fi
 
diff --git a/agent/util-scripts/pbench-register-tool b/agent/util-scripts/pbench-register-tool
index 8b647c733a..994fd40d4a 100755
--- a/agent/util-scripts/pbench-register-tool
+++ b/agent/util-scripts/pbench-register-tool
@@ -116,9 +116,7 @@ function usage() {
 	printf -- "\tdenoted by a leading hash, or pound (\"#\"), character.\n"
 	printf -- "\nAvailable tools:\n"
 	local tool=""
-	for tool in $(find ${pbench_bin}/tool-scripts -maxdepth 1 ! -type d ! -name '*README*' ! -name base-tool ! -name unittests -printf "%P\n" 2> /dev/null | sort); do
-		printf -- "\t${tool}\n"
-	done
+	python3 -c "import sys, json; meta = json.load(open(sys.argv[1])); [print(f'\t{tool}') for tool in (*meta['transient'].keys(), *meta['persistent'].keys()) ]" ${pbench_bin}/tool-scripts/meta.json
 	#                     1         2         3         4         5         6         7         8
 	# (no tab)   12345678901234567890123456789012345678901234567890123456789012345678901234567890
 	printf -- "\nFor a list of tool specific options, run:\n"
diff --git a/agent/util-scripts/pbench-start-tools b/agent/util-scripts/pbench-start-tools
index 5d54c5212b..a57d624cc8 100755
--- a/agent/util-scripts/pbench-start-tools
+++ b/agent/util-scripts/pbench-start-tools
@@ -10,7 +10,7 @@ action=${_suffix%%-*}
 # source the base script
 . "${pbench_bin}"/base
 
-if [[ "${action}" != "kill" && "${action}" != "send" && "${action}" != "start" && "${action}" != "stop" ]]; then
+if [[ "${action}" != "end" && "${action}" != "init" && "${action}" != "kill" && "${action}" != "send" && "${action}" != "start" && "${action}" != "stop" ]]; then
 	error_log "[${script_name}] action \"${action}\" is not supported"
 	exit 1
 fi
@@ -24,16 +24,20 @@ def_group="default"
 group="${def_group}"
 dir=""
 
-# Process options and arguments
-
-opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}")
-if [[ ${?} -ne 0 ]]; then
-	printf "\n%s: you specified an invalid option\n\n" "${script_name}"
+function usage {
 	printf "The following are required:\n\n"
 	printf -- "\t-g str --group=str, str = a tool group used in a benchmark\n"
 	printf -- "\t                          (the default group is '%s')\n\n" "${def_group}"
 	printf -- "\t-d str --dir=str, str = a directory where %s\n" "${script_name}"
 	printf -- "\t                        will store and process data\n"
+}
+
+# Process options and arguments
+
+opts=$(getopt -q -o d:g: --longoptions "dir:,group:" -n "getopt.sh" -- "${@}")
+if [[ ${?} -ne 0 ]]; then
+	printf "\n%s: you specified an invalid option\n\n" "${script_name}"
+	usage >&2
 	exit 1
 fi
 eval set -- "${opts}"
@@ -62,10 +66,12 @@ done
 
 if [[ -z "${group}" ]]; then
 	printf -- "ERROR: required tool group parameter missing.\n" >&2
+	usage >&2
 	exit 1
 fi
 if [[ -z "${dir}" ]]; then
 	printf -- "ERROR: required directory argument missing.\n" >&2
+	usage >&2
 	exit 1
 fi
 
@@ -86,7 +92,7 @@ fi
 # The tool group's directory which stores tool output for all hosts.
 tool_output_dir="${dir}/tools-${group}"
 
-if [[ "${action}" == "start" ]]; then
+if [[ "${action}" == "start" || "${action}" == "init" ]]; then
 	mkdir -p ${tool_output_dir}
 	if [[ ${?} -ne 0 ]]; then
 		error_log "[${script_name}] failed to create tool output directory, \"${tool_output_dir}\""
diff --git a/agent/util-scripts/pbench-tool-meister-client b/agent/util-scripts/pbench-tool-meister-client
index dc0ae9019c..fa52b2c142 100755
--- a/agent/util-scripts/pbench-tool-meister-client
+++ b/agent/util-scripts/pbench-tool-meister-client
@@ -25,7 +25,7 @@ tm_channel = "tool-meister-chan"
 cl_channel = "tool-meister-client"
 
 # List of allowed actions
-allowed_actions = ("start", "stop", "send", "kill")
+allowed_actions = ("end", "init", "send", "start", "stop", "kill")
 
 
 def main(argv):
diff --git a/agent/util-scripts/pbench-tool-meister-start b/agent/util-scripts/pbench-tool-meister-start
index e0a7f9067b..75403b9b7e 100755
--- a/agent/util-scripts/pbench-tool-meister-start
+++ b/agent/util-scripts/pbench-tool-meister-start
@@ -32,6 +32,8 @@ import redis
 
 from pbench.agent.tool_data_sink import main as tds_main
 from pbench.agent.tool_meister import main as tm_main
+from pbench.agent import PbenchAgentConfig
+import pbench.agent.toolmetadata as toolmetadata
 
 
 # Port number is "One Tool" in hex 0x17001
@@ -369,10 +371,32 @@ def main(argv):
         ), f"bad channel: {resp!r}"
         assert resp["data"] == 1, f"bad data: {resp!r}"
 
+    # 2.5. Add tool metadata json to redis
+    try:
+        inst_dir = PbenchAgentConfig(os.environ["_PBENCH_AGENT_CONFIG"]).pbench_install_dir
+    except BadConfig as exc:
+        logger.error("%s", exc)
+        return 1
+    except Exception:
+        logger.error("Unexpected error encountered logging pbench agent configuration: '%s'", exc)
+        return 1
+
+    try:
+        tm_start_path = Path(inst_dir).resolve(strict=True)
+    except FileNotFoundError:
+        logger.error("Unable to determine proper installation directory, '%s' not found", inst_dir)
+        return 1
+    except Exception as exc:
+        logger.exception("Unexpected error encountered resolving installation directory: '%s'", exc)
+        return 1
+    tool_metadata = toolmetadata.ToolMetadata("json", tm_start_path, logger)
+    tool_metadata.loadIntoRedis(redis_server)
+
+
     # 3. Start the tool-data-sink process
     #   - leave a PID file for the tool data sink process
     tds_param_key = "tds-{}".format(group)
-    tds = dict(channel=channel, benchmark_run_dir=benchmark_run_dir)
+    tds = dict(channel=channel, benchmark_run_dir=benchmark_run_dir, group=group)
     try:
         redis_server.set(tds_param_key, json.dumps(tds, sort_keys=True))
     except Exception:
diff --git a/agent/util-scripts/samples/pbench-end-tools/test-62 b/agent/util-scripts/samples/pbench-end-tools/test-62
new file mode 120000
index 0000000000..b8de329238
--- /dev/null
+++ b/agent/util-scripts/samples/pbench-end-tools/test-62
@@ -0,0 +1 @@
+../pbench-start-tools/test-05
\ No newline at end of file
diff --git a/agent/util-scripts/samples/pbench-init-tools/test-61 b/agent/util-scripts/samples/pbench-init-tools/test-61
new file mode 120000
index 0000000000..b8de329238
--- /dev/null
+++ b/agent/util-scripts/samples/pbench-init-tools/test-61
@@ -0,0 +1 @@
+../pbench-start-tools/test-05
\ No newline at end of file
diff --git a/agent/util-scripts/test-bin/test-client-tool-meister b/agent/util-scripts/test-bin/test-client-tool-meister
index 3f46ab28a4..cf65d8c26f 100755
--- a/agent/util-scripts/test-bin/test-client-tool-meister
+++ b/agent/util-scripts/test-bin/test-client-tool-meister
@@ -68,6 +68,13 @@ if [[ ${status} -ne 0 ]]; then
     exit 1
 fi
 
+_timeout pbench-init-tools --group="${group}" --dir="${benchmark_run_dir}"
+status=${?}
+if [[ ${status} -ne 0 ]]; then
+    printf -- "ERROR - \"pbench-init-tools\" failed to execute successfully (exit code: %s)\n" "${status}" >&2
+    exit 1
+fi
+
 sample="sample42"
 iterations="0-iter-zero 1-iter-one"
 
@@ -113,6 +120,13 @@ if [[ "${3}" == "delayed-send" ]]; then
     done
 fi
 
+_timeout pbench-end-tools --group="${group}" --dir="${benchmark_run_dir}"
+status=${?}
+if [[ ${status} -ne 0 ]]; then
+    printf -- "ERROR - \"pbench-end-tools\" failed to execute successfully (exit code: %s)\n" "${status}" >&2
+    exit 1
+fi
+
 _timeout pbench-tool-meister-stop
 status=${?}
 if [[ ${status} -ne 0 ]]; then
diff --git a/agent/util-scripts/unittests b/agent/util-scripts/unittests
index c97128e020..990256dddc 100755
--- a/agent/util-scripts/unittests
+++ b/agent/util-scripts/unittests
@@ -367,6 +367,8 @@ declare -A tools=(
     [test-58]="pbench-stop-tools"
     [test-59]="pbench-postprocess-tools"
     [test-60]="pbench-send-tools"
+    [test-61]="pbench-init-tools"
+    [test-62]="pbench-end-tools"
 )
 
 declare -A options=(
@@ -441,6 +443,8 @@ declare -A options=(
     [test-58]="--group=default --dir=42-iter/sample42"
     [test-59]="--group=foobar --dir=42-iter/sample42"
     [test-60]="--group=default --dir=42-iter/sample42"
+    [test-61]="--group=default --dir=${_testdir}/mock-run"
+    [test-62]="--group=default --dir=${_testdir}/mock-run"
 )
 
 declare -A expected_status=(
@@ -478,6 +482,8 @@ declare -A pre_hooks=(
     [test-47]='mkdir ${_testdir}/tmp; printf -- "# good list with no labels\none.example.com\ntwo.example.com\nthree.example.com\n" > ${_testdir}/tmp/remotes.lis'
     [test-48]='mkdir ${_testdir}/tmp; printf -- "%s\n" "30%" > ${_testdir}/tmp/foo.txt'
     [test-55]='pbench-register-tool --name=mpstat --remote=localhost > /dev/null; mkdir ${_testdir}/mock-run'
+    [test-61]='ln -s mock-pbench-tool-meister-client ${_testopt}/unittest-scripts/pbench-tool-meister-client'
+    [test-62]='ln -s mock-pbench-tool-meister-client ${_testopt}/unittest-scripts/pbench-tool-meister-client; mkdir -p ${_testdir}/mock-run/tools-default'
 )
 
 declare -A post_hooks=(
@@ -487,6 +493,8 @@ declare -A post_hooks=(
     [test-19]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client'
     [test-56]='sort ${_testdir}/mock-run/tm/pbench-tool-data-sink.log > ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted; mv ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted ${_testdir}/mock-run/tm/pbench-tool-data-sink.log; sort ${_testlog} > ${_testlog}.sorted; mv ${_testlog}.sorted ${_testlog}'
     [test-57]='sort ${_testdir}/mock-run/tm/pbench-tool-data-sink.log > ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted; mv ${_testdir}/mock-run/tm/pbench-tool-data-sink.log.sorted ${_testdir}/mock-run/tm/pbench-tool-data-sink.log; sort ${_testlog} > ${_testlog}.sorted; mv ${_testlog}.sorted ${_testlog}'
+    [test-61]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client'
+    [test-62]='rm ${_testopt}/unittest-scripts/pbench-tool-meister-client'
 )
 
 tests="${*}"
diff --git a/lib/pbench/agent/tool_data_sink.py b/lib/pbench/agent/tool_data_sink.py
index c1812945e2..e2edf94b7a 100644
--- a/lib/pbench/agent/tool_data_sink.py
+++ b/lib/pbench/agent/tool_data_sink.py
@@ -16,6 +16,7 @@
 import json
 import logging
 import os
+import socket
 import subprocess
 import sys
 import tempfile
@@ -32,6 +33,8 @@
 
 from bottle import Bottle, ServerAdapter, request, abort
 
+import pbench.agent.toolmetadata as toolmetadata
+
 
 # Read in 64 KB chunks off the wire for HTTP PUT requests.
 _BUFFER_SIZE = 65536
@@ -68,8 +71,7 @@ class DataSinkWsgiRequestHandler(WSGIRequestHandler):
             _logger = logger
 
             def log_error(self, format_str, *args):
-                """log_error - log the error message with the client address
-                """
+                """log_error - log the error message with the client address"""
                 self._logger.error(
                     "%s - - %s", self.address_string(), format_str % args
                 )
@@ -83,8 +85,7 @@ def log_message(self, format_str, *args):
                 )
 
             def log_request(self, code="-", size="-"):
-                """log_request - log the request as an informational message.
-                """
+                """log_request - log the request as an informational message."""
                 if isinstance(code, HTTPStatus):
                     code = code.value
                 self._logger.info(
@@ -111,6 +112,128 @@ def stop(self):
             self._server.shutdown()
 
 
+class BaseCollector:
+    allowed_tools = {"noop-collector": None}
+
+    def __init__(
+        self, benchmark_run_dir, tool_group, host_tools_dict, logger, tool_metadata
+    ):
+        self.run = None
+        self.benchmark_run_dir = benchmark_run_dir
+        self.tool_group = tool_group
+        self.host_tools_dict = host_tools_dict
+        self.logger = logger
+        self.tool_metadata = tool_metadata
+        self.tool_group_dir = self.benchmark_run_dir / f"tools-{self.tool_group}"
+        self.abort_launch = True
+
+    def launch(self):
+        pass
+
+    def terminate(self):
+        if self.abort_launch:
+            return 0
+
+        self.run.terminate()
+        self.run.wait()
+        return 1
+
+
+class PromCollector(BaseCollector):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.volume = self.tool_group_dir / "prometheus"
+
+    def launch(self):
+
+        if self.host_tools_dict:
+            self.abort_launch = False
+        else:
+            return 0
+
+        config = open("prometheus.yml", "w")
+
+        config.write("global:\n  scrape_interval: 1s\n  evaluation_interval: 1s\n\n")
+        # config.write("alerting:\n  alertmanagers:\n  - static_configs:\n    - targets:\n\nrule_files:\n\n")
+        config.write(
+            "scrape_configs:\n  - job_name: 'prometheus'\n    static_configs:\n    - targets: ['localhost:9090']\n\n"
+        )
+
+        for host in self.host_tools_dict:
+            if host.startswith("local"):
+                s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+                s.connect(("8.8.8.8", 80))
+                host_ip = str(s.getsockname()[0])
+                s.close()
+            else:
+                host_ip = host
+
+            for tool in self.host_tools_dict[host]:
+                port = self.tool_metadata.getProperties(tool)["port"]
+                config.write(
+                    "  - job_name: '{}_{}'\n    static_configs:\n    - targets: ['{}:{}']\n\n".format(
+                        host_ip, tool, host_ip, port
+                    )
+                )
+
+        config.close()
+
+        prom_logs = open("prom.log", "w")
+
+        if self.abort_launch:
+            prom_logs.write("Prometheus launch aborted, no persistent tools registered")
+            prom_logs.close()
+            return 0
+
+        args = ["podman", "pull", "prom/prometheus"]
+        prom_pull = subprocess.Popen(args, stdout=prom_logs, stderr=prom_logs)
+        prom_pull.wait()
+
+        os.mkdir(self.volume)
+        args = ["chmod", "777", self.volume]
+        volume_dir = subprocess.Popen(args)
+        volume_dir.wait()
+
+        args = [
+            "podman",
+            "run",
+            "-p",
+            "9090:9090",
+            "-v",
+            f"{self.volume}:/prometheus:Z",
+            "-v",
+            f"{self.benchmark_run_dir}/tm/prometheus.yml:/etc/prometheus/prometheus.yml:Z",
+            "prom/prometheus",
+        ]
+        self.run = subprocess.Popen(args, stdout=prom_logs, stderr=prom_logs)
+
+        prom_logs.close()
+
+        return 1
+
+    def terminate(self):
+        if super().terminate() == 0:
+            return 0
+
+        self.logger.debug("PROM TERMINATED")
+
+        args = [
+            "tar",
+            "--remove-files",
+            "--exclude",
+            "prometheus/prometheus_data.tar.gz",
+            "-zcvf",
+            f"{self.volume}/prometheus_data.tar.gz",
+            "-C",
+            f"{self.tool_group_dir}/",
+            "prometheus",
+        ]
+        data_store = subprocess.Popen(args)
+        data_store.wait()
+
+        return 1
+
+
 class ToolDataSink(Bottle):
     """ToolDataSink - sub-class of Bottle representing state for tracking data
     sent from tool meisters via an HTTP PUT method.
@@ -119,19 +242,22 @@ class ToolDataSink(Bottle):
     class Terminate(Exception):
         pass
 
-    def __init__(self, redis_server, channel, benchmark_run_dir, logger):
+    def __init__(self, redis_server, channel, benchmark_run_dir, tool_group, logger):
         super(ToolDataSink, self).__init__()
         # Save external state
         self.redis_server = redis_server
         self.channel = channel
         self.benchmark_run_dir = benchmark_run_dir
+        self.tool_group = tool_group
         self.logger = logger
         # Initialize internal state
         self._hostname = os.environ["full_hostname"]
         self.state = None
         self.tool_data_ctx = None
         self.directory = None
+        self.tool_metadata = toolmetadata.ToolMetadata("redis", redis_server, logger)
         self._data = None
+        self._prom_server = None
         self._tm_tracking = None
         self._lock = Lock()
         self._cv = Condition(lock=self._lock)
@@ -168,8 +294,7 @@ def __init__(self, redis_server, channel, benchmark_run_dir, logger):
         self.web_server_thread = None
 
     def run(self):
-        """run - Start the Bottle web server running and the watcher thread.
-        """
+        """run - Start the Bottle web server running and the watcher thread."""
         self.logger.info("Running Bottle web server ...")
         try:
             super().run(server=self._server)
@@ -179,8 +304,7 @@ def run(self):
             self.logger.info("Bottle web server exited")
 
     def execute(self):
-        """execute - Start the Bottle web server running and the watcher thread.
-        """
+        """execute - Start the Bottle web server running and the watcher thread."""
         self.web_server_thread = Thread(target=self.run)
         self.web_server_thread.start()
         self.logger.debug("web server 'run' thread started, processing payloads ...")
@@ -280,14 +404,45 @@ def _fetch_tms(self):
             assert pids["ds"]["hostname"] == self._hostname, f"what? {pids['ds']!r}"
             for tm in pids["tm"]:
                 assert tm["kind"] == "tm", f"what? {tm!r}"
+                # Fetch all the tool data for this Tool Meister.
+                tm_name = tm["hostname"]
+                tools_json_str_raw = self.redis_server.get(
+                    f"tm-{self.tool_group}-{tm_name}"
+                )
+                tools_json_str = tools_json_str_raw.decode("utf-8")
+                tools = json.loads(tools_json_str)["tools"]
+                noop_tools = []
+                persistent_tools = []
+                transient_tools = []
+                for tool_name in tools.keys():
+                    if tool_name in self.tool_metadata.getPersistentTools():
+                        persistent_tools.append(tool_name)
+                    elif tool_name in BaseCollector.allowed_tools:
+                        noop_tools.append(tool_name)
+                    elif tool_name in self.tool_metadata.getTransientTools():
+                        transient_tools.append(tool_name)
+                    else:
+                        self.logger.error(
+                            f"Registered tool {tool_name} is not recognized in tool metadata"
+                        )
+                tm["noop_tools"] = noop_tools
+                tm["persistent_tools"] = persistent_tools
+                tm["transient_tools"] = transient_tools
+
                 if tm["hostname"] == self._hostname:
                     # The "localhost" tool meister instance does not send data
                     # to the tool data sink, it just writes it locally.
-                    continue
-                # The `posted` field is "dormant" to start (as set below),
-                # "waiting" when we transition to the "send" state, "dormant"
-                # when we receive data from the target Tool Meister host.
-                tm["posted"] = "dormant"
+                    tm["posted"] = None
+                elif not transient_tools:
+                    # Only Tool Meisters with at least one transient tool will
+                    # send data to a data sink, so ignore those Tool Meisters
+                    # without any.
+                    tm["posted"] = None
+                else:
+                    # The `posted` field is "dormant" to start (as set below),
+                    # "waiting" when we transition to the "send" state, "dormant"
+                    # when we receive data from the target Tool Meister host.
+                    tm["posted"] = "dormant"
                 tms[tm["hostname"]] = tm
         return tms
 
@@ -304,6 +459,8 @@ def _wait_for_all_data(self):
         done = False
         while not done:
             for hostname, tm in self._tm_tracking.items():
+                if tm["posted"] is None:
+                    continue
                 if tm["posted"] == "waiting":
                     # Don't bother checking any other Tool Meister when we
                     # have at least one that has not sent any data.
@@ -331,6 +488,8 @@ def _change_tm_tracking(self, curr, new):
         if self._tm_tracking is None:
             return
         for hostname, tm in self._tm_tracking.items():
+            if tm["posted"] is None:
+                continue
             assert (
                 tm["posted"] == curr
             ), f"_change_tm_tracking unexpected tm posted value, {tm!r}"
@@ -398,7 +557,32 @@ def state_change(self, data):
 
         # Transition to "send" state should reset self._tm_tracking
         with self._lock:
-            if self.state == "send":
+            if self.state == "init":
+                prom_tool_dict = {}
+                for tm in self._tm_tracking:
+                    prom_tools = []
+                    persist_tools = self._tm_tracking[tm]["persistent_tools"]
+                    for tool in persist_tools:
+                        tool_data = self.tool_metadata.getProperties(tool)
+                        if tool_data["collector"] == "prometheus":
+                            prom_tools.append(tool)
+                    if len(prom_tools) > 0:
+                        prom_tool_dict[self._tm_tracking[tm]["hostname"]] = prom_tools
+                self.logger.debug(prom_tool_dict)
+
+                if prom_tool_dict:
+                    self._prom_server = PromCollector(
+                        self.benchmark_run_dir,
+                        self.tool_group,
+                        prom_tool_dict,
+                        self.logger,
+                        self.tool_metadata,
+                    )
+                    self._prom_server.launch()
+            elif self.state == "end":
+                if self._prom_server:
+                    self._prom_server.terminate()
+            elif self.state == "send":
                 self._change_tm_tracking("dormant", "waiting")
                 # The Tool Data Sink cannot send success until all the Tool
                 # Meisters have sent their collected data, so wait for all the
@@ -687,6 +871,7 @@ def main(argv):
         params = json.loads(params_str)
         channel = params["channel"]
         benchmark_run_dir = Path(params["benchmark_run_dir"]).resolve(strict=True)
+        tool_group = params["group"]
     except Exception as ex:
         logger.error("Unable to fetch and decode parameter key, %s: %s", param_key, ex)
         return 5
@@ -723,6 +908,7 @@ def main(argv):
             logger.debug("constructing Redis() object")
             try:
                 redis_server = redis.Redis(host=redis_host, port=redis_port, db=0)
+
             except Exception as e:
                 logger.error(
                     "Unable to connect to redis server, %s:%s: %s",
@@ -734,7 +920,9 @@ def main(argv):
             else:
                 logger.debug("constructed Redis() object")
 
-            tds_app = ToolDataSink(redis_server, channel, benchmark_run_dir, logger)
+            tds_app = ToolDataSink(
+                redis_server, channel, benchmark_run_dir, tool_group, logger
+            )
             tds_app.execute()
         except OSError as exc:
             if exc.errno == errno.EADDRINUSE:
diff --git a/lib/pbench/agent/tool_meister.py b/lib/pbench/agent/tool_meister.py
index d2322696ad..12f0f9f22f 100644
--- a/lib/pbench/agent/tool_meister.py
+++ b/lib/pbench/agent/tool_meister.py
@@ -59,6 +59,7 @@
 import redis
 
 from pbench.server.utils import md5sum
+import pbench.agent.toolmetadata as toolmetadata
 
 
 # Path to external tar executable.
@@ -72,6 +73,67 @@ class ToolException(Exception):
     pass
 
 
+class PersistentTool:
+    def __init__(self, name, tool_opts, logger):
+        self.name = name
+        self.tool_opts = tool_opts.split(" ")
+        self.logger = logger
+        self.install_path = None
+
+        # Looking for required --inst option
+        # Reformatting appropriately if found
+        for opt in self.tool_opts:
+            if opt.startswith("--inst="):
+                if opt[len(opt) - 1] == "\n":
+                    self.install_path = opt[7 : len(opt) - 1]
+                else:
+                    self.install_path = opt[7:]
+                self.logger.debug("FOUND")
+            else:
+                self.logger.debug("NOT FOUND SOMEHOW")
+
+        self.process = None
+        self.failure = False
+
+    def start(self):
+        if self.install_path is None:
+            self.failure = True
+            self.logger.error(
+                "NO INSTALL PATH PROPERLY GIVEN AS PERSISTENT TOOL OPTION, see /opt/pbench-agent/nodexporter --help"
+            )
+            return
+
+        if self.name == "node-exporter":
+            self.logger.debug(self.install_path)
+
+            if not os.path.isfile(self.install_path + "/node_exporter"):
+                self.logger.info(
+                    self.install_path + "/node_exporter" + " does not exist"
+                )
+                self.failure = True
+                return 0
+
+            args = [self.install_path + "/node_exporter"]
+            self.process = subprocess.Popen(
+                args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
+            )
+        else:
+            self.logger.error("INVALID PERSISTENT TOOL NAME")
+            self.failure = True
+            return 0
+
+        return 1
+
+    def stop(self):
+        if not self.failure:
+            self.process.terminate()
+            self.process.wait()
+            return 1
+
+        self.logger.error("Nothing to terminate")
+        return 0
+
+
 class Tool(object):
     """Encapsulates all the state needed to manage a tool running as a background
     process.
@@ -284,6 +346,10 @@ def fetch_params(params):
 
     def __init__(self, pbench_bin, params, redis_server, logger):
         self.logger = logger
+        self.tool_metadata = toolmetadata.ToolMetadata(
+            "redis", redis_server, self.logger
+        )
+        self.persist_tools = self.tool_metadata.getPersistentTools()
         self.pbench_bin = pbench_bin
         ret_val = self.fetch_params(params)
         (
@@ -295,6 +361,7 @@ def __init__(self, pbench_bin, params, redis_server, logger):
             self._tools,
         ) = ret_val
         self._running_tools = dict()
+        self._persistent_tools = dict()
         self._rs = redis_server
         logger.debug("pubsub")
         self._pubsub = self._rs.pubsub()
@@ -315,14 +382,18 @@ def __init__(self, pbench_bin, params, redis_server, logger):
         ), f"Unexpected 'channel': {resp!r}"
         assert resp["data"] == 1, f"Unexpected 'data': {resp!r}"
         logger.debug("next done")
-        # We start in the "idle" state.
-        self.state = "idle"
-        self._valid_states = frozenset(["idle", "running"])
+        # We start in the "startup" state, waiting for first "init" action.
+        self.state = "startup"
+        self._valid_states = frozenset(["startup", "idle", "running", "shutdown"])
         self._state_trans = {
+            "end": {"curr": "idle", "next": "shutdown", "action": self.end_tools},
+            "init": {"curr": "startup", "next": "idle", "action": self.init_tools},
             "start": {"curr": "idle", "next": "running", "action": self.start_tools},
             "stop": {"curr": "running", "next": "idle", "action": self.stop_tools},
         }
-        self._valid_actions = frozenset(["start", "stop", "send", "terminate"])
+        self._valid_actions = frozenset(
+            ["end", "init", "send", "start", "stop", "terminate"]
+        )
         for key in self._state_trans.keys():
             assert (
                 key in self._valid_actions
@@ -476,6 +547,38 @@ def _send_client_status(self, status):
                 ret_val = 0
         return ret_val
 
+    def init_tools(self, data):
+        """init_tools - setup all registered tools which have data collectors.
+
+        The Tool Data Sink will be setting up the actual processes which
+        collect data from these tools.
+        """
+        failures = 0
+        tool_cnt = 0
+        for name, tool_opts in self._tools.items():
+            if name not in self.persist_tools:
+                continue
+            tool_cnt += 1
+            try:
+                persistent_tool = PersistentTool(name, tool_opts, self.logger)
+                persistent_tool.start()
+
+                self.logger.debug("NAME: " + name + "  TOOL OPTS: " + tool_opts)
+            except Exception:
+                self.logger.exception(
+                    "Failed to init PersistentTool %s running in background", name
+                )
+                failures += 1
+                continue
+            else:
+                self._persistent_tools[name] = persistent_tool
+        if failures > 0:
+            msg = f"{failures} of {tool_cnt} persistent tools failed to start"
+            self._send_client_status(msg)
+        else:
+            self._send_client_status("success")
+        return failures
+
     def start_tools(self, data):
         """start_tools - start all registered tools executing in the background
 
@@ -552,6 +655,8 @@ def start_tools(self, data):
         failures = 0
         tool_cnt = 0
         for name, tool_opts in sorted(self._tools.items()):
+            if name in self.persist_tools:
+                continue
             tool_cnt += 1
             try:
                 tool = Tool(
@@ -572,10 +677,7 @@ def start_tools(self, data):
             else:
                 self._running_tools[name] = tool
         if failures > 0:
-            if failures == tool_cnt:
-                msg = "failure"
-            else:
-                msg = f"{failures} of {tool_cnt} tools failed to start"
+            msg = f"{failures} of {tool_cnt} tools failed to start"
             self._send_client_status(msg)
         else:
             self._send_client_status("success")
@@ -590,11 +692,13 @@ def _wait_for_tools(self):
         """
         failures = 0
         for name in sorted(self._tools.keys()):
+            if name in self.persist_tools:
+                continue
             try:
                 tool = self._running_tools[name]
             except KeyError:
                 self.logger.error(
-                    "INTERNAL ERROR - tool %s not found in list of running tools", name,
+                    "INTERNAL ERROR - tool %s not found in list of running tools", name
                 )
                 failures += 1
                 continue
@@ -628,12 +732,16 @@ def stop_tools(self, data):
             return False
 
         failures = 0
+        tool_cnt = 0
         for name in sorted(self._tools.keys()):
+            if name in self.persist_tools:
+                continue
+            tool_cnt += 1
             try:
                 tool = self._running_tools[name]
             except KeyError:
                 self.logger.error(
-                    "INTERNAL ERROR - tool %s not found in list of running tools", name,
+                    "INTERNAL ERROR - tool %s not found in list of running tools", name
                 )
                 failures += 1
                 continue
@@ -649,6 +757,8 @@ def stop_tools(self, data):
         # Clean up the running tools data structure explicitly ahead of
         # potentially receiving another start tools.
         for name in sorted(self._tools.keys()):
+            if name in self.persist_tools:
+                continue
             try:
                 del self._running_tools[name]
             except KeyError:
@@ -664,9 +774,11 @@ def stop_tools(self, data):
         self._directory = None
         self._tool_dir = None
 
-        self._send_client_status(
-            "success" if failures == 0 else "failures stopping tools"
-        )
+        if failures > 0:
+            msg = f"{failures} of {tool_cnt} failed stopping tools"
+            self._send_client_status(msg)
+        else:
+            self._send_client_status("success")
         return failures
 
     def send_tools(self, data):
@@ -679,6 +791,11 @@ def send_tools(self, data):
         payload matches what was previously provided to a "start tools"
         action.
         """
+
+        if len(set(self._tools.keys()) - set(self.persist_tools)) == 0:
+            self._send_client_status("success")
+            return 0
+
         directory = data["directory"]
         try:
             tool_dir = self.directories[directory]
@@ -768,7 +885,10 @@ def send_tools(self, data):
                         headers = {"md5sum": tar_md5}
                         directory_bytes = data["directory"].encode("utf-8")
                         tool_data_ctx = hashlib.md5(directory_bytes).hexdigest()
-                        url = f"http://{self._controller}:8080/tool-data/{tool_data_ctx}/{self._hostname}"
+                        url = (
+                            f"http://{self._controller}:8080/tool-data"
+                            f"/{tool_data_ctx}/{self._hostname}"
+                        )
                         sent = False
                         retries = 200
                         while not sent:
@@ -808,7 +928,8 @@ def send_tools(self, data):
                                         shutil.rmtree(parent_dir)
                                     except Exception:
                                         self.logger.exception(
-                                            "Failed to remove tool data hierarchy, '%s'",
+                                            "Failed to remove tool data"
+                                            " hierarchy, '%s'",
                                             parent_dir,
                                         )
                                         failures += 1
@@ -838,7 +959,7 @@ def send_tools(self, data):
                     )
             except Exception as exc:
                 self.logger.warning(
-                    "unexpected error removing tools tar ball, '%s': %s", tar_file, exc,
+                    "unexpected error removing tools tar ball, '%s': %s", tar_file, exc
                 )
 
         self._send_client_status(
@@ -846,6 +967,38 @@ def send_tools(self, data):
         )
         return failures
 
+    def end_tools(self, data):
+        """end_tools - stop all the persistent data collection tools.
+        """
+        failures = 0
+        tool_cnt = 0
+        for name in self._tools.keys():
+            if name not in self.persist_tools:
+                continue
+            tool_cnt += 1
+            try:
+                persistent_tool = self._persistent_tools[name]
+            except KeyError:
+                self.logger.error(
+                    "INTERNAL ERROR - tool %s not in list of persistent tools", name,
+                )
+                failures += 1
+                continue
+            try:
+                persistent_tool.stop()
+            except Exception:
+                self.logger.exception(
+                    "Failed to stop persistent tool %s running in background", name
+                )
+                failures += 1
+
+        if failures > 0:
+            msg = f"{failures} of {tool_cnt} failed stopping persistent tools"
+            self._send_client_status(msg)
+        else:
+            self._send_client_status("success")
+        return failures
+
 
 def main(argv):
     """Main program for the Tool Meister.
diff --git a/lib/pbench/agent/toolmetadata.py b/lib/pbench/agent/toolmetadata.py
new file mode 100644
index 0000000000..6f32c6ead9
--- /dev/null
+++ b/lib/pbench/agent/toolmetadata.py
@@ -0,0 +1,115 @@
+from pathlib import Path
+import json
+import os
+
+
+class ToolMetadataExc(Exception):
+    pass
+
+
+class ToolMetadata:
+    def __init__(self, mode, context, logger):
+        self.logger = logger
+        assert mode in (
+            "redis",
+            "json",
+        ), f"Logic bomb! Unexpected mode, {mode}, encountered constructing tool meta data"
+        assert (
+            context
+        ), "Logic bomb! No context given on ToolMetadata object construction"
+        self.mode = mode
+        if mode == "redis":
+            self.redis_server = context
+            self.json_file = None
+        else:
+            self.redis_server = None
+            json_path = Path(context, "tool-scripts", "meta.json")
+            try:
+                self.json = json_path.resolve(strict=True)
+            except FileNotFoundError:
+                raise ToolMetadataExc(f"missing {json_path}")
+            except Exception:
+                raise
+        self.data = self.__getInitialData()
+
+    def __getInitialData(self):
+        if self.mode == "json":
+            if not os.path.isfile(self.json):
+                self.logger.error(
+                    "There is no tool-scripts/meta.json in given install dir"
+                )
+                return None
+            with self.json.open("r") as json_file:
+                metadata = json.load(json_file)
+        elif self.mode == "redis":
+            try:
+                meta_raw = self.redis_server.get("tool-metadata")
+            except Exception:
+                self.logger.exception(
+                    "Failure to fetch tool metadata from the Redis server"
+                )
+                raise
+            else:
+                if meta_raw is None:
+                    self.logger.error("Metadata has not been loaded into redis yet")
+                    return None
+            try:
+                metadata = json.loads(meta_raw.decode("utf-8"))
+            except Exception as exc:
+                self.logger.error(
+                    "Bad metadata loaded into Redis server, '%s', json=%r",
+                    exc,
+                    meta_raw,
+                )
+                return None
+        return metadata
+
+    def __dataCheck(self):
+        """Check for existing/loadable data, return True if retreival possible, False otherwise"""
+        if not self.data:
+            self.data == self.__getInitialData()
+            if not self.data:
+                self.logger.error(f"Unable to access data through {self.mode}")
+                return False
+        return True
+
+    def getFullData(self):
+        if self.__dataCheck():
+            return self.data
+        return None
+
+    def getPersistentTools(self):
+        if self.__dataCheck():
+            return list(self.data["persistent"].keys())
+        return None
+
+    def getTransientTools(self):
+        if self.__dataCheck():
+            return list(self.data["transient"].keys())
+        return None
+
+    def getProperties(self, tool):
+        if tool in self.data["persistent"].keys():
+            return self.data["persistent"][tool]
+        elif tool in self.data["transient"].keys():
+            return self.data["transient"][tool]
+
+    def loadIntoRedis(self, info):
+        if self.mode == "redis":
+            try:
+                self.json = Path(info).resolve(strict=True)
+            except FileNotFoundError:
+                raise ToolMetadataExc(f"missing {info}")
+            except Exception:
+                raise
+        elif self.mode == "json":
+            self.redis_server = info
+
+        try:
+            with self.json.open("r") as json_file:
+                metadata = json.load(json_file)
+                self.redis_server.set("tool-metadata", json.dumps(metadata))
+        except Exception:
+            self.logger.error("Failed to load the data into redis")
+            raise
+        return None

From 0b88e17ac268d4ca117265c22d1ad7573466a30a Mon Sep 17 00:00:00 2001
From: Keshav Maheshwari <keshavm02@gmail.com>
Date: Mon, 27 Jul 2020 13:08:57 -0400
Subject: [PATCH 2/2] Added DCGM Tool to pbench-agent (keshavm02)

Co-authored-by: maxusmusti <meyceoz@redhat.com>
---
 agent/tool-scripts/dcgm                       | 24 +++++++++++++++++++
 agent/tool-scripts/meta.json                  |  3 ++-
 .../gold/pbench-register-tool/test-44.txt     |  1 +
 .../gold/pbench-register-tool/test-46.txt     |  1 +
 .../gold/pbench-register-tool/test-47.txt     |  1 +
 lib/pbench/agent/tool_meister.py              | 16 +++++++++++++
 6 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100755 agent/tool-scripts/dcgm

diff --git a/agent/tool-scripts/dcgm b/agent/tool-scripts/dcgm
new file mode 100755
index 0000000000..fdee80860f
--- /dev/null
+++ b/agent/tool-scripts/dcgm
@@ -0,0 +1,24 @@
+#!/usr/bin/python3
+# -*- mode: python -*-
+
+import sys
+import os
+import logging
+
+PROG = os.path.basename(sys.argv[0])
+logger = logging.getLogger(PROG)
+logger.setLevel(logging.DEBUG)
+sh = logging.StreamHandler()
+sh.setLevel(logging.DEBUG)
+shf = logging.Formatter("%(message)s")
+sh.setFormatter(shf)
+logger.addHandler(sh)
+
+if len(sys.argv) != 2 or sys.argv[1] != "--help":
+    logger.info("This script is deprecated, please run it with --help for info on registering the tool.")
+    logger.info("Run /opt/pbench-agent/tool-scripts/dcgm --help for more info.")
+    exit(0)
+
+if sys.argv[1] == "--help":
+    logger.info("Options:")
+    logger.info("--inst=<LOCATION OF dcgm INSTALL> (required)")
diff --git a/agent/tool-scripts/meta.json b/agent/tool-scripts/meta.json
index 3de7b5032a..b9b265b4d1 100644
--- a/agent/tool-scripts/meta.json
+++ b/agent/tool-scripts/meta.json
@@ -42,6 +42,7 @@
 	},
 
 	"persistent":{
-		"node-exporter": {"collector": "prometheus", "port": "9100"}
+		"node-exporter": {"collector": "prometheus", "port": "9100"},
+		"dcgm": {"collector": "prometheus", "port": "8000"}
 	}
 }
diff --git a/agent/util-scripts/gold/pbench-register-tool/test-44.txt b/agent/util-scripts/gold/pbench-register-tool/test-44.txt
index 34a86880f9..4b15e6a57b 100644
--- a/agent/util-scripts/gold/pbench-register-tool/test-44.txt
+++ b/agent/util-scripts/gold/pbench-register-tool/test-44.txt
@@ -56,6 +56,7 @@ Available tools:
 	virsh-migrate
 	vmstat
 	node-exporter
+	dcgm
 
 For a list of tool specific options, run:
 	/var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/<tool-name> --help
diff --git a/agent/util-scripts/gold/pbench-register-tool/test-46.txt b/agent/util-scripts/gold/pbench-register-tool/test-46.txt
index a459815044..605883fab7 100644
--- a/agent/util-scripts/gold/pbench-register-tool/test-46.txt
+++ b/agent/util-scripts/gold/pbench-register-tool/test-46.txt
@@ -56,6 +56,7 @@ Available tools:
 	virsh-migrate
 	vmstat
 	node-exporter
+	dcgm
 
 For a list of tool specific options, run:
 	/var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/<tool-name> --help
diff --git a/agent/util-scripts/gold/pbench-register-tool/test-47.txt b/agent/util-scripts/gold/pbench-register-tool/test-47.txt
index e60db9fcf9..f43f893c7c 100644
--- a/agent/util-scripts/gold/pbench-register-tool/test-47.txt
+++ b/agent/util-scripts/gold/pbench-register-tool/test-47.txt
@@ -56,6 +56,7 @@ Available tools:
 	virsh-migrate
 	vmstat
 	node-exporter
+	dcgm
 
 For a list of tool specific options, run:
 	/var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/<tool-name> --help
diff --git a/lib/pbench/agent/tool_meister.py b/lib/pbench/agent/tool_meister.py
index 12f0f9f22f..988d8db64f 100644
--- a/lib/pbench/agent/tool_meister.py
+++ b/lib/pbench/agent/tool_meister.py
@@ -117,6 +117,22 @@ def start(self):
             self.process = subprocess.Popen(
                 args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
             )
+        elif self.name == "dcgm":
+            os.environ["PYTHONPATH"] = (
+                self.install_path
+                + "/bindings:"
+                + self.install_path
+                + "/bindings/common"
+            )
+
+            script_path = self.install_path + "/samples/scripts/dcgm_prometheus.py"
+            if not os.path.isfile(script_path):
+                self.logger.info(script_path + " does not exist")
+                self.failure = True
+                return 0
+
+            args = [f"python2 {script_path}"]
+            self.process = subprocess.Popen(args, shell=True)
         else:
             self.logger.error("INVALID PERSISTENT TOOL NAME")
             self.failure = True