From fe15a81e2ad5422abf5f8faca79693f603b9cc70 Mon Sep 17 00:00:00 2001
From: Nick Dokos <ndokos@redhat.com>
Date: Mon, 16 May 2016 12:43:00 -0400
Subject: [PATCH 1/3] Catch pipeline failures and return proper error status.

Scripts where pipelines like this: ssh remote command | process output
are used do not deal with ssh errors properly. The pipeline returns
the status of the last command.

Set the pipefail option in base: that way pipelines will return the
status of the last failed command in the pipeline, not that of the
last command.

o pbench-register-tool now exits with the correct status of the
  pipeline.  pbench-register-tool-set counts pbench-register-tool
  failures and exits with status equal to the number of failed
  pbench-register-tool calls.

o Extend ssh status checking to pbench-clear-tools.

  There are two cases to consider: pbench-clear-tools is called
  with or without a --name=foo option.

  In the first case, the intent is to clear a single
  tool. pbench-clear-tools will try to ssh to all the remotes and
  clear the tool; it will also try to count how many tools are left
  (with another ssh) and if none, it will delete the local @remote
  entry.  If the first ssh fails, we do not continue to the second: we
  just count it as a failure, but since we cannot find out the state
  of the remote, we assume that it still has tools left and we do not
  remove the local entry for the remote.

  In the second case, the intent is to clear all tools, so even if the
  ssh fails, we remove the local entry for the remote. IOW,
  pbench-clear-tools with no --name argument might leave junk lying
  around on the remote if we can't get to it, but it clears everything
  locally.

  In either case, we return the number of ssh failures as the exit
  status of the command.

o pbench-start-tools now checks its ssh pipeline for errors and returns
  status properly.

  In addition, pbench-start-tools is modified to call
  pbench-kill-tools before starting a new run. It has often been the
  case that tools from earlier runs are not cleaned up properly,
  particularly when the run is interrupted. Although that is a problem
  that should be resolved in the calling script, using "trap
  'pbench-kill-tools' INT QUIT EXIT", this is an attempt to make tool
  handling more robust and avoid the most common failure scenarios.
---
 agent/base                                  |  4 ++++
 agent/util-scripts/pbench-clear-tools       | 15 +++++++++++++++
 agent/util-scripts/pbench-postprocess-tools | 13 +++++++++++++
 agent/util-scripts/pbench-register-tool     |  1 +
 agent/util-scripts/pbench-register-tool-set | 11 +++++++++++
 5 files changed, 44 insertions(+)

diff --git a/agent/base b/agent/base
index ad491adb7b..772cf1a982 100755
--- a/agent/base
+++ b/agent/base
@@ -1,6 +1,10 @@
 #!/bin/bash
 
 export PBENCH_debug_mode=0
+# pipeline status is set to the status of the last command that *failed*
+# in the pipeline (or 0 if all succeed): this way "ssh foo | sed '...' "
+# will catch any ssh failure
+set -o pipefail
 
 # very first thing to do is figure out which pbench
 # we are
diff --git a/agent/util-scripts/pbench-clear-tools b/agent/util-scripts/pbench-clear-tools
index 932b944316..9b9a255d44 100755
--- a/agent/util-scripts/pbench-clear-tools
+++ b/agent/util-scripts/pbench-clear-tools
@@ -1,4 +1,5 @@
 #!/bin/bash
+# -*- mode: shell-script; indent-tabs-mode: t; sh-basic-offset: 8; sh-indentation: 8; sh-indent-for-case-alt: + -*-
 
 script_path=`dirname $0`
 script_name=`basename $0`
@@ -58,6 +59,8 @@ pushd $pbench_run >/dev/null
 # this tool group's directory which stores options, etc.
 tool_group_dir="tools-$group"
 if [ -d "$tool_group_dir" ]; then
+        typeset -i nerrs=0
+        ssh_opts="$ssh_opts -o ConnectTimeout=1"
 	for this_tool_file in `/bin/ls $tool_group_dir`; do
 		if [ "$this_tool_file" == "label" ]; then
 			continue;
@@ -72,6 +75,17 @@ if [ -d "$tool_group_dir" ]; then
 			remote_hostname=`echo $this_tool_file | awk -F@ '{print $2}'`
 			echo running ". ${pbench_install_dir}/profile; pbench-clear-tools $group_opt $name_opt" on $remote_hostname
 			ssh $ssh_opts -n $remote_hostname ". ${pbench_install_dir}/profile; pbench-clear-tools $group_opt $name_opt" | sed -e 's/\(.*\)/['$remote_hostname']\1/g'
+                        rc=$?
+                        if [ $rc != 0 ] ;then
+				nerrs=$nerrs+1
+				# we can't get to the remote but if the intent is to clear all tools
+				# then we remove the local entry for the remote
+				if [ -z "$name" ] ;then
+					echo "Removing the remote $this_tool_file"
+					/bin/rm -f "$tool_group_dir/$this_tool_file"
+				fi
+				continue
+			fi
 			remaining_remote_tools=`ssh $ssh_opts -n $remote_hostname ". ${pbench_install_dir}/profile; pbench-list-tools $group_opt"`
 			if [ -z "$remaining_remote_tools" ]; then
 				echo "The remote host $remote_hostname no longer has tools registered for $group group, so the "remote@$remote" entry in the local $tool_group_dir directory will be removed"
@@ -96,3 +110,4 @@ if [ -d "$tool_group_dir" ]; then
 	fi
 fi
 popd >/dev/null
+exit $nerrs
diff --git a/agent/util-scripts/pbench-postprocess-tools b/agent/util-scripts/pbench-postprocess-tools
index aad39cd9cc..bdbbeb9037 100755
--- a/agent/util-scripts/pbench-postprocess-tools
+++ b/agent/util-scripts/pbench-postprocess-tools
@@ -92,6 +92,13 @@ fi
 tool_output_dir="$dir/tools-$group"
 mkdir -p $tool_output_dir
 
+# Try to prevent a cascade of tools running.
+if [ "$action" == "start" ] ;then
+	# Kill any tools running from a previous incantation before
+	# starting this one.
+	pbench-kill-tools --group $group
+fi
+
 function move_tool_data {
 	local remote_host=$1
 	local remote_label=$2
@@ -100,6 +107,12 @@ function move_tool_data {
 	tool_data_size=`ssh $ssh_opts -n $remote_host du -sm $tool_output_dir | awk '{print $1}'`
 	debug_log "[$script_name]started: copying tool data ($tool_data_size MB) from $remote_host"
 	ssh $ssh_opts -n $remote_host "cd $tool_output_dir && tar cf - *" | tar mxf -
+	rc=$?
+	if [ $rc != 0 ] ;then
+		debug_log "[$script_name]copying tool data failed for remote $remote_host"
+		popd > /dev/null
+		return $rc
+	fi
 	# if the full hostname was used in pbench-register-tool --remote, make sure that is preserved in the directory name
 	if [ $remote_host != "$remote_shost" ]; then
 		if [ -e "$remote_label:$remote_shost" ]; then
diff --git a/agent/util-scripts/pbench-register-tool b/agent/util-scripts/pbench-register-tool
index d462c30c5b..21fcbee787 100755
--- a/agent/util-scripts/pbench-register-tool
+++ b/agent/util-scripts/pbench-register-tool
@@ -179,3 +179,4 @@ else    # register this tool on the remote host
 		echo "$label" >"$this_tool_file"
 	fi
 fi
+exit $rc
diff --git a/agent/util-scripts/pbench-register-tool-set b/agent/util-scripts/pbench-register-tool-set
index 2b810bb4dd..9cd5a3299f 100755
--- a/agent/util-scripts/pbench-register-tool-set
+++ b/agent/util-scripts/pbench-register-tool-set
@@ -90,13 +90,24 @@ if [ -z "$interval" ] ;then
 	interval=3
 fi
 
+typeset -i nerrs=0
 default_tool_set=$(getconf.py -l default-tool-set pbench/tools)
 case "$toolset" in
 	default)
 	for i in $default_tool_set; do
 		pbench-register-tool --name=$i $remote $label $group -- --interval="$interval"
+		rc=$?
+		if [ $rc != 0 ] ;then
+			nerrs=$nerrs+1
+		fi
 	done
 	# low overhead perf
 	pbench-register-tool --name=perf $remote $label $group -- --record-opts="record -a --freq=100"
+	rc=$?
+	if [ $rc != 0 ] ;then
+		nerrs=$nerrs+1
+	fi
 	;;
 esac
+
+exit $nerrs

From cf59a069bf67ba17d7d3d32b7eb27149a9eca942 Mon Sep 17 00:00:00 2001
From: Nick Dokos <ndokos@redhat.com>
Date: Tue, 12 Jul 2016 14:40:02 -0400
Subject: [PATCH 2/3] Check status of backgrounded commands in
 pbench-postprocess-tools.

Replace wait with a loop of "wait $pid" for each background process
created, so we can get the exit status of the process. Each non-zero
status counts as an error. The total number of errors is then returned
as the status of pbench-postprocess-tools.
---
 agent/util-scripts/pbench-postprocess-tools | 25 +++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/agent/util-scripts/pbench-postprocess-tools b/agent/util-scripts/pbench-postprocess-tools
index bdbbeb9037..28c3459e2f 100755
--- a/agent/util-scripts/pbench-postprocess-tools
+++ b/agent/util-scripts/pbench-postprocess-tools
@@ -74,6 +74,7 @@ while true; do
 	esac
 done
 
+typeset -i nerrs=0
 iteration_num=`echo $iteration | awk -F- '{print $1}'`
 # this tool group's directory which stores options, etc.
 if [ -d "$pbench_run/tools-$group" ]; then
@@ -125,11 +126,14 @@ function move_tool_data {
 	debug_log "[$script_name]completed: copying of tool data on $remote_host"
 	debug_log "[$script_name]started: deleting tool data on $remote_host"
 	ssh $ssh_opts -n $remote_host "cd $tool_output_dir && /bin/rm -rf *"
+	rc=$?
 	debug_log "[$script_name]completed: deleting tool data on $remote_host"
 	popd >/dev/null
+	return $rc
 }
 
 ### phase 1: for each tool, call the tool script with --$action (start, stop, or postprocess)
+pids=""
 for this_tool_file in `/bin/ls $tool_group_dir`; do
 	if [ "$this_tool_file" == "label" ]; then
 		continue;
@@ -141,6 +145,7 @@ for this_tool_file in `/bin/ls $tool_group_dir`; do
 		# tool options are stored on the remote host's tool file, so no need to pass it here
 		debug_log "[$script_name]running this tool on $remote: ssh $ssh_opts -n $remote pbench-$action-tools --iteration=$iteration --group=$group --dir=$dir"
 		ssh $ssh_opts -n $remote pbench-$action-tools --iteration=$iteration --group=$group --dir=$dir &
+		pids="$pids $!"
 	else
 		# tool is local
 		# assemble the tool options in to an array
@@ -172,12 +177,19 @@ for this_tool_file in `/bin/ls $tool_group_dir`; do
 				fi
 			else
 				$pbench_bin/tool-scripts/$name --$action --iteration=$iteration --group=$group --dir=$dir "${tool_opts[@]}" &
+				pids="$pids $!"
 			fi
 		fi
 		
 	fi
 done
-wait
+for p in $pids ;do
+	wait $p
+	rc=$?
+	if [[ $rc -ne 0 ]] ;then
+		nerrs=$nerrs+1
+	fi
+done
 if [ "$action" == "postprocess" ]; then
 	# phase 2: now that the local results are ready, move them
 	# down to $tool_output_dir/[$label:]$hostname.
@@ -198,12 +210,21 @@ if [ "$action" == "postprocess" ]; then
 	
 	### phase 3: copy over data from remote hosts
 	# for the remote tools, copy over the postprocess data
+	pids=""
 	for this_tool_file in `/bin/ls $tool_group_dir | grep "^remote"`; do
 		remote_hostname=`echo "$this_tool_file" | awk -F@ '{print $2}'`
 		label=`cat $tool_group_dir/$this_tool_file`
 		# copy over the data from postprocessing
 		move_tool_data $remote_hostname $label &
+		pids="$pids $!"
+	done
+	for p in $pids ;do
+		wait $p
+		rc=$?
+		if [[ $rc -ne 0 ]] ;then
+			nerrs=$nerrs+1
+		fi
 	done
-	wait
 fi
 debug_log "[$script_name]completed: $@"
+exit $nerrs

From f4f5618456dc41cb1fc41e480e0f71622123516a Mon Sep 17 00:00:00 2001
From: Nick Dokos <ndokos@redhat.com>
Date: Mon, 11 Jul 2016 18:12:16 -0400
Subject: [PATCH 3/3] Add unit tests.

Modify mock ssh to return failures on a given host name ("fubar").
Modify unittests to pass when the test is expected to fail
and does so with an expected non-zero status code.
---
 agent/bench-scripts/test-bin/ssh              | 21 ++++
 .../gold/pbench-clear-tools/test-12.txt       | 16 ++++
 .../gold/pbench-postprocess-tools/test-13.txt | 25 +++++
 .../gold/pbench-register-tool-set/test-11.txt | 26 +++++
 .../gold/pbench-stop-tools/test-05.txt        |  2 +-
 .../gold/pbench-stop-tools/test-06.txt        |  2 +-
 agent/util-scripts/pbench-postprocess-tools   |  2 +-
 agent/util-scripts/samples/pbench-agent.cfg   |  2 +
 .../test-12/tools-default/mpstat              |  1 +
 .../test-12/tools-default/remote@fubar        |  0
 .../test-13/tools-default/remote@fubar        |  0
 agent/util-scripts/unittests                  | 96 ++++++++++++-------
 12 files changed, 158 insertions(+), 35 deletions(-)
 create mode 100644 agent/util-scripts/gold/pbench-clear-tools/test-12.txt
 create mode 100644 agent/util-scripts/gold/pbench-postprocess-tools/test-13.txt
 create mode 100644 agent/util-scripts/gold/pbench-register-tool-set/test-11.txt
 create mode 100644 agent/util-scripts/samples/pbench-agent.cfg
 create mode 100644 agent/util-scripts/samples/pbench-clear-tools/test-12/tools-default/mpstat
 create mode 100644 agent/util-scripts/samples/pbench-clear-tools/test-12/tools-default/remote@fubar
 create mode 100644 agent/util-scripts/samples/pbench-postprocess-tools/test-13/tools-default/remote@fubar

diff --git a/agent/bench-scripts/test-bin/ssh b/agent/bench-scripts/test-bin/ssh
index 8a0a0c0d7f..5970ea4657 100755
--- a/agent/bench-scripts/test-bin/ssh
+++ b/agent/bench-scripts/test-bin/ssh
@@ -5,3 +5,24 @@ echo "$0 $*" >> $_testlog
 if [[ "$4" == "netstat" && "$5" == "-tlpn" ]]; then
     echo "tcp        0      0 0.0.0.0:21000               0.0.0.0:*                   LISTEN      5830/uperf"
 fi
+
+while true ;do
+    case $1 in
+        -o)
+            shift 2
+            ;;
+        -n)
+            shift 1
+            ;;
+        *)
+            break
+            ;;
+    esac
+done
+remote=$1
+
+if [[ "$remote" == "fubar" ]] ;then
+    exit 255
+else
+    exit 0
+fi
diff --git a/agent/util-scripts/gold/pbench-clear-tools/test-12.txt b/agent/util-scripts/gold/pbench-clear-tools/test-12.txt
new file mode 100644
index 0000000000..7841bcd507
--- /dev/null
+++ b/agent/util-scripts/gold/pbench-clear-tools/test-12.txt
@@ -0,0 +1,16 @@
++++ Running test-12 pbench-clear-tools
+removing tools-default/mpstat
+running . /opt/pbench-agent/profile; pbench-clear-tools --group default  on fubar
+Removing the remote remote@fubar
+--- Finished test-12 pbench-clear-tools (status=1}
++++ pbench tree state
+/var/tmp/pbench-test-utils/pbench
+/var/tmp/pbench-test-utils/pbench/tmp
+/var/tmp/pbench-test-utils/pbench/tools-default
+--- pbench tree state
++++ pbench.log file contents
+grep: /var/tmp/pbench-test-utils/pbench/pbench.log: No such file or directory
+--- pbench.log file contents
++++ test-execution.log file contents
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -o ConnectTimeout=1 -n fubar . /opt/pbench-agent/profile; pbench-clear-tools --group default 
+--- test-execution.log file contents
diff --git a/agent/util-scripts/gold/pbench-postprocess-tools/test-13.txt b/agent/util-scripts/gold/pbench-postprocess-tools/test-13.txt
new file mode 100644
index 0000000000..07c896c15b
--- /dev/null
+++ b/agent/util-scripts/gold/pbench-postprocess-tools/test-13.txt
@@ -0,0 +1,25 @@
++++ Running test-13 pbench-postprocess-tools
+tar: This does not look like a tar archive
+tar: Exiting with failure status due to previous errors
+--- Finished test-13 pbench-postprocess-tools (status=2}
++++ pbench tree state
+/var/tmp/pbench-test-utils/pbench
+/var/tmp/pbench-test-utils/pbench/pbench.log
+/var/tmp/pbench-test-utils/pbench/tmp
+/var/tmp/pbench-test-utils/pbench/tmp/tools-default
+/var/tmp/pbench-test-utils/pbench/tools-default
+/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar
+/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar:
+--- pbench tree state
++++ pbench.log file contents
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]started: --dir=/var/tmp/pbench-test-utils/pbench/tmp --group=default --iteration=1
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]running this tool on fubar: ssh -o StrictHostKeyChecking=no -n fubar pbench-postprocess-tools --iteration=1 --group=default --dir=/var/tmp/pbench-test-utils/pbench/tmp
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]started: copying tool data ( MB) from fubar
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]copying tool data failed for remote fubar
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]completed: 
+--- pbench.log file contents
++++ test-execution.log file contents
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -n fubar pbench-postprocess-tools --iteration=1 --group=default --dir=/var/tmp/pbench-test-utils/pbench/tmp
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -n fubar du -sm /var/tmp/pbench-test-utils/pbench/tmp/tools-default
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -n fubar cd /var/tmp/pbench-test-utils/pbench/tmp/tools-default && tar cf - *
+--- test-execution.log file contents
diff --git a/agent/util-scripts/gold/pbench-register-tool-set/test-11.txt b/agent/util-scripts/gold/pbench-register-tool-set/test-11.txt
new file mode 100644
index 0000000000..f67a6feee1
--- /dev/null
+++ b/agent/util-scripts/gold/pbench-register-tool-set/test-11.txt
@@ -0,0 +1,26 @@
++++ Running test-11 pbench-register-tool-set
+--- Finished test-11 pbench-register-tool-set (status=4}
++++ pbench tree state
+/var/tmp/pbench-test-utils/pbench
+/var/tmp/pbench-test-utils/pbench/pbench.log
+/var/tmp/pbench-test-utils/pbench/tmp
+/var/tmp/pbench-test-utils/pbench/tools-default
+/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar
+/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar:
+--- pbench tree state
++++ pbench.log file contents
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --interval=3
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--interval=3"; pbench-register-tool --name=mpstat --group=default $label_opt -- "${tool_opts[@]}" 2>&1
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --interval=3
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--interval=3"; pbench-register-tool --name=vmstat --group=default $label_opt -- "${tool_opts[@]}" 2>&1
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --interval=3
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--interval=3"; pbench-register-tool --name=iostat --group=default $label_opt -- "${tool_opts[@]}" 2>&1
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --record-opts=record -a --freq=100
+/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--record-opts=record -a --freq=100"; pbench-register-tool --name=perf --group=default $label_opt -- "${tool_opts[@]}" 2>&1
+--- pbench.log file contents
++++ test-execution.log file contents
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--interval=3"; pbench-register-tool --name=mpstat --group=default  -- "${tool_opts[@]}" 2>&1
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--interval=3"; pbench-register-tool --name=vmstat --group=default  -- "${tool_opts[@]}" 2>&1
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--interval=3"; pbench-register-tool --name=iostat --group=default  -- "${tool_opts[@]}" 2>&1
+/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--record-opts=record -a --freq=100"; pbench-register-tool --name=perf --group=default  -- "${tool_opts[@]}" 2>&1
+--- test-execution.log file contents
diff --git a/agent/util-scripts/gold/pbench-stop-tools/test-05.txt b/agent/util-scripts/gold/pbench-stop-tools/test-05.txt
index 655afbdbc3..7df719af50 100644
--- a/agent/util-scripts/gold/pbench-stop-tools/test-05.txt
+++ b/agent/util-scripts/gold/pbench-stop-tools/test-05.txt
@@ -1,6 +1,6 @@
 +++ Running test-05 pbench-stop-tools
 [warn][1900-01-01T00:00:00.000000] Too many pids for turbostat: 123463 123464 123465 123466 123467 -- maybe old tools running? Use pbench-kill-tools.
---- Finished test-05 pbench-stop-tools (status=0}
+--- Finished test-05 pbench-stop-tools (status=1}
 +++ pbench tree state
 /var/tmp/pbench-test-utils/pbench
 /var/tmp/pbench-test-utils/pbench/pbench.log
diff --git a/agent/util-scripts/gold/pbench-stop-tools/test-06.txt b/agent/util-scripts/gold/pbench-stop-tools/test-06.txt
index be444125e8..4283ce4e57 100644
--- a/agent/util-scripts/gold/pbench-stop-tools/test-06.txt
+++ b/agent/util-scripts/gold/pbench-stop-tools/test-06.txt
@@ -1,5 +1,5 @@
 +++ Running test-06 pbench-stop-tools
---- Finished test-06 pbench-stop-tools (status=0}
+--- Finished test-06 pbench-stop-tools (status=1}
 +++ pbench tree state
 /var/tmp/pbench-test-utils/pbench
 /var/tmp/pbench-test-utils/pbench/pbench.log
diff --git a/agent/util-scripts/pbench-postprocess-tools b/agent/util-scripts/pbench-postprocess-tools
index 28c3459e2f..594ce3c284 100755
--- a/agent/util-scripts/pbench-postprocess-tools
+++ b/agent/util-scripts/pbench-postprocess-tools
@@ -40,7 +40,7 @@ if [ $? -ne 0 ]; then
 	printf -- "\t                        will store and process data\n"
 	printf "\n"
 	printf -- "\t-i str --iteration=num, num = a number representing the\n"
-	printf -- "\t                              iteration data was collected for"
+	printf -- "\t                              iteration data was collected for\n"
 	exit 1
 fi
 eval set -- "$opts";
diff --git a/agent/util-scripts/samples/pbench-agent.cfg b/agent/util-scripts/samples/pbench-agent.cfg
new file mode 100644
index 0000000000..7028ea832e
--- /dev/null
+++ b/agent/util-scripts/samples/pbench-agent.cfg
@@ -0,0 +1,2 @@
+[pbench/tools]
+default-tool-set = mpstat, vmstat, iostat
diff --git a/agent/util-scripts/samples/pbench-clear-tools/test-12/tools-default/mpstat b/agent/util-scripts/samples/pbench-clear-tools/test-12/tools-default/mpstat
new file mode 100644
index 0000000000..160ac3d855
--- /dev/null
+++ b/agent/util-scripts/samples/pbench-clear-tools/test-12/tools-default/mpstat
@@ -0,0 +1 @@
+--interval: 34
diff --git a/agent/util-scripts/samples/pbench-clear-tools/test-12/tools-default/remote@fubar b/agent/util-scripts/samples/pbench-clear-tools/test-12/tools-default/remote@fubar
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/agent/util-scripts/samples/pbench-postprocess-tools/test-13/tools-default/remote@fubar b/agent/util-scripts/samples/pbench-postprocess-tools/test-13/tools-default/remote@fubar
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/agent/util-scripts/unittests b/agent/util-scripts/unittests
index bb251703de..3e2d1e11a9 100755
--- a/agent/util-scripts/unittests
+++ b/agent/util-scripts/unittests
@@ -25,12 +25,14 @@ let res=res+$?
 mkdir -p $_testopt/util-scripts
 let res=res+$?
 cp $_tdir/../base $_testopt/
+mkdir -p $_testopt/config
+cp $_tdir/samples/pbench-agent.cfg $_testopt/config
 let res=res+$?
 mkdir -p $_testopt/tool-scripts
 let res=res+$?
 cp $_tdir/../tool-scripts/{iostat,kvmstat,mpstat,numastat,perf,pidstat,proc-interrupts,proc-vmstat,sar,turbostat,vmstat} $_testopt/tool-scripts
 let res=res+$?
-scripts="$_tdir/pbench-register-tool $_tdir/pbench-metadata-log"
+# scripts="$_tdir/pbench-register-tool $_tdir/pbench-metadata-log"
 scripts="$_tdir/pbench-*"
 for script in $scripts ; do
     cp $script $_testopt/util-scripts/
@@ -49,11 +51,14 @@ fi
 # Fixed timestamp output
 export _PBENCH_BENCH_TESTS=1
 # Allows us to intercept scp, ssh, rsync, etc.
-export PATH=$_testopt/tool-scripts:$_testopt/unittest-scripts:$_tconfigtoolsbin:$PATH
+export PATH=$_testopt/util-scripts:$_testopt/tool-scripts:$_testopt/unittest-scripts:$PATH
+export CONFIG=$_testopt/config/pbench-agent.cfg
 
 res=0
 
 function _run {
+    local sts
+    
     tname=$1
     shift
     tscrpt=$1
@@ -70,10 +75,12 @@ function _save_tree {
     echo "+++ pbench tree state" >> $_testout
     find $_testdir | sort >> $_testout
     if [ -d $_testdir/tools-default ] ;then
-        for x in $_testdir/tools-default/* ;do
-            echo $x:
+        pushd $_testdir/tools-default >/dev/null
+        for x in $(ls) ;do
+            echo $_testdir/tools-default/$x:
             cat $x
         done
+        popd >/dev/null
     elif [ -f $_testdir/tools.default ]; then
         echo $_testdir/tools.default: 
         cat $_testdir/tools.default
@@ -95,9 +102,11 @@ function _dump_logs {
     rm -f $_testroot/test-execution.log
 }
 function _verify_output {
+    local sts tname tscrpt
     sts=$1
     tname=$2
     tscrpt=$3
+    expected_status=${4:-0}
     diff -cw $_tdir/gold/${tscrpt}/${tname}.txt $_testout
     if [[ $? -gt 0 ]]; then
         echo "FAIL - $tname"
@@ -107,6 +116,10 @@ function _verify_output {
         if [[ $sts -eq 0 ]]; then
             echo "PASS - $tname"
             rm $_testout
+        elif [[ $sts -eq $expected_status ]] ; then
+            echo "PASS - $tname failed with expected exit status: $sts"
+            rm $_testout
+            sts=0
         else
             echo "FAIL - $tname: PASS output but execution returned non-zero exit status"
         fi
@@ -142,45 +155,64 @@ let errs=0
 
 tests="$*"
 if [ -z "$tests" ] ;then
-    tests="$(seq -f "test-%02g" 0 10) test-14"
+
+    tests="$(seq -f "test-%02g" 0 14)"
 fi
 
-declare -A tools=([test-00]="pbench-register-tool"
-                  [test-01]="pbench-metadata-log"
-                  [test-02]="pbench-metadata-log"
-                  [test-03]="pbench-metadata-log"
-                  [test-04]="pbench-metadata-log"
-                  [test-05]="pbench-stop-tools"
-                  [test-06]="pbench-stop-tools"
-                  [test-07]="pbench-stop-tools"
-                  [test-08]="pbench-stop-tools"
-                  [test-09]="pbench-stop-tools"
-                  [test-10]="pbench-stop-tools"
-                  [test-14]="pbench-agent-config-activate"
-                 )
-declare -A options=([test-00]="--name=mpstat --group=default -- --interval=10"
-                    [test-01]="--dir=$_testdir/tmp beg"
-                    [test-02]="--dir=$_testdir/tmp beg"
-                    [test-03]="--dir=$_testdir/tmp beg"
-                    [test-04]="--dir=$_testdir/tmp beg"
-                    [test-05]="--dir=$_testdir/tmp"
-                    [test-06]="--dir=$_testdir/tmp"
-                    [test-07]="--dir=$_testdir/tmp"
-                    [test-08]="--dir=$_testdir/tmp"
-                    [test-09]="--dir=$_testdir/tmp"
-                    [test-10]="--dir=$_testdir/tmp"
-                    [test-14]="$_testdir/tmp/pbench-agent.cfg"
-                   )
+declare -A tools=(
+    [test-00]="pbench-register-tool"
+    [test-01]="pbench-metadata-log"
+    [test-02]="pbench-metadata-log"
+    [test-03]="pbench-metadata-log"
+    [test-04]="pbench-metadata-log"
+    [test-05]="pbench-stop-tools"
+    [test-06]="pbench-stop-tools"
+    [test-07]="pbench-stop-tools"
+    [test-08]="pbench-stop-tools"
+    [test-09]="pbench-stop-tools"
+    [test-10]="pbench-stop-tools"
+    [test-11]="pbench-register-tool-set"
+    [test-12]="pbench-clear-tools"
+    [test-13]="pbench-postprocess-tools"
+    [test-14]="pbench-agent-config-activate"
+)
+declare -A options=(
+    [test-00]="--name=mpstat --group=default -- --interval=10"
+    [test-01]="--dir=$_testdir/tmp beg"
+    [test-02]="--dir=$_testdir/tmp beg"
+    [test-03]="--dir=$_testdir/tmp beg"
+    [test-04]="--dir=$_testdir/tmp beg"
+    [test-05]="--dir=$_testdir/tmp"
+    [test-06]="--dir=$_testdir/tmp"
+    [test-07]="--dir=$_testdir/tmp"
+    [test-08]="--dir=$_testdir/tmp"
+    [test-09]="--dir=$_testdir/tmp"
+    [test-10]="--dir=$_testdir/tmp"
+    # fubar is assumed to *NOT* exist
+    [test-11]="--remote=fubar"
+    [test-13]="--dir=$_testdir/tmp --group=default --iteration=1"
+    [test-14]="$_testdir/tmp/pbench-agent.cfg"
+)
+
+declare -A expected_status=(
+    [test-05]=1
+    [test-06]=1
+    [test-11]=4
+    [test-12]=1
+    [test-13]=2
+)
 
 for tst in $tests; do
     tool=${tools[$tst]}
     opts=${options[$tst]}
+    status=${expected_status[$tst]}
+
     _setup_state $tst $tool
     _run $tst $tool $opts
     res=$?
     _save_tree
     _dump_logs
-    _verify_output $res $tst $tool
+    _verify_output $res $tst $tool $status
     res=$?
     let errs=$errs+$res
     _reset_state