From 75a6578333ad12d7c85efcc14aef5063594a8564 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 8 May 2018 14:47:47 -0700 Subject: [PATCH 1/7] wreck: add new completing state --- src/common/libjsc/jstatctl.c | 1 + src/common/libjsc/jstatctl.h | 1 + src/modules/wreck/wrexecd.c | 3 +++ t/t2000-wreck.t | 3 ++- t/t2001-jsc.t | 6 ++++-- 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/common/libjsc/jstatctl.c b/src/common/libjsc/jstatctl.c index 9f5c5777c43a..774b0454a4bc 100644 --- a/src/common/libjsc/jstatctl.c +++ b/src/common/libjsc/jstatctl.c @@ -78,6 +78,7 @@ static stab_t job_state_tab[] = { { J_STOPPED, "stopped" }, { J_RUNNING, "running" }, { J_CANCELLED, "cancelled" }, + { J_COMPLETING, "completing" }, { J_COMPLETE, "complete" }, { J_REAPED, "reaped" }, { J_FAILED, "failed" }, diff --git a/src/common/libjsc/jstatctl.h b/src/common/libjsc/jstatctl.h index f35108afce0d..8b8993bea1e4 100644 --- a/src/common/libjsc/jstatctl.h +++ b/src/common/libjsc/jstatctl.h @@ -51,6 +51,7 @@ typedef enum { J_STOPPED, /*!< Stopped *including init barrier hit for a tool) */ J_RUNNING, /*!< Running */ J_CANCELLED, /*!< Cancelled */ + J_COMPLETING,/*!< Completing */ J_COMPLETE, /*!< Completed */ J_REAPED, /*!< Reaped */ J_FAILED, /*!< Failed */ diff --git a/src/modules/wreck/wrexecd.c b/src/modules/wreck/wrexecd.c index fdfbb1e6bbd2..604598b91139 100644 --- a/src/modules/wreck/wrexecd.c +++ b/src/modules/wreck/wrexecd.c @@ -2408,6 +2408,9 @@ int main (int ac, char **av) } if (exec_rc == 0) { + rexec_state_change (ctx, "completing"); + lua_stack_call (ctx->lua_stack, "rexecd_complete"); + rexec_state_change (ctx, "complete"); wlog_msg (ctx, "job complete. exiting..."); diff --git a/t/t2000-wreck.t b/t/t2000-wreck.t index f27576cfbb8a..34b113026401 100755 --- a/t/t2000-wreck.t +++ b/t/t2000-wreck.t @@ -129,11 +129,12 @@ test_expect_success 'wreck: job state events emitted' ' $SHARNESS_TEST_SRCDIR/scripts/event-trace.lua \ wreck.state wreck.state.complete \ flux wreckrun -n${SIZE} /bin/true > output && - tail -4 output > output_states && # only care about last 4 + tail -5 output > output_states && # only care about last 4 cat >expected_states <<-EOF && wreck.state.reserved wreck.state.starting wreck.state.running + wreck.state.completing wreck.state.complete EOF test_cmp expected_states output_states diff --git a/t/t2001-jsc.t b/t/t2001-jsc.t index 279a5879e490..70c2cb0c323e 100755 --- a/t/t2001-jsc.t +++ b/t/t2001-jsc.t @@ -14,11 +14,13 @@ fi tr1="null->reserved" tr2="reserved->starting" tr3="starting->running" -tr4="running->complete" +tr4="running->completing" +tr5="completing->complete" trans="$tr1 $tr2 $tr3 -$tr4" +$tr4 +$tr5" # Return previous job path in kvs last_job_path() { From 99415148ad970f5508525578fd563222ec8ab43c Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 8 May 2018 19:54:09 -0700 Subject: [PATCH 2/7] wreck: add support for epilog.pre and epilog.post scripts Add support for user-specific scripts run after all tasks have exited via a new epilog.lua wrexecd plugin. Two types of epilog scripts are supported: - An "epilog.pre" script is run after all tasks have exited but before the job is placed in the "complete" state. During this time, the job will be in the "completing" state. - An "epilog.post" script is run after the job state has been set to "complete". Note, this script may run after resources associated with the job have been returned to the system. Similar to the wreck environment variables, global `lwj.epilog.pre` and `lwj.epilog.post` may be set and will be inherited by all jobs run within the current instance. Per-job epilogs set in the kvs directory of the job will override the global settings. --- src/modules/wreck/Makefile.am | 1 + src/modules/wreck/lua.d/epilog.lua | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 src/modules/wreck/lua.d/epilog.lua diff --git a/src/modules/wreck/Makefile.am b/src/modules/wreck/Makefile.am index fac2b4e86956..35e541ece866 100644 --- a/src/modules/wreck/Makefile.am +++ b/src/modules/wreck/Makefile.am @@ -76,6 +76,7 @@ dist_wreckscripts_SCRIPTS = \ lua.d/02-affinity.lua \ lua.d/timeout.lua \ lua.d/output.lua \ + lua.d/epilog.lua \ lua.d/input.lua \ lua.d/mvapich.lua \ lua.d/pmi-mapping.lua \ diff --git a/src/modules/wreck/lua.d/epilog.lua b/src/modules/wreck/lua.d/epilog.lua new file mode 100644 index 000000000000..c7a32b41b8a8 --- /dev/null +++ b/src/modules/wreck/lua.d/epilog.lua @@ -0,0 +1,21 @@ +local posix = require 'flux.posix' + +-- execute a path from kvs `key` +local function run_kvs (key) + local epilog = wreck.kvsdir [key] or wreck.flux:kvs_get ("lwj."..key) + if not epilog then return end + return os.execute (epilog) +end + +function rexecd_complete () + local rc, err = run_kvs ("epilog.pre") + if not rc then wreck:log_msg ("error: epilog: %s", err) end +end + +-- rexecd_exit callback happens after the job is in the complete state +function rexecd_exit () + local rc, err = run_kvs ("epilog.post") + if not rc then wreck:log_msg ("error: epilog.post: %s", err) end +end + +-- vi: ts=4 sw=4 expandtab From 92e0c31ce60b5465c8450b9688bf11964eaf3424 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 8 May 2018 20:24:26 -0700 Subject: [PATCH 3/7] wreck: allow wreckrun to exit at completing state Once a job hits completing state and all task io has complated, wreckrun can exit. There is no reason to wait for the job epilog to finish. --- src/cmd/flux-wreckrun | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd/flux-wreckrun b/src/cmd/flux-wreckrun index 397419ca18de..09211ef9b783 100755 --- a/src/cmd/flux-wreckrun +++ b/src/cmd/flux-wreckrun @@ -189,7 +189,7 @@ local function check_job_completed () wreck:die ("job %d failed\n", jobid) end if (not taskio or taskio:complete()) and - (state == "complete" or state == "reaped") then + (state == "completing" or state == "complete" or state == "reaped") then local rc = lwj_return_code (f, wreck, jobid) if rc == 0 then wreck:verbose ("%.3fs: All tasks completed successfully.\n", From 51b1056e6a57b74b03d9b17e5cd20a8bc90edbd5 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 8 May 2018 20:26:54 -0700 Subject: [PATCH 4/7] wreck: add options to wreckrun and submit for epilogs Add -x, --epilog=script and -p, --postscript=SCRIPT convenience options for wreckrun and flux-submit to set per-job epilog.pre and epilog.post. --- src/bindings/lua/wreck.lua | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/bindings/lua/wreck.lua b/src/bindings/lua/wreck.lua index 583d328185bb..312d4352b1ce 100644 --- a/src/bindings/lua/wreck.lua +++ b/src/bindings/lua/wreck.lua @@ -54,6 +54,9 @@ local default_opts = { ['input'] = { char = "i", arg = "HOW" }, ['label-io'] = { char = "l", }, ['skip-env'] = { char = "S", }, + ['epilog'] = { char = "x", arg = "SCRIPT" }, + ['postscript'] = + { char = "p", arg = "SCRIPT" }, ['options'] = { char = 'o', arg = "OPTIONS.." }, } @@ -123,6 +126,9 @@ function wreck:usage() -E, --error=FILENAME Send stderr to a different location than stdout. -l, --labelio Prefix lines of output with task id -S, --skip-env Skip export of environment to job + -x, --epilog=PATH Execute a script after all tasks exit but before + the job state is set to "complete" + -p, --postscript=PATH Execute a script after job state is "complete" ]]) for _,v in pairs (self.extra_options) do local optstr = v.name .. (v.arg and "="..v.arg or "") @@ -378,6 +384,8 @@ function wreck:jobreq () ["opts.cores-per-task"] = self.opts.c, ["opts.gpus-per-task"] = self.opts.g, ["opts.tasks-per-node"] = self.opts.t, + ["epilog.pre"] = self.opts.x, + ["epilog.post"] = self.opts.p, } if self.opts.o then for opt in self.opts.o:gmatch ('[^,]+') do From b5f6ddd22b209e379ef3fe74ee0e289a3dd767ca Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Wed, 9 May 2018 10:49:11 -0700 Subject: [PATCH 5/7] testsuite: add t2000-wreck-epilog.t Add t2000-wreck-epilog.t to verify epilog.pre and post functionality. --- t/Makefile.am | 2 ++ t/t2000-wreck-epilog.t | 67 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100755 t/t2000-wreck-epilog.t diff --git a/t/Makefile.am b/t/Makefile.am index fefc53166355..26f6f4b9933b 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -64,6 +64,7 @@ TESTS = \ t2000-wreck.t \ t2000-wreck-env.t \ t2000-wreck-dummy-sched.t \ + t2000-wreck-epilog.t \ t2001-jsc.t \ t2002-pmi.t \ t2003-recurse.t \ @@ -146,6 +147,7 @@ check_SCRIPTS = \ t2000-wreck.t \ t2000-wreck-env.t \ t2000-wreck-dummy-sched.t \ + t2000-wreck-epilog.t \ t2001-jsc.t \ t2002-pmi.t \ t2003-recurse.t \ diff --git a/t/t2000-wreck-epilog.t b/t/t2000-wreck-epilog.t new file mode 100755 index 000000000000..d471af261c27 --- /dev/null +++ b/t/t2000-wreck-epilog.t @@ -0,0 +1,67 @@ +#!/bin/sh +# + +test_description='Test basic wreck epilog functionality +' +. `dirname $0`/sharness.sh +SIZE=${FLUX_TEST_SIZE:-4} +test_under_flux ${SIZE} wreck + +# Return previous job path in kvs +last_job_path() { + flux wreck last-jobid -p +} + +epilog_path="$(pwd)/epilog.wait.sh" +kvswait=${SHARNESS_TEST_SRCDIR}/scripts/kvs-watch-until.lua +eventtrace=${SHARNESS_TEST_SRCDIR}/scripts/event-trace.lua + +# Create epilog test that will block until an 'epilog.test' event +cat <${epilog_path} +#!/bin/sh +flux event sub -c 1 epilog.test +flux event pub epilog.test.done +EOF +chmod +x ${epilog_path} + +wait_for_complete() { + $kvswait -vt 5 $1.state 'v == "complete"' +} + +test_expect_success 'flux-wreck: global epilog.pre' ' + flux kvs put --json lwj.epilog.pre="$epilog_path" && + flux wreckrun /bin/true && + LWJ=$(last_job_path) && + STATE=$(flux kvs get --json ${LWJ}.state) && + test_debug "echo job state is now ${STATE}" && + test "$STATE" = "completing" && + flux event pub epilog.test && + wait_for_complete $LWJ +' +test_expect_success 'flux-wreck: per-job epilog.pre' ' + flux kvs unlink lwj.epilog.pre && + flux wreckrun -x ${epilog_path} /bin/true && + LWJ=$(last_job_path) && + test $(flux kvs get --json ${LWJ}.epilog.pre) = "$epilog_path" && + STATE=$(flux kvs get --json ${LWJ}.state) && + test_debug "echo job state is now ${STATE}" && + test "$STATE" = "completing" && + flux event pub epilog.test && + wait_for_complete $LWJ +' +test_expect_success 'flux-wreck: global epilog.post' ' + flux kvs put --json lwj.epilog.post="$epilog_path" && + flux wreckrun /bin/true && + wait_for_complete $LWJ && + ${eventtrace} -t 5 epilog.test epilog.test.done \ + flux event pub epilog.test +' +test_expect_success 'flux-wreck: per-job epilog.post' ' + flux kvs unlink lwj.epilog.post && + flux wreckrun -p "$epilog_path" /bin/true && + wait_for_complete $LWJ && + ${eventtrace} -t 5 epilog.test epilog.test.done \ + flux event pub epilog.test +' + +test_done From 5d08cc45d13b3aa1abd7fd1345567cc7c97b76f4 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Thu, 10 May 2018 10:02:27 -0700 Subject: [PATCH 6/7] libjsc: hard-code enumeration values for job_state_t Problem: Insertion of new job states into the job_state_t enumeration may inadvertently cause re-enumeration of other state values, triggering hard to find bugs and confusion for developers. Also, the states encoded in job_state_t are a mixture of scheduler and execution system states. Add explicit integers for job states encoded in the enumeration, and regroup values so that it is easy for developers to see which states are scheduler specific vs WRECK execution system states. Leave space between groups of states for near-term additions. --- src/common/libjsc/jstatctl.h | 49 +++++++++++++++++++++++------------- t/t2001-jsc.t | 2 +- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/common/libjsc/jstatctl.h b/src/common/libjsc/jstatctl.h index 8b8993bea1e4..3874ccf68186 100644 --- a/src/common/libjsc/jstatctl.h +++ b/src/common/libjsc/jstatctl.h @@ -39,23 +39,38 @@ extern "C" { * please refer to README.md */ typedef enum { - J_NULL = 1, /*!< The state has yet to be assigned */ - J_RESERVED, /*!< Reserved by the program execution service */ - J_SUBMITTED, /*!< Submitted to the system */ - J_PENDING, /*!< Pending */ - J_SCHEDREQ, /*!< Resources requested to be selected */ - J_SELECTED, /*!< Assigned to requested resource in RDL */ - J_ALLOCATED, /*!< Got allocated/contained by the program executoin service */ - J_RUNREQUEST,/*!< Requested to be executed */ - J_STARTING, /*!< Starting */ - J_STOPPED, /*!< Stopped *including init barrier hit for a tool) */ - J_RUNNING, /*!< Running */ - J_CANCELLED, /*!< Cancelled */ - J_COMPLETING,/*!< Completing */ - J_COMPLETE, /*!< Completed */ - J_REAPED, /*!< Reaped */ - J_FAILED, /*!< Failed */ - J_FOR_RENT /*!< Space For Rent */ + J_NULL = 0, /*!< The state has yet to be assigned */ + + /* WRECK job initial condition states: + */ + J_RESERVED = 1, /*!< Reserved by the program execution service */ + J_SUBMITTED = 2, /*!< Submitted to the system */ + + /* Scheduler internal states: + */ + J_PENDING = 11, /*!< Pending */ + J_SCHEDREQ = 12, /*!< Resources requested to be selected */ + J_SELECTED = 13, /*!< Assigned to requested resource in RDL */ + J_ALLOCATED = 14, /*!< Got alloc/contained by the program exec service */ + + /* WRECK job execution states: + */ + J_RUNREQUEST= 21, /*!< Requested to be executed */ + J_STARTING = 22, /*!< Starting */ + J_STOPPED = 23, /*!< Stopped (including init barrier hit for a tool) */ + J_RUNNING = 24, /*!< Running */ + J_COMPLETING= 26, /*!< Completing (all tasks exited, epilog running) */ + + /* WRECK job terminal states: + */ + J_CANCELLED = 51, /*!< Cancelled (before execution) */ + J_COMPLETE = 52, /*!< Completed */ + J_FAILED = 53, /*!< Failed (before exec) */ + + /* Scheduler post exec states: + */ + J_REAPED = 101, /*!< Reaped */ + J_FOR_RENT = 102, /*!< Space For Rent */ } job_state_t; typedef int (*jsc_handler_f)(const char *base_jcb, void *arg, int errnum); diff --git a/t/t2001-jsc.t b/t/t2001-jsc.t index 70c2cb0c323e..ea2109c2aa4e 100755 --- a/t/t2001-jsc.t +++ b/t/t2001-jsc.t @@ -207,7 +207,7 @@ test_expect_success 'jstat 8: query detects bad inputs' ' ' test_expect_success 'jstat 9: update state-pair' " - flux jstat update 1 state-pair '{\"state-pair\": {\"ostate\": 13, \"nstate\": 12}}' && + flux jstat update 1 state-pair '{\"state-pair\": {\"ostate\": 24, \"nstate\": 51}}' && flux kvs get --json $(flux wreck kvs-path 1).state > output.9.1 && cat >expected.9.1 <<-EOF && cancelled From ee7e54fce8dba4d6613df715c1b919ebb7489d28 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Thu, 10 May 2018 10:29:03 -0700 Subject: [PATCH 7/7] libjsc: rename J_STOPPED to J_SYNC Problem: The libjsc J_STOPPED state ("stopped") is not actually set by the wreck execution system. It is presumed this was meant to be the "sync" state set by wreck when tasks are all running but stopped in exec(2). Update JSC to use "sync" and J_SYNC instead of "stopped"/J_STOPPED. --- src/common/libjsc/jstatctl.c | 2 +- src/common/libjsc/jstatctl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/libjsc/jstatctl.c b/src/common/libjsc/jstatctl.c index 774b0454a4bc..8dd7b49f96d9 100644 --- a/src/common/libjsc/jstatctl.c +++ b/src/common/libjsc/jstatctl.c @@ -75,7 +75,7 @@ static stab_t job_state_tab[] = { { J_ALLOCATED, "allocated" }, { J_RUNREQUEST, "runrequest" }, { J_STARTING, "starting" }, - { J_STOPPED, "stopped" }, + { J_SYNC, "sync" }, { J_RUNNING, "running" }, { J_CANCELLED, "cancelled" }, { J_COMPLETING, "completing" }, diff --git a/src/common/libjsc/jstatctl.h b/src/common/libjsc/jstatctl.h index 3874ccf68186..dc1b622a17ce 100644 --- a/src/common/libjsc/jstatctl.h +++ b/src/common/libjsc/jstatctl.h @@ -57,7 +57,7 @@ typedef enum { */ J_RUNREQUEST= 21, /*!< Requested to be executed */ J_STARTING = 22, /*!< Starting */ - J_STOPPED = 23, /*!< Stopped (including init barrier hit for a tool) */ + J_SYNC = 23, /*!< Tasks stopped in exec waiting for a tool */ J_RUNNING = 24, /*!< Running */ J_COMPLETING= 26, /*!< Completing (all tasks exited, epilog running) */