From 38235697b0cbf9fd9febc2844d01b1b21a58c4f5 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Fri, 28 Apr 2023 10:55:45 -0700 Subject: [PATCH 1/8] job-list: remove list_inactive_cb prototype Problem: The function list_inactive_cb() was removed a long time ago but the prototype for it is still in list.h. Remove the function prototype. --- src/modules/job-list/list.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/modules/job-list/list.h b/src/modules/job-list/list.h index cf4b435bd97d..d052c22237e0 100644 --- a/src/modules/job-list/list.h +++ b/src/modules/job-list/list.h @@ -16,9 +16,6 @@ void list_cb (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, void *arg); -void list_inactive_cb (flux_t *h, flux_msg_handler_t *mh, - const flux_msg_t *msg, void *arg); - void list_id_cb (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, void *arg); From 628220b01c8da4533b2b1963d2b2387b0cd452ee Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Sun, 23 Apr 2023 11:13:41 -0700 Subject: [PATCH 2/8] job-list: clarify comment Problem: There is a comment describing job list filtering with the 'since' filtering. The comment is a little confusing because it does not match the code, doesn't describe how a list is sorted, and happens to use the word "since" in the description. Clarify the comment with more details. --- src/modules/job-list/list.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/modules/job-list/list.c b/src/modules/job-list/list.c index 55b63bd1bc04..8e03395001a8 100644 --- a/src/modules/job-list/list.c +++ b/src/modules/job-list/list.c @@ -79,9 +79,13 @@ int get_jobs_from_list (json_t *jobs, job = zlistx_first (list); while (job) { - /* If job->t_inactive > 0. (we're on the inactive jobs list), - * and job->t_inactive > since, then we're done since inactive - * jobs are sorted by inactive time. + /* If job->t_inactive > 0. we're on the inactive jobs list and jobs are + * sorted on the inactive list, larger t_inactive first. + * + * If job->t_inactive > since, this is a job that could potentially be returned + * + * So if job->t_inactive <= since, then we're done b/c the rest of the inactive + * jobs cannot be returned. */ if (job->t_inactive > 0. && job->t_inactive <= since) break; From e353520f8f05bd43cb16fd6dd17452279437e460 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Mon, 1 May 2023 15:29:43 -0700 Subject: [PATCH 3/8] flux-job: fix invalid tabbing Problem: Some tabbing was incorrect. Make tabbing good! --- src/cmd/flux-job.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/cmd/flux-job.c b/src/cmd/flux-job.c index fd6b6a7c68c6..035bc067d4cb 100644 --- a/src/cmd/flux-job.c +++ b/src/cmd/flux-job.c @@ -1381,7 +1381,7 @@ int cmd_list (optparse_t *p, int argc, char **argv) flux_future_destroy (f); flux_close (h); - return (0); + return (0); } int cmd_list_inactive (optparse_t *p, int argc, char **argv) @@ -2566,7 +2566,7 @@ void attach_event_continuation (flux_future_t *f, void *arg) else if (streq (name, "submit")) { if (!(ctx->exec_eventlog_f = flux_job_event_watch (ctx->h, ctx->id, - "guest.exec.eventlog", + "guest.exec.eventlog", 0))) log_err_exit ("flux_job_event_watch"); if (flux_future_then (ctx->exec_eventlog_f, @@ -2855,12 +2855,12 @@ int cmd_status (optparse_t *p, int argc, char **argv) else if (verbose > 1 || exitcode != 0) { if (!exception) log_msg ("%s: exited with exit code %d", - jobid, - exitcode); + jobid, + exitcode); else log_msg ("%s: exception type=%s", - jobid, - exc_type); + jobid, + exc_type); } } flux_future_destroy (futures[i]); @@ -3601,10 +3601,10 @@ int cmd_memo (optparse_t *p, int argc, char **argv) "id", id, "volatile", optparse_hasopt (p, "volatile"), "memo", memo))) - log_err_exit ("flux_rpc_pack"); + log_err_exit ("flux_rpc_pack"); if (flux_rpc_get (f, NULL) < 0) - log_msg_exit ("memo: %s", future_strerror (f, errno)); + log_msg_exit ("memo: %s", future_strerror (f, errno)); flux_future_destroy (f); flux_close (h); @@ -3782,10 +3782,10 @@ static char *flux_job_nodeid_to_hostname (flux_jobid_t id, int nodeid) log_err_exit ("flux_open"); if (!(f = flux_rpc_pack (h, "job-info.lookup", FLUX_NODEID_ANY, 0, - "{s:I s:[s] s:i}", - "id", id, - "keys", "R", - "flags", 0))) + "{s:I s:[s] s:i}", + "id", id, + "keys", "R", + "flags", 0))) log_err_exit ("flux_rpc_pack"); if (flux_rpc_get_unpack (f, "{s:s}", "R", &R) < 0 || !(rl = rlist_from_R (R)) From c730f0a23b2dc4653f8f514e5f4d60951908a27d Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Tue, 2 May 2023 16:25:54 -0700 Subject: [PATCH 4/8] doc: clarify --since in flux-jobs(1) Problem: The wording in the --since option in flux-jobs(1) is a bit misleading, suggesting that the --since option may apply only to completed/inactive jobs. Solution: Note that the --since option applies to jobs that were active after the given timestamp, which makes it clear that --since also will list pending and running jobs. --- doc/man1/flux-jobs.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/man1/flux-jobs.rst b/doc/man1/flux-jobs.rst index b073de6e4300..3b9db958482d 100644 --- a/doc/man1/flux-jobs.rst +++ b/doc/man1/flux-jobs.rst @@ -46,10 +46,11 @@ OPTIONS Limit output to N jobs (default 1000) **--since**\ *WHEN* - Limit output to jobs that completed or have become inactive since a - given timestamp. This option implies ``-a`` if no other ``--filter`` - options are specified. If *WHEN* begins with ``-`` character, then - the remainder is considered to be a an offset in Flux standard duration + Limit output to jobs that have been active since a given timestamp. In other + words, jobs that are currently pending, currently running, or became inactive + since the given timestamp. This option implies ``-a`` if no other + ``--filter`` options are specified. If *WHEN* begins with ``-`` character, + then the remainder is considered to be a an offset in Flux standard duration (RFC 23). Otherwise, any datetime expression accepted by the Python `parsedatetime `_ module is accepted. Examples: "-6h", "-1d", "yesterday", "2021-06-21 6am", From c60a2ebb0028ac624d18bef0085d4e1920845fff Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Wed, 3 May 2023 14:41:09 -0700 Subject: [PATCH 5/8] testsuite: fix invalid option in test Problem: In a test in t2800-jobs-cmd.t, both the --filter and --all option are specified, leading to unexpected output. The test happens to pass, but may not behave as intended. Remove the -a option in the test. --- t/t2800-jobs-cmd.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t2800-jobs-cmd.t b/t/t2800-jobs-cmd.t index 62755145e0b2..56dccbbe08a3 100755 --- a/t/t2800-jobs-cmd.t +++ b/t/t2800-jobs-cmd.t @@ -182,7 +182,7 @@ test_expect_success 'flux-jobs: custom format with numeric spec works' ' test_expect_success 'flux-jobs: collapsible fields work' ' flux jobs -ao "{id.f58:<12} ?:{exception.type:>8}" >nocollapse.out && flux jobs -f running,completed \ - -ao "{id.f58:<12} ?:{exception.type:>8}" >collapsed.out && + -o "{id.f58:<12} ?:{exception.type:>8}" >collapsed.out && test_debug "head -n1 nocollapse.out" && test_debug "head -n1 collapsed.out" && grep EXCEPTION-TYPE nocollapse.out && From 8fbd157ea69d86911d8a9bdabdc2b053371c082b Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Wed, 3 May 2023 15:53:56 -0700 Subject: [PATCH 6/8] testsuite: update out of date comment Problem: A comment in t2260-job-list.t is out of date. flux_job_list() has been deprecated and is no longer used. Update the comment. --- t/t2260-job-list.t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t2260-job-list.t b/t/t2260-job-list.t index 8954ef907aa5..cd26bbb9dfa0 100755 --- a/t/t2260-job-list.t +++ b/t/t2260-job-list.t @@ -185,8 +185,8 @@ test_expect_success 'flux job list inactive jobs results are correct' ' test_cmp list_result_I.out list_result_I.exp ' -# Hard code results values for these tests, as we did not add a results -# option to flux_job_list() or the flux-job command. +# flux job list does not take results as an option, test via direct +# call to job-list.list test_expect_success 'flux job list only canceled jobs' ' id=$(id -u) && From 976b089f2b5d370f5d59042eae2ea13e7701bac0 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Wed, 3 May 2023 16:15:54 -0700 Subject: [PATCH 7/8] testsuite: add job-archive active job test Problem: The t2250-job-archive.t test does not test that active jobs are not incorrectly archived in the archive database. Add an additional test to double check for this. --- t/t2250-job-archive.t | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/t/t2250-job-archive.t b/t/t2250-job-archive.t index 2940b3d53550..6c39e98ced55 100755 --- a/t/t2250-job-archive.t +++ b/t/t2250-job-archive.t @@ -171,6 +171,11 @@ test_expect_success 'job-archive: load module' ' flux module load job-archive ' +test_expect_success 'job-archive: launch a running job' ' + jobid=`flux submit sleep inf` && + echo $jobid > long_running_job.id +' + test_expect_success 'job-archive: stores inactive job info (job good)' ' jobid=`flux submit hostname` && fj_wait_event $jobid clean && @@ -189,6 +194,27 @@ test_expect_success 'job-archive: stores inactive job info (job fail)' ' db_check_values_run $jobid ${ARCHIVEDB} ' +# ensure long running job wasn't stored +test_expect_success 'job-archive: check 2 jobs stored' ' + count=`db_count_entries ${ARCHIVEDB}` && + test $count -eq 2 +' + +test_expect_success 'job-archive: cancel long running job' ' + jobid=$(cat long_running_job.id) && + flux cancel $jobid && + fj_wait_event $jobid clean && + wait_jobid_state $jobid inactive && + wait_db $jobid ${ARCHIVEDB} && + db_check_entries $jobid ${ARCHIVEDB} && + db_check_values_run $jobid ${ARCHIVEDB} +' + +test_expect_success 'job-archive: check 3 jobs stored' ' + count=`db_count_entries ${ARCHIVEDB}` && + test $count -eq 3 +' + # to ensure job canceled before we run, we submit a job to eat up all # resources first. test_expect_success 'job-archive: stores inactive job info (job cancel)' ' @@ -217,7 +243,7 @@ test_expect_success 'job-archive: stores inactive job info (resources)' ' test_expect_success 'job-archive: all jobs stored' ' count=`db_count_entries ${ARCHIVEDB}` && - test $count -eq 5 + test $count -eq 6 ' test_expect_success 'job-archive: reload module' ' @@ -226,7 +252,7 @@ test_expect_success 'job-archive: reload module' ' test_expect_success 'job-archive: doesnt restore old data' ' count=`db_count_entries ${ARCHIVEDB}` && - test $count -eq 5 + test $count -eq 6 ' test_expect_success 'job-archive: stores more inactive job info' ' @@ -246,7 +272,7 @@ test_expect_success 'job-archive: stores more inactive job info' ' test_expect_success 'job-archive: all jobs stored' ' count=`db_count_entries ${ARCHIVEDB}` && - test $count -eq 7 + test $count -eq 8 ' # we don't check values in module stats b/c it can be racy w/ polling @@ -260,7 +286,7 @@ test_expect_success 'job-archive: unload module' ' test_expect_success 'job-archive: db exists after module unloaded' ' count=`db_count_entries ${ARCHIVEDB}` && - test $count -eq 7 + test $count -eq 8 ' test_expect_success 'job-archive: setup config file without dbpath' ' From d0597ee8f38fb29bbd2a9f0e8c878c3ca7b20cf8 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Thu, 4 May 2023 22:01:56 -0700 Subject: [PATCH 8/8] python: remove "all" filter name in add_filter() Problem: The JobList class's add_filter() function checks for the special filter name "all" and sets the filter to pending and running. As far as I can tell, the special filter name "all" has never been supported or documented. In addition, it doesn't make much sense since it should set pending, running, and inactive to get all jobs. My suspicion is this may be left over code from development that did not get removed. Remove check for the filter name "all". --- src/bindings/python/flux/job/list.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/bindings/python/flux/job/list.py b/src/bindings/python/flux/job/list.py index 43f1496001b0..b0ff9710fefa 100644 --- a/src/bindings/python/flux/job/list.py +++ b/src/bindings/python/flux/job/list.py @@ -236,11 +236,6 @@ def set_user(self, user): def add_filter(self, fname): """Append a state or result filter to JobList query""" fname = fname.lower() - if fname == "all": - self.states |= self.STATES["pending"] - self.states |= self.STATES["running"] - return - if fname in self.STATES: self.states |= self.STATES[fname] elif fname in self.RESULTS: