Skip to content

Commit

Permalink
Merge pull request #3072 from garlick/priority_bug
Browse files Browse the repository at this point in the history
job-manager: avoid segfault on priority change with pending alloc
  • Loading branch information
mergify[bot] committed Jul 26, 2020
2 parents 53cf4b1 + b7eb47e commit 6cc90fc
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
11 changes: 11 additions & 0 deletions src/modules/job-manager/priority.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ void priority_handle_request (flux_t *h,
errno = EPERM;
goto error;
}
/* RFC 27 does not yet handle priority changes after alloc request
* has been sent to the scheduler. Also, alloc_queue_reorder() will
* segfault if job->handle is NULL, which is the case if the job is
* no longer in alloc->queue.
*/
if (job->alloc_pending) {
errstr = "job has made an alloc request to scheduler, "
"priority cannot be changed";
errno = EINVAL;
goto error;
}
/* Post event, change job's queue position, and respond.
*/
if (event_job_post_pack (ctx->event, job,
Expand Down
27 changes: 26 additions & 1 deletion t/t2210-job-manager-bugs.t
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,35 @@ test_expect_success 'issue2664: cancel job 3' '
test_expect_success 'issue2664: submit job 4' '
flux mini submit hostname >job4.out
'
# Hangs here (hitting timeout)
# Hangs here (hitting timeout) when bug is present
test_expect_success 'issue2664: cancel job 1 and drain (cleanup)' '
flux job cancel $(cat job1.out) &&
run_timeout 5 flux queue drain
'

#
# Issue 3051 job-manager: segfault on priority change with pending alloc
#

test_expect_success 'issue3051: submit full system job' '
ncores=$(flux resource list -s up -no {ncores}) &&
flux mini submit -n ${ncores} sleep 3600 >issue3051.job1
'
test_expect_success 'issue3051: submit one more job and wait for alloc' '
flux mini submit --flags=debug /bin/true >issue3051.job2 &&
flux job wait-event -t 5 $(cat issue3051.job2) debug.alloc-request
'
test_expect_success 'issue3051: cannot reprioritize job with pending alloc' '
test_must_fail flux job priority $(cat issue3051.job2) 0 2>issue3051.err
'
test_expect_success 'issue3051: human message is reasonable' '
grep alloc issue3051.err
'
test_expect_success 'issue3051: clean up' '
flux job cancel $(cat issue3051.job2) &&
flux job cancel $(cat issue3051.job1) &&
flux queue drain
'


test_done

0 comments on commit 6cc90fc

Please sign in to comment.