Skip to content

Commit

Permalink
t: Add guest namespace wait-event tests
Browse files Browse the repository at this point in the history
Add tests for job wait-event to read guest namespace eventlogs.  Most
notably, add tests that are still running and have not completed, to
ensure that the guest eventlog is being read from the guest namespace.
  • Loading branch information
chu11 committed Aug 15, 2019
1 parent a45bf32 commit 3fc8bee
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 12 deletions.
143 changes: 138 additions & 5 deletions t/t2204-job-info.t
Expand Up @@ -20,10 +20,26 @@ submit_job() {
echo $jobid
}

# Unlike above, do not cancel the job, the test will cancel the job
submit_job_live() {
jobspec=$1
jobid=$(flux job submit $jobspec)
flux job wait-event $jobid start >/dev/null
echo $jobid
}

# Test will cancel the job, is assumed won't run immediately
submit_job_wait() {
jobid=$(flux job submit test.json)
flux job wait-event $jobid depend >/dev/null
echo $jobid
}

wait_watchers_nonzero() {
str=$1
i=0
while (! flux module stats --parse watchers job-info > /dev/null 2>&1 \
|| [ "$(flux module stats --parse watchers job-info 2> /dev/null)" = "0" ]) \
while (! flux module stats --parse $str job-info > /dev/null 2>&1 \
|| [ "$(flux module stats --parse $str job-info 2> /dev/null)" = "0" ]) \
&& [ $i -lt 50 ]
do
sleep 0.1
Expand All @@ -36,6 +52,25 @@ wait_watchers_nonzero() {
return 0
}

# arg1 - namespace
# Special version of wait_watcherscount_nonzero(), that can deal with
# namespaces with periods in them. We use 'jq' to parse instead. See
# issue #2306.
wait_guest_watcherscount_nonzero() {
jqfilter='.["namespaces"]["'$1'"]["watchers"]'
i=0
while (! flux module stats kvs-watch > /dev/null 2>&1 \
|| [ "$(flux module stats kvs-watch | jq $jqfilter 2> /dev/null)" = "0" ]) \
&& [ $i -lt ${KVS_WAIT_ITERS} ]
do
sleep 0.1
i=$((i + 1))
echo "***"
flux module stats kvs-watch | jq $jqfilter
done
return $(loophandlereturn $i)
}

get_timestamp_field() {
field=$1
file=$2
Expand All @@ -46,11 +81,12 @@ test_expect_success 'job-info: generate jobspec for simple test job' '
flux jobspec --format json srun -N1 sleep inf > test.json
'

hwloc_fake_config='{"0-1":{"Core":2,"cpuset":"0-1"}}'
hwloc_fake_config='{"0-3":{"Core":2,"cpuset":"0-1"}}'

test_expect_success 'load job-exec,sched-simple modules' '
# Add fake by_rank configuration to kvs:
flux kvs put resource.hwloc.by_rank="$hwloc_fake_config" &&
flux module load -r all barrier &&
flux module load -r 0 sched-simple &&
flux module load -r 0 job-exec
'
Expand Down Expand Up @@ -152,7 +188,7 @@ test_expect_success NO_CHAIN_LINT 'flux job wait-event works, event is later' '
jobid=$(submit_job)
flux job wait-event $jobid foobar > wait_event3.out &
waitpid=$! &&
wait_watchers_nonzero &&
wait_watchers_nonzero "watchers" &&
wait_watcherscount_nonzero primary &&
kvsdir=$(flux job id --to=kvs $jobid) &&
flux kvs eventlog append ${kvsdir}.eventlog foobar &&
Expand Down Expand Up @@ -298,11 +334,103 @@ test_expect_success 'flux job wait-event -p fails on invalid path' '
! flux job wait-event -p "foobar" $jobid submit
'

test_expect_success 'flux job wait-event -p fails on path "guest."' '
jobid=$(submit_job) &&
! flux job wait-event -p "guest." $jobid submit
'

test_expect_success 'flux job wait-event -p hangs on no event' '
jobid=$(submit_job) &&
! run_timeout 0.2 flux job wait-event -p "guest.exec.eventlog" $jobid foobar
'

test_expect_success NO_CHAIN_LINT 'flux job wait-event -p guest.exec.eventlog works (live job)' '
jobid=$(submit_job_live test.json)
flux job wait-event -p "guest.exec.eventlog" $jobid done > wait_event_path3.out &
waitpid=$! &&
wait_watchers_nonzero "watchers" &&
wait_watchers_nonzero "guest_watchers" &&
guestns=$(flux job id -f dec -t hex $jobid) &&
wait_guest_watcherscount_nonzero $guestns &&
flux job cancel $jobid &&
wait $waitpid &&
grep done wait_event_path3.out
'

test_expect_success NO_CHAIN_LINT 'flux job wait-event -p guest.exec.eventlog works (live job / after main)' '
jobid=$(submit_job_live test.json)
flux job wait-event -p "guest.exec.eventlog" $jobid foo > wait_event_path4.out &
waitpid=$! &&
wait_watchers_nonzero "watchers" &&
wait_watchers_nonzero "guest_watchers" &&
guestns=$(flux job id -f dec -t hex $jobid) &&
wait_guest_watcherscount_nonzero $guestns &&
flux job cancel $jobid &&
kvsdir=$(flux job id --to=kvs $jobid) &&
flux kvs eventlog append ${kvsdir}.guest.exec.eventlog foo &&
wait $waitpid &&
grep foo wait_event_path4.out
'

# waiting on "foo" is to make sure that the main namespace guest
# eventlog is fully read
test_expect_success NO_CHAIN_LINT 'flux job wait-event -p guest.exec.eventlog check no duplicates (live job)' '
jobid=$(submit_job_live test.json)
flux job wait-event -p "guest.exec.eventlog" --verbose $jobid foo > wait_event_path5.out &
waitpid=$! &&
wait_watchers_nonzero "watchers" &&
wait_watchers_nonzero "guest_watchers" &&
guestns=$(flux job id -f dec -t hex $jobid) &&
wait_guest_watcherscount_nonzero $guestns &&
flux job cancel $jobid &&
kvsdir=$(flux job id --to=kvs $jobid) &&
flux kvs eventlog append ${kvsdir}.guest.exec.eventlog foo &&
wait $waitpid &&
test $(grep init wait_event_path5.out | wc -l) -eq 1 &&
test $(grep starting wait_event_path5.out | wc -l) -eq 1 &&
test $(grep running wait_event_path5.out | wc -l) -eq 1 &&
test $(grep done wait_event_path5.out | wc -l) -eq 1
'

test_expect_success 'flux job wait-event -p hangs on no event (live job)' '
jobid=$(submit_job_live test.json) &&
! run_timeout 0.2 flux job wait-event -p "guest.exec.eventlog" $jobid foobar &&
flux job cancel $jobid
'

# In order to test watching a guest event log that does not yet exist,
# we will start a job that will take up all resources. Then start
# another job, which we will watch and know it hasn't started running
# yet. Then we cancel the initial job to get the new one running.

test_expect_success 'job-info: generate jobspec to consume all resources' '
flux jobspec --format json srun -n4 -c2 sleep inf > test-all.json
'

test_expect_success NO_CHAIN_LINT 'flux job wait-event -p guest.exec.eventlog works (wait job)' '
jobidall=$(submit_job_live test-all.json)
jobid=$(submit_job_wait)
flux job wait-event -v -p "guest.exec.eventlog" ${jobid} done > wait_event_path6.out &
waitpid=$! &&
wait_watchers_nonzero "watchers" &&
wait_watchers_nonzero "guest_watchers" &&
guestns=$(flux job id -f dec -t hex ${jobid}) &&
wait_guest_watcherscount_nonzero $guestns &&
flux job cancel ${jobidall} &&
flux job wait-event ${jobid} start &&
flux job cancel ${jobid} &&
wait $waitpid &&
grep done wait_event_path6.out
'

test_expect_success 'flux job wait-event -p hangs on no event (wait job)' '
jobidall=$(submit_job_live test-all.json) &&
jobid=$(submit_job_wait) &&
! run_timeout 0.2 flux job wait-event -p "guest.exec.eventlog" $jobid foobar &&
flux job cancel $jobidall &&
flux job cancel $jobid
'

#
# job info tests
#
Expand Down Expand Up @@ -362,7 +490,8 @@ test_expect_success 'flux job info multiple keys fails on 1 bad entry (no eventl

test_expect_success 'job-info stats works' '
flux module stats job-info | grep "lookups" &&
flux module stats job-info | grep "watchers"
flux module stats job-info | grep "watchers" &&
flux module stats job-info | grep "guest_watchers"
'

test_expect_success 'lookup request with empty payload fails with EPROTO(71)' '
Expand All @@ -371,11 +500,15 @@ test_expect_success 'lookup request with empty payload fails with EPROTO(71)' '
test_expect_success 'eventlog-watch request with empty payload fails with EPROTO(71)' '
${RPC} job-info.eventlog-watch 71 </dev/null
'
test_expect_success 'guest-eventlog-watch request with empty payload fails with EPROTO(71)' '
${RPC} job-info.guest-eventlog-watch 71 </dev/null
'

#
# cleanup
#
test_expect_success 'remove sched-simple,job-exec modules' '
flux module remove -r all barrier &&
flux module remove -r 0 sched-simple &&
flux module remove -r 0 job-exec
'
Expand Down
48 changes: 41 additions & 7 deletions t/t2205-job-info-security.t
Expand Up @@ -9,21 +9,33 @@ test_under_flux 4 job
# We have to fake a job submission by a guest into the KVS.
# This method of editing the eventlog preserves newline separators.

update_job_userid() {
userid=$1
if test -n "$userid"; then
kvsdir=$(flux job id --to=kvs $jobid)
flux kvs get --raw ${kvsdir}.eventlog \
| sed -e 's/\("userid":\)[0-9]*/\1'${userid}/ \
| flux kvs put --raw ${kvsdir}.eventlog=-
fi
}

# Usage: submit_job [userid]
# To ensure robustness of tests despite future job manager changes,
# cancel the job, and wait for clean event. Optionally, edit the
# userid
submit_job() {
userid=$1
jobid=$(flux job submit test.json)
flux job cancel $jobid
flux job wait-event $jobid clean >/dev/null
if test -n "$userid"; then
kvsdir=$(flux job id --to=kvs $jobid)
flux kvs get --raw ${kvsdir}.eventlog \
| sed -e 's/\("userid":\)[0-9]*/\1'${userid}/ \
| flux kvs put --raw ${kvsdir}.eventlog=-
fi
update_job_userid $1
echo $jobid
}

# Unlike above, do not cancel the job, the test will cancel the job
submit_job_live() {
jobid=$(flux job submit test.json)
flux job wait-event $jobid start >/dev/null
update_job_userid $1
echo $jobid
}

Expand Down Expand Up @@ -154,6 +166,28 @@ test_expect_success 'flux job wait-event guest.exec.eventlog fails via -p (wrong
unset_userid
'

test_expect_success 'flux job wait-event guest.exec.eventlog works via -p (live job, owner)' '
jobid=$(submit_job_live) &&
flux job wait-event -p guest.exec.eventlog $jobid init &&
flux job cancel $jobid
'

test_expect_success 'flux job wait-event guest.exec.eventlog works via -p (live job, user)' '
jobid=$(submit_job_live 9000) &&
set_userid 9000 &&
flux job wait-event -p guest.exec.eventlog $jobid init &&
unset_userid &&
flux job cancel $jobid
'

test_expect_success 'flux job wait-event guest.exec.eventlog fails via -p (live job, wrong user)' '
jobid=$(submit_job_live 9000) &&
set_userid 9999 &&
! flux job wait-event -p guest.exec.eventlog $jobid init &&
unset_userid &&
flux job cancel $jobid
'

#
# job info
#
Expand Down

0 comments on commit 3fc8bee

Please sign in to comment.