Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Problem: the job manager purge function has no test coverage Add a new sharness test script.
- Loading branch information
Showing
2 changed files
with
165 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
#!/bin/sh | ||
|
||
test_description='Test flux job purge' | ||
|
||
. $(dirname $0)/sharness.sh | ||
|
||
mkdir -p config | ||
|
||
test_under_flux 1 full -o,--config-path=$(pwd)/config | ||
|
||
# Get the number of inactive jobs | ||
inactive_count() { | ||
flux module stats --parse=inactive_jobs job-manager | ||
} | ||
|
||
# Poll for a specific number of inactive jobs | ||
# Usage: wait_inactive_count target tries | ||
wait_inactive_count() { | ||
local target=$1 | ||
local tries=$2 | ||
local count | ||
while test $tries -gt 0; do | ||
count=$(inactive_count) | ||
echo $count inactive jobs >&2 | ||
test $count -eq $target && return 0 | ||
sleep 0.25 | ||
tries=$(($tries-1)) | ||
done | ||
return 1 | ||
} | ||
|
||
# Speed up heartbeat-driven purge results to make the test run faster | ||
test_expect_success 'reload heartbeat module with fast rate' ' | ||
flux module reload heartbeat period=0.1s | ||
' | ||
test_expect_success 'create 10 inactive jobs' ' | ||
flux mini submit --cc=1-10 /bin/true >jobids && | ||
flux queue drain | ||
' | ||
test_expect_success 'verify job KVS eventlogs exist' ' | ||
for id in $(cat jobids); do \ | ||
flux job eventlog $id >/dev/null; \ | ||
done | ||
' | ||
test_expect_success 'flux job purge with no args purges 0' ' | ||
flux job purge >noargs.out && | ||
grep "purged 0 inactive jobs" noargs.out | ||
' | ||
test_expect_success 'flux job purge --batch=10000 fails' ' | ||
test_must_fail flux job purge --batch=10000 2>bigbatch.err && | ||
grep "batch must be" bigbatch.err | ||
' | ||
test_expect_success 'flux job purge --num-limit=-42 fails' ' | ||
test_must_fail flux job purge --num-limit=-42 2>negnum.err && | ||
grep "num limit must be" negnum.err | ||
' | ||
test_expect_success 'flux job purge with extra free argument fails' ' | ||
test_must_fail flux job purge xyz 2>freearg.err && | ||
grep "Usage:" freearg.err | ||
' | ||
test_expect_success 'flux job purge --num-limit=8 purges 2' ' | ||
flux job purge --num-limit=8 >num8.out && | ||
grep "purged 2 inactive jobs" num8.out | ||
' | ||
test_expect_success 'flux job purge --num-limit=8 purges 0' ' | ||
flux job purge --num-limit=8 >num8_again.out && | ||
grep "purged 0 inactive jobs" num8_again.out | ||
' | ||
test_expect_success 'flux job purge --num-limit=6 purges 2' ' | ||
flux job purge --num-limit=6 --batch=1 >num6.out && | ||
grep "purged 2 inactive jobs" num6.out | ||
' | ||
test_expect_success 'flux job purge --num-limit=1000 --age-limit=1ms purges 6' ' | ||
flux job purge --age-limit=1ms >both.out && | ||
grep "purged 6 inactive jobs" both.out | ||
' | ||
test_expect_success 'flux job purge --num-limit=1 purges 0' ' | ||
flux job purge --num-limit=1 >num1.out && | ||
grep "purged 0 inactive jobs" num1.out | ||
' | ||
test_expect_success 'verify job KVS eventlogs do not exist' ' | ||
for id in $(cat jobids); do \ | ||
test_must_fail flux job eventlog $id; \ | ||
done | ||
' | ||
test_expect_success 'create 2 inactive jobs with known completion order' ' | ||
flux mini submit /bin/true >jobid1 && | ||
flux job wait-event $(cat jobid1) clean && | ||
flux mini submit /bin/true >jobid2 && | ||
flux job wait-event $(cat jobid2) clean | ||
' | ||
test_expect_success 'purge the oldest job - youngest is still there' ' | ||
flux job purge --num-limit=1 && | ||
flux job eventlog $(cat jobid2) >/dev/null | ||
' | ||
test_expect_success 'purge the last job' ' | ||
flux job purge --num-limit=0 && | ||
wait_inactive_count 0 30 | ||
' | ||
test_expect_success 'create 10 inactive jobs' ' | ||
flux mini submit --cc=1-10 /bin/true && | ||
flux queue drain | ||
' | ||
test_expect_success 'reconfigure job manager with inactive-num-limit=5' ' | ||
cat >config/system.toml <<-EOT && | ||
[job-manager] | ||
inactive-num-limit = 5 | ||
EOT | ||
flux config reload | ||
' | ||
test_expect_success 'wait for inactive job count to reach 5' ' | ||
wait_inactive_count 5 30 | ||
' | ||
test_expect_success NO_CHAIN_LINT 'run multiple flux-job purges concurrently' ' | ||
flux job purge --num-limit=4 & | ||
pid=$! && | ||
flux job purge --num-limit=3 && | ||
wait $pid | ||
' | ||
test_expect_success 'wait for inactive job count to reach 3' ' | ||
wait_inactive_count 3 30 | ||
' | ||
test_expect_success 'reconfigure job manager with inactive-age-limit=1ms' ' | ||
cat >config/system.toml <<-EOT && | ||
[job-manager] | ||
inactive-age-limit = "1ms" | ||
EOT | ||
flux config reload | ||
' | ||
test_expect_success 'wait for inactive job count to reach 0' ' | ||
wait_inactive_count 0 30 | ||
' | ||
test_expect_success 'reconfigure job manager with incorrect type limit' ' | ||
cat >config/system.toml <<-EOT && | ||
[job-manager] | ||
inactive-age-limit = 42 | ||
EOT | ||
test_must_fail flux config reload 2>badtype.err && | ||
grep "Expected string" badtype.err | ||
' | ||
test_expect_success 'reconfigure job manager with bad age-limit fsd' ' | ||
cat >config/system.toml <<-EOT && | ||
[job-manager] | ||
inactive-age-limit = "notfsd" | ||
EOT | ||
test_must_fail flux config reload 2>badfsd.err && | ||
grep "invalid FSD" badfsd.err | ||
' | ||
test_expect_success 'reconfigure job manager with invalid num-limit' ' | ||
cat >config/system.toml <<-EOT && | ||
[job-manager] | ||
inactive-num-limit = -42 | ||
EOT | ||
test_must_fail flux config reload 2>badnum.err && | ||
grep "must be >= 0" badnum.err | ||
' | ||
# Reuse bad config from previous test | ||
test_expect_success 'new instance with bad config fails to start' ' | ||
test_must_fail flux start -o,--config-path=$(pwd)/config \ | ||
/bin/true 2>badnum2.err && | ||
grep "must be >= 0" badnum2.err | ||
' | ||
|
||
test_done |