Skip to content

Commit

Permalink
Merge pull request #1038 from trws/fix-segfault
Browse files Browse the repository at this point in the history
planner: ensure result in planner_avail_resources_at
  • Loading branch information
mergify[bot] committed Jun 23, 2023
2 parents c65026c + cf4de65 commit 32f74d6
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 1 deletion.
2 changes: 1 addition & 1 deletion resource/planner/planner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ extern "C" int64_t planner_avail_resources_during (planner_t *ctx,
extern "C" int64_t planner_avail_resources_at (planner_t *ctx, int64_t at)
{
scheduled_point_t *state = nullptr;
if (!ctx || at > ctx->plan_end) {
if (!ctx || at > ctx->plan_end || at < ctx->plan_start) {
errno = EINVAL;
return -1;
}
Expand Down
4 changes: 4 additions & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ TESTS = \
t4010-match-conf.t \
t4011-match-duration.t \
t5000-valgrind.t \
t5100-issues-test-driver.t \
t6000-graph-size.t \
t6001-match-formats.t \
t6002-graph-hwloc.t \
Expand All @@ -105,6 +106,9 @@ TESTS = \

check_SCRIPTS = $(TESTS)

dist_check_SCRIPTS = \
issues/t1035-fluxion-reload.sh

EXTRA_DIST= \
$(check_SCRIPTS) \
data \
Expand Down
69 changes: 69 additions & 0 deletions t/issues/t1035-fluxion-reload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash
#
# Ensure fluxion modules can recover running jobs with rv1 match format.
#
log() { printf "issue#1035: $@\n" >&2; }
die() { log "$@"; exit 1; }
run_timeout() {
"${PYTHON:-python3}" "${SHARNESS_TEST_SRCDIR}/scripts/run_timeout.py" "$@"
}

if test -z "$ISSUE_1035_TEST_ACTIVE"; then
export ISSUE_1035_TEST_ACTIVE=t
log "relaunching under test instance of size 4..."
exec flux start -s 4 $0 "$@"
fi
test $(flux resource list -no {nnodes}) -eq 4 || die "test requires 4 nodes"

log "Unloading modules..."
flux module remove sched-fluxion-qmanager
flux module remove sched-fluxion-resource
flux module remove resource

log "Amending instance resource set with properties: batch, debug..."
flux kvs get resource.R \
| flux R set-property batch:0-1 debug:2-3 \
| flux kvs put -r resource.R=-
#flux kvs get resource.R | jq

log "Loading config with queues and match-format=\"rv1\"..."
flux config load <<EOF
[queues.debug]
requires = ["debug"]
[queues.batch]
requires = ["batch"]
[sched-fluxion-resource]
match-format = "rv1"
EOF
flux config get | \
jq -e ".\"sched-fluxion-resource\".\"match-format\" == \"rv1\"" \
|| die "failed to set sched-fluxion-resource.match-format = rv1"

log "Reloading modules..."
flux module load resource noverify
flux module load sched-fluxion-resource
flux module load sched-fluxion-qmanager
flux dmesg -HL | grep version | tail -2

log "Starting all queues..."
flux queue start --all --quiet
flux queue status
flux resource list -s free

log "Submitting two sleep jobs..."
run_timeout 10 flux submit -N2 --wait-event=start --queue=debug sleep inf
run_timeout 10 flux submit -N2 --wait-event=start --queue=batch sleep inf

log "Reloading fluxion..."
flux module unload sched-fluxion-qmanager
flux module reload sched-fluxion-resource
flux module load sched-fluxion-qmanager

log "Checking that running jobs were recovered..."
flux jobs -ano "{id.f58:>12} {status_abbrev:>2} {name}"
test $(flux jobs -no {id} | wc -l) -eq 2 \
|| die "Expected 2 jobs still running"
flux cancel --all

31 changes: 31 additions & 0 deletions t/t5100-issues-test-driver.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/sh
#
test_description='Verify that fixed issues remain fixed'

. `dirname $0`/sharness.sh

if test_have_prereq ASAN; then
skip_all='skipping issues tests under AddressSanitizer'
test_done
fi
skip_all_unless_have jq

SIZE=2
test_under_flux ${SIZE}
echo "# $0: flux session size will be ${SIZE}"

if test -z "$T5100_ISSUES_GLOB"; then
T5100_ISSUES_GLOB="*"
fi

flux bulksubmit -n1 -o pty --job-name={./%} -t 10m \
--flags=waitable \
--quiet --watch \
flux start {} \
::: ${SHARNESS_TEST_SRCDIR}/issues/${T5100_ISSUES_GLOB}

for id in $(flux jobs -ano {id}); do
test_expect_success $(flux jobs -no {name} $id) "flux job attach $id"
done

test_done

0 comments on commit 32f74d6

Please sign in to comment.