Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix for incorrectly handling implicit exclusivity #502

Merged
merged 3 commits into from Jul 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 18 additions & 3 deletions resource/traversers/dfu_impl.cpp
Expand Up @@ -115,14 +115,27 @@ int dfu_impl_t::by_avail (const jobmeta_t &meta, const std::string &s, vtx_t u,
}

int dfu_impl_t::by_excl (const jobmeta_t &meta, const std::string &s, vtx_t u,
const Jobspec::Resource &resource)
bool exclusive_in, const Jobspec::Resource &resource)
{
int rc = -1;
planner_t *p = NULL;
int64_t at = meta.at;
int64_t njobs = -1;
uint64_t duration = meta.duration;
if (resource.exclusive == Jobspec::tristate_t::TRUE) {

// If a non-exclusive resource request is explicitly given on a
SteVwonder marked this conversation as resolved.
Show resolved Hide resolved
// resource that lies under slot, this spec is invalid.
if (exclusive_in && resource.exclusive == Jobspec::tristate_t::FALSE) {
errno = EINVAL;
m_err_msg += "by_excl: exclusivity conflicts at jobspec=";
m_err_msg += resource.label + " : vertex=" + (*m_graph)[u].name;
goto done;
}

// If a resource request is under slot or an explict exclusivity is
// requested, we check the validity of the visiting vertex using
// its x_checker planner.
if (exclusive_in || resource.exclusive == Jobspec::tristate_t::TRUE) {
p = (*m_graph)[u].schedule.x_checker;
njobs = planner_avail_resources_during (p, at, duration);
if (njobs == -1) {
Expand All @@ -137,6 +150,8 @@ int dfu_impl_t::by_excl (const jobmeta_t &meta, const std::string &s, vtx_t u,
goto done;
}
}

// All cases reached this point indicate further walk is needed.
rc = 0;

done:
Expand Down Expand Up @@ -187,7 +202,7 @@ int dfu_impl_t::prune (const jobmeta_t &meta, bool exclusive,
if ((*m_graph)[u].type != resource.type)
continue;
// Prune by exclusivity checker
if ( (rc = by_excl (meta, s, u, resource)) == -1)
if ( (rc = by_excl (meta, s, u, exclusive, resource)) == -1)
break;
// Prune by the subtree planner quantities
if ( (rc = by_subplan (meta, s, u, resource)) == -1)
Expand Down
2 changes: 1 addition & 1 deletion resource/traversers/dfu_impl.hpp
Expand Up @@ -211,7 +211,7 @@ class dfu_impl_t {
int by_avail (const jobmeta_t &meta, const std::string &s, vtx_t u,
const std::vector<Jobspec::Resource> &resources);
int by_excl (const jobmeta_t &meta, const std::string &s, vtx_t u,
const Jobspec::Resource &resource);
bool exclusive_in, const Jobspec::Resource &resource);
int by_subplan (const jobmeta_t &meta, const std::string &s, vtx_t u,
const Jobspec::Resource &resource);
int prune (const jobmeta_t &meta, bool excl, const std::string &subsystem,
Expand Down
21 changes: 21 additions & 0 deletions t/data/resource/commands/basics/cmds40.in
@@ -0,0 +1,21 @@
# 4x cluster[1]->rack[1]->node[1]->slot[1]->socket[1]->core[1]
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml

# 4x cluster[1]->rack[1]->node[1]->slot[1]->socket[1]->core[1]
# match must fail for all of them
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test001.yaml

# 4x slot[1]->core[1]
# match must fail for all of them
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test008.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test008.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test008.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test008.yaml

quit
72 changes: 72 additions & 0 deletions t/data/resource/expected/basics/040.R.out
@@ -0,0 +1,72 @@
---------------core0[1:x]
------------socket0[1:x]
---------node0[1:s]
------rack0[1:s]
---tiny0[1:s]
INFO: =============================
INFO: JOBID=1
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
---------------core18[1:x]
------------socket1[1:x]
---------node0[1:s]
------rack0[1:s]
---tiny0[1:s]
INFO: =============================
INFO: JOBID=2
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
---------------core0[1:x]
------------socket0[1:x]
---------node1[1:s]
------rack0[1:s]
---tiny0[1:s]
INFO: =============================
INFO: JOBID=3
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
---------------core18[1:x]
------------socket1[1:x]
---------node1[1:s]
------rack0[1:s]
---tiny0[1:s]
INFO: =============================
INFO: JOBID=4
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=5
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=6
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=7
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=8
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=9
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=10
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=11
INFO: =============================
INFO: =============================
INFO: No matching resources found
INFO: JOBID=12
INFO: =============================
6 changes: 3 additions & 3 deletions t/t1001-qmanager-basic.t
Expand Up @@ -55,10 +55,10 @@ test_expect_success 'qmanager: canceling job during execution works' '
flux job wait-event -vt 2.5 ${jobid} start &&
flux job cancel ${jobid} &&
flux job wait-event -t 2.5 ${jobid} exception &&
flux job wait-event -t 2.5 ${jobid} finish | grep status=9 &&
flux job wait-event -t 2.5 ${jobid} finish | grep status=15 &&
flux job wait-event -t 2.5 ${jobid} release &&
flux job wait-event -t 2.5 ${jobid} clean &&
exec_eventlog $jobid | grep "complete" | grep "\"status\":9"
exec_eventlog $jobid | grep "complete" | grep "\"status\":15"
'

test_expect_success 'qmanager: exception during initialization is supported' '
Expand All @@ -83,7 +83,7 @@ test_expect_success 'qmanager: exception during run is supported' '
grep "mock run exception generated" exception.2.out &&
flux job wait-event -qt 2.5 ${jobid} clean &&
flux job eventlog ${jobid} > eventlog.${jobid}.out &&
grep "finish status=9" eventlog.${jobid}.out
grep "finish status=15" eventlog.${jobid}.out
'

test_expect_success 'removing resource and qmanager modules' '
Expand Down
8 changes: 8 additions & 0 deletions t/t3001-resource-basic.t
Expand Up @@ -152,4 +152,12 @@ test_expect_success "${test016_desc}" '
test_cmp 016.R.out ${exp_dir}/016.R.out
'

cmds040="${cmd_dir}/cmds40.in"
test040_desc="Once all sockets are exclusively allocated, no jobs can match"
test_expect_success "${test040_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds040} > cmds040 &&
${query} -G ${grugs} -S CA -P low -t 040.R.out < cmds040 &&
test_cmp 040.R.out ${exp_dir}/040.R.out
'

test_done