From 1ea3bcea2d401e7c803816b25f0c30b6587bada2 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Tue, 26 Nov 2019 18:26:57 -0800 Subject: [PATCH 1/4] resource: fix buffer overflow from issue #509 Copying from Dong's comment: We treat slot in a different way than any other resource type because it is the only non-physical resource type. How we handle is this is : 1) perform a DFV subtree walk for a slot type, 2) divide the resource set discovered from this DFV walk equally according to the slot shape, 3) check if the given number of slots can be satisfied. There turns out to be a bug in 2). For example, if your jobspec is socket[1]->slot[1]->core[2] and your machine is socket[1]->core[8], then at the end of the DFV walk on the subtree rooted at a socket vertex, we will give you core: 8. Then, we divide 8 by 2 which means that we can have 4 slots. So clearly this satisfies slot[2]. Now, there is an issue in the current code. The code doesn't factor into account the granularity in which a resource pool vertex is modeled/constructed for this. Say, each memory pool granularity is 64GB and your machine has 4 of those. In terms of quantity, your machines has socket[1]->memory[256]. But in terms of scheduleable units you only have 4 (i.e., 4 x 64GB). Now if you jobspec is socket[1]->memory[32], we will currently say you have 8 slots when in fact you only can fit 4 slots because of this granularity constraint! This leads to a buffer overflow. These changes fix the resulting segfault. --- resource/evaluators/edge_eval_api.cpp | 5 +++++ resource/evaluators/edge_eval_api.hpp | 1 + resource/evaluators/scoring_api.cpp | 8 ++++++++ resource/evaluators/scoring_api.hpp | 1 + resource/traversers/dfu_impl.cpp | 12 ++++++++++++ 5 files changed, 27 insertions(+) diff --git a/resource/evaluators/edge_eval_api.cpp b/resource/evaluators/edge_eval_api.cpp index daacc68a5..4732fb359 100644 --- a/resource/evaluators/edge_eval_api.cpp +++ b/resource/evaluators/edge_eval_api.cpp @@ -149,6 +149,11 @@ unsigned int evals_t::qualified_count () const return m_qual_count; } +unsigned int evals_t::qualified_granules () const +{ + return m_eval_egroups.size (); +} + unsigned int evals_t::total_count () const { return m_total_count; diff --git a/resource/evaluators/edge_eval_api.hpp b/resource/evaluators/edge_eval_api.hpp index f4b391449..b0a28ecb0 100644 --- a/resource/evaluators/edge_eval_api.hpp +++ b/resource/evaluators/edge_eval_api.hpp @@ -73,6 +73,7 @@ class evals_t { // This can throw out_of_range exception const eval_egroup_t &at (unsigned int i) const; unsigned int qualified_count () const; + unsigned int qualified_granules () const; unsigned int total_count () const; int64_t cutline () const; int64_t set_cutline (int64_t cutline); diff --git a/resource/evaluators/scoring_api.cpp b/resource/evaluators/scoring_api.cpp index e04ce62c8..019480e7a 100644 --- a/resource/evaluators/scoring_api.cpp +++ b/resource/evaluators/scoring_api.cpp @@ -180,6 +180,14 @@ unsigned int scoring_api_t::qualified_count (const subsystem_t &s, return res_evals->qualified_count (); } +unsigned int scoring_api_t::qualified_granules (const subsystem_t &s, + const std::string &r) +{ + handle_new_keys (s, r); + auto res_evals = (*m_ssys_map[s])[r]; + return res_evals->qualified_granules (); +} + unsigned int scoring_api_t::total_count (const subsystem_t &s, const std::string &r) { diff --git a/resource/evaluators/scoring_api.hpp b/resource/evaluators/scoring_api.hpp index 2689c8daa..34113db43 100644 --- a/resource/evaluators/scoring_api.hpp +++ b/resource/evaluators/scoring_api.hpp @@ -58,6 +58,7 @@ class scoring_api_t { const eval_egroup_t &at (const subsystem_t &s, const std::string &r, unsigned int i); unsigned int qualified_count (const subsystem_t &s, const std::string &r); + unsigned int qualified_granules (const subsystem_t &s, const std::string &r); unsigned int total_count (const subsystem_t &s, const std::string &r); unsigned int best_k (const subsystem_t &s, const std::string &r); unsigned int best_i (const subsystem_t &s, const std::string &r); diff --git a/resource/traversers/dfu_impl.cpp b/resource/traversers/dfu_impl.cpp index c0fed4efb..36d33dd85 100644 --- a/resource/traversers/dfu_impl.cpp +++ b/resource/traversers/dfu_impl.cpp @@ -449,17 +449,29 @@ int dfu_impl_t::cnt_slot (const std::vector &slot_shape, scoring_api_t &dfu_slot) { unsigned int qc = 0; + unsigned int qg = 0; unsigned int fit = 0; unsigned int count = 0; unsigned int qual_num_slots = UINT_MAX; const subsystem_t &dom = m_match->dom_subsystem (); // qualifed slot count is determined by the most constrained resource type + // both in terms of the amounts available as well as the number of edges into + // that resource because that represent the match granularity. + // Say you have 128 units of memory available across two memory resource + // vertices each with 64 units of memory and you request 1 unit of memory. + // In this case, you don't have 128 slots available because the match + // granularity is 64 units. Instead, you have only 2 slots available each + // with 64 units, and your request will get 1 whole resource vertex. qual_num_slots = UINT_MAX; for (auto &slot_elem : slot_shape) { qc = dfu_slot.qualified_count (dom, slot_elem.type); + qg = dfu_slot.qualified_granules (dom, slot_elem.type); count = m_match->calc_count (slot_elem, qc); + // constraint check against qualified amounts fit = (count == 0)? count : (qc / count); + // constraint check against qualified granules + fit = (fit > qg)? qg : fit; qual_num_slots = (qual_num_slots > fit)? fit : qual_num_slots; dfu_slot.rewind_iter_cur (dom, slot_elem.type); } From 3c1fb72398377e40db43842ca231c03d20842a56 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Wed, 4 Dec 2019 14:01:48 -0800 Subject: [PATCH 2/4] test: input/output files for granularity correctness test Add them in preparation for the upcoming t3017-resource-granule.t sharness test. --- t/data/resource/commands/granule/cmds01.in | 8 +++ t/data/resource/expected/granule/001.R.out | 72 +++++++++++++++++++ t/data/resource/expected/granule/002.R.out | 72 +++++++++++++++++++ t/data/resource/jobspecs/granule/test001.yaml | 26 +++++++ 4 files changed, 178 insertions(+) create mode 100644 t/data/resource/commands/granule/cmds01.in create mode 100644 t/data/resource/expected/granule/001.R.out create mode 100644 t/data/resource/expected/granule/002.R.out create mode 100644 t/data/resource/jobspecs/granule/test001.yaml diff --git a/t/data/resource/commands/granule/cmds01.in b/t/data/resource/commands/granule/cmds01.in new file mode 100644 index 000000000..b8cc6e20d --- /dev/null +++ b/t/data/resource/commands/granule/cmds01.in @@ -0,0 +1,8 @@ +# 5x cluster[1]->rack[1]->node[1]->socket[1]->slot[4]->core[1] +# ->memory[1] +match allocate @TEST_SRCDIR@/data/resource/jobspecs/granule/test001.yaml +match allocate @TEST_SRCDIR@/data/resource/jobspecs/granule/test001.yaml +match allocate @TEST_SRCDIR@/data/resource/jobspecs/granule/test001.yaml +match allocate @TEST_SRCDIR@/data/resource/jobspecs/granule/test001.yaml +match allocate @TEST_SRCDIR@/data/resource/jobspecs/granule/test001.yaml +quit diff --git a/t/data/resource/expected/granule/001.R.out b/t/data/resource/expected/granule/001.R.out new file mode 100644 index 000000000..3b2bfa769 --- /dev/null +++ b/t/data/resource/expected/granule/001.R.out @@ -0,0 +1,72 @@ + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:s] + ---------node1[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:s] + ---------node1[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:s] + ---------node0[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=3 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:s] + ---------node0[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=4 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= +INFO: ============================= +INFO: No matching resources found +INFO: JOBID=5 +INFO: ============================= diff --git a/t/data/resource/expected/granule/002.R.out b/t/data/resource/expected/granule/002.R.out new file mode 100644 index 000000000..2b7548e8c --- /dev/null +++ b/t/data/resource/expected/granule/002.R.out @@ -0,0 +1,72 @@ + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:s] + ---------node0[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:s] + ---------node0[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:s] + ---------node1[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=3 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:s] + ---------node1[1:s] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=4 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= +INFO: ============================= +INFO: No matching resources found +INFO: JOBID=5 +INFO: ============================= diff --git a/t/data/resource/jobspecs/granule/test001.yaml b/t/data/resource/jobspecs/granule/test001.yaml new file mode 100644 index 000000000..50381f8c7 --- /dev/null +++ b/t/data/resource/jobspecs/granule/test001.yaml @@ -0,0 +1,26 @@ +version: 1 +resources: + - type: node + count: 1 + with: + - type: socket + count: 1 + with: + - type: slot + count: 4 + label: default + with: + - type: core + count: 1 + - type: memory + count: 1 + +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: app + slot: default + count: + per_slot: 1 From 02063103ffcafa4829aa68dd57d5c9b218806fb1 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Wed, 4 Dec 2019 14:05:02 -0800 Subject: [PATCH 3/4] test: add t3017-resource-granule.t Add two subtests within t3017-resource-granule.t that checks the correctness of matching when each memory resource vertice is modeled as a pool (of size 2GBs). Even though each jobspec requests 1GB of memory, because the finest granularity of memory to be scheduled is 2GBs, you can schedule only 4 jobspecs. --- t/t3017-resource-granule.t | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 t/t3017-resource-granule.t diff --git a/t/t3017-resource-granule.t b/t/t3017-resource-granule.t new file mode 100755 index 000000000..e81bc840b --- /dev/null +++ b/t/t3017-resource-granule.t @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='Test Scheduling Correctness on Vertex Granularity' + +. $(dirname $0)/sharness.sh + +cmd_dir="${SHARNESS_TEST_SRCDIR}/data/resource/commands/granule" +exp_dir="${SHARNESS_TEST_SRCDIR}/data/resource/expected/granule" +grugs="${SHARNESS_TEST_SRCDIR}/data/resource/grugs/tiny.graphml" +query="../../resource/utilities/resource-query" + +# +# Selection Policy -- High ID first (-P high) +# The resource vertex with higher ID is preferred among its kind +# (e.g., node1 is preferred over node0 if available) +# + +cmds001="${cmd_dir}/cmds01.in" +test001_desc="memory granularity keeps last jobspec from being matched (pol=hi)" +test_expect_success "${test001_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds001} > cmds001 && + ${query} -L ${grugs} -S CA -P high -t 001.R.out < cmds001 && + test_cmp 001.R.out ${exp_dir}/001.R.out +' + +## Selection Policy -- Low ID first (-P low) +## The resource vertex with lower ID is preferred among its kind +## (e.g., node0 is preferred over node1 if available) +## + +cmds002="${cmd_dir}/cmds01.in" +test002_desc="memory granularity keeps last jobspec from being matched (pol=hi)" +test_expect_success "${test002_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds002} > cmds002 && + ${query} -L ${grugs} -S CA -P low -t 002.R.out < cmds002 && + test_cmp 002.R.out ${exp_dir}/002.R.out +' + +test_done From 7818411b784351b1c083e1bce1210dc99ce9ca2a Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Wed, 4 Dec 2019 14:09:03 -0800 Subject: [PATCH 4/4] build: integrate t3017-resource-granule.t into build system --- t/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/t/Makefile.am b/t/Makefile.am index 12e25f0e8..e416ffaa2 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -51,6 +51,7 @@ TESTS = \ t3014-resource-var-aware.t \ t3015-resource-basic-jgf.t \ t3016-resource-power-jgf.t \ + t3017-resource-granule.t \ t4000-match-params.t \ t4001-match-allocate.t \ t4002-match-reserve.t \