Skip to content

Commit

Permalink
Merge 7ab10d7 into ce8c530
Browse files Browse the repository at this point in the history
  • Loading branch information
dongahn committed May 8, 2018
2 parents ce8c530 + 7ab10d7 commit c8635f1
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 99 deletions.
1 change: 1 addition & 0 deletions resrc/test/tresrc.c
Expand Up @@ -361,6 +361,7 @@ static int test_a_resrc (resrc_api_ctx_t *rsapi, resrc_t *resrc, bool rdl)
resrc_api_map_put (gather_map, "node", (void *)(intptr_t)REDUCE_UNDER_ME);
resrc_api_map_t *reduce_map = resrc_api_map_new ();
resrc_api_map_put (reduce_map, "core", (void *)(intptr_t)NONE_UNDER_ME);
resrc_api_map_put (reduce_map, "gpu", (void *)(intptr_t)NONE_UNDER_ME);

init_time ();
rc = resrc_tree_serialize_lite (gather, reduce, found_tree,
Expand Down
98 changes: 1 addition & 97 deletions sched/sched.c
Expand Up @@ -1250,87 +1250,6 @@ static inline int bridge_rs2rank_tab_query (ssrvctx_t *ctx, const char *name,
* *
*******************************************************************************/

static void inline build_contain_1node_req (int64_t nc, int64_t ng, int64_t rank,
json_t *rarr)
{
json_t *e = Jnew ();
json_t *o = Jnew ();
Jadd_int64 (o, JSC_RDL_ALLOC_CONTAINING_RANK, rank);
Jadd_int64 (o, JSC_RDL_ALLOC_CONTAINED_NCORES, nc);
Jadd_int64 (o, JSC_RDL_ALLOC_CONTAINED_NGPUS, ng);
json_object_set_new (e, JSC_RDL_ALLOC_CONTAINED, o);
json_array_append_new (rarr, e);
}

static int n_resources_of_type (resrc_tree_t *rt, const char *type)
{
int n = 0;
resrc_t *r = NULL;

if (rt) {
r = resrc_tree_resrc (rt);
if (! strcmp (resrc_type (r), type)) {
return 1;
} else {
if (resrc_tree_num_children (rt)) {
resrc_tree_list_t *children = resrc_tree_children (rt);
if (children) {
resrc_tree_t *child = resrc_tree_list_first (children);
while (child) {
n += n_resources_of_type(child, type);
child = resrc_tree_list_next (children);
}
}
}
}
}
return n;
}


/*
* Because the job's rdl should only contain what's allocated to the job,
* we traverse the entire tree in the post-order walk fashion
*/
static int build_contain_req (ssrvctx_t *ctx, flux_lwj_t *job, resrc_tree_t *rt,
json_t *arr)
{
int rc = -1;
uint32_t rank = 0;
resrc_t *r = NULL;

if (rt) {
r = resrc_tree_resrc (rt);
if (strcmp (resrc_type (r), "node")) {
if (resrc_tree_num_children (rt)) {
resrc_tree_list_t *children = resrc_tree_children (rt);
if (children) {
resrc_tree_t *child = resrc_tree_list_first (children);
while (child) {
build_contain_req (ctx, job, child, arr);
child = resrc_tree_list_next (children);
}
}
}
} else {
if (bridge_rs2rank_tab_query (ctx, resrc_name (r), resrc_digest (r), &rank))
goto done;
else {
int cores = job->req->corespernode ? job->req->corespernode :
n_resources_of_type(rt, "core");
int gpus = job->req->gpuspernode ? job->req->gpuspernode :
n_resources_of_type(rt, "gpu");
if (cores) {
build_contain_1node_req (cores, gpus, rank, arr);
}
}
}
}
rc = 0;
done:
return rc;
}

static int resolve_rank (ssrvctx_t *ctx, json_t *o)
{
int rc = -1;
Expand Down Expand Up @@ -1371,7 +1290,6 @@ static int req_tpexec_allocate (ssrvctx_t *ctx, flux_lwj_t *job)
int rc = -1;
flux_t *h = ctx->h;
json_t *jcb = Jnew ();
json_t *arr = Jnew_ar ();
json_t *gat = Jnew_ar ();
json_t *red = Jnew ();
resrc_api_map_t *gmap = resrc_api_map_new ();
Expand All @@ -1385,6 +1303,7 @@ static int req_tpexec_allocate (ssrvctx_t *ctx, flux_lwj_t *job)

resrc_api_map_put (gmap, "node", (void *)(intptr_t)REDUCE_UNDER_ME);
resrc_api_map_put (rmap, "core", (void *)(intptr_t)NONE_UNDER_ME);
resrc_api_map_put (rmap, "gpu", (void *)(intptr_t)NONE_UNDER_ME);
if (resrc_tree_serialize_lite (gat, red, job->resrc_tree, gmap, rmap)) {
flux_log (h, LOG_ERR, "job (%"PRId64") resource serialization failed",
job->lwj_id);
Expand All @@ -1403,21 +1322,6 @@ static int req_tpexec_allocate (ssrvctx_t *ctx, flux_lwj_t *job)
goto done;
}
free (jcbstr);
Jput (jcb);

jcb = Jnew ();
if (build_contain_req (ctx, job, job->resrc_tree, arr) != 0) {
flux_log (h, LOG_ERR, "error requesting containment for job");
goto done;
}
json_object_set_new (jcb, JSC_RDL_ALLOC, arr);
jcbstr = Jtostr (jcb);
if (jsc_update_jcb (h, job->lwj_id, JSC_RDL_ALLOC, jcbstr) != 0) {
flux_log (h, LOG_ERR, "error updating jcb");
free (jcbstr);
goto done;
}
free (jcbstr);

if ((update_state (h, job->lwj_id, job->state, J_ALLOCATED)) != 0) {
flux_log (h, LOG_ERR, "failed to update the state of job %"PRId64"",
Expand Down
82 changes: 82 additions & 0 deletions t/scripts/R_lite.lua
@@ -0,0 +1,82 @@
#!/usr/bin/env lua

local cpuset = require 'flux.cpuset'
local id_to_kvs_path = require 'wreck'.id_to_path
local f = assert (require 'flux'.new())
local jobid = tonumber (arg[1])
local rank = arg[2] or "all"
local resource_type = arg[3] or "all"
local format = arg[4] or "all"

local function die (...)
io.stderr:write (string.format (...))
os.exit (1)
end

local function usage ()
io.stderr:write ('Usage: R_lite Jobid [Rank Resource Format]\n')
io.stderr:write ([[
Print R_lite information on a job.
Jobid jobid.
Optional Arguments
Rank a specific rank for which to print resource info;
if "all" is given, print for all ranks.
Resource type of the resource for which to print the info:
all (default), core, or gpu.
Format if "count" is given, print only the count on the resource(s)
if "id", print only the ID of the resource(s)
if omitted, print both the count and ID.
]])
end

local function r_string (resources, count, id)
local s = ""

for k,v in pairs (resources) do
if resource_type == "all" or (resource_type == k and format == "all") then
s = s..(count and "count="..#cpuset.new (resources[k]).." " or "")
s = s..(id and k.."="..resources[k] or "")
elseif resource_type == k then
s = s..(count and #cpuset.new (resources[k]) or "")
s = s..(id and resources[k] or "")
end
end
return s
end

local function run (count, id)
local key = id_to_kvs_path{ flux = f, jobid = jobid }..".R_lite"
local R_lite = assert (f:kvs_get (key))
local hit = false

for _,r in pairs (R_lite) do
local rtype = r.children[resource_type]
if resource_type ~= "all" and not rtype then
die ("No info in R_lite for %s\n", resource_type)
end

if rank == "all" then
hit = true;
print ("rank"..r.rank..": "..r_string (r.children, count, id))
elseif r.rank == tonumber (rank) then
hit = true;
print (r_string (r.children, count, id))
end
end

if hit == false then
die ("No info in R_lite for rank %d\n", rank)
end
end

local count = (format == "all" or format == "count")
local id = (format == "all" or format == "id")
if not jobid or #arg < 1 or (count == false and id == false) then
usage ()
os.exit ()
end

run (count, id)

-- vi: ts=4 sw=4 expandtab
4 changes: 2 additions & 2 deletions t/sharness.d/sched-sharness.sh
Expand Up @@ -124,8 +124,8 @@ verify_1N_sleep_jobs () {
local rank=0
for i in `seq $sched_start_jobid $sched_end_jobid`
do
flux kvs get $(job_kvs_path $i).rank.$rank.cores \
> $sched_test_session.$i.out
$SHARNESS_TEST_SRCDIR/scripts/R_lite.lua ${i} ${rank} core count \
> ${sched_test_session}.${i}.out
grep $cores $sched_test_session.$i.out
if [ $? -ne 0 ]
then
Expand Down

0 comments on commit c8635f1

Please sign in to comment.