From f452ec161f6adaf6ea49301676d3fc64b4f88a71 Mon Sep 17 00:00:00 2001 From: Chuck Yount Date: Tue, 8 May 2018 10:52:11 -0700 Subject: [PATCH] Hide scratch bundles as children under parent bundles. Fixes #116. --- src/common/common_utils.cpp | 2 +- src/compiler/lib/YaskKernel.cpp | 41 ++++++++++++++++++++++----------- src/kernel/lib/context.cpp | 40 +++++++++----------------------- src/kernel/lib/context.hpp | 7 +++--- src/kernel/lib/setup.cpp | 23 ++++++++++++------ src/kernel/lib/stencil_calc.cpp | 2 +- src/kernel/lib/stencil_calc.hpp | 17 ++++++++++---- 7 files changed, 72 insertions(+), 60 deletions(-) diff --git a/src/common/common_utils.cpp b/src/common/common_utils.cpp index fa7f2f17..e1a9d327 100644 --- a/src/common/common_utils.cpp +++ b/src/common/common_utils.cpp @@ -41,7 +41,7 @@ namespace yask { // for numbers above 9 (at least up to 99). // Format: "major.minor.patch". - const string version = "2.08.03"; + const string version = "2.08.04"; string yask_get_version_string() { return version; diff --git a/src/compiler/lib/YaskKernel.cpp b/src/compiler/lib/YaskKernel.cpp index b0aa9b83..89202a32 100644 --- a/src/compiler/lib/YaskKernel.cpp +++ b/src/compiler/lib/YaskKernel.cpp @@ -679,20 +679,12 @@ namespace yask { for (auto& eg : _eqBundles) { string egName = eg.getName(); string sgName = "stencilBundle_" + egName; - os << " stBundles.push_back(&" << sgName << ");\n"; - // Add other-bundle deps. - for (DepType dt = DepType(0); dt < num_deps; dt = DepType(dt+1)) { - for (auto& dep : eg.getDeps(dt)) { - string depName = "stencilBundle_" + dep; - string dtName = (dt == cur_step_dep) ? "cur_step_dep" : - (dt == prev_step_dep) ? "prev_step_dep" : - "internal_error"; - os << " " << sgName << - ".add_dep(yask::" << dtName << - ", &" << depName << ");\n"; - } - } + // Only want non-scratch bundles in stBundles. + // Each scratch bundles will be added to its + // parent bundle. + if (!eg.isScratch()) + os << " stBundles.push_back(&" << sgName << ");\n"; // Add scratch-bundle deps in proper order. auto& sdeps = eg.getScratchDeps(); @@ -701,10 +693,31 @@ namespace yask { string sg2Name = "stencilBundle_" + eg2Name; if (sdeps.count(eg2Name)) os << " " << sgName << - ".add_scratch_dep(&" << sg2Name << ");\n"; + ".add_scratch_child(&" << sg2Name << ");\n"; } } // eq-bundles. + + // Deps. + os << "\n // Stencil bundle inter-dependencies.\n"; + for (auto& eg : _eqBundles) { + string egName = eg.getName(); + string sgName = "stencilBundle_" + egName; + + // Add deps between bundles. + for (DepType dt = DepType(0); dt < num_deps; dt = DepType(dt+1)) { + for (auto& dep : eg.getDeps(dt)) { + string depName = "stencilBundle_" + dep; + string dtName = (dt == cur_step_dep) ? "cur_step_dep" : + (dt == prev_step_dep) ? "prev_step_dep" : + "internal_error"; + os << " " << sgName << + ".add_dep(yask::" << dtName << + ", &" << depName << ");\n"; + } + } + } // bundles. + os << " } // Ctor.\n"; // Dims creator. diff --git a/src/kernel/lib/context.cpp b/src/kernel/lib/context.cpp index df4bbe5a..6a7c6420 100644 --- a/src/kernel/lib/context.cpp +++ b/src/kernel/lib/context.cpp @@ -231,10 +231,6 @@ namespace yask { // Loop thru bundles. for (auto* asg : stBundles) { - // Don't do scratch updates here. - if (asg->is_scratch()) - continue; - // Scan through n-D space. TRACE_MSG("calc_rank_ref: step " << start_t << " in non-scratch group '" << asg->get_name()); @@ -245,8 +241,7 @@ namespace yask { // Find the groups that need to be processed. // This will be the prerequisite scratch-grid // groups plus this non-scratch group. - auto sg_list = asg->get_scratch_deps(); - sg_list.push_back(asg); + auto sg_list = asg->get_reqd_bundles(); // Loop through all the needed bundles. for (auto* sg : sg_list) { @@ -421,10 +416,6 @@ namespace yask { for (auto* sg : stBundles) { - // Don't do scratch updates here. - if (sg->is_scratch()) - continue; - // Exchange halo(s) needed for this bundle. exchange_halos(start_t, stop_t, sg); @@ -552,10 +543,6 @@ namespace yask { // Stencil bundles to evaluate at this time step. for (auto* sg : stBundles) { - // Don't do scratch updates here. - if (sg->is_scratch()) - continue; - // Bundle selected? if (stBundle_set && !stBundle_set->count(sg)) continue; @@ -1060,21 +1047,21 @@ namespace yask { domain_pts_ps = writes_ps = flops = 0.; if (steps_done > 0) { os << - "num-points-per-step: " << makeNumStr(tot_domain_1t) << endl << - "num-writes-per-step: " << makeNumStr(tot_numWrites_1t) << endl << - "num-est-FP-ops-per-step: " << makeNumStr(tot_numFpOps_1t) << endl << - "num-steps-done: " << makeNumStr(steps_done) << endl << - "elapsed-time (sec): " << makeNumStr(rtime) << endl; + "num-points-per-step: " << makeNumStr(tot_domain_1t) << endl << + "num-writes-per-step: " << makeNumStr(tot_numWrites_1t) << endl << + "num-est-FP-ops-per-step: " << makeNumStr(tot_numFpOps_1t) << endl << + "num-steps-done: " << makeNumStr(steps_done) << endl << + "elapsed-time (sec): " << makeNumStr(rtime) << endl; #ifdef USE_MPI os << - "time in halo exch (sec): " << makeNumStr(mtime); + "time in halo exch (sec): " << makeNumStr(mtime); float pct = 100. * mtime / rtime; os << " (" << pct << "%)" << endl; #endif os << - "throughput (num-writes/sec): " << makeNumStr(writes_ps) << endl << - "throughput (est-FLOPS): " << makeNumStr(flops) << endl << - "throughput (num-points/sec): " << makeNumStr(domain_pts_ps) << endl; + "throughput (num-writes/sec): " << makeNumStr(writes_ps) << endl << + "throughput (est-FLOPS): " << makeNumStr(flops) << endl << + "throughput (num-points/sec): " << makeNumStr(domain_pts_ps) << endl; } // Fill in return object. @@ -1164,10 +1151,6 @@ namespace yask { // Loop thru all stencil bundles. for (auto* sg : stBundles) { - // Don't exchange for scratch groups. - if (sg->is_scratch()) - continue; - // Bundle selected? if (sgp && sgp != sg) continue; @@ -1178,8 +1161,7 @@ namespace yask { // We need to loop thru the scratch-grid // bundles so we can consider the inputs // to them for exchanges. - auto sg_list = sg->get_scratch_deps(); - sg_list.push_back(sg); + auto sg_list = sg->get_reqd_bundles(); // Loop through all the needed bundles. for (auto* csg : sg_list) { diff --git a/src/kernel/lib/context.hpp b/src/kernel/lib/context.hpp index 9af09ef4..94db05cf 100644 --- a/src/kernel/lib/context.hpp +++ b/src/kernel/lib/context.hpp @@ -154,12 +154,12 @@ namespace yask { // If WFs are not used, this is the same as rank_bb; BoundingBox ext_bb; - // List of all stencil bundles in the order in which + // List of all non-scratch stencil bundles in the order in which // they should be evaluated within a step. // TODO: use dependency info, allowing more parallelism. StencilBundleList stBundles; - // All grids. + // All non-scratch grids. GridPtrs gridPtrs; GridPtrMap gridMap; @@ -167,7 +167,8 @@ namespace yask { GridPtrs outputGridPtrs; GridPtrMap outputGridMap; - // Scratch grids. + // Scratch-grid vectors. + // Each vector contains a grid for each thread. ScratchVecs scratchVecs; // Some calculated domain sizes. diff --git a/src/kernel/lib/setup.cpp b/src/kernel/lib/setup.cpp index b65dd0e8..7d308406 100644 --- a/src/kernel/lib/setup.cpp +++ b/src/kernel/lib/setup.cpp @@ -920,10 +920,6 @@ namespace yask { // Need to shift for each non-scratch bundle. for (auto* asg : stBundles) { - // Don't do scratch updates here. - if (asg->is_scratch()) - continue; - // Each bundle is shifted 'wf_steps' times. num_wf_shifts += wf_steps; } @@ -1137,16 +1133,29 @@ namespace yask { rank_numFpOps_1t = 0; os << "Num stencil bundles: " << stBundles.size() << endl; for (auto* sg : stBundles) { - idx_t updates1 = sg->get_scalar_points_written(); + + idx_t updates1 = 0, reads1 = 0, fpops1 = 0; + + // Loop through all the needed bundles to + // count stats for scratch bundles. + // Does not count extra ops needed in scratch halos + // since this varies depending on block size. + auto sg_list = sg->get_reqd_bundles(); + for (auto* rsg : sg_list) { + updates1 += rsg->get_scalar_points_written(); + reads1 += rsg->get_scalar_points_read(); + fpops1 += rsg->get_scalar_fp_ops(); + } + idx_t updates_domain = updates1 * sg->bb_num_points; rank_numWrites_1t += updates_domain; - idx_t reads1 = sg->get_scalar_points_read(); idx_t reads_domain = reads1 * sg->bb_num_points; rank_reads_1t += reads_domain; - idx_t fpops1 = sg->get_scalar_fp_ops(); idx_t fpops_domain = fpops1 * sg->bb_num_points; rank_numFpOps_1t += fpops_domain; + os << "Stats for bundle '" << sg->get_name() << "':\n" << + " scratch bundles: " << (sg_list.size() - 1) << endl << " sub-domain: " << sg->bb_begin.makeDimValStr() << " ... " << sg->bb_end.subElements(1).makeDimValStr() << endl << " sub-domain size: " << sg->bb_len.makeDimValStr(" * ") << endl << diff --git a/src/kernel/lib/stencil_calc.cpp b/src/kernel/lib/stencil_calc.cpp index 6fe49639..2a16ec60 100644 --- a/src/kernel/lib/stencil_calc.cpp +++ b/src/kernel/lib/stencil_calc.cpp @@ -63,7 +63,7 @@ namespace yask { // Define the bundles that need to be processed in // this block. This will be the prerequisite scratch-grid // bundles plus this non-scratch bundle. - auto sg_list = get_scratch_deps(); + auto sg_list = get_scratch_children(); sg_list.push_back(this); // Set number of threads for a block. diff --git a/src/kernel/lib/stencil_calc.hpp b/src/kernel/lib/stencil_calc.hpp index 816e70dd..40e6352f 100644 --- a/src/kernel/lib/stencil_calc.hpp +++ b/src/kernel/lib/stencil_calc.hpp @@ -47,7 +47,7 @@ namespace yask { // List of scratch-grid bundles that need to be evaluated // before this bundle. Listed in eval order first-to-last. - StencilBundleList _scratch_deps; + StencilBundleList _scratch_children; // Whether this updates scratch grid(s); bool _is_scratch = false; @@ -126,15 +126,22 @@ namespace yask { } // Add needed scratch-bundle. - virtual void add_scratch_dep(StencilBundleBase* eg) { - _scratch_deps.push_back(eg); + virtual void add_scratch_child(StencilBundleBase* eg) { + _scratch_children.push_back(eg); } // Get needed scratch-bundle(s). - virtual const StencilBundleList& get_scratch_deps() const { - return _scratch_deps; + virtual const StencilBundleList& get_scratch_children() const { + return _scratch_children; } + // Get scratch children plus self. + virtual StencilBundleList get_reqd_bundles() { + auto sg_list = get_scratch_children(); + sg_list.push_back(this); + return sg_list; + } + // If this bundle is updating scratch grid(s), // expand indices to calculate values in halo. // Adjust offsets in grids based on original idxs.