From b9d4e15d0864260fbd30aba8af7029a3a522915e Mon Sep 17 00:00:00 2001 From: "chuck.yount" Date: Thu, 31 Jan 2019 16:09:24 -0800 Subject: [PATCH] Improve the step-allocation algorithm so it handles staggered-grids and other solutions with multiple packs better. Closes #192. --- src/common/common_utils.cpp | 2 +- src/compiler/lib/Eqs.cpp | 279 ++++++++++++++++++++---------------- src/compiler/lib/Eqs.hpp | 9 +- src/compiler/lib/Grid.cpp | 157 ++++++++++++-------- src/compiler/lib/Grid.hpp | 38 ++--- src/compiler/lib/Soln.cpp | 3 + src/compiler/main.cpp | 12 +- src/kernel/Makefile | 12 +- 8 files changed, 287 insertions(+), 225 deletions(-) diff --git a/src/common/common_utils.cpp b/src/common/common_utils.cpp index c857b320..3645886a 100644 --- a/src/common/common_utils.cpp +++ b/src/common/common_utils.cpp @@ -46,7 +46,7 @@ namespace yask { // for numbers above 9 (at least up to 99). // Format: "major.minor.patch". - const string version = "2.16.04"; + const string version = "2.16.05"; string yask_get_version_string() { return version; diff --git a/src/compiler/lib/Eqs.cpp b/src/compiler/lib/Eqs.cpp index c4c84425..5b054a45 100644 --- a/src/compiler/lib/Eqs.cpp +++ b/src/compiler/lib/Eqs.cpp @@ -682,7 +682,9 @@ namespace yask { visitEqs(&slv); } - // Update access stats for the grids, i.e., halos and const indices. + // Update access stats for the grids. + // For now, this us just const indices. + // Halos are updated later, after packs are established. void Eqs::updateGridStats() { // Find all LHS and RHS points and grids for all eqs. @@ -698,7 +700,6 @@ namespace yask { // Update stats of each grid accessed in 'eq'. for (auto ap : allPts1) { auto* g = ap->getGrid(); // grid for point 'ap'. - g->updateHalo(ap->getArgOffsets()); g->updateConstIndices(ap->getArgConsts()); } } @@ -1048,8 +1049,35 @@ namespace yask { return newBundle; } - // Adjust scratch-grid halos as needed. - void EqBundles::adjustScratchHalos() { + // Find halos needed for each grid. + void EqBundlePacks::calcHalos(EqBundles& allBundles) { + + // Find all LHS and RHS points and grids for all eqs. + PointVisitor pv; + visitEqs(&pv); + +#ifdef DEBUG_SCRATCH + cout << "* cH: analyzing " << getAll().size() << " eqs...\n"; +#endif + + // First, set halos based only on immediate accesses. + for (auto& bp : getAll()) { + auto pname = bp->getName(); + + for (auto& eq : bp->getEqs()) { + + // Get all grid points touched by this eq. + auto& allPts1 = pv.getAllPts().at(eq.get()); + + // Update stats of each grid accessed in 'eq'. + for (auto ap : allPts1) { + auto* g = ap->getGrid(); // grid for point 'ap'. + g->updateHalo(pname, ap->getArgOffsets()); + } + } + } + + // Next, propagate halos through scratch grids as needed. // Example: // eq1: scr(x) EQUALS u(t,x+1); <-- orig halo of u = 1. @@ -1086,149 +1114,151 @@ namespace yask { // from the shadows. vector< map> shadows; - // Find all LHS and RHS points and grids for all eqs. - PointVisitor pv; - visitEqs(&pv); + // Packs. + for (auto& bp : getAll()) { + auto pname = bp->getName(); + auto& pbundles = bp->getBundles(); // list of bundles. -#ifdef DEBUG_SCRATCH - cout << "* uSH: analyzing " << getAll().size() << " eqs...\n"; -#endif - - // Bundles. - for (auto& b1 : getAll()) { + // Bundles with their dependency info. + for (auto& b1 : allBundles.getAll()) { - // Only need to look at dep paths starting from non-scratch bundles. - if (b1->isScratch()) - continue; + // Only need bundles in this pack. + if (pbundles.count(b1) == 0) + continue; - // We start with each non-scratch bundle and walk the dep - // tree to find all dependent scratch bundles. It's - // important to then visit them in dep order using 'path' to - // get only unbroken chains of scratch bundles. + // Only need to look at dep paths starting from non-scratch bundles. + if (b1->isScratch()) + continue; + + // We start with each non-scratch bundle and walk the dep + // tree to find all dependent scratch bundles. It's + // important to then visit them in dep order using 'path' to + // get only unbroken chains of scratch bundles. #ifdef DEBUG_SCRATCH - cout << "* uSH: visiting deps of " << b1->getDescr() << endl; + cout << "* cH: visiting deps of " << b1->getDescr() << endl; #endif - getDeps().visitDeps + allBundles.getDeps().visitDeps - // For each 'bn', 'b1' is 'bn' or depends on 'bn', - // immediately or indirectly; 'path' leads from - // 'b1' to 'bn'. - (b1, [&](Tp bn, TpList& path) { + // For each 'bn', 'b1' is 'bn' or depends on 'bn', + // immediately or indirectly; 'path' leads from + // 'b1' to 'bn'. + (b1, [&](EqBundlePtr bn, EqBundleList& path) { - // Create a new empty map of shadow grids for this path. - shadows.resize(shadows.size() + 1); - auto& shadow_map = shadows.back(); + // Create a new empty map of shadow grids for this path. + shadows.resize(shadows.size() + 1); + auto& shadow_map = shadows.back(); - // Walk path from 'b1', stopping at end of scratch - // chain. - for (auto b2 : path) { + // Walk path from 'b1', stopping at end of scratch + // chain. + for (auto b2 : path) { - // Don't process 'b1', the initial non-scratch bundle. - if (b2 == b1) - continue; + // Don't process 'b1', the initial non-scratch bundle. + if (b2 == b1) + continue; - // If this isn't a scratch bundle, we are done - // w/this path because we only want the bundles - // from 'b1' through an *unbroken* chain of - // scratch bundles. - if (!b2->isScratch()) - break; + // If this isn't a scratch bundle, we are done + // w/this path because we only want the bundles + // from 'b1' through an *unbroken* chain of + // scratch bundles. + if (!b2->isScratch()) + break; - // Make shadow copies of all grids touched by 'eq2'. - // All changes will be applied to these shadow grids - // for the current 'path'. - for (auto& eq : b2->getEqs()) { - - // Output grid. - auto* og = pv.getOutputGrids().at(eq.get()); - if (shadow_map.count(og) == 0) - shadow_map[og] = new Grid(*og); - - // Input grids. - auto& inPts = pv.getInputPts().at(eq.get()); - for (auto* ip : inPts) { - auto* ig = ip->getGrid(); - if (shadow_map.count(ig) == 0) - shadow_map[ig] = new Grid(*ig); + // Make shadow copies of all grids touched by 'eq2'. + // All changes will be applied to these shadow grids + // for the current 'path'. + for (auto& eq : b2->getEqs()) { + + // Output grid. + auto* og = pv.getOutputGrids().at(eq.get()); + if (shadow_map.count(og) == 0) + shadow_map[og] = new Grid(*og); + + // Input grids. + auto& inPts = pv.getInputPts().at(eq.get()); + for (auto* ip : inPts) { + auto* ig = ip->getGrid(); + if (shadow_map.count(ig) == 0) + shadow_map[ig] = new Grid(*ig); + } } - } - // For each scratch bundle, set the size of all its - // output grids' halos to the max across its - // halos. We need to do this because halos are - // written in a scratch grid. Since they are - // bundled, all the writes must be over the same - // area. - - // First, set first eq halo the max of all. - auto& eq1 = b2->getEqs().front(); - auto* og1 = shadow_map[eq1->getGrid()]; - for (auto& eq2 : b2->getEqs()) { - if (eq1 == eq2) - continue; - - // Adjust g1 to max(g1, g2). - auto* og2 = shadow_map[eq2->getGrid()]; - og1->updateHalo(*og2); - } + // For each scratch bundle, set the size of all its + // output grids' halos to the max across its + // halos. We need to do this because halos are + // written in a scratch grid. Since they are + // bundled, all the writes must be over the same + // area. + + // First, set first eq halo the max of all. + auto& eq1 = b2->getEqs().front(); + auto* og1 = shadow_map[eq1->getGrid()]; + for (auto& eq2 : b2->getEqs()) { + if (eq1 == eq2) + continue; + + // Adjust g1 to max(g1, g2). + auto* og2 = shadow_map[eq2->getGrid()]; + og1->updateHalo(*og2); + } - // Then, update all others based on first. - for (auto& eq2 : b2->getEqs()) { - if (eq1 == eq2) - continue; + // Then, update all others based on first. + for (auto& eq2 : b2->getEqs()) { + if (eq1 == eq2) + continue; - // Adjust g2 to g1. - auto* og2 = shadow_map[eq2->getGrid()]; - og2->updateHalo(*og1); - } + // Adjust g2 to g1. + auto* og2 = shadow_map[eq2->getGrid()]; + og2->updateHalo(*og1); + } - // Get updated halos from the scratch bundle. These - // are the points that are read from the dependent - // eq(s). For scratch grids, the halo areas must - // also be written to. - auto left_ohalo = og1->getHaloSizes(true); - auto right_ohalo = og1->getHaloSizes(false); + // Get updated halos from the scratch bundle. These + // are the points that are read from the dependent + // eq(s). For scratch grids, the halo areas must + // also be written to. + auto left_ohalo = og1->getHaloSizes(pname, true); + auto right_ohalo = og1->getHaloSizes(pname, false); #ifdef DEBUG_SCRATCH - cout << "** uSH: processing " << b2->getDescr() << "...\n" - "*** uSH: LHS halos: " << left_ohalo.makeDimValStr() << - " & " << right_ohalo.makeDimValStr() << endl; + cout << "** cH: processing " << b2->getDescr() << "...\n" + "*** cH: LHS halos: " << left_ohalo.makeDimValStr() << + " & " << right_ohalo.makeDimValStr() << endl; #endif - // Recalc min halos of all input grids of all - // scratch eqs in this bundle by adding size of - // output-grid halos. - for (auto& eq : b2->getEqs()) { - auto& inPts = pv.getInputPts().at(eq.get()); - - // Input points. - for (auto ip : inPts) { - auto* ig = shadow_map[ip->getGrid()]; - auto& ao = ip->getArgOffsets(); // e.g., '2' for 'x+2'. - - // Increase range by subtracting left halos and - // adding right halos. - auto left_ihalo = ao.subElements(left_ohalo, false); - ig->updateHalo(left_ihalo); - auto right_ihalo = ao.addElements(right_ohalo, false); - ig->updateHalo(right_ihalo); + // Recalc min halos of all input grids of all + // scratch eqs in this bundle by adding size of + // output-grid halos. + for (auto& eq : b2->getEqs()) { + auto& inPts = pv.getInputPts().at(eq.get()); + + // Input points. + for (auto ip : inPts) { + auto* ig = shadow_map[ip->getGrid()]; + auto& ao = ip->getArgOffsets(); // e.g., '2' for 'x+2'. + + // Increase range by subtracting left halos and + // adding right halos. + auto left_ihalo = ao.subElements(left_ohalo, false); + ig->updateHalo(pname, left_ihalo); + auto right_ihalo = ao.addElements(right_ohalo, false); + ig->updateHalo(pname, right_ihalo); #ifdef DEBUG_SCRATCH - cout << "*** uSH: updated min halos of '" << ig->get_name() << "' to " << - left_ihalo.makeDimValStr() << - " & " << right_ihalo.makeDimValStr() << endl; + cout << "*** cH: updated min halos of '" << ig->get_name() << "' to " << + left_ihalo.makeDimValStr() << + " & " << right_ihalo.makeDimValStr() << endl; #endif - } // input pts. - } // eqs in bundle. - } // path. - }); // lambda fn. - } // bundles. + } // input pts. + } // eqs in bundle. + } // path. + }); // lambda fn. + } // bundles. + } // packs. // Apply the changes from the shadow grids. // This will result in the grids containing the max // of the shadow halos. for (auto& shadow_map : shadows) { #ifdef DEBUG_SCRATCH - cout << "* uSH: applying changes from a shadow map...\n"; + cout << "* cH: applying changes from a shadow map...\n"; #endif for (auto& si : shadow_map) { auto* orig_gp = si.first; @@ -1239,7 +1269,7 @@ namespace yask { // Update the original. orig_gp->updateHalo(*shadow_gp); #ifdef DEBUG_SCRATCH - cout << "** uSH: updated '" << orig_gp->get_name() << "'.\n"; + cout << "** cH: updated '" << orig_gp->get_name() << "'.\n"; #endif // Release the shadow grid. @@ -1247,7 +1277,7 @@ namespace yask { shadow_map.at(orig_gp) = NULL; } } - } + } // calcHalos(). // Divide all equations into eqBundles. // Only process updates to grids in 'gridRegex'. @@ -1328,12 +1358,9 @@ namespace yask { os << "Topologically ordering bundles...\n"; topo_sort(); - os << "Adjusting scratch halos...\n"; - adjustScratchHalos(); - // Dump info. os << "Created " << getNum() << " equation bundle(s):\n"; - for (auto& eg1 : _all) { + for (auto& eg1 : getAll()) { os << " " << eg1->getDescr() << ":\n" " Contains " << eg1->getNumEqs() << " equation(s).\n" " Updates the following grid(s): "; @@ -1529,7 +1556,7 @@ namespace yask { // Dump info. os << "Created " << getNum() << " equation bundle pack(s):\n"; - for (auto& bp1 : _all) { + for (auto& bp1 : getAll()) { os << " " << bp1->getDescr() << ":\n" " Contains " << bp1->getBundles().size() << " bundle(s): "; int i = 0; diff --git a/src/compiler/lib/Eqs.hpp b/src/compiler/lib/Eqs.hpp index b9dfb34f..fa90180b 100644 --- a/src/compiler/lib/Eqs.hpp +++ b/src/compiler/lib/Eqs.hpp @@ -590,9 +590,6 @@ namespace yask { const string& descr, bool printSets, ostream& os); - - // Adjust scratch-grid halos as needed. - virtual void adjustScratchHalos(); }; typedef shared_ptr EqBundlePtr; @@ -695,7 +692,11 @@ namespace yask { for (auto& bp : _all) bp->visitEqs(ev); } - }; + + // Find halos needed for each grid. + virtual void calcHalos(EqBundles& allBundles); + + }; // EqBundlePacks. } // namespace yask. diff --git a/src/compiler/lib/Grid.cpp b/src/compiler/lib/Grid.cpp index 561488f2..7259289d 100644 --- a/src/compiler/lib/Grid.cpp +++ b/src/compiler/lib/Grid.cpp @@ -238,29 +238,33 @@ namespace yask { assert(areDimsSame(other)); // Loop thru other grid's halo values. - for (auto& i0 : other._halos) { - auto& left = i0.first; - auto& m1 = i0.second; - for (auto& i1 : m1) { - auto& step = i1.first; - const IntTuple& ohalos = i1.second; - for (auto& dim : ohalos.getDims()) { - auto& dname = dim.getName(); - auto& val = dim.getVal(); - - // Any existing value? - auto& halos = _halos[left][step]; - auto* p = halos.lookup(dname); - - // If not, add this one. - if (!p) - halos.addDimBack(dname, val); - - // Keep larger value. - else if (val > *p) - *p = val; - - // Else, current value is larger than val, so don't update. + for (auto& hi : other._halos) { + auto& pname = hi.first; + auto& h2 = hi.second; + for (auto& i0 : h2) { + auto& left = i0.first; + auto& m1 = i0.second; + for (auto& i1 : m1) { + auto& step = i1.first; + const IntTuple& ohalos = i1.second; + for (auto& dim : ohalos.getDims()) { + auto& dname = dim.getName(); + auto& val = dim.getVal(); + + // Any existing value? + auto& halos = _halos[pname][left][step]; + auto* p = halos.lookup(dname); + + // If not, add this one. + if (!p) + halos.addDimBack(dname, val); + + // Keep larger value. + else if (val > *p) + *p = val; + + // Else, current value is larger than val, so don't update. + } } } } @@ -269,7 +273,7 @@ namespace yask { // Update halos based on each value in 'offsets' in some // read or write to this grid. // This grid's halos can only be increased. - void Grid::updateHalo(const IntTuple& offsets) { + void Grid::updateHalo(const string& packName, const IntTuple& offsets) { // Find step value or use 0 if none. int stepVal = 0; @@ -285,7 +289,8 @@ namespace yask { auto& dname = dim.getName(); int val = dim.getVal(); bool left = val <= 0; - auto& halos = _halos[left][stepVal]; + auto& halos = _halos[packName][left][stepVal]; + // Don't keep halo in step dim. if (stepDim && dname == stepDim->getName()) continue; @@ -332,8 +337,6 @@ namespace yask { } // Determine how many values in step-dim are needed. - // TODO: fix this for staggered grids; it currently does not - // understand the per-pack reuse. int Grid::getStepDimSize() const { // Specified by API. @@ -353,49 +356,81 @@ namespace yask { if (_halos.size() == 0) return 1; - // First and last step-dim. - int first_ofs = 0, last_ofs = 0; + // Need the max across all packs. + int max_sz = 1; + + // Loop thru each pack w/halos. + for (auto& hi : _halos) { +#ifdef DEBUG_HALOS + auto& pname = hi.first; +#endif + auto& h2 = hi.second; + + // First and last step-dim. + int first_ofs = -1, last_ofs = -1; - // left and right. - for (auto& i : _halos) { - //auto left = i.first; - auto& h2 = i.second; // map of step-dims to halos. + // left and right. + for (auto& i : h2) { + //auto left = i.first; + auto& h3 = i.second; // map of step-dims to halos. - // Step-dim ofs. - for (auto& j : h2) { - auto ofs = j.first; - //auto& halo = j.second; // halo tuple at step-val 'ofs'. + // Step-dim ofs. + for (auto& j : h3) { + auto ofs = j.first; + auto& halo = j.second; // halo tuple at step-val 'ofs'. - // Update vars. - first_ofs = min(first_ofs, ofs); - last_ofs = max(last_ofs, ofs); + // Any existing value? + if (halo.size()) { +#ifdef DEBUG_HALOS + cout << "** grid " << _name << " has halo " << halo.makeDimValStr() << + " at ofs " << ofs << " in pack " << pname << endl; +#endif + + // Update vars. + if (first_ofs < 0) + first_ofs = last_ofs = ofs; + else { + first_ofs = min(first_ofs, ofs); + last_ofs = max(last_ofs, ofs); + } + } + } } - } +#ifdef DEBUG_HALOS + cout << "** grid " << _name << " has halos from " << first_ofs << + " to " << last_ofs << " in pack " << pname << endl; +#endif - // First and last largest halos. - int first_max_halo = 0, last_max_halo = 0; - for (auto& i : _halos) { - //auto left = i.first; - auto& h2 = i.second; // map of step-dims to halos. + // Only need to process if >1 offset. + if (last_ofs >= 0 && first_ofs >= 0 && last_ofs > first_ofs) { - if (h2.count(first_ofs)) - first_max_halo = max(first_max_halo, h2.at(first_ofs).max()); - if (h2.count(last_ofs)) - last_max_halo = max(last_max_halo, h2.at(last_ofs).max()); - } + // Default step-dim size is range of step offsets. + int sz = last_ofs - first_ofs + 1; - // Default step-dim size is range of offsets. - assert(last_ofs >= first_ofs); - int sz = last_ofs - first_ofs + 1; + // First and last largest halos. + int first_max_halo = 0, last_max_halo = 0; + for (auto& i : h2) { + //auto left = i.first; + auto& h3 = i.second; // map of step-dims to halos. + + if (h3.count(first_ofs) && h3.at(first_ofs).size()) + first_max_halo = max(first_max_halo, h3.at(first_ofs).max()); + if (h3.count(last_ofs) && h3.at(last_ofs).size()) + last_max_halo = max(last_max_halo, h3.at(last_ofs).max()); + } + + // If first and last halos are zero, we can further optimize storage by + // immediately reusing memory location. + if (sz > 1 && first_max_halo == 0 && last_max_halo == 0) + sz--; + + // Keep max so far. + max_sz = max(max_sz, sz); + } - // If first and last halos are zero, we can further optimize storage by - // immediately reusing memory location. - // TODO: recognize that reading in one pack and then writing in - // another can also reuse storage. - if (sz > 1 && first_max_halo == 0 && last_max_halo == 0) - sz--; + } // packs. - return sz; + return max_sz; } // Description of this grid. diff --git a/src/compiler/lib/Grid.hpp b/src/compiler/lib/Grid.hpp index fd1e3bd9..2dfe3a14 100644 --- a/src/compiler/lib/Grid.hpp +++ b/src/compiler/lib/Grid.hpp @@ -68,10 +68,10 @@ namespace yask { // Max abs-value of domain-index halos required by all eqs at // various step-index values. + // string key: name of pack. // bool key: true=left, false=right. // int key: step-dim offset or 0 if no step-dim. - // TODO: keep separate halos for each equation bundle. - map> _halos; + map>> _halos; public: // Ctors. @@ -129,28 +129,32 @@ namespace yask { virtual const IntTuple& getMinIndices() const { return _minIndices; } virtual const IntTuple& getMaxIndices() const { return _maxIndices; } - // Get the max sizes of halo across all steps. - virtual IntTuple getHaloSizes(bool left) const { + // Get the max sizes of halo across all steps for given pack. + virtual IntTuple getHaloSizes(const string& packName, bool left) const { IntTuple halo; - if (_halos.count(left)) { - for (auto i : _halos.at(left)) { - auto& right = i.second; // halo at step-val 'i'. - halo = halo.makeUnionWith(right); - halo = halo.maxElements(right, false); + if (_halos.count(packName) && _halos.at(packName).count(left)) { + for (auto i : _halos.at(packName).at(left)) { + auto& hs = i.second; // halo at step-val 'i'. + halo = halo.makeUnionWith(hs); + halo = halo.maxElements(hs, false); } } return halo; } - // Get the max size in 'dim' of halo across all steps. + // Get the max size in 'dim' of halo across all packs and steps. virtual int getHaloSize(const string& dim, bool left) const { int h = 0; - if (_halos.count(left)) { - for (auto i : _halos.at(left)) { - auto& hs = i.second; // halo at step-val 'i'. - auto* p = hs.lookup(dim); - if (p) - h = std::max(h, *p); + for (auto& hi : _halos) { + //auto& pname = hi.first; + auto& h2 = hi.second; + if (h2.count(left)) { + for (auto i : h2.at(left)) { + auto& hs = i.second; // halo at step-val 'i'. + auto* p = hs.lookup(dim); + if (p) + h = std::max(h, *p); + } } } return h; @@ -183,7 +187,7 @@ namespace yask { virtual void updateHalo(const Grid& other); // Update halos based on each value in 'offsets'. - virtual void updateHalo(const IntTuple& offsets); + virtual void updateHalo(const string& packName, const IntTuple& offsets); // Update const indices based on 'indices'. virtual void updateConstIndices(const IntTuple& indices); diff --git a/src/compiler/lib/Soln.cpp b/src/compiler/lib/Soln.cpp index e2059ed0..8f49f32d 100644 --- a/src/compiler/lib/Soln.cpp +++ b/src/compiler/lib/Soln.cpp @@ -105,6 +105,9 @@ namespace yask { // Separate bundles into packs. _eqBundlePacks.makePacks(_eqBundles, *_dos); + // Compute halos. + _eqBundlePacks.calcHalos(_eqBundles); + // Make a copy of each equation at each cluster offset. // We will use these for inter-cluster optimizations and code generation. // NB: these cluster bundles do not maintain dependencies, so cannot be used diff --git a/src/compiler/main.cpp b/src/compiler/main.cpp index 710f37c8..a7d200bb 100644 --- a/src/compiler/main.cpp +++ b/src/compiler/main.cpp @@ -72,6 +72,7 @@ void usage(const string& cmd) { "\n" " -elem-bytes " " Set number of bytes in each FP element (default=" << settings._elem_bytes << ").\n" + " Currently, only 4 (single-precision) and 8 (double) are allowed.\n" " -fold =,...\n" " Set number of elements in each given dimension in a vector block.\n" " Default depends on -elem-bytes setting and print format (below).\n" @@ -101,12 +102,12 @@ void usage(const string& cmd) { " [-no]-bundle-scratch\n" " Bundle scratch equations even if the sizes of their scratch grids must be increased\n" " to do so (default=" << settings._bundleScratch << ").\n" - " -step-alloc \n" - " Specify the size of the step-dimension memory allocation.\n" - " By default, allocations are calculated automatically for each grid.\n" " -halo \n" - " Specify the sizes of the halos.\n" + " Specify the size of the halos on all grids.\n" " By default, halos are calculated automatically for each grid.\n" + " -step-alloc \n" + " Specify the size of the step-dimension memory allocation on all grids.\n" + " By default, allocations are calculated automatically for each grid.\n" " [-no]-interleave-misc\n" " Allocate grid vars with the 'misc' dims as the inner-most dims (default=" << settings._innerMisc << ").\n" " This disallows dynamcally changing the 'misc' dim sizes during run-time.\n" @@ -117,8 +118,7 @@ void usage(const string& cmd) { " Make last dimension of fold unit stride (default=" << (!settings._firstInner) << ").\n" " This controls the intra-vector memory layout.\n" " [-no]-ul\n" - " Do [not] generate simple unaligned loads (default=" << - settings._allowUnalignedLoads << ").\n" + " Do [not] generate simple unaligned loads (default=" << settings._allowUnalignedLoads << ").\n" " [Advanced] To use this correctly, only 1D folds are allowed, and\n" " the memory layout used by YASK must have that same dimension in unit stride.\n" " [-no]-opt-comb\n" diff --git a/src/kernel/Makefile b/src/kernel/Makefile index d3e07e43..5250076c 100644 --- a/src/kernel/Makefile +++ b/src/kernel/Makefile @@ -57,9 +57,6 @@ else ifeq ($(stencil),cube) MACROS += MAX_EXCH_DIST=3 radius := 2 -else ifneq ($(findstring test_dep,$(stencil)),) - time_alloc := 1 - else ifneq ($(findstring iso3dfd,$(stencil)),) MACROS += MAX_EXCH_DIST=1 radius := 8 @@ -90,9 +87,6 @@ else ifneq ($(findstring iso3dfd,$(stencil)),) endif else ifneq ($(findstring awp,$(stencil)),) - ifeq ($(stencil),awp) - time_alloc := 1 # only allowed w/o surface conditions. - endif def_block_args := -b 32 YC_FLAGS += -min-es 1 def_rank_args := -d 1024 -dz 128 @@ -120,7 +114,6 @@ else ifneq ($(findstring awp,$(stencil)),) endif else ifneq ($(findstring ssg,$(stencil)),) - time_alloc := 1 def_rank_args := -d 512 ifneq ($(filter $(arch),skx skl clx),) def_rank_args := -d 640 -dx 320 @@ -130,7 +123,6 @@ else ifneq ($(findstring ssg,$(stencil)),) endif else ifneq ($(findstring fsg,$(stencil)),) - time_alloc := 1 def_rank_args := -d 256 ifeq ($(arch),knl) omp_region_schedule := guided @@ -291,8 +283,8 @@ endif ifneq ($(halo),) YC_FLAGS += -halo $(halo) endif -ifneq ($(time_alloc),) - YC_FLAGS += -step-alloc $(time_alloc) +ifneq ($(step_alloc),) + YC_FLAGS += -step-alloc $(step_alloc) endif # Kernel base names.