Skip to content

Commit

Permalink
Improve the step-allocation algorithm so it handles staggered-grids a…
Browse files Browse the repository at this point in the history
…nd other solutions with multiple packs better.

Closes #192.
  • Loading branch information
chuck.yount committed Feb 1, 2019
1 parent cd3a484 commit b9d4e15
Show file tree
Hide file tree
Showing 8 changed files with 287 additions and 225 deletions.
2 changes: 1 addition & 1 deletion src/common/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ namespace yask {
// for numbers above 9 (at least up to 99).

// Format: "major.minor.patch".
const string version = "2.16.04";
const string version = "2.16.05";

string yask_get_version_string() {
return version;
Expand Down
279 changes: 153 additions & 126 deletions src/compiler/lib/Eqs.cpp

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions src/compiler/lib/Eqs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,9 +590,6 @@ namespace yask {
const string& descr,
bool printSets,
ostream& os);

// Adjust scratch-grid halos as needed.
virtual void adjustScratchHalos();
};

typedef shared_ptr<EqBundle> EqBundlePtr;
Expand Down Expand Up @@ -695,7 +692,11 @@ namespace yask {
for (auto& bp : _all)
bp->visitEqs(ev);
}
};

// Find halos needed for each grid.
virtual void calcHalos(EqBundles& allBundles);

}; // EqBundlePacks.

} // namespace yask.

Expand Down
157 changes: 96 additions & 61 deletions src/compiler/lib/Grid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,29 +238,33 @@ namespace yask {
assert(areDimsSame(other));

// Loop thru other grid's halo values.
for (auto& i0 : other._halos) {
auto& left = i0.first;
auto& m1 = i0.second;
for (auto& i1 : m1) {
auto& step = i1.first;
const IntTuple& ohalos = i1.second;
for (auto& dim : ohalos.getDims()) {
auto& dname = dim.getName();
auto& val = dim.getVal();

// Any existing value?
auto& halos = _halos[left][step];
auto* p = halos.lookup(dname);

// If not, add this one.
if (!p)
halos.addDimBack(dname, val);

// Keep larger value.
else if (val > *p)
*p = val;

// Else, current value is larger than val, so don't update.
for (auto& hi : other._halos) {
auto& pname = hi.first;
auto& h2 = hi.second;
for (auto& i0 : h2) {
auto& left = i0.first;
auto& m1 = i0.second;
for (auto& i1 : m1) {
auto& step = i1.first;
const IntTuple& ohalos = i1.second;
for (auto& dim : ohalos.getDims()) {
auto& dname = dim.getName();
auto& val = dim.getVal();

// Any existing value?
auto& halos = _halos[pname][left][step];
auto* p = halos.lookup(dname);

// If not, add this one.
if (!p)
halos.addDimBack(dname, val);

// Keep larger value.
else if (val > *p)
*p = val;

// Else, current value is larger than val, so don't update.
}
}
}
}
Expand All @@ -269,7 +273,7 @@ namespace yask {
// Update halos based on each value in 'offsets' in some
// read or write to this grid.
// This grid's halos can only be increased.
void Grid::updateHalo(const IntTuple& offsets) {
void Grid::updateHalo(const string& packName, const IntTuple& offsets) {

// Find step value or use 0 if none.
int stepVal = 0;
Expand All @@ -285,7 +289,8 @@ namespace yask {
auto& dname = dim.getName();
int val = dim.getVal();
bool left = val <= 0;
auto& halos = _halos[left][stepVal];
auto& halos = _halos[packName][left][stepVal];

// Don't keep halo in step dim.
if (stepDim && dname == stepDim->getName())
continue;
Expand Down Expand Up @@ -332,8 +337,6 @@ namespace yask {
}

// Determine how many values in step-dim are needed.
// TODO: fix this for staggered grids; it currently does not
// understand the per-pack reuse.
int Grid::getStepDimSize() const
{
// Specified by API.
Expand All @@ -353,49 +356,81 @@ namespace yask {
if (_halos.size() == 0)
return 1;

// First and last step-dim.
int first_ofs = 0, last_ofs = 0;
// Need the max across all packs.
int max_sz = 1;

// Loop thru each pack w/halos.
for (auto& hi : _halos) {
#ifdef DEBUG_HALOS
auto& pname = hi.first;
#endif
auto& h2 = hi.second;

// First and last step-dim.
int first_ofs = -1, last_ofs = -1;

// left and right.
for (auto& i : _halos) {
//auto left = i.first;
auto& h2 = i.second; // map of step-dims to halos.
// left and right.
for (auto& i : h2) {
//auto left = i.first;
auto& h3 = i.second; // map of step-dims to halos.

// Step-dim ofs.
for (auto& j : h2) {
auto ofs = j.first;
//auto& halo = j.second; // halo tuple at step-val 'ofs'.
// Step-dim ofs.
for (auto& j : h3) {
auto ofs = j.first;
auto& halo = j.second; // halo tuple at step-val 'ofs'.

// Update vars.
first_ofs = min(first_ofs, ofs);
last_ofs = max(last_ofs, ofs);
// Any existing value?
if (halo.size()) {
#ifdef DEBUG_HALOS
cout << "** grid " << _name << " has halo " << halo.makeDimValStr() <<
" at ofs " << ofs << " in pack " << pname << endl;
#endif

// Update vars.
if (first_ofs < 0)
first_ofs = last_ofs = ofs;
else {
first_ofs = min(first_ofs, ofs);
last_ofs = max(last_ofs, ofs);
}
}
}
}
}
#ifdef DEBUG_HALOS
cout << "** grid " << _name << " has halos from " << first_ofs <<
" to " << last_ofs << " in pack " << pname << endl;
#endif

// First and last largest halos.
int first_max_halo = 0, last_max_halo = 0;
for (auto& i : _halos) {
//auto left = i.first;
auto& h2 = i.second; // map of step-dims to halos.
// Only need to process if >1 offset.
if (last_ofs >= 0 && first_ofs >= 0 && last_ofs > first_ofs) {

if (h2.count(first_ofs))
first_max_halo = max(first_max_halo, h2.at(first_ofs).max());
if (h2.count(last_ofs))
last_max_halo = max(last_max_halo, h2.at(last_ofs).max());
}
// Default step-dim size is range of step offsets.
int sz = last_ofs - first_ofs + 1;

// Default step-dim size is range of offsets.
assert(last_ofs >= first_ofs);
int sz = last_ofs - first_ofs + 1;
// First and last largest halos.
int first_max_halo = 0, last_max_halo = 0;
for (auto& i : h2) {
//auto left = i.first;
auto& h3 = i.second; // map of step-dims to halos.

if (h3.count(first_ofs) && h3.at(first_ofs).size())
first_max_halo = max(first_max_halo, h3.at(first_ofs).max());
if (h3.count(last_ofs) && h3.at(last_ofs).size())
last_max_halo = max(last_max_halo, h3.at(last_ofs).max());
}

// If first and last halos are zero, we can further optimize storage by
// immediately reusing memory location.
if (sz > 1 && first_max_halo == 0 && last_max_halo == 0)
sz--;

// Keep max so far.
max_sz = max(max_sz, sz);
}

// If first and last halos are zero, we can further optimize storage by
// immediately reusing memory location.
// TODO: recognize that reading in one pack and then writing in
// another can also reuse storage.
if (sz > 1 && first_max_halo == 0 && last_max_halo == 0)
sz--;
} // packs.

return sz;
return max_sz;
}

// Description of this grid.
Expand Down
38 changes: 21 additions & 17 deletions src/compiler/lib/Grid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ namespace yask {

// Max abs-value of domain-index halos required by all eqs at
// various step-index values.
// string key: name of pack.
// bool key: true=left, false=right.
// int key: step-dim offset or 0 if no step-dim.
// TODO: keep separate halos for each equation bundle.
map<bool, map<int, IntTuple>> _halos;
map<string, map<bool, map<int, IntTuple>>> _halos;

public:
// Ctors.
Expand Down Expand Up @@ -129,28 +129,32 @@ namespace yask {
virtual const IntTuple& getMinIndices() const { return _minIndices; }
virtual const IntTuple& getMaxIndices() const { return _maxIndices; }

// Get the max sizes of halo across all steps.
virtual IntTuple getHaloSizes(bool left) const {
// Get the max sizes of halo across all steps for given pack.
virtual IntTuple getHaloSizes(const string& packName, bool left) const {
IntTuple halo;
if (_halos.count(left)) {
for (auto i : _halos.at(left)) {
auto& right = i.second; // halo at step-val 'i'.
halo = halo.makeUnionWith(right);
halo = halo.maxElements(right, false);
if (_halos.count(packName) && _halos.at(packName).count(left)) {
for (auto i : _halos.at(packName).at(left)) {
auto& hs = i.second; // halo at step-val 'i'.
halo = halo.makeUnionWith(hs);
halo = halo.maxElements(hs, false);
}
}
return halo;
}

// Get the max size in 'dim' of halo across all steps.
// Get the max size in 'dim' of halo across all packs and steps.
virtual int getHaloSize(const string& dim, bool left) const {
int h = 0;
if (_halos.count(left)) {
for (auto i : _halos.at(left)) {
auto& hs = i.second; // halo at step-val 'i'.
auto* p = hs.lookup(dim);
if (p)
h = std::max(h, *p);
for (auto& hi : _halos) {
//auto& pname = hi.first;
auto& h2 = hi.second;
if (h2.count(left)) {
for (auto i : h2.at(left)) {
auto& hs = i.second; // halo at step-val 'i'.
auto* p = hs.lookup(dim);
if (p)
h = std::max(h, *p);
}
}
}
return h;
Expand Down Expand Up @@ -183,7 +187,7 @@ namespace yask {
virtual void updateHalo(const Grid& other);

// Update halos based on each value in 'offsets'.
virtual void updateHalo(const IntTuple& offsets);
virtual void updateHalo(const string& packName, const IntTuple& offsets);

// Update const indices based on 'indices'.
virtual void updateConstIndices(const IntTuple& indices);
Expand Down
3 changes: 3 additions & 0 deletions src/compiler/lib/Soln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ namespace yask {
// Separate bundles into packs.
_eqBundlePacks.makePacks(_eqBundles, *_dos);

// Compute halos.
_eqBundlePacks.calcHalos(_eqBundles);

// Make a copy of each equation at each cluster offset.
// We will use these for inter-cluster optimizations and code generation.
// NB: these cluster bundles do not maintain dependencies, so cannot be used
Expand Down
12 changes: 6 additions & 6 deletions src/compiler/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ void usage(const string& cmd) {
"\n"
" -elem-bytes <n>"
" Set number of bytes in each FP element (default=" << settings._elem_bytes << ").\n"
" Currently, only 4 (single-precision) and 8 (double) are allowed.\n"
" -fold <dim>=<size>,...\n"
" Set number of elements in each given dimension in a vector block.\n"
" Default depends on -elem-bytes setting and print format (below).\n"
Expand Down Expand Up @@ -101,12 +102,12 @@ void usage(const string& cmd) {
" [-no]-bundle-scratch\n"
" Bundle scratch equations even if the sizes of their scratch grids must be increased\n"
" to do so (default=" << settings._bundleScratch << ").\n"
" -step-alloc <size>\n"
" Specify the size of the step-dimension memory allocation.\n"
" By default, allocations are calculated automatically for each grid.\n"
" -halo <size>\n"
" Specify the sizes of the halos.\n"
" Specify the size of the halos on all grids.\n"
" By default, halos are calculated automatically for each grid.\n"
" -step-alloc <size>\n"
" Specify the size of the step-dimension memory allocation on all grids.\n"
" By default, allocations are calculated automatically for each grid.\n"
" [-no]-interleave-misc\n"
" Allocate grid vars with the 'misc' dims as the inner-most dims (default=" << settings._innerMisc << ").\n"
" This disallows dynamcally changing the 'misc' dim sizes during run-time.\n"
Expand All @@ -117,8 +118,7 @@ void usage(const string& cmd) {
" Make last dimension of fold unit stride (default=" << (!settings._firstInner) << ").\n"
" This controls the intra-vector memory layout.\n"
" [-no]-ul\n"
" Do [not] generate simple unaligned loads (default=" <<
settings._allowUnalignedLoads << ").\n"
" Do [not] generate simple unaligned loads (default=" << settings._allowUnalignedLoads << ").\n"
" [Advanced] To use this correctly, only 1D folds are allowed, and\n"
" the memory layout used by YASK must have that same dimension in unit stride.\n"
" [-no]-opt-comb\n"
Expand Down
12 changes: 2 additions & 10 deletions src/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ else ifeq ($(stencil),cube)
MACROS += MAX_EXCH_DIST=3
radius := 2

else ifneq ($(findstring test_dep,$(stencil)),)
time_alloc := 1

else ifneq ($(findstring iso3dfd,$(stencil)),)
MACROS += MAX_EXCH_DIST=1
radius := 8
Expand Down Expand Up @@ -90,9 +87,6 @@ else ifneq ($(findstring iso3dfd,$(stencil)),)
endif

else ifneq ($(findstring awp,$(stencil)),)
ifeq ($(stencil),awp)
time_alloc := 1 # only allowed w/o surface conditions.
endif
def_block_args := -b 32
YC_FLAGS += -min-es 1
def_rank_args := -d 1024 -dz 128
Expand Down Expand Up @@ -120,7 +114,6 @@ else ifneq ($(findstring awp,$(stencil)),)
endif

else ifneq ($(findstring ssg,$(stencil)),)
time_alloc := 1
def_rank_args := -d 512
ifneq ($(filter $(arch),skx skl clx),)
def_rank_args := -d 640 -dx 320
Expand All @@ -130,7 +123,6 @@ else ifneq ($(findstring ssg,$(stencil)),)
endif

else ifneq ($(findstring fsg,$(stencil)),)
time_alloc := 1
def_rank_args := -d 256
ifeq ($(arch),knl)
omp_region_schedule := guided
Expand Down Expand Up @@ -291,8 +283,8 @@ endif
ifneq ($(halo),)
YC_FLAGS += -halo $(halo)
endif
ifneq ($(time_alloc),)
YC_FLAGS += -step-alloc $(time_alloc)
ifneq ($(step_alloc),)
YC_FLAGS += -step-alloc $(step_alloc)
endif

# Kernel base names.
Expand Down

0 comments on commit b9d4e15

Please sign in to comment.