Skip to content

Commit

Permalink
CPU version of TagBox::buffer
Browse files Browse the repository at this point in the history
Add a CPU version of TagBox::buffer.  @MSABuschman reported in AMReX-Codes#1951 that
TagBox::buffer has been very slow since commit AMReX-Codes#1258 if the error buffer
size is large.  The function was rewritten in AMReX-Codes#1258 to do the work on GPU.
In this PR, the old version is reintroduced for CPU.

Note that the current implementation is expected to have poor performance on
GPU if it has a very large error buffer.  It's still not clear how we should
implement this function for GPU if a large error buffer is used.
  • Loading branch information
WeiqunZhang committed Apr 19, 2021
1 parent 4320729 commit 37ac8bf
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 20 deletions.
5 changes: 4 additions & 1 deletion Src/AmrCore/AMReX_TagBox.H
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,13 @@ public:

/**
* \brief Mark neighbors of every tagged cell a distance nbuff away
* only search interior for initial tagged points where nwid
* is given as the width of the bndry region.
*
* \param nbuff
* \param nwid
*/
void buffer (const IntVect& nbuf) noexcept;
void buffer (const IntVect& nbuf, const IntVect& nwid) noexcept;

/**
* \brief Returns Vector\<int\> of size domain.numPts() suitable for calling
Expand Down
58 changes: 39 additions & 19 deletions Src/AmrCore/AMReX_TagBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,30 +75,50 @@ TagBox::coarsen (const IntVect& ratio, const Box& cbox) noexcept
}

void
TagBox::buffer (const IntVect& a_nbuff) noexcept
TagBox::buffer (const IntVect& a_nbuff, const IntVect& a_nwid) noexcept
{
Array4<char> const& a = this->array();
Dim3 nbuf = a_nbuff.dim3();
const auto lo = amrex::lbound(domain);
const auto hi = amrex::ubound(domain);
AMREX_HOST_DEVICE_FOR_3D(domain, i, j, k,
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion()) {
AMREX_HOST_DEVICE_FOR_3D(domain, i, j, k,
{
if (a(i,j,k) == TagBox::CLEAR) {
bool to_buf = false;
int imin = amrex::max(i-nbuf.x, lo.x);
int jmin = amrex::max(j-nbuf.y, lo.y);
int kmin = amrex::max(k-nbuf.z, lo.z);
int imax = amrex::min(i+nbuf.x, hi.x);
int jmax = amrex::min(j+nbuf.y, hi.y);
int kmax = amrex::min(k+nbuf.z, hi.z);
// xxxxx TODO: If nbuf is large, this is not efficient.
// We need to find another better way.
for (int kk = kmin; kk <= kmax && !to_buf; ++kk) {
for (int jj = jmin; jj <= jmax && !to_buf; ++jj) {
for (int ii = imin; ii <= imax && !to_buf; ++ii) {
if (a(ii,jj,kk) == TagBox::SET) to_buf = true;
}}}
if (to_buf) a(i,j,k) = TagBox::BUF;
}
});
} else
#endif
{
if (a(i,j,k) == TagBox::CLEAR) {
bool to_buf = false;
int imin = amrex::max(i-nbuf.x, lo.x);
int jmin = amrex::max(j-nbuf.y, lo.y);
int kmin = amrex::max(k-nbuf.z, lo.z);
int imax = amrex::min(i+nbuf.x, hi.x);
int jmax = amrex::min(j+nbuf.y, hi.y);
int kmax = amrex::min(k+nbuf.z, hi.z);
for (int kk = kmin; kk <= kmax && !to_buf; ++kk) {
for (int jj = jmin; jj <= jmax && !to_buf; ++jj) {
for (int ii = imin; ii <= imax && !to_buf; ++ii) {
if (a(ii,jj,kk) == TagBox::SET) to_buf = true;
}}}
if (to_buf) a(i,j,k) = TagBox::BUF;
}
});
Dim3 nwid = a_nwid.dim3();
Box const& interior = amrex::grow(domain, -a_nwid);
AMREX_LOOP_3D(interior, i, j, k,
{
if (a(i,j,k) == TagBox::SET) {
for (int kk = k-nwid.z; kk <= k+nwid.z; ++kk) {
for (int jj = j-nwid.y; jj <= j+nwid.y; ++jj) {
for (int ii = i-nwid.x; ii <= i+nwid.x; ++ii) {
if (a(ii,jj,kk) == TagBox::CLEAR) { a(ii,jj,kk) = TagBox::BUF; }
}}}
}
});
}
}

// DEPRECATED
Expand Down Expand Up @@ -295,7 +315,7 @@ TagBoxArray::buffer (const IntVect& nbuf)
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(*this); mfi.isValid(); ++mfi) {
get(mfi).buffer(nbuf);
get(mfi).buffer(nbuf, n_grow);
}
}
}
Expand Down

0 comments on commit 37ac8bf

Please sign in to comment.