Skip to content

Commit

Permalink
Optimize self-communication
Browse files Browse the repository at this point in the history
  • Loading branch information
masterleinad committed Oct 8, 2019
1 parent 6b03a84 commit c8d2048
Showing 1 changed file with 33 additions and 20 deletions.
53 changes: 33 additions & 20 deletions src/details/ArborX_DetailsDistributor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,33 +185,46 @@ class Distributor
std::size_t const total_message_size =
_src_counts[i] * num_packets * sizeof(ValueType);
int const n_chunks = (total_message_size + chunk_size - 1) / chunk_size;
for (int chunk = 0; chunk < n_chunks; ++chunk)
{
requests.emplace_back();
int const this_chunk_size = std::min<std::size_t>(
chunk_size, total_message_size - chunk * chunk_size);
MPI_Irecv(reinterpret_cast<char *>(src_buffer.data() +
_src_offsets[i] * num_packets) +
chunk * chunk_size,
this_chunk_size, MPI_BYTE, _sources[i], 123 + chunk, _comm,
&requests.back());
}
if (_sources[i] != comm_rank)
for (int chunk = 0; chunk < n_chunks; ++chunk)
{
requests.emplace_back();
int const this_chunk_size = std::min<std::size_t>(
chunk_size, total_message_size - chunk * chunk_size);
MPI_Irecv(reinterpret_cast<char *>(src_buffer.data() +
_src_offsets[i] * num_packets) +
chunk * chunk_size,
this_chunk_size, MPI_BYTE, _sources[i], 123 + chunk, _comm,
&requests.back());
}
}
for (int i = 0; i < outdegrees; ++i)
{
std::size_t const total_message_size =
_dest_counts[i] * num_packets * sizeof(ValueType);
int const n_chunks = (total_message_size + chunk_size - 1) / chunk_size;
for (int chunk = 0; chunk < n_chunks; ++chunk)
if (_destinations[i] == comm_rank)
{
auto const it = std::find(_sources.begin(), _sources.end(), comm_rank);
ARBORX_ASSERT(it != _sources.end());
auto const position = it - _sources.begin();
std::memcpy(src_buffer.data() + _src_offsets[position] * num_packets,
dest_buffer.data() + _dest_offsets[i] * num_packets,
total_message_size);
}
else
{
requests.emplace_back();
int const this_chunk_size = std::min<std::size_t>(
chunk_size, total_message_size - chunk * chunk_size);
MPI_Isend(reinterpret_cast<char *>(dest_buffer.data() +
_dest_offsets[i] * num_packets) +
chunk * chunk_size,
this_chunk_size, MPI_BYTE, _destinations[i], 123 + chunk,
_comm, &requests.back());
for (int chunk = 0; chunk < n_chunks; ++chunk)
{
requests.emplace_back();
int const this_chunk_size = std::min<std::size_t>(
chunk_size, total_message_size - chunk * chunk_size);
MPI_Isend(reinterpret_cast<char *>(dest_buffer.data() +
_dest_offsets[i] * num_packets) +
chunk * chunk_size,
this_chunk_size, MPI_BYTE, _destinations[i], 123 + chunk,
_comm, &requests.back());
}
}
}
if (!requests.empty())
Expand Down

0 comments on commit c8d2048

Please sign in to comment.