Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 21 additions & 35 deletions dart-impl/mpi/src/dart_communication.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,16 @@ static dart_ret_t get_shared_mem(
int16_t seg_id = gptr.segid;
uint64_t offset = gptr.addr_or_offs.offset;
DART_LOG_DEBUG("dart_get: shared windows enabled");
dart_team_unit_t luid = DART_TEAM_UNIT_ID(gptr.unitid);
dart_team_unit_t luid = team_data->sharedmem_tab[gptr.unitid];
char * baseptr;
/*
* Use memcpy if the target is in the same node as the calling unit:
*/
DART_LOG_DEBUG("dart_get: shared memory segment, seg_id:%d",
seg_id);
if (seg_id) {
if (dart_segment_get_baseptr(&team_data->segdata, seg_id, luid, &baseptr) != DART_OK) {
if (dart_segment_get_baseptr(
&team_data->segdata, seg_id, luid, &baseptr) != DART_OK) {
DART_LOG_ERROR("dart_get ! "
"dart_adapt_transtable_get_baseptr failed");
return DART_ERR_INVAL;
Expand All @@ -74,7 +75,8 @@ static dart_ret_t get_shared_mem(
baseptr = dart_sharedmem_local_baseptr_set[luid.id];
}
baseptr += offset;
DART_LOG_DEBUG("dart_get: memcpy %zu bytes", nelem * dart__mpi__datatype_sizeof(dtype));
DART_LOG_DEBUG(
"dart_get: memcpy %zu bytes", nelem * dart__mpi__datatype_sizeof(dtype));
memcpy((char*)dest, baseptr, nelem * dart__mpi__datatype_sizeof(dtype));
return DART_OK;
}
Expand Down Expand Up @@ -1276,40 +1278,33 @@ dart_ret_t dart_waitall_local(
* copy MPI requests back to DART handles:
*/
DART_LOG_TRACE("dart_waitall_local: "
"copying MPI requests back to DART handles");
"releasing DART handles");
r_n = 0;
for (i = 0; i < num_handles; i++) {
if (handle[i]) {
DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_SOURCE:"
" %d",
r_n, mpi_sta[r_n].MPI_SOURCE);
DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_ERROR:"
" %d:%s",
r_n,
mpi_sta[r_n].MPI_ERROR,
DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR));
if (mpi_req[r_n] != MPI_REQUEST_NULL) {
if (mpi_sta[r_n].MPI_ERROR == MPI_SUCCESS) {
DART_LOG_TRACE("dart_waitall_local: -- MPI_Request_free");
if (MPI_Request_free(&mpi_req[r_n]) != MPI_SUCCESS) {
DART_LOG_TRACE("dart_waitall_local ! MPI_Request_free failed");
ret = DART_ERR_INVAL;
break;
}
} else {
DART_LOG_TRACE("dart_waitall_local: cannot free request %zu "
"mpi_sta[%zu] = %d (%s)",
r_n,
if (handle[i]->request) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am surprised that this even worked in the past...

DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_SOURCE:"
" %d",
r_n, mpi_sta[r_n].MPI_SOURCE);
DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_ERROR:"
" %d:%s",
r_n,
mpi_sta[r_n].MPI_ERROR,
DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR));
if (mpi_sta[r_n].MPI_ERROR != MPI_SUCCESS) {
DART_LOG_ERROR("dart_waitall_local: detected unsuccesful request "
"%zu mpi_sta[%zu] = %d (%s)",
i,
r_n,
mpi_sta[r_n].MPI_ERROR,
DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR));
}
r_n++;
}
DART_LOG_DEBUG("dart_waitall_local: free handle[%zu] %p",
DART_LOG_TRACE("dart_waitall_local: free handle[%zu] %p",
i, (void*)(handle[i]));
free(handle[i]);
handle[i] = NULL;
r_n++;
}
}
DART_LOG_TRACE("dart_waitall_local: free MPI_Request temporaries");
Expand Down Expand Up @@ -1443,15 +1438,6 @@ dart_ret_t dart_waitall(
free(mpi_sta);
return DART_ERR_INVAL;
}
DART_LOG_TRACE("dart_waitall: -- MPI_Request_free");
if (MPI_Request_free(&handle[i]->request) != MPI_SUCCESS) {
DART_LOG_ERROR("dart_waitall: MPI_Request_free failed");
DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
free(mpi_req);
DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
free(mpi_sta);
return DART_ERR_INVAL;
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion dart-impl/mpi/src/dart_globmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ dart_team_memalloc_aligned(
MPI_Comm_rank(sharedmem_comm, &sharedmem_unitid);
// re-use previously allocated memory
if (segment->baseptr == NULL) {
segment->baseptr = malloc(sizeof(char *) * team_data->sharedmem_nodesize);
segment->baseptr = calloc(team_data->sharedmem_nodesize, sizeof(char *));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

calloc should not be necessary here, but who cares ;)

}
baseptr_set = segment->baseptr;

Expand Down
2 changes: 1 addition & 1 deletion dash/include/dash/algorithm/ForEach.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ void for_each_with_index(
if (lbegin_index != lend_index) {
// Pattern from global begin iterator:
auto & pattern = first.pattern();
auto first_offset = first.pos();
// Iterate local index range:
for (auto lindex = lbegin_index;
lindex != lend_index;
++lindex) {
auto gindex = pattern.global(lindex);
auto first_offset = first.pos();
auto element_it = first + (gindex - first_offset);
func(*(element_it.local()), gindex);
}
Expand Down
51 changes: 50 additions & 1 deletion dash/include/dash/algorithm/Generate.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace dash {
* \tparam ElementType Type of the elements in the sequence
* invoke, deduced from parameter \c gen
* \tparam UnaryFunction Unary function with signature
* \c void(ElementType &)
* \c ElementType(void)
*
* \complexity O(d) + O(nl), with \c d dimensions in the global iterators'
* pattern and \c nl local elements within the global range
Expand All @@ -48,6 +48,55 @@ void generate (
std::generate(lfirst, llast, gen);
}

/**
* Assigns each element in range [first, last) a value generated by the
* given function object g. The index passed to the function is
* a global index.
*
* Being a collaborative operation, each unit will invoke the given
* function on its local elements only.
*
* \tparam ElementType Type of the elements in the sequence
* invoke, deduced from parameter \c gen
* \tparam UnaryFunction Unary function with signature
* \c ElementType(index_t)
*
* \complexity O(d) + O(nl), with \c d dimensions in the global iterators'
* pattern and \c nl local elements within the global range
*
* \ingroup DashAlgorithms
*/
template <
typename ElementType,
class PatternType,
class UnaryFunction >
void generate_with_index(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, I missed this function quite often.

/// Iterator to the initial position in the sequence
GlobIter<ElementType, PatternType> first,
/// Iterator to the final position in the sequence
GlobIter<ElementType, PatternType> last,
/// Generator function
UnaryFunction gen) {
/// Global iterators to local index range:
auto index_range = dash::local_index_range(first, last);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better use the new range based views, as this does not work with multiple local ranges AFAIK. Maybe @fuchsto can assist.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have any pointer on how to use them? I'm not sure range based views and I have crossed paths so far...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey, I even introduced you personally last time in Munich!
Anyways: yes, I will add documentation.
For usage you can refer to quite an abundance of tests in dash/test/view.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be honest, I don't even know where to start looking. I guess I could crawl through a host of undocumented code, still not knowing where to start or how to put the pieces together...

auto lbegin_index = index_range.begin;
auto lend_index = index_range.end;

if (lbegin_index != lend_index) {
// Pattern from global begin iterator:
auto & pattern = first.pattern();
auto first_offset = first.pos();
// Iterate local index range:
for (auto lindex = lbegin_index;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same problem here.

lindex != lend_index;
++lindex) {
auto gindex = pattern.global(lindex);
auto element_it = first + (gindex - first_offset);
*element_it = gen(gindex);
}
}
}

} // namespace dash

#endif // DASH__ALGORITHM__GENERATE_H__
2 changes: 1 addition & 1 deletion dash/include/dash/memory/GlobStaticMem.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ class GlobStaticMem
/**
* Synchronize all units associated with this global memory instance.
*/
void barrier() const noexcept
void barrier() const
{
DASH_ASSERT_RETURNS(
dart_barrier(_teamid),
Expand Down
25 changes: 24 additions & 1 deletion dash/test/algorithm/GenerateTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <dash/algorithm/LocalRange.h>


TEST_F(GenerateTest, TestAllItemsGenerated)
TEST_F(GenerateTest, TestGenerate)
{
typedef typename Array_t::value_type value_t;

Expand All @@ -32,3 +32,26 @@ TEST_F(GenerateTest, TestAllItemsGenerated)
ASSERT_EQ_U(17, static_cast<value_t>(*lbegin));
}
}

TEST_F(GenerateTest, TestGenerateWithIndex)
{
typedef typename Array_t::value_type value_t;

// Initialize global array:
Array_t array(_num_elem);
// Generator function
auto f = [](Array_t::index_type idx){ return 2*idx; };
// Fill Array with given generator function
dash::generate_with_index(array.begin(), array.end(), f);
// Wait for all units
array.barrier();

// check global index range
if (dash::myid() == 0) {
for (size_t idx = 0;
idx != array.size();
++idx) {
ASSERT_EQ_U(idx * 2.0, array[idx]);
}
}
}
18 changes: 12 additions & 6 deletions dash/test/algorithm/MinElementTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "MinElementTest.h"

#include <dash/algorithm/MinMax.h>
#include <dash/algorithm/Generate.h>
#include <dash/Array.h>
#include <dash/Matrix.h>

Expand Down Expand Up @@ -218,6 +219,7 @@ TEST_F(MinElementTest, TestShrinkRange)

TEST_F(MinElementTest, TestFindMatrixDefault)
{
typedef typename dash::Matrix<Element_t, 2>::index_type index_t;
Element_t min_value = 11;
size_t num_units = dash::Team::All().size();
size_t tilesize_x = 13;
Expand All @@ -239,13 +241,17 @@ TEST_F(MinElementTest, TestFindMatrixDefault)
ASSERT_EQ(extent_rows, matrix.extent(1));
LOG_MESSAGE("Matrix size: %d", static_cast<int>(matrix_size));
// Fill matrix
if (dash::myid() == 0) {
LOG_MESSAGE("Assigning matrix values");
for(int i = 0; i < matrix.extent(0); ++i) {
for(int k = 0; k < matrix.extent(1); ++k) {
matrix[i][k] = 20 + (i * 11) + (k * 97);
}
LOG_MESSAGE("Assigning matrix values");
auto &pattern = matrix.pattern();
dash::generate_with_index(
matrix.begin(), matrix.end(),
[&pattern](index_t gidx){
auto coords = pattern.coords(gidx);
return 20 + (coords[0] * 11) + (coords[1] * 97);
}
);
matrix.barrier();
if (dash::myid() == 0) {
LOG_MESSAGE("Setting matrix[%d][%d] = %d (min)",
min_pos_x, min_pos_y, static_cast<int>(min_value));
matrix[min_pos_x][min_pos_y] = min_value;
Expand Down
29 changes: 16 additions & 13 deletions dash/test/algorithm/TransformTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "TransformTest.h"

#include <dash/algorithm/Transform.h>
#include <dash/algorithm/Generate.h>

#include <dash/Array.h>
#include <dash/Matrix.h>
Expand Down Expand Up @@ -156,21 +157,22 @@ TEST_F(TransformTest, ArrayGlobalPlusGlobalBlocking)
TEST_F(TransformTest, MatrixGlobalPlusGlobalBlocking)
{
// Block-wise addition (a += b) of two matrices
typedef typename dash::Matrix<int, 2>::index_type index_t;
using value_t = int64_t;
typedef typename dash::Matrix<value_t, 2>::index_type index_t;
dash::global_unit_t myid = dash::myid();
size_t num_units = dash::Team::All().size();
size_t tilesize_x = 7;
size_t tilesize_y = 3;
size_t extent_cols = tilesize_x * num_units * 2;
size_t extent_rows = tilesize_y * num_units * 2;
dash::Matrix<int, 2> matrix_a(
dash::Matrix<value_t, 2> matrix_a(
dash::SizeSpec<2>(
extent_cols,
extent_rows),
dash::DistributionSpec<2>(
dash::TILE(tilesize_x),
dash::TILE(tilesize_y)));
dash::Matrix<int, 2> matrix_b(
dash::Matrix<value_t, 2> matrix_b(
dash::SizeSpec<2>(
extent_cols,
extent_rows),
Expand All @@ -185,16 +187,17 @@ TEST_F(TransformTest, MatrixGlobalPlusGlobalBlocking)
LOG_MESSAGE("Matrix size: %zu", matrix_size);

// Fill matrix
if(myid == 0) {
LOG_MESSAGE("Assigning matrix values");
for(size_t i = 0; i < matrix_a.extent(0); ++i) {
for(size_t k = 0; k < matrix_a.extent(1); ++k) {
auto value = (i * 1000) + (k * 1);
matrix_a[i][k] = value * 100000;
matrix_b[i][k] = value;
}
}
}
LOG_MESSAGE("Assigning matrix values");
auto &pattern = matrix_a.pattern();
auto gen = [&pattern](index_t gidx){
auto coords = pattern.coords(gidx);
return 100000 * ((coords[0] * 1000) + coords[1]);
};
dash::generate_with_index(matrix_a.begin(), matrix_a.end(), gen);
dash::generate_with_index(matrix_b.begin(), matrix_b.end(), gen);
matrix_a.barrier();


LOG_MESSAGE("Wait for team barrier ...");
dash::Team::All().barrier();
LOG_MESSAGE("Team barrier passed");
Expand Down