diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index cf46ecc60..68f34a4b0 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -57,7 +57,7 @@ static dart_ret_t get_shared_mem( int16_t seg_id = gptr.segid; uint64_t offset = gptr.addr_or_offs.offset; DART_LOG_DEBUG("dart_get: shared windows enabled"); - dart_team_unit_t luid = DART_TEAM_UNIT_ID(gptr.unitid); + dart_team_unit_t luid = team_data->sharedmem_tab[gptr.unitid]; char * baseptr; /* * Use memcpy if the target is in the same node as the calling unit: @@ -65,7 +65,8 @@ static dart_ret_t get_shared_mem( DART_LOG_DEBUG("dart_get: shared memory segment, seg_id:%d", seg_id); if (seg_id) { - if (dart_segment_get_baseptr(&team_data->segdata, seg_id, luid, &baseptr) != DART_OK) { + if (dart_segment_get_baseptr( + &team_data->segdata, seg_id, luid, &baseptr) != DART_OK) { DART_LOG_ERROR("dart_get ! " "dart_adapt_transtable_get_baseptr failed"); return DART_ERR_INVAL; @@ -74,7 +75,8 @@ static dart_ret_t get_shared_mem( baseptr = dart_sharedmem_local_baseptr_set[luid.id]; } baseptr += offset; - DART_LOG_DEBUG("dart_get: memcpy %zu bytes", nelem * dart__mpi__datatype_sizeof(dtype)); + DART_LOG_DEBUG( + "dart_get: memcpy %zu bytes", nelem * dart__mpi__datatype_sizeof(dtype)); memcpy((char*)dest, baseptr, nelem * dart__mpi__datatype_sizeof(dtype)); return DART_OK; } @@ -1276,40 +1278,33 @@ dart_ret_t dart_waitall_local( * copy MPI requests back to DART handles: */ DART_LOG_TRACE("dart_waitall_local: " - "copying MPI requests back to DART handles"); + "releasing DART handles"); r_n = 0; for (i = 0; i < num_handles; i++) { if (handle[i]) { - DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_SOURCE:" - " %d", - r_n, mpi_sta[r_n].MPI_SOURCE); - DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_ERROR:" - " %d:%s", - r_n, - mpi_sta[r_n].MPI_ERROR, - DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR)); - if (mpi_req[r_n] != MPI_REQUEST_NULL) { - if (mpi_sta[r_n].MPI_ERROR == MPI_SUCCESS) { - DART_LOG_TRACE("dart_waitall_local: -- MPI_Request_free"); - if (MPI_Request_free(&mpi_req[r_n]) != MPI_SUCCESS) { - DART_LOG_TRACE("dart_waitall_local ! MPI_Request_free failed"); - ret = DART_ERR_INVAL; - break; - } - } else { - DART_LOG_TRACE("dart_waitall_local: cannot free request %zu " - "mpi_sta[%zu] = %d (%s)", - r_n, + if (handle[i]->request) { + DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_SOURCE:" + " %d", + r_n, mpi_sta[r_n].MPI_SOURCE); + DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%"PRIu64"].MPI_ERROR:" + " %d:%s", + r_n, + mpi_sta[r_n].MPI_ERROR, + DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR)); + if (mpi_sta[r_n].MPI_ERROR != MPI_SUCCESS) { + DART_LOG_ERROR("dart_waitall_local: detected unsuccesful request " + "%zu mpi_sta[%zu] = %d (%s)", + i, r_n, mpi_sta[r_n].MPI_ERROR, DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR)); } + r_n++; } - DART_LOG_DEBUG("dart_waitall_local: free handle[%zu] %p", + DART_LOG_TRACE("dart_waitall_local: free handle[%zu] %p", i, (void*)(handle[i])); free(handle[i]); handle[i] = NULL; - r_n++; } } DART_LOG_TRACE("dart_waitall_local: free MPI_Request temporaries"); @@ -1443,15 +1438,6 @@ dart_ret_t dart_waitall( free(mpi_sta); return DART_ERR_INVAL; } - DART_LOG_TRACE("dart_waitall: -- MPI_Request_free"); - if (MPI_Request_free(&handle[i]->request) != MPI_SUCCESS) { - DART_LOG_ERROR("dart_waitall: MPI_Request_free failed"); - DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries"); - free(mpi_req); - DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries"); - free(mpi_sta); - return DART_ERR_INVAL; - } } } } diff --git a/dart-impl/mpi/src/dart_globmem.c b/dart-impl/mpi/src/dart_globmem.c index 12294aca6..4dc9657f1 100644 --- a/dart-impl/mpi/src/dart_globmem.c +++ b/dart-impl/mpi/src/dart_globmem.c @@ -283,7 +283,7 @@ dart_team_memalloc_aligned( MPI_Comm_rank(sharedmem_comm, &sharedmem_unitid); // re-use previously allocated memory if (segment->baseptr == NULL) { - segment->baseptr = malloc(sizeof(char *) * team_data->sharedmem_nodesize); + segment->baseptr = calloc(team_data->sharedmem_nodesize, sizeof(char *)); } baseptr_set = segment->baseptr; diff --git a/dash/include/dash/algorithm/ForEach.h b/dash/include/dash/algorithm/ForEach.h index a56510caa..b95b4f322 100644 --- a/dash/include/dash/algorithm/ForEach.h +++ b/dash/include/dash/algorithm/ForEach.h @@ -95,12 +95,12 @@ void for_each_with_index( if (lbegin_index != lend_index) { // Pattern from global begin iterator: auto & pattern = first.pattern(); + auto first_offset = first.pos(); // Iterate local index range: for (auto lindex = lbegin_index; lindex != lend_index; ++lindex) { auto gindex = pattern.global(lindex); - auto first_offset = first.pos(); auto element_it = first + (gindex - first_offset); func(*(element_it.local()), gindex); } diff --git a/dash/include/dash/algorithm/Generate.h b/dash/include/dash/algorithm/Generate.h index 74906088a..e3ab6d42c 100644 --- a/dash/include/dash/algorithm/Generate.h +++ b/dash/include/dash/algorithm/Generate.h @@ -22,7 +22,7 @@ namespace dash { * \tparam ElementType Type of the elements in the sequence * invoke, deduced from parameter \c gen * \tparam UnaryFunction Unary function with signature - * \c void(ElementType &) + * \c ElementType(void) * * \complexity O(d) + O(nl), with \c d dimensions in the global iterators' * pattern and \c nl local elements within the global range @@ -48,6 +48,55 @@ void generate ( std::generate(lfirst, llast, gen); } +/** + * Assigns each element in range [first, last) a value generated by the + * given function object g. The index passed to the function is + * a global index. + * + * Being a collaborative operation, each unit will invoke the given + * function on its local elements only. + * + * \tparam ElementType Type of the elements in the sequence + * invoke, deduced from parameter \c gen + * \tparam UnaryFunction Unary function with signature + * \c ElementType(index_t) + * + * \complexity O(d) + O(nl), with \c d dimensions in the global iterators' + * pattern and \c nl local elements within the global range + * + * \ingroup DashAlgorithms + */ +template < + typename ElementType, + class PatternType, + class UnaryFunction > +void generate_with_index( + /// Iterator to the initial position in the sequence + GlobIter first, + /// Iterator to the final position in the sequence + GlobIter last, + /// Generator function + UnaryFunction gen) { + /// Global iterators to local index range: + auto index_range = dash::local_index_range(first, last); + auto lbegin_index = index_range.begin; + auto lend_index = index_range.end; + + if (lbegin_index != lend_index) { + // Pattern from global begin iterator: + auto & pattern = first.pattern(); + auto first_offset = first.pos(); + // Iterate local index range: + for (auto lindex = lbegin_index; + lindex != lend_index; + ++lindex) { + auto gindex = pattern.global(lindex); + auto element_it = first + (gindex - first_offset); + *element_it = gen(gindex); + } + } +} + } // namespace dash #endif // DASH__ALGORITHM__GENERATE_H__ diff --git a/dash/include/dash/memory/GlobStaticMem.h b/dash/include/dash/memory/GlobStaticMem.h index 9435ee735..996005db4 100644 --- a/dash/include/dash/memory/GlobStaticMem.h +++ b/dash/include/dash/memory/GlobStaticMem.h @@ -393,7 +393,7 @@ class GlobStaticMem /** * Synchronize all units associated with this global memory instance. */ - void barrier() const noexcept + void barrier() const { DASH_ASSERT_RETURNS( dart_barrier(_teamid), diff --git a/dash/test/algorithm/GenerateTest.cc b/dash/test/algorithm/GenerateTest.cc index 82ea9ca60..5740aed95 100644 --- a/dash/test/algorithm/GenerateTest.cc +++ b/dash/test/algorithm/GenerateTest.cc @@ -6,7 +6,7 @@ #include -TEST_F(GenerateTest, TestAllItemsGenerated) +TEST_F(GenerateTest, TestGenerate) { typedef typename Array_t::value_type value_t; @@ -32,3 +32,26 @@ TEST_F(GenerateTest, TestAllItemsGenerated) ASSERT_EQ_U(17, static_cast(*lbegin)); } } + +TEST_F(GenerateTest, TestGenerateWithIndex) +{ + typedef typename Array_t::value_type value_t; + + // Initialize global array: + Array_t array(_num_elem); + // Generator function + auto f = [](Array_t::index_type idx){ return 2*idx; }; + // Fill Array with given generator function + dash::generate_with_index(array.begin(), array.end(), f); + // Wait for all units + array.barrier(); + + // check global index range + if (dash::myid() == 0) { + for (size_t idx = 0; + idx != array.size(); + ++idx) { + ASSERT_EQ_U(idx * 2.0, array[idx]); + } + } +} diff --git a/dash/test/algorithm/MinElementTest.cc b/dash/test/algorithm/MinElementTest.cc index c605cd6d5..e3edcf2c6 100644 --- a/dash/test/algorithm/MinElementTest.cc +++ b/dash/test/algorithm/MinElementTest.cc @@ -2,6 +2,7 @@ #include "MinElementTest.h" #include +#include #include #include @@ -218,6 +219,7 @@ TEST_F(MinElementTest, TestShrinkRange) TEST_F(MinElementTest, TestFindMatrixDefault) { + typedef typename dash::Matrix::index_type index_t; Element_t min_value = 11; size_t num_units = dash::Team::All().size(); size_t tilesize_x = 13; @@ -239,13 +241,17 @@ TEST_F(MinElementTest, TestFindMatrixDefault) ASSERT_EQ(extent_rows, matrix.extent(1)); LOG_MESSAGE("Matrix size: %d", static_cast(matrix_size)); // Fill matrix - if (dash::myid() == 0) { - LOG_MESSAGE("Assigning matrix values"); - for(int i = 0; i < matrix.extent(0); ++i) { - for(int k = 0; k < matrix.extent(1); ++k) { - matrix[i][k] = 20 + (i * 11) + (k * 97); - } + LOG_MESSAGE("Assigning matrix values"); + auto &pattern = matrix.pattern(); + dash::generate_with_index( + matrix.begin(), matrix.end(), + [&pattern](index_t gidx){ + auto coords = pattern.coords(gidx); + return 20 + (coords[0] * 11) + (coords[1] * 97); } + ); + matrix.barrier(); + if (dash::myid() == 0) { LOG_MESSAGE("Setting matrix[%d][%d] = %d (min)", min_pos_x, min_pos_y, static_cast(min_value)); matrix[min_pos_x][min_pos_y] = min_value; diff --git a/dash/test/algorithm/TransformTest.cc b/dash/test/algorithm/TransformTest.cc index a6480b31d..4a9d6e4c9 100644 --- a/dash/test/algorithm/TransformTest.cc +++ b/dash/test/algorithm/TransformTest.cc @@ -2,6 +2,7 @@ #include "TransformTest.h" #include +#include #include #include @@ -156,21 +157,22 @@ TEST_F(TransformTest, ArrayGlobalPlusGlobalBlocking) TEST_F(TransformTest, MatrixGlobalPlusGlobalBlocking) { // Block-wise addition (a += b) of two matrices - typedef typename dash::Matrix::index_type index_t; + using value_t = int64_t; + typedef typename dash::Matrix::index_type index_t; dash::global_unit_t myid = dash::myid(); size_t num_units = dash::Team::All().size(); size_t tilesize_x = 7; size_t tilesize_y = 3; size_t extent_cols = tilesize_x * num_units * 2; size_t extent_rows = tilesize_y * num_units * 2; - dash::Matrix matrix_a( + dash::Matrix matrix_a( dash::SizeSpec<2>( extent_cols, extent_rows), dash::DistributionSpec<2>( dash::TILE(tilesize_x), dash::TILE(tilesize_y))); - dash::Matrix matrix_b( + dash::Matrix matrix_b( dash::SizeSpec<2>( extent_cols, extent_rows), @@ -185,16 +187,17 @@ TEST_F(TransformTest, MatrixGlobalPlusGlobalBlocking) LOG_MESSAGE("Matrix size: %zu", matrix_size); // Fill matrix - if(myid == 0) { - LOG_MESSAGE("Assigning matrix values"); - for(size_t i = 0; i < matrix_a.extent(0); ++i) { - for(size_t k = 0; k < matrix_a.extent(1); ++k) { - auto value = (i * 1000) + (k * 1); - matrix_a[i][k] = value * 100000; - matrix_b[i][k] = value; - } - } - } + LOG_MESSAGE("Assigning matrix values"); + auto &pattern = matrix_a.pattern(); + auto gen = [&pattern](index_t gidx){ + auto coords = pattern.coords(gidx); + return 100000 * ((coords[0] * 1000) + coords[1]); + }; + dash::generate_with_index(matrix_a.begin(), matrix_a.end(), gen); + dash::generate_with_index(matrix_b.begin(), matrix_b.end(), gen); + matrix_a.barrier(); + + LOG_MESSAGE("Wait for team barrier ..."); dash::Team::All().barrier(); LOG_MESSAGE("Team barrier passed");