From de2ec2b6178badb64879ddd0dbf6c1b56ef14b57 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 27 May 2021 21:33:08 -0400 Subject: [PATCH 1/5] Updates for revised madness serialization --- external/versions.cmake | 4 ++-- src/TiledArray/cuda/cpu_cuda_vector.h | 8 ++------ src/TiledArray/cuda/um_storage.h | 8 ++------ src/TiledArray/dist_array.h | 4 ++-- src/TiledArray/external/eigen.h | 10 +--------- src/TiledArray/permutation.h | 2 +- src/TiledArray/range.h | 4 ++-- src/TiledArray/sparse_shape.h | 8 ++++---- src/TiledArray/tensor/tensor.h | 10 +++++----- src/TiledArray/tile.h | 8 ++++---- src/TiledArray/tiled_range.h | 8 ++++---- src/TiledArray/tiled_range1.h | 21 +++++---------------- src/TiledArray/val_array.h | 4 ++-- tests/sparse_tile.h | 8 ++++---- 14 files changed, 40 insertions(+), 67 deletions(-) diff --git a/external/versions.cmake b/external/versions.cmake index ef8f9816dc..f609a31476 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,8 +19,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH b9e98a200d2455f06db9c661c5610496) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG a3f3dce8c9d81262cf9fd7b29f97fcdafc7372a5) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG b22ee85059e6ccc9a6e803ba0550652ece8d9df1) +set(TA_TRACKED_MADNESS_TAG 56fbd23b809aa77408201b093ca5593f048d1ec8) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG a3f3dce8c9d81262cf9fd7b29f97fcdafc7372a5) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) diff --git a/src/TiledArray/cuda/cpu_cuda_vector.h b/src/TiledArray/cuda/cpu_cuda_vector.h index 91c6a699d9..7370eeaa2e 100644 --- a/src/TiledArray/cuda/cpu_cuda_vector.h +++ b/src/TiledArray/cuda/cpu_cuda_vector.h @@ -7,6 +7,8 @@ #include #include +#include + namespace TiledArray { /// \brief a vector that lives on either host or device side, or both @@ -204,12 +206,6 @@ const T* device_data(const cpu_cuda_vector& storage) { namespace madness { namespace archive { -// forward decls -template -struct ArchiveLoadImpl; -template -struct ArchiveStoreImpl; - template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, diff --git a/src/TiledArray/cuda/um_storage.h b/src/TiledArray/cuda/um_storage.h index aff455640a..4b3781185c 100644 --- a/src/TiledArray/cuda/um_storage.h +++ b/src/TiledArray/cuda/um_storage.h @@ -35,6 +35,8 @@ #include #include +#include + namespace TiledArray { template @@ -124,12 +126,6 @@ const typename Storage::value_type* device_data(const Storage& storage) { namespace madness { namespace archive { -// forward decls -template -struct ArchiveLoadImpl; -template -struct ArchiveStoreImpl; - template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 7931db27ed..6cd156ba5d 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -1214,7 +1214,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { template ::value>> + madness::is_output_archive_v>> void serialize(const Archive& ar) const { // serialize array type, world size, rank, and pmap type to be able // to ensure same data type and same data distribution expected @@ -1234,7 +1234,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { template ::value>> + madness::is_input_archive_v>> void serialize(const Archive& ar) { auto& world = TiledArray::get_default_world(); diff --git a/src/TiledArray/external/eigen.h b/src/TiledArray/external/eigen.h index bcde7a649a..df1f42b8f0 100644 --- a/src/TiledArray/external/eigen.h +++ b/src/TiledArray/external/eigen.h @@ -33,6 +33,7 @@ #include #include +#include TILEDARRAY_PRAGMA_GCC(diagnostic push) TILEDARRAY_PRAGMA_GCC(system_header) @@ -58,15 +59,6 @@ TILEDARRAY_PRAGMA_GCC(diagnostic pop) namespace madness { namespace archive { -template -class archive_array; -template -inline archive_array wrap(const T*, unsigned int); -template -struct ArchiveStoreImpl; -template -struct ArchiveLoadImpl; - template diff --git a/src/TiledArray/permutation.h b/src/TiledArray/permutation.h index cb7610a6a0..3674e62e41 100644 --- a/src/TiledArray/permutation.h +++ b/src/TiledArray/permutation.h @@ -757,7 +757,7 @@ class BipartitePermutation { template void serialize(Archive& ar) { ar& base_& second_size_; - if constexpr (madness::archive::is_input_archive::value) { + if constexpr (madness::is_input_archive_v) { first_ = {}; second_ = {}; } diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index c6cb472951..cad65bf229 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -1123,10 +1123,10 @@ class Range { // read via madness::archive::wrap to be able to // - avoid having to serialize datavec_'s size // - read old archives that represented datavec_ by bare ptr - if constexpr (madness::archive::is_input_archive::value) { + if constexpr (madness::is_input_archive_v) { datavec_.resize(four_x_rank); ar >> madness::archive::wrap(datavec_.data(), four_x_rank); - } else if constexpr (madness::archive::is_output_archive::value) { + } else if constexpr (madness::is_output_archive_v) { ar << madness::archive::wrap(datavec_.data(), four_x_rank); } else abort(); // unreachable diff --git a/src/TiledArray/sparse_shape.h b/src/TiledArray/sparse_shape.h index 502620ca8a..e43fc1f345 100644 --- a/src/TiledArray/sparse_shape.h +++ b/src/TiledArray/sparse_shape.h @@ -1565,8 +1565,8 @@ class SparseShape { } template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(const Archive& ar) { ar& tile_norms_; const unsigned int dim = tile_norms_.range().rank(); @@ -1578,8 +1578,8 @@ class SparseShape { } template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(const Archive& ar) const { ar& tile_norms_; const unsigned int dim = tile_norms_.range().rank(); diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index fe76b07bd0..e2f2dbfaf5 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -625,9 +625,9 @@ class Tensor { /// \tparam Archive The output archive type /// \param[out] ar The output archive template ::value>::type* = nullptr> - void serialize(Archive& ar) { + typename std::enable_if>::type* = nullptr> + void serialize(const Archive& ar) { if (pimpl_) { ar & pimpl_->range_.volume(); ar& madness::archive::wrap(pimpl_->data_, pimpl_->range_.volume()); @@ -643,8 +643,8 @@ class Tensor { /// \tparam Archive The input archive type /// \param[out] ar The input archive template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(Archive& ar) { ordinal_type n = 0ul; ar& n; diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h index 44fcbd71f9..57e1c1173f 100644 --- a/src/TiledArray/tile.h +++ b/src/TiledArray/tile.h @@ -569,8 +569,8 @@ class Tile { // Serialization ----------------------------------------------------------- template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(Archive& ar) const { // Serialize data for empty tile check bool empty = !static_cast(pimpl_); @@ -582,8 +582,8 @@ class Tile { } template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(Archive& ar) { // Check for empty tile bool empty = false; diff --git a/src/TiledArray/tiled_range.h b/src/TiledArray/tiled_range.h index 0aeb3ddf4f..e0c234b09d 100644 --- a/src/TiledArray/tiled_range.h +++ b/src/TiledArray/tiled_range.h @@ -298,15 +298,15 @@ class TiledRange { } template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(const Archive& ar) { ar& range_& elements_range_& ranges_; } template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(const Archive& ar) const { ar& range_& elements_range_& ranges_; } diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index ed61147205..4e0990f440 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -23,23 +23,12 @@ #include #include #include +#include #include #include #include #include -// Forward declaration of MADNESS archive type traits -namespace madness { -namespace archive { - -template -struct is_output_archive; -template -struct is_input_archive; - -} // namespace archive -} // namespace madness - namespace TiledArray { /// TiledRange1 class defines a non-uniformly-tiled, contiguous, one-dimensional @@ -216,15 +205,15 @@ class TiledRange1 { } template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(const Archive& ar) { ar& range_& elements_range_& tiles_ranges_& elem2tile_; } template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(const Archive& ar) const { ar& range_& elements_range_& tiles_ranges_& elem2tile_; } diff --git a/src/TiledArray/val_array.h b/src/TiledArray/val_array.h index f1c4029e29..65c2b72785 100644 --- a/src/TiledArray/val_array.h +++ b/src/TiledArray/val_array.h @@ -454,7 +454,7 @@ class ValArray : private SizeArray { /// \param[out] ar an Archive object template ::value>> + madness::is_output_archive_v>> void serialize(Archive& ar) const { // need to write size first to be able to init when deserializing ar& size() & madness::archive::wrap(data(), size()); @@ -466,7 +466,7 @@ class ValArray : private SizeArray { /// \param[out] ar an Archive object template ::value>> + madness::is_input_archive_v>> void serialize(Archive& ar) { size_t sz = 0; ar& sz; diff --git a/tests/sparse_tile.h b/tests/sparse_tile.h index d268b55cc4..6c365334fa 100644 --- a/tests/sparse_tile.h +++ b/tests/sparse_tile.h @@ -138,8 +138,8 @@ class EigenSparseTile { // output template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(Archive& ar) { if (impl_) { ar & true; @@ -159,8 +159,8 @@ class EigenSparseTile { // output template ::value>::type* = nullptr> + typename std::enable_if>::type* = nullptr> void serialize(Archive& ar) { bool have_impl = false; ar& have_impl; From 0067f44b132ddb4256e28f38b5b83ebb8d2d9bca Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 28 May 2021 08:55:44 -0400 Subject: [PATCH 2/5] CUDA callback tasks no longer need to deal with pre-buffer MADNESS runtime takes care of this automatically as of https://github.com/m-a-d-n-e-s-s/madness/pull/376/commits/56fbd23b809aa77408201b093ca5593f048d1ec8 --- src/TiledArray/cuda/cuda_task_fn.h | 4 +--- src/TiledArray/reduce_task.h | 8 ++++++++ tests/cutt.cpp | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/TiledArray/cuda/cuda_task_fn.h b/src/TiledArray/cuda/cuda_task_fn.h index f11df63285..8de133b3bd 100644 --- a/src/TiledArray/cuda/cuda_task_fn.h +++ b/src/TiledArray/cuda/cuda_task_fn.h @@ -123,6 +123,7 @@ struct cudaTaskFn : public TaskInterface { private: static void CUDART_CB cuda_callback(void* userData) { + TA_ASSERT(!madness::is_madness_thread()); const auto t0 = TiledArray::now(); // convert void * to AsyncTaskInterface* auto* callback = static_cast(userData); @@ -131,9 +132,6 @@ struct cudaTaskFn : public TaskInterface { // std::string message = "callback on cudaTaskFn: " + address.str() + // '\n'; std::cout << message; callback->notify(); - // must flush the prebuf (if any) to make sure this cleanup task will be - // actually submitted - ThreadPool::instance()->flush_prebuf(); const auto t1 = TiledArray::now(); TiledArray::detail::cuda_taskfn_callback_duration_ns() += diff --git a/src/TiledArray/reduce_task.h b/src/TiledArray/reduce_task.h index 5b084d0bcd..03abd9c818 100644 --- a/src/TiledArray/reduce_task.h +++ b/src/TiledArray/reduce_task.h @@ -307,6 +307,8 @@ class ReduceTask { #ifdef TILEDARRAY_HAS_CUDA static void CUDART_CB cuda_reduceobject_delete_callback(void* userData) { + TA_ASSERT(!madness::is_madness_thread()); + const auto t0 = TiledArray::now(); std::vector* objects = static_cast*>(userData); @@ -341,6 +343,8 @@ class ReduceTask { } static void CUDART_CB cuda_dependency_dec_callback(void* userData) { + TA_ASSERT(!madness::is_madness_thread()); + const auto t0 = TiledArray::now(); std::vector* objects = static_cast*>(userData); @@ -363,6 +367,8 @@ class ReduceTask { static void CUDART_CB cuda_dependency_dec_reduceobject_delete_callback(void* userData) { + TA_ASSERT(!madness::is_madness_thread()); + const auto t0 = TiledArray::now(); std::vector* objects = static_cast*>(userData); @@ -398,6 +404,8 @@ class ReduceTask { } static void CUDART_CB cuda_readyresult_reset_callback(void* userData) { + TA_ASSERT(!madness::is_madness_thread()); + const auto t0 = TiledArray::now(); std::vector* objects = static_cast*>(userData); diff --git a/tests/cutt.cpp b/tests/cutt.cpp index 41f9ecb56a..8a6b1af539 100644 --- a/tests/cutt.cpp +++ b/tests/cutt.cpp @@ -693,4 +693,4 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_six) { } BOOST_AUTO_TEST_SUITE_END() -#endif +#endif // TILEDARRAY_HAS_CUDA From 1ec86d0d2beec2340cbd4b0b6a7d0c9ae7927c48 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 28 May 2021 13:31:22 -0400 Subject: [PATCH 3/5] bump MADNESS tag to fix MPI archive serialization --- INSTALL.md | 2 +- external/versions.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 0a1e0e0337..986ed941c0 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -42,7 +42,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Range: header-only, *only used for unit testing* - [BTAS](http://github.com/ValeevGroup/BTAS), tag d7794799e4510cf66844081dd8f1f5b648112d33 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. - - [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag a3f3dce8c9d81262cf9fd7b29f97fcdafc7372a5 . + - [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag ce21aa7723c5c94ecc2e459c22891053eddd5a95 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. diff --git a/external/versions.cmake b/external/versions.cmake index f609a31476..15c19377d5 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,8 +19,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH b9e98a200d2455f06db9c661c5610496) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 56fbd23b809aa77408201b093ca5593f048d1ec8) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG a3f3dce8c9d81262cf9fd7b29f97fcdafc7372a5) +set(TA_TRACKED_MADNESS_TAG ce21aa7723c5c94ecc2e459c22891053eddd5a95) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG 56fbd23b809aa77408201b093ca5593f048d1ec8) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) From ff24ea17822e44af56797745975aa5c242ca6eac Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 28 May 2021 13:32:07 -0400 Subject: [PATCH 4/5] cleanup --- src/TiledArray/tensor/tensor.h | 2 +- src/TiledArray/tile.h | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index e2f2dbfaf5..b3e3fdbe9c 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -627,7 +627,7 @@ class Tensor { template >::type* = nullptr> - void serialize(const Archive& ar) { + void serialize(Archive& ar) { if (pimpl_) { ar & pimpl_->range_.volume(); ar& madness::archive::wrap(pimpl_->data_, pimpl_->range_.volume()); diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h index 57e1c1173f..99be71851c 100644 --- a/src/TiledArray/tile.h +++ b/src/TiledArray/tile.h @@ -25,18 +25,6 @@ #include #include -// Forward declaration of MADNESS archive type traits -namespace madness { -namespace archive { - -template -struct is_output_archive; -template -struct is_input_archive; - -} // namespace archive -} // namespace madness - namespace TiledArray { /** From f445977feee125fd4439b09be34e1a83a723cbd7 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 29 May 2021 09:44:47 -0400 Subject: [PATCH 5/5] misc type conversion fixes --- .../math/linalg/scalapack/block_cyclic.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/TiledArray/math/linalg/scalapack/block_cyclic.h b/src/TiledArray/math/linalg/scalapack/block_cyclic.h index 4ca4e31c4e..902312788b 100644 --- a/src/TiledArray/math/linalg/scalapack/block_cyclic.h +++ b/src/TiledArray/math/linalg/scalapack/block_cyclic.h @@ -226,11 +226,11 @@ class BlockCyclicMatrix : public madness::WorldObject> { auto tile_map = eigen_map(tile); // Extract distribution information - const auto mb = bc_dist_.mb(); - const auto nb = bc_dist_.nb(); + const size_t mb = bc_dist_.mb(); + const size_t nb = bc_dist_.nb(); - const auto m = dims_.first; - const auto n = dims_.second; + decltype(mb) m = dims_.first; + decltype(mb) n = dims_.second; // Loop over 2D BC compatible blocks size_t i_extent, j_extent; @@ -239,16 +239,16 @@ class BlockCyclicMatrix : public madness::WorldObject> { for (size_t j = lo[1], j_t = 0ul; j < up[1]; j += j_extent, j_t += j_extent) { // Determine indices of start of BC owning block - const decltype(m) i_block_begin = (i / mb) * mb; - const decltype(n) j_block_begin = (j / nb) * nb; + decltype(m) i_block_begin = (i / mb) * mb; + decltype(m) j_block_begin = (j / nb) * nb; // Determine indices of end of BC owning block const auto i_block_end = std::min(m, i_block_begin + mb); const auto j_block_end = std::min(n, j_block_begin + nb); - // Cut block if necessacary to adhere to tile dimensions - const auto i_last = std::min(i_block_end, static_cast(up[0])); - const auto j_last = std::min(j_block_end, static_cast(up[1])); + // Cut block if necessary to adhere to tile dimensions + const auto i_last = std::min(i_block_end, static_cast(up[0])); + const auto j_last = std::min(j_block_end, static_cast(up[1])); // Calculate extents of the block to be copied i_extent = i_last - i;