diff --git a/INSTALL.md b/INSTALL.md index 6060c4bd29..6517dc86c6 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -42,7 +42,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Range: header-only, *only used for unit testing* - [BTAS](http://github.com/ValeevGroup/BTAS), tag 561fe1bff7f3374814111a15e28c7a141ab9b67a . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 91fff76deba20c751d0646c54f2f1c1e07bd6156 . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 58b3e2c623d772f6e4a2e9cf5758073de32ecc50 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. diff --git a/examples/demo/demo2.cpp b/examples/demo/demo2.cpp index 5818fae22d..7ef5ca45c8 100644 --- a/examples/demo/demo2.cpp +++ b/examples/demo/demo2.cpp @@ -56,6 +56,10 @@ int main(int argc, char* argv[]) { assert((ρ.upbound() == Index{11, 9})); // extent of $\mathbb{Z}_{1,11} \otimes \mathbb{Z}_{-1,9}$ assert((ρ.extent() == Index{10, 10})); + // 1st dimension of ρ is $\mathbb{Z}_{1,11}$ + assert((ρ.dim(0) == Range1{1, 11})); + // 2nd dimension of ρ is $\mathbb{Z}_{-1,9}$ + assert((ρ.dim(1) == Range1{-1, 9})); // the number of elements in $\mathbb{Z}_{1,11} \otimes \mathbb{Z}_{-1,9}$ assert(ρ.volume() == 100); // row-major order @@ -100,12 +104,51 @@ int main(int argc, char* argv[]) { auto tile_0_0 = τ.tile({0, 0}); assert((tile_0_0 == Range{{1, 5}, {-1, 5}})); - // default instance of $\code{DistArray}$ is a dense array of $\code{double}$s - DistArray array0(world, τ); - array0.fill(1.0); // fill $\code{array0}$ with 1's - - // grab a tile NB this returns a ${\bf future}$ to a tile; see Section 3.2. - auto t00 = array0.find({0, 0}); + // clang-format off + + // 2-d array of $\code{double}$ 0s, indexed by ρ + Tensor t0(ρ, 0.); + // same as $\code{t0}$ but filled with ordinals + TensorD t1(ρ, [&ρ](auto&& idx) { + return ρ.ordinal(idx); + }); + // print out "0 1 .. 99 " + for (auto&& v : t1) cout << v << " "; + // same as $\code{t0}$, using existing buffer + shared_ptr v(new double[ρ.volume()]); + TensorD t2(ρ, v); // t2 and v co-manage buffer lifetime + v[0] = 1.; + assert(t2(1, -1) == 1.); + // same as $\code{t0}$, using existing (unmanaged) buffer + auto t3 = make_map(v.get(), ρ); + v[0] = 2.; + assert(t3(1, -1) == 2.); + // Tensor has shallow-copy semantics + auto t4 = t0; + t0(1, -1) = 3.; + assert(t4(1, -1) == 3.); + + // clang-format on + + // default instance of $\code{DistArray}$ is + // a {\em dense} array of $\code{double}$s + // NB can use TArrayD instead of DistArray<> + DistArray<> a0(τ); + a0.fill(1.); // fill $\code{da}$ with 1s + // every tile exists in a dense array + assert(!a0.is_zero({0, 0})); + // grab a ${\em future}$ to the {0,0} tile + auto t00 = a0.find({0, 0}); + + // shape of a {\em sparse} array over τ + // tiles with even ordinals ({0,0}, {0,2}, {1,1}) are zero + SparseShape s(TensorF(τ.tiles_range(), {0, 1, 0, 1, 0, 1}), τ); + // a sparse array of $\code{double}$s + // TSpArrayX $\equiv$ DistArray + TSpArrayD a1(τ, s); + // only some tiles are nonzero in sparse array + assert(a1.is_zero({0, 0})); + assert(!a1.is_zero({0, 1})); #endif // defined(TILEDARRAY_CXX_COMPILER_SUPPORT_UNICODE_VARIABLES) diff --git a/external/versions.cmake b/external/versions.cmake index e9cfb45375..313b8d5d11 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,8 +19,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 91fff76deba20c751d0646c54f2f1c1e07bd6156) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG 0b44ef319643cb9721fbe17d294987c146e6460e) +set(TA_TRACKED_MADNESS_TAG 58b3e2c623d772f6e4a2e9cf5758073de32ecc50) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG dc3294160209cbd683bfb57cb2b933bd5f86e07e) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 09e99eda86..fd0450ed8e 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -158,7 +158,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { using rebind_numeric_t = typename rebind_numeric::type; private: - pimpl_type pimpl_; ///< managed ptr to Array implementation + pimpl_type pimpl_ = {}; ///< managed ptr to Array implementation bool defer_deleter_to_next_fence_ = false; ///< if true, the impl object is scheduled to be destroyed in the ///< next fence @@ -277,7 +277,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// array is uninitialized, but these arrays may be assign via a tensor /// expression assignment or the copy construction. - DistArray() : pimpl_() {} + DistArray() = default; /// Copy constructor @@ -298,6 +298,19 @@ class DistArray : public madness::archive::ParallelSerializableObject { const std::shared_ptr& pmap = {}) : pimpl_(init(world, trange, shape_type(1, trange), pmap)) {} + /// Dense array constructor + + /// Constructs an array with the given meta data in default World. + /// This constructor only initializes the array meta data; + /// the array tiles are empty and must be assigned by the user. + /// \param trange The tiled range object that will be used to set the array + /// tiling. + /// \param pmap The tile index -> process map + explicit DistArray(const trange_type& trange, + const std::shared_ptr& pmap = {}) + : pimpl_(init(get_default_world(), trange, shape_type(1, trange), pmap)) { + } + /// Sparse array constructor /// Constructs an array with the given meta data. This constructor only @@ -312,6 +325,19 @@ class DistArray : public madness::archive::ParallelSerializableObject { std::shared_ptr()) : pimpl_(init(world, trange, shape, pmap)) {} + /// Sparse array constructor + + /// Constructs an array with the given meta data in default World. + /// This constructor only initializes the array meta data; the array tiles + /// are empty and must be assigned by the user. + /// \param trange The tiled range object that will be used to set the array + /// tiling. \param shape The array shape that defines zero and non-zero tiles + /// \param pmap The tile index -> process map + DistArray(const trange_type& trange, const shape_type& shape, + const std::shared_ptr& pmap = + std::shared_ptr()) + : pimpl_(init(get_default_world(), trange, shape, pmap)) {} + /// \name Initializer list constructors /// \brief Creates a new tensor containing the elements in the provided /// `std::initializer_list`. @@ -374,6 +400,41 @@ class DistArray : public madness::archive::ParallelSerializableObject { std::initializer_list>>>>> il) : DistArray(array_from_il(world, il)) {} + + template + explicit DistArray(std::initializer_list il) // N.B. clang does not like + // detail::vector_il here + : DistArray(array_from_il(get_default_world(), il)) {} + + template + explicit DistArray(std::initializer_list> il) + : DistArray(array_from_il(get_default_world(), il)) {} + + template + explicit DistArray( + std::initializer_list>> il) + : DistArray(array_from_il(get_default_world(), il)) {} + + template + explicit DistArray(std::initializer_list>>> + il) + : DistArray(array_from_il(get_default_world(), il)) {} + + template + explicit DistArray( + std::initializer_list>>>> + il) + : DistArray(array_from_il(get_default_world(), il)) {} + + template + explicit DistArray( + std::initializer_list< + std::initializer_list>>>>> + il) + : DistArray(array_from_il(get_default_world(), il)) {} ///@} /// \name Tiling initializer list constructors @@ -440,6 +501,44 @@ class DistArray : public madness::archive::ParallelSerializableObject { std::initializer_list>>>>> il) : DistArray(array_from_il(world, trange, il)) {} + + template + DistArray(const trange_type& trange, std::initializer_list il) + : DistArray(array_from_il(get_default_world(), trange, il)) {} + + template + DistArray(const trange_type& trange, + std::initializer_list> il) + : DistArray(array_from_il(get_default_world(), trange, il)) {} + + template + DistArray( + const trange_type& trange, + std::initializer_list>> il) + : DistArray(array_from_il(get_default_world(), trange, il)) {} + + template + DistArray(const trange_type& trange, + std::initializer_list>>> + il) + : DistArray(array_from_il(get_default_world(), trange, il)) {} + + template + DistArray(const trange_type& trange, + std::initializer_list>>>> + il) + : DistArray(array_from_il(get_default_world(), trange, il)) {} + + template + DistArray( + const trange_type& trange, + std::initializer_list< + std::initializer_list>>>>> + il) + : DistArray(array_from_il(get_default_world(), trange, il)) {} /// @} /// converting copy constructor diff --git a/src/TiledArray/external/btas.h b/src/TiledArray/external/btas.h index 7dbd115d4d..11971c269e 100644 --- a/src/TiledArray/external/btas.h +++ b/src/TiledArray/external/btas.h @@ -109,6 +109,34 @@ inline bool is_congruent(const btas::RangeNd& r1, r2.extent_data()); } +/// Test if a BTAS range and a TA range are congruent + +/// This function tests that the rank and extent of +/// \c r1 are equal to those of \c r2. +/// \param r1 The first Range to compare +/// \param r2 The second Range to compare +template +inline bool is_congruent(const btas::RangeNd& r1, + const TiledArray::Range& r2) { + return (r1.rank() == r2.rank()) && + std::equal(r1.extent_data(), r1.extent_data() + r1.rank(), + r2.extent_data()); +} + +/// Test if a TA range and a BTAS range are congruent + +/// This function tests that the rank and extent of +/// \c r1 are equal to those of \c r2. +/// \param r1 The first Range to compare +/// \param r2 The second Range to compare +template +inline bool is_congruent(const TiledArray::Range& r1, + const btas::RangeNd& r2) { + return (r1.rank() == r2.rank()) && + std::equal(r1.extent_data(), r1.extent_data() + r1.rank(), + r2.extent_data()); +} + template decltype(auto) make_ti(const btas::Tensor& arg) { return TiledArray::detail::TensorInterface #include diff --git a/src/TiledArray/external/umpire.h b/src/TiledArray/external/umpire.h index 644039abe7..ac42f3bf1c 100644 --- a/src/TiledArray/external/umpire.h +++ b/src/TiledArray/external/umpire.h @@ -91,14 +91,16 @@ class umpire_allocator_impl { : umpalloc_(umpalloc) {} template - umpire_allocator_impl(const umpire_allocator_impl& rhs) noexcept + umpire_allocator_impl( + const umpire_allocator_impl& rhs) noexcept : umpalloc_(rhs.umpalloc_) {} /// allocates memory using umpire dynamic pool pointer allocate(size_t n) { TA_ASSERT(umpalloc_); - size_t nbytes = n * sizeof(T); + // QuickPool::allocate_internal does not handle zero-size allocations + size_t nbytes = n == 0 ? 1 : n * sizeof(T); pointer result = nullptr; auto* allocation_strategy = umpalloc_->getAllocationStrategy(); @@ -117,7 +119,8 @@ class umpire_allocator_impl { void deallocate(pointer ptr, size_t n) { TA_ASSERT(umpalloc_); - const auto nbytes = n * sizeof(T); + // QuickPool::allocate_internal does not handle zero-size allocations + const auto nbytes = n == 0 ? 1 : n * sizeof(T); auto* allocation_strategy = umpalloc_->getAllocationStrategy(); // N.B. with multiple threads would have to do this test in @@ -137,15 +140,15 @@ class umpire_allocator_impl { umpire::Allocator* umpalloc_; }; // class umpire_allocator -template -bool operator==(const umpire_allocator_impl& lhs, - const umpire_allocator_impl& rhs) noexcept { +template +bool operator==(const umpire_allocator_impl& lhs, + const umpire_allocator_impl& rhs) noexcept { return lhs.umpire_allocator() == rhs.umpire_allocator(); } -template -bool operator!=(const umpire_allocator_impl& lhs, - const umpire_allocator_impl& rhs) noexcept { +template +bool operator!=(const umpire_allocator_impl& lhs, + const umpire_allocator_impl& rhs) noexcept { return !(lhs == rhs); } diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 87af0e6115..f09a98c0e5 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -117,6 +117,11 @@ namespace symmetry { class Permutation; } +// shapes +class DenseShape; +template +class SparseShape; + // TiledArray Arrays template class DistArray; diff --git a/src/TiledArray/host/allocator.h b/src/TiledArray/host/allocator.h index 9e221c42d7..dbb8f53b55 100644 --- a/src/TiledArray/host/allocator.h +++ b/src/TiledArray/host/allocator.h @@ -53,7 +53,9 @@ class host_allocator_impl template host_allocator_impl(const host_allocator_impl& rhs) noexcept - : base_type(static_cast&>(rhs)) {} + : base_type(static_cast< + const umpire_allocator_impl>&>( + rhs)) {} template friend bool operator==(const host_allocator_impl& lhs, diff --git a/src/TiledArray/math/linalg/non-distributed/cholesky.h b/src/TiledArray/math/linalg/non-distributed/cholesky.h index 4196002533..fc96a6bf1c 100644 --- a/src/TiledArray/math/linalg/non-distributed/cholesky.h +++ b/src/TiledArray/math/linalg/non-distributed/cholesky.h @@ -42,9 +42,7 @@ auto rank_local_cholesky(const DistArray& A) { World& world = A.world(); auto A_eig = detail::make_matrix(A); - if (world.rank() == 0) { - linalg::rank_local::cholesky(A_eig); - } + TA_LAPACK_ON_RANK_ZERO(cholesky, world, A_eig); world.gop.broadcast_serializable(A_eig, 0); return A_eig; } @@ -140,11 +138,20 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { // if need to return L use its copy to compute inverse decltype(L_eig) L_inv_eig; + std::optional error_opt; if (world.rank() == 0) { - if (Both) L_inv_eig = L_eig; - auto& L_inv_eig_ref = Both ? L_inv_eig : L_eig; - linalg::rank_local::cholesky_linv(L_inv_eig_ref); - detail::zero_out_upper_triangle(L_inv_eig_ref); + try { + if (Both) L_inv_eig = L_eig; + auto& L_inv_eig_ref = Both ? L_inv_eig : L_eig; + linalg::rank_local::cholesky_linv(L_inv_eig_ref); + detail::zero_out_upper_triangle(L_inv_eig_ref); + } catch (lapack::Error& err) { + error_opt = err; + } + } + world.gop.broadcast_serializable(error_opt, 0); + if (error_opt) { + throw error_opt.value(); } world.gop.broadcast_serializable(Both ? L_inv_eig : L_eig, 0); @@ -169,9 +176,7 @@ auto cholesky_solve(const Array& A, const Array& B, auto A_eig = detail::make_matrix(A); auto X_eig = detail::make_matrix(B); World& world = A.world(); - if (world.rank() == 0) { - linalg::rank_local::cholesky_solve(A_eig, X_eig); - } + TA_LAPACK_ON_RANK_ZERO(cholesky_solve, world, A_eig, X_eig); world.gop.broadcast_serializable(X_eig, 0); if (x_trange.rank() == 0) x_trange = B.trange(); return eigen_to_array(world, x_trange, X_eig); @@ -192,9 +197,7 @@ auto cholesky_lsolve(Op transpose, const Array& A, const Array& B, "scalar types"); auto X_eig = detail::make_matrix(B); - if (world.rank() == 0) { - linalg::rank_local::cholesky_lsolve(transpose, L_eig, X_eig); - } + TA_LAPACK_ON_RANK_ZERO(cholesky_lsolve, world, transpose, L_eig, X_eig); world.gop.broadcast_serializable(X_eig, 0); if (l_trange.rank() == 0) l_trange = A.trange(); if (x_trange.rank() == 0) x_trange = B.trange(); diff --git a/src/TiledArray/math/linalg/non-distributed/heig.h b/src/TiledArray/math/linalg/non-distributed/heig.h index 5490b6b757..85079f356c 100644 --- a/src/TiledArray/math/linalg/non-distributed/heig.h +++ b/src/TiledArray/math/linalg/non-distributed/heig.h @@ -56,9 +56,7 @@ auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { World& world = A.world(); auto A_eig = detail::make_matrix(A); std::vector evals; - if (world.rank() == 0) { - linalg::rank_local::heig(A_eig, evals); - } + TA_LAPACK_ON_RANK_ZERO(heig, world, A_eig, evals); world.gop.broadcast_serializable(A_eig, 0); world.gop.broadcast_serializable(evals, 0); if (evec_trange.rank() == 0) evec_trange = A.trange(); @@ -99,9 +97,7 @@ auto heig(const ArrayA& A, const ArrayB& B, auto A_eig = detail::make_matrix(A); auto B_eig = detail::make_matrix(B); std::vector evals; - if (world.rank() == 0) { - linalg::rank_local::heig(A_eig, B_eig, evals); - } + TA_LAPACK_ON_RANK_ZERO(heig, world, A_eig, B_eig, evals); world.gop.broadcast_serializable(A_eig, 0); world.gop.broadcast_serializable(evals, 0); if (evec_trange.rank() == 0) evec_trange = A.trange(); diff --git a/src/TiledArray/math/linalg/non-distributed/lu.h b/src/TiledArray/math/linalg/non-distributed/lu.h index d1b06bbb1c..6a3e1ea424 100644 --- a/src/TiledArray/math/linalg/non-distributed/lu.h +++ b/src/TiledArray/math/linalg/non-distributed/lu.h @@ -27,9 +27,9 @@ #include -#include -#include #include +#include +#include namespace TiledArray::math::linalg::non_distributed { @@ -37,15 +37,14 @@ namespace TiledArray::math::linalg::non_distributed { * @brief Solve a linear system via LU factorization */ template -auto lu_solve(const ArrayA& A, const ArrayB& B, TiledRange x_trange = TiledRange()) { +auto lu_solve(const ArrayA& A, const ArrayB& B, + TiledRange x_trange = TiledRange()) { (void)detail::array_traits{}; (void)detail::array_traits{}; auto& world = A.world(); auto A_eig = detail::make_matrix(A); auto B_eig = detail::make_matrix(B); - if (world.rank() == 0) { - linalg::rank_local::lu_solve(A_eig, B_eig); - } + TA_LAPACK_ON_RANK_ZERO(lu_solve, world, A_eig, B_eig); world.gop.broadcast_serializable(B_eig, 0); if (x_trange.rank() == 0) x_trange = B.trange(); return eigen_to_array(world, x_trange, B_eig); @@ -59,14 +58,12 @@ auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange()) { (void)detail::array_traits{}; auto& world = A.world(); auto A_eig = detail::make_matrix(A); - if (world.rank() == 0) { - linalg::rank_local::lu_inv(A_eig); - } + TA_LAPACK_ON_RANK_ZERO(lu_inv, world, A_eig); world.gop.broadcast_serializable(A_eig, 0); if (ainv_trange.rank() == 0) ainv_trange = A.trange(); return eigen_to_array(A.world(), ainv_trange, A_eig); } -} // namespace TiledArray::math::linalg::lapack +} // namespace TiledArray::math::linalg::non_distributed #endif // TILEDARRAY_MATH_LINALG_NON_DISTRIBUTED_LU_H__INCLUDED diff --git a/src/TiledArray/math/linalg/non-distributed/qr.h b/src/TiledArray/math/linalg/non-distributed/qr.h index e43cec632d..b66ee222ea 100644 --- a/src/TiledArray/math/linalg/non-distributed/qr.h +++ b/src/TiledArray/math/linalg/non-distributed/qr.h @@ -3,35 +3,32 @@ #include -#include -#include #include +#include +#include namespace TiledArray::math::linalg::non_distributed { template -auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), - TiledRange r_trange = TiledRange() ) { - +auto householder_qr(const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange()) { (void)detail::array_traits{}; auto& world = V.world(); auto V_eig = detail::make_matrix(V); decltype(V_eig) R_eig; - if( !world.rank() ) { - linalg::rank_local::householder_qr( V_eig, R_eig ); - } - world.gop.broadcast_serializable( V_eig, 0 ); - if(q_trange.rank() == 0) q_trange = V.trange(); - auto Q = eigen_to_array( world, q_trange, V_eig ); + TA_LAPACK_ON_RANK_ZERO(householder_qr, world, V_eig, R_eig); + world.gop.broadcast_serializable(V_eig, 0); + if (q_trange.rank() == 0) q_trange = V.trange(); + auto Q = eigen_to_array(world, q_trange, V_eig); if constexpr (not QOnly) { - world.gop.broadcast_serializable( R_eig, 0 ); + world.gop.broadcast_serializable(R_eig, 0); if (r_trange.rank() == 0) { // Generate a TRange based on column tiling of V auto col_tiling = V.trange().dim(1); - r_trange = TiledRange( {col_tiling, col_tiling} ); + r_trange = TiledRange({col_tiling, col_tiling}); } - auto R = eigen_to_array( world, r_trange, R_eig ); - return std::make_tuple( Q, R ); + auto R = eigen_to_array(world, r_trange, R_eig); + return std::make_tuple(Q, R); } else { return Q; } diff --git a/src/TiledArray/math/linalg/non-distributed/svd.h b/src/TiledArray/math/linalg/non-distributed/svd.h index e6ea5ef1da..e0094ef906 100644 --- a/src/TiledArray/math/linalg/non-distributed/svd.h +++ b/src/TiledArray/math/linalg/non-distributed/svd.h @@ -75,9 +75,7 @@ auto svd(const Array& A, TiledRange u_trange = TiledRange(), if constexpr (need_u) U = std::make_unique(); if constexpr (need_vt) VT = std::make_unique(); - if (world.rank() == 0) { - linalg::rank_local::svd(A_eig, S, U.get(), VT.get()); - } + TA_LAPACK_ON_RANK_ZERO(svd, world, A_eig, S, U.get(), VT.get()); world.gop.broadcast_serializable(S, 0); if (U) world.gop.broadcast_serializable(*U, 0); diff --git a/src/TiledArray/math/linalg/rank-local.cpp b/src/TiledArray/math/linalg/rank-local.cpp index 74e1aac526..d23f3b4e3f 100644 --- a/src/TiledArray/math/linalg/rank-local.cpp +++ b/src/TiledArray/math/linalg/rank-local.cpp @@ -40,7 +40,7 @@ inline int ta_lapack_fortran_call(F f, Args... args) { return info; } -#define TA_LAPACK_ERROR(F) throw std::runtime_error("lapack::" #F " failed") +#define TA_LAPACK_ERROR(F) throw lapack::Error("lapack::" #F " failed") #define TA_LAPACK_FORTRAN_CALL(F, ARGS...) \ ((ta_lapack_fortran_call(F, ARGS) == 0) || (TA_LAPACK_ERROR(F), 0)) diff --git a/src/TiledArray/math/linalg/rank-local.h b/src/TiledArray/math/linalg/rank-local.h index 5c46550bd3..f7db4abd01 100644 --- a/src/TiledArray/math/linalg/rank-local.h +++ b/src/TiledArray/math/linalg/rank-local.h @@ -71,4 +71,42 @@ void householder_qr(Matrix &V, Matrix &R); } // namespace TiledArray::math::linalg::rank_local +namespace madness::archive { + +/// Serialize (deserialize) an lapack::Error + +/// \tparam Archive The archive type. +template +struct ArchiveSerializeImpl { + static inline void serialize(const Archive &ar, lapack::Error &e) { + MAD_ARCHIVE_DEBUG(std::cout << "(de)serialize lapack::Error" << std::endl); + if constexpr (is_output_archive_v) { // serialize + const std::string msg = e.what(); + ar &msg; + } else { + std::string msg; + ar &msg; + e = lapack::Error(msg); + } + } +}; + +} // namespace madness::archive + +/// TA_LAPACK_ON_RANK_ZERO(fn,args...) invokes linalg::rank_local::fn(args...) +/// on rank 0 and broadcasts/rethrows the exception, if any +#define TA_LAPACK_ON_RANK_ZERO(fn, world, args...) \ + std::optional error_opt; \ + if (world.rank() == 0) { \ + try { \ + linalg::rank_local::fn(args); \ + } catch (lapack::Error & err) { \ + error_opt = err; \ + } \ + } \ + world.gop.broadcast_serializable(error_opt, 0); \ + if (error_opt) { \ + throw error_opt.value(); \ + } + #endif // TILEDARRAY_MATH_LINALG_RANK_LOCAL_H__INCLUDED diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index b7a38d38b0..b4f2a0d48f 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -49,9 +49,12 @@ class Range { typedef Range Range_; ///< This object type typedef TA_1INDEX_TYPE index1_type; ///< 1-index type, to conform to ///< Tensor Working Group (TWG) spec + typedef std::make_signed_t + index1_difference_type; ///< type representing difference of 1-indices typedef container::svector - index_type; ///< Coordinate index type, to conform to - ///< TWG spec + index_type; ///< Coordinate index type, to conform to + ///< TWG spec + typedef container::svector index_difference_type; typedef index_type index; ///< Coordinate index type (deprecated) typedef detail::SizeArray index_view_type; ///< Non-owning variant of index_type diff --git a/src/TiledArray/sparse_shape.h b/src/TiledArray/sparse_shape.h index 7346f45d1c..bf51487922 100644 --- a/src/TiledArray/sparse_shape.h +++ b/src/TiledArray/sparse_shape.h @@ -26,6 +26,8 @@ #ifndef TILEDARRAY_SPARSE_SHAPE_H__INCLUDED #define TILEDARRAY_SPARSE_SHAPE_H__INCLUDED +#include + #include #include #include diff --git a/src/TiledArray/special/diagonal_array.h b/src/TiledArray/special/diagonal_array.h index 825d66fd98..dd62db1498 100644 --- a/src/TiledArray/special/diagonal_array.h +++ b/src/TiledArray/special/diagonal_array.h @@ -267,7 +267,7 @@ Array diagonal_array(World &world, TiledRange const &trange, T val = 1) { /// \param[in] diagonals_begin the begin iterator of the range of the diagonals /// \param[in] diagonals_end the end iterator of the range of the diagonals; /// if not given, default initialized and thus will not be checked -/// \return a constant diagonal DistArray +/// \return a diagonal DistArray template std::enable_if_t::value, Array> diagonal_array(World &world, TiledRange const &trange, diff --git a/src/TiledArray/tensor/kernels.h b/src/TiledArray/tensor/kernels.h index 81141f4982..2cd2d46fe3 100644 --- a/src/TiledArray/tensor/kernels.h +++ b/src/TiledArray/tensor/kernels.h @@ -28,6 +28,7 @@ #include #include +#include namespace TiledArray { @@ -107,12 +108,14 @@ inline TR tensor_op(Op&& op, const Permutation& perm, const T1& tensor1, } /// provides transform functionality to class \p T, useful for nonintrusive -/// extension of a tensor type \p T to be usable as element type \p T in \c -/// Tensor \tparam T a tensor type \note The default implementation +/// extension of a tensor type \p T to be usable as element type \p T in +/// \c Tensor +/// \tparam T a tensor type +/// \note The default implementation /// constructs T, then computes it by coiterating over elements of the argument /// tensors and transforming with the transform \c Op . -/// This should be specialized for classes like TiledArray::Tensor that -/// already include the appropriate transform constructors already +/// This should be specialized for classes like TiledArray::Tensor that +/// already include the appropriate transform constructors already template struct transform { /// creates a result tensor in which element \c i is obtained by \c @@ -283,29 +286,23 @@ inline void inplace_tensor_op(InputOp&& input_op, OutputOp&& output_op, /// \endcode /// The expected signature of the output /// operations is: -/// \code void op(TR::value_type::value_type*, const +/// \code +/// void op(TR::value_type::value_type*, const /// TR::value_type::value_type) /// \endcode -/// \tparam InputOp The input operation -/// type +/// \tparam InputOp The input operation type /// \tparam OutputOp The output operation type -/// \tparam TR The result tensor -/// type +/// \tparam TR The result tensor type /// \tparam T1 The first argument tensor type -/// \tparam Ts The remaining -/// argument tensor types +/// \tparam Ts The remaining argument tensor types /// \param[in] input_op The operation that is used to /// generate the output value from the input arguments -/// \param[in] output_op The -/// operation that is used to set the value of the result tensor given the -/// element pointer and the result value -/// \param[in] perm The permutation applied -/// to the argument tensors +/// \param[in] output_op The operation that is used to set the value +/// of the result tensor given the element pointer and the result value +/// \param[in] perm The permutation applied to the argument tensors /// \param[in,out] result The result tensor -/// \param[in] -/// tensor1 The first argument tensor -/// \param[in] tensors The remaining argument -/// tensors +/// \param[in] tensor1 The first argument tensor +/// \param[in] tensors The remaining argument tensors template (op), stride, - result.data() + result.range().ordinal(i), - (tensors.data() + tensors.range().ordinal(i))...); + if constexpr (detail::has_member_function_data_anyreturn_v && + (detail::has_member_function_data_anyreturn_v && ...)) { + const auto stride = inner_size(result, tensors...); + for (std::decay_t i = 0ul; i < volume; i += stride) + math::inplace_vector_op(std::forward(op), stride, + result.data() + result.range().ordinal(i), + (tensors.data() + tensors.range().ordinal(i))...); + } else { // if 1+ tensor lacks data() must iterate over individual elements + auto& result_rng = result.range(); + using signed_idx_t = Range::index_difference_type; + auto result_lobound = signed_idx_t(result_rng.lobound()); + for (auto&& idx : result_rng) { + using namespace container::operators; + std::forward(op)( + result[idx], (tensors[idx - result_lobound + + signed_idx_t(tensors.range().lobound())])...); + } + } } /// In-place tensor of tensors operations with non-contiguous data @@ -384,20 +394,33 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) { TA_ASSERT(!empty(result, tensors...)); TA_ASSERT(is_range_set_congruent(result, tensors...)); - const auto stride = inner_size(result, tensors...); const auto volume = result.range().volume(); - auto inplace_tensor_range = - [&op, stride]( - typename TR::pointer MADNESS_RESTRICT const result_data, - typename Ts::const_pointer MADNESS_RESTRICT const... tensors_data) { - for (decltype(result.range().volume()) i = 0ul; i < stride; ++i) - inplace_tensor_op(op, result_data[i], tensors_data[i]...); - }; - - for (decltype(result.range().volume()) ord = 0ul; ord < volume; ord += stride) - inplace_tensor_range(result.data() + result.range().ordinal(ord), - (tensors.data() + tensors.range().ordinal(ord))...); + if constexpr (detail::has_member_function_data_anyreturn_v && + (detail::has_member_function_data_anyreturn_v && ...)) { + const auto stride = inner_size(result, tensors...); + auto inplace_tensor_range = + [&op, stride]( + typename TR::pointer MADNESS_RESTRICT const result_data, + typename Ts::const_pointer MADNESS_RESTRICT const... tensors_data) { + for (decltype(result.range().volume()) i = 0ul; i < stride; ++i) + inplace_tensor_op(op, result_data[i], tensors_data[i]...); + }; + + for (std::decay_t ord = 0ul; ord < volume; ord += stride) + inplace_tensor_range(result.data() + result.range().ordinal(ord), + (tensors.data() + tensors.range().ordinal(ord))...); + } else { // if 1+ tensor lacks data() must iterate over individual elements + auto& result_rng = result.range(); + using signed_idx_t = Range::index_difference_type; + auto result_lobound = signed_idx_t(result_rng.lobound()); + for (auto&& idx : result_rng) { + using namespace container::operators; + std::forward(op)( + result[idx], (tensors[idx - result_lobound + + signed_idx_t(tensors.range().lobound())])...); + } + } } // ------------------------------------------------------------------------- @@ -407,8 +430,9 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) { /// Initialize tensor with contiguous tensor arguments /// This function initializes the \c i -th element of \c result with the result -/// of \c op(tensors[i]...) \pre The memory of \c tensor1 has been allocated but -/// not initialized. \tparam Op The element initialization operation type +/// of \c op(tensors[i]...) +/// \pre The memory of \c tensor1 has been allocated but not initialized. +/// \tparam Op The element initialization operation type /// \tparam TR The result tensor type /// \tparam Ts The argument tensor types /// \param[in] op The result tensor element initialization operation @@ -437,8 +461,7 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) { /// This function initializes the \c i -th element of \c result with the result /// of \c op(tensors[i]...) -/// \pre The memory of \c tensor1 has been allocated but -/// not initialized. +/// \pre The memory of \c tensor1 has been allocated but not initialized. /// \tparam Op The element initialization operation type /// \tparam TR The result tensor type /// \tparam Ts The argument tensor types @@ -467,21 +490,15 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) { /// of \c op(tensor1[i], tensors[i]...) /// \pre The memory of \c result has been /// allocated but not initialized. -/// \tparam Op The element initialization -/// operation type +/// \tparam Op The element initialization operation type /// \tparam TR The result tensor type -/// \tparam T1 The first -/// argument tensor type +/// \tparam T1 The first argument tensor type /// \tparam Ts The argument tensor types -/// \param[in] op The -/// result tensor element initialization operation -/// \param[in] perm The -/// permutation that will be applied to tensor2 -/// \param[out] result The result -/// tensor +/// \param[in] op The result tensor element initialization operation +/// \param[in] perm The permutation that will be applied to tensor2 +/// \param[out] result The result tensor /// \param[in] tensor1 The first argument tensor -/// \param[in] tensors The -/// argument tensors +/// \param[in] tensors The argument tensors template < typename Op, typename TR, typename T1, typename... Ts, typename std::enable_if::value>::type* = nullptr> @@ -505,8 +522,7 @@ inline void tensor_init(Op&& op, const Permutation& perm, TR& result, /// This function initializes the \c i -th element of \c result with the result /// of \c op(tensor1[i], tensors[i]...) -/// \pre The memory of \c result has been -/// allocated but not initialized. +/// \pre The memory of \c result has been allocated but not initialized. /// \tparam Op The element initialization operation type /// \tparam Perm A permutation type /// \tparam TR The result tensor type @@ -546,18 +562,13 @@ inline void tensor_init(Op&& op, const Permutation& perm, TR& result, /// This function initializes the \c i -th element of \c result with the result /// of \c op(tensor1[i], tensors[i]...) -/// \pre The memory of \c tensor1 has been -/// allocated but not initialized. -/// \tparam Op The element initialization -/// operation type +/// \pre The memory of \c tensor1 has been allocated but not initialized. +/// \tparam Op The element initialization operation type /// \tparam T1 The result tensor type -/// \tparam Ts The argument -/// tensor types -/// \param[in] op The result tensor element initialization -/// operation +/// \tparam Ts The argument tensor types +/// \param[in] op The result tensor element initialization operation /// \param[out] result The result tensor -/// \param[in] tensor1 The first -/// argument tensor +/// \param[in] tensor1 The first argument tensor /// \param[in] tensors The argument tensors template < typename Op, typename TR, typename T1, typename... Ts, @@ -569,7 +580,6 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1, TA_ASSERT(!empty(result, tensor1, tensors...)); TA_ASSERT(is_range_set_congruent(result, tensor1, tensors...)); - const auto stride = inner_size(tensor1, tensors...); const auto volume = tensor1.range().volume(); auto wrapper_op = [&op](typename TR::pointer MADNESS_RESTRICT result_ptr, @@ -578,11 +588,27 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1, new (result_ptr) typename T1::value_type(op(value1, values...)); }; - for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; - ord += stride) - math::vector_ptr_op(wrapper_op, stride, result.data() + ord, - (tensor1.data() + tensor1.range().ordinal(ord)), - (tensors.data() + tensors.range().ordinal(ord))...); + if constexpr (detail::has_member_function_data_anyreturn_v && + (detail::has_member_function_data_anyreturn_v && ...)) { + const auto stride = inner_size(tensor1, tensors...); + for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; + ord += stride) + math::vector_ptr_op(wrapper_op, stride, result.data() + ord, + (tensor1.data() + tensor1.range().ordinal(ord)), + (tensors.data() + tensors.range().ordinal(ord))...); + } else { // if 1+ tensor lacks data() must iterate over individual elements + auto& result_rng = result.range(); + using signed_idx_t = Range::index_difference_type; + auto result_lobound = signed_idx_t(result_rng.lobound()); + for (auto&& idx : result_rng) { + using namespace container::operators; + const signed_idx_t relidx = idx - result_lobound; + wrapper_op( + &(result[idx]), + tensor1[relidx + signed_idx_t(tensor1.range().lobound())], + (tensors[relidx + signed_idx_t(tensors.range().lobound())])...); + } + } } /// Initialize tensor with one or more non-contiguous tensor arguments @@ -591,13 +617,10 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1, /// of \c op(tensor1[i],tensors[i]...) /// \pre The memory of \c tensor1 has been /// allocated but not initialized. -/// \tparam Op The element initialization -/// operation type +/// \tparam Op The element initialization operation type /// \tparam T1 The result tensor type -/// \tparam Ts The argument -/// tensor types -/// \param[in] op The result tensor element initialization -/// operation +/// \tparam Ts The argument tensor types +/// \param[in] op The result tensor element initialization operation /// \param[out] result The result tensor /// \param[in] tensor1 The first /// argument tensor @@ -612,24 +635,40 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1, TA_ASSERT(!empty(result, tensor1, tensors...)); TA_ASSERT(is_range_set_congruent(result, tensor1, tensors...)); - const auto stride = inner_size(tensor1, tensors...); const auto volume = tensor1.range().volume(); - auto inplace_tensor_range = - [&op, stride]( - typename TR::pointer MADNESS_RESTRICT const result_data, - typename T1::const_pointer MADNESS_RESTRICT const tensor1_data, - typename Ts::const_pointer MADNESS_RESTRICT const... tensors_data) { - for (decltype(result.range().volume()) i = 0ul; i < stride; ++i) - new (result_data + i) - typename TR::value_type(tensor_op( - op, tensor1_data[i], tensors_data[i]...)); - }; - - for (decltype(volume) ord = 0ul; ord < volume; ord += stride) - inplace_tensor_range(result.data() + ord, - (tensor1.data() + tensor1.range().ordinal(ord)), - (tensors.data() + tensors.range().ordinal(ord))...); + if constexpr (detail::has_member_function_data_anyreturn_v && + (detail::has_member_function_data_anyreturn_v && ...)) { + const auto stride = inner_size(tensor1, tensors...); + auto inplace_tensor_range = + [&op, stride]( + typename TR::pointer MADNESS_RESTRICT const result_data, + typename T1::const_pointer MADNESS_RESTRICT const tensor1_data, + typename Ts::const_pointer MADNESS_RESTRICT const... tensors_data) { + for (std::decay_t i = 0ul; i < stride; ++i) + new (result_data + i) + typename TR::value_type(tensor_op( + op, tensor1_data[i], tensors_data[i]...)); + }; + + for (std::decay_t ord = 0ul; ord < volume; ord += stride) + inplace_tensor_range(result.data() + ord, + (tensor1.data() + tensor1.range().ordinal(ord)), + (tensors.data() + tensors.range().ordinal(ord))...); + } else { + auto& result_rng = result.range(); + using signed_idx_t = Range::index_difference_type; + auto result_lobound = signed_idx_t(result_rng.lobound()); + for (auto&& idx : result_rng) { + using namespace container::operators; + const signed_idx_t relidx = idx - result_lobound; + + new (&(result[idx])) + typename TR::value_type(tensor_op( + op, tensor1[relidx + signed_idx_t(tensor1.range().lobound())], + (tensors[relidx + signed_idx_t(tensors.range().lobound())])...)); + } + } } // ------------------------------------------------------------------------- @@ -639,12 +678,11 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1, /// Perform an element-wise reduction of the tensors by /// executing join_op(result, reduce_op(result, &tensor1[i], -/// &tensors[i]...)) for each \c i in the index range of \c tensor1 . \c -/// result is initialized to \c identity . If HAVE_INTEL_TBB is defined, the -/// reduction will be executed in an undefined order, otherwise will execute in -/// the order of increasing \c i . -/// \tparam ReduceOp The element-wise reduction -/// operation type +/// &tensors[i]...)) for each \c i in the index range of \c tensor1 . +/// \c result is initialized to \c identity . If `HAVE_INTEL_TBB` is defined, +/// the reduction will be executed in an undefined order, otherwise will +/// execute in the order of increasing \c i . +/// \tparam ReduceOp The element-wise reduction operation type /// \tparam JoinOp The result operation type /// \tparam Identity A type that can be used as an argument to ReduceOp /// \tparam T1 The first argument tensor type @@ -708,10 +746,10 @@ auto tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, Scalar identity, /// Perform reduction of the tensor-of-tensors' elements by /// executing join_op(result, reduce_op(tensor1[i], tensors[i]...)) for -/// each \c i in the index range of \c tensor1 . \c result is initialized to \c -/// identity . This will execute serially, in the order of increasing \c i (each -/// element's reduction can however be executed in parallel, depending on the -/// element type). +/// each \c i in the index range of \c tensor1 . \c result is initialized to +/// \c identity . This will execute serially, in the order of increasing +/// \c i (each element's reduction can however be executed in parallel, +/// depending on the element type). /// \tparam ReduceOp The tensor-wise reduction operation type /// \tparam JoinOp The result operation type /// \tparam Scalar A scalar type @@ -751,10 +789,10 @@ auto tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, /// Perform an element-wise reduction of the tensors by /// executing join_op(result, reduce_op(tensor1[i], tensors[i]...)) for -/// each \c i in the index range of \c tensor1 . \c result is initialized to \c -/// identity . This will execute serially, in the order of increasing \c i (each -/// element-wise reduction can however be executed in parallel, depending on the -/// element type). +/// each \c i in the index range of \c tensor1 . \c result is initialized to +/// \c identity . This will execute serially, in the order of increasing +/// \c i (each element-wise reduction can however be executed in parallel, +/// depending on the element type). /// \tparam ReduceOp The element-wise reduction operation type /// \tparam JoinOp The result operation type /// \tparam Scalar A scalar type @@ -777,17 +815,31 @@ auto tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, TA_ASSERT(!empty(tensor1, tensors...)); TA_ASSERT(is_range_set_congruent(tensor1, tensors...)); - const auto stride = inner_size(tensor1, tensors...); const auto volume = tensor1.range().volume(); auto result = identity; - for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; - ord += stride) { - auto temp = identity; - math::reduce_op(reduce_op, join_op, identity, stride, temp, - tensor1.data() + tensor1.range().ordinal(ord), - (tensors.data() + tensors.range().ordinal(ord))...); - join_op(result, temp); + if constexpr (detail::has_member_function_data_anyreturn_v && + (detail::has_member_function_data_anyreturn_v && ...)) { + const auto stride = inner_size(tensor1, tensors...); + for (std::decay_t ord = 0ul; ord < volume; + ord += stride) { + auto temp = identity; + math::reduce_op(reduce_op, join_op, identity, stride, temp, + tensor1.data() + tensor1.range().ordinal(ord), + (tensors.data() + tensors.range().ordinal(ord))...); + join_op(result, temp); + } + } else { // if 1+ tensor lacks data() must iterate over individual elements + auto& t1_rng = tensor1.range(); + using signed_idx_t = Range::index_difference_type; + auto t1_lobound = signed_idx_t(t1_rng.lobound()); + for (auto&& idx : t1_rng) { + using namespace container::operators; + signed_idx_t relidx = idx - t1_lobound; + reduce_op(result, tensor1[idx], + (tensors[idx - t1_lobound + + signed_idx_t(tensors.range().lobound())])...); + } } return result; @@ -797,10 +849,11 @@ auto tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, /// Perform an element-wise reduction of the tensors by /// executing join_op(result, reduce_op(tensor1[i], tensors[i]...)) for -/// each \c i in the index range of \c tensor1 . \c result is initialized to \c -/// identity . This will execute serially, in the order of increasing \c i (each -/// element-wise reduction can however be executed in parallel, depending on the -/// element type). \tparam ReduceOp The element-wise reduction operation type +/// each \c i in the index range of \c tensor1 . \c result is initialized to +/// \c identity . This will execute serially, in the order of increasing +/// \c i (each element-wise reduction can however be executed in parallel, +/// depending on the element type). +/// \tparam ReduceOp The element-wise reduction operation type /// \tparam JoinOp The result operation type /// \tparam Scalar A scalar type /// \tparam T1 The first argument tensor type @@ -822,31 +875,49 @@ Scalar tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, TA_ASSERT(!empty(tensor1, tensors...)); TA_ASSERT(is_range_set_congruent(tensor1, tensors...)); - const auto stride = inner_size(tensor1, tensors...); const auto volume = tensor1.range().volume(); - auto tensor_reduce_range = - [&reduce_op, &join_op, &identity, stride]( - Scalar& MADNESS_RESTRICT result, - typename T1::const_pointer MADNESS_RESTRICT const tensor1_data, - typename Ts::const_pointer MADNESS_RESTRICT const... tensors_data) { - for (decltype(result.range().volume()) i = 0ul; i < stride; ++i) { - Scalar temp = tensor_reduce(reduce_op, join_op, identity, - tensor1_data[i], tensors_data[i]...); - join_op(result, temp); - } - }; - Scalar result = identity; - for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; - ord += stride) { - Scalar temp = tensor_reduce_range( - result, tensor1.data() + tensor1.range().ordinal(ord), - (tensors.data() + tensors.range().ordinal(ord))...); - join_op(result, temp); + + if constexpr (detail::has_member_function_data_anyreturn_v && + (detail::has_member_function_data_anyreturn_v && ...)) { + const auto stride = inner_size(tensor1, tensors...); + auto tensor_reduce_range = + [&reduce_op, &join_op, &identity, stride]( + Scalar& MADNESS_RESTRICT result, + typename T1::const_pointer MADNESS_RESTRICT const tensor1_data, + typename Ts::const_pointer MADNESS_RESTRICT const... tensors_data) { + for (decltype(result.range().volume()) i = 0ul; i < stride; ++i) { + Scalar temp = tensor_reduce(reduce_op, join_op, identity, + tensor1_data[i], tensors_data[i]...); + join_op(result, temp); + } + }; + + for (std::decay_t ord = 0ul; ord < volume; + ord += stride) { + Scalar temp = tensor_reduce_range( + result, tensor1.data() + tensor1.range().ordinal(ord), + (tensors.data() + tensors.range().ordinal(ord))...); + join_op(result, temp); + } + } else { // if 1+ tensor lacks data() must iterate over individual elements + auto& t1_rng = tensor1.range(); + using signed_idx_t = Range::index_difference_type; + auto t1_lobound = signed_idx_t(t1_rng.lobound()); + for (auto&& idx : t1_rng) { + using namespace container::operators; + signed_idx_t relidx = idx - t1_lobound; + + Scalar temp = + tensor_reduce(reduce_op, join_op, identity, tensor1[idx], + (tensors[idx - t1_lobound + + signed_idx_t(tensors.range().lobound())])...); + join_op(result, temp); + } } - return identity; + return result; } } // namespace detail diff --git a/src/TiledArray/tensor/operators.h b/src/TiledArray/tensor/operators.h index f7c7a5f2ae..4be515d3b3 100644 --- a/src/TiledArray/tensor/operators.h +++ b/src/TiledArray/tensor/operators.h @@ -41,11 +41,13 @@ namespace TiledArray { /// \param right The right-hand tensor argument /// \return A tensor where element \c i is equal to left[i] + right[i] template ::value || - detail::is_tensor_of_tensor::value>::type* = nullptr> -inline auto operator+(const T1& left, const T2& right) { - return add(left, right); + typename = std::enable_if_t< + detail::is_tensor, + detail::remove_cvr_t>::value || + detail::is_tensor_of_tensor, + detail::remove_cvr_t>::value>> +inline decltype(auto) operator+(T1&& left, T2&& right) { + return add(std::forward(left), std::forward(right)); } /// Tensor minus operator @@ -56,12 +58,16 @@ inline auto operator+(const T1& left, const T2& right) { /// \param left The left-hand tensor argument /// \param right The right-hand tensor argument /// \return A tensor where element \c i is equal to left[i] - right[i] -template ::value || - detail::is_tensor_of_tensor::value>::type* = nullptr> -inline auto operator-(const T1& left, const T2& right) { - return subt(left, right); +template < + typename T1, typename T2, + typename std::enable_if< + detail::is_tensor, + detail::remove_cvr_t>::value || + detail::is_tensor_of_tensor, + detail::remove_cvr_t>::value>::type* = + nullptr> +inline decltype(auto) operator-(T1&& left, T2&& right) { + return subt(std::forward(left), std::forward(right)); } /// Tensor multiplication operator @@ -72,12 +78,16 @@ inline auto operator-(const T1& left, const T2& right) { /// \param left The left-hand tensor argument /// \param right The right-hand tensor argument /// \return A tensor where element \c i is equal to left[i] * right[i] -template ::value || - detail::is_tensor_of_tensor::value>::type* = nullptr> -inline auto operator*(const T1& left, const T2& right) { - return mult(left, right); +template < + typename T1, typename T2, + typename std::enable_if< + detail::is_tensor, + detail::remove_cvr_t>::value || + detail::is_tensor_of_tensor, + detail::remove_cvr_t>::value>::type* = + nullptr> +inline decltype(auto) operator*(T1&& left, T2&& right) { + return mult(std::forward(left), std::forward(right)); } /// Create a copy of \c left that is scaled by \c right @@ -89,11 +99,12 @@ inline auto operator*(const T1& left, const T2& right) { /// \param right The right-hand scalar argument /// \return A tensor where element \c i is equal to left[i] * right template ::value || - detail::is_tensor_of_tensor::value) && - detail::is_numeric_v>::type* = nullptr> -inline auto operator*(const T& left, N right) { - return scale(left, right); + typename std::enable_if< + (detail::is_tensor>::value || + detail::is_tensor_of_tensor>::value) && + detail::is_numeric_v>::type* = nullptr> +inline decltype(auto) operator*(T&& left, N right) { + return scale(std::forward(left), right); } /// Create a copy of \c right that is scaled by \c left @@ -103,13 +114,15 @@ inline auto operator*(const T& left, N right) { /// \param left The left-hand scalar argument /// \param right The right-hand tensor argument /// \return A tensor where element \c i is equal to left * right[i] -template && - (detail::is_tensor::value || - detail::is_tensor_of_tensor::value)>::type* = nullptr> -inline auto operator*(N left, const T& right) { - return scale(right, left); +template < + typename N, typename T, + typename std::enable_if< + detail::is_numeric_v && + (detail::is_tensor>::value || + detail::is_tensor_of_tensor>::value)>::type* = + nullptr> +inline decltype(auto) operator*(N left, T&& right) { + return scale(std::forward(right), left); } /// Create a negated copy of \c arg @@ -117,11 +130,12 @@ inline auto operator*(N left, const T& right) { /// \tparam T The element type of \c arg /// \param arg The argument tensor /// \return A tensor where element \c i is equal to \c -arg[i] -template ::value || - detail::is_tensor_of_tensor< - T>::value>::type* = nullptr> -inline auto operator-(const T& arg) -> decltype(arg.neg()) { - return neg(arg); +template >::value || + detail::is_tensor_of_tensor< + detail::remove_cvr_t>::value>::type* = nullptr> +inline decltype(auto) operator-(T&& arg) { + return neg(std::forward(arg)); } /// Create a permuted copy of \c arg @@ -129,11 +143,12 @@ inline auto operator-(const T& arg) -> decltype(arg.neg()) { /// \tparam T The argument tensor type /// \param perm The permutation to be applied to \c arg /// \param arg The argument tensor to be permuted -template ::value || - detail::is_tensor_of_tensor< - T>::value>::type* = nullptr> -inline auto operator*(const Permutation& perm, const T& arg) { - return permute(arg, perm); +template >::value || + detail::is_tensor_of_tensor< + detail::remove_cvr_t>::value>::type* = nullptr> +inline decltype(auto) operator*(const Permutation& perm, T&& arg) { + return permute(std::forward(arg), perm); } /// Tensor plus operator @@ -146,10 +161,11 @@ inline auto operator*(const Permutation& perm, const T& arg) { /// \return A tensor where element \c i is equal to left[i] + right[i] template ::value || - detail::is_tensor_of_tensor::value>::type* = nullptr> -inline auto operator+=(T1& left, const T2& right) { - return add_to(left, right); + detail::is_tensor, T2>::value || + detail::is_tensor_of_tensor, + T2>::value>::type* = nullptr> +inline decltype(auto) operator+=(T1&& left, const T2& right) { + return add_to(std::forward(left), right); } /// Tensor minus operator @@ -162,10 +178,11 @@ inline auto operator+=(T1& left, const T2& right) { /// \return A reference to \c left template ::value || - detail::is_tensor_of_tensor::value>::type* = nullptr> -inline auto operator-=(T1& left, const T2& right) { - return sub_to(left, right); + detail::is_tensor, T2>::value || + detail::is_tensor_of_tensor, + T2>::value>::type* = nullptr> +inline decltype(auto) operator-=(T1&& left, const T2& right) { + return subt_to(std::forward(left), right); } /// In place tensor multiplication @@ -178,10 +195,11 @@ inline auto operator-=(T1& left, const T2& right) { /// \return A reference to \c left template ::value || - detail::is_tensor_of_tensor::value>::type* = nullptr> -inline auto operator*=(T1& left, const T2& right) { - return mult_to(left, right); + detail::is_tensor, T2>::value || + detail::is_tensor_of_tensor, + T2>::value>::type* = nullptr> +inline decltype(auto) operator*=(T1&& left, const T2& right) { + return mult_to(std::forward(left), right); } /// In place tensor add constant @@ -193,11 +211,12 @@ inline auto operator*=(T1& left, const T2& right) { /// \param right The right-hand scalar argument /// \return A reference to \c left template ::value || - detail::is_tensor_of_tensor::value) && - detail::is_numeric_v>::type* = nullptr> -inline auto operator+=(T& left, N right) { - return add_to(left, right); + typename std::enable_if< + (detail::is_tensor>::value || + detail::is_tensor_of_tensor>::value) && + detail::is_numeric_v>::type* = nullptr> +inline decltype(auto) operator+=(T&& left, N right) { + return add_to(std::forward(left), right); } /// In place tensor subtract constant @@ -209,11 +228,12 @@ inline auto operator+=(T& left, N right) { /// \param right The right-hand scalar argument /// \return A reference to \c left template ::value || - detail::is_tensor_of_tensor::value) && - detail::is_numeric_v>::type* = nullptr> -inline auto operator-=(T& left, N right) { - return subt_to(left, right); + typename std::enable_if< + (detail::is_tensor>::value || + detail::is_tensor_of_tensor>::value) && + detail::is_numeric_v>::type* = nullptr> +inline decltype(auto) operator-=(T&& left, N right) { + return subt_to(std::forward(left), right); } /// In place tensor scale @@ -225,11 +245,12 @@ inline auto operator-=(T& left, N right) { /// \param right The right-hand scalar argument /// \return A reference to \c left template ::value || - detail::is_tensor_of_tensor::value) && - detail::is_numeric_v>::type* = nullptr> -inline auto operator*=(T& left, N right) { - return scale_to(left, right); + typename std::enable_if< + (detail::is_tensor>::value || + detail::is_tensor_of_tensor>::value) && + detail::is_numeric_v>::type* = nullptr> +inline decltype(auto) operator*=(T&& left, N right) { + return scale_to(std::forward(left), right); } } // namespace TiledArray diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index fcb5ffbe7a..d6ce1b62be 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -163,7 +163,7 @@ class Tensor { #endif allocator.deallocate(ptr, size); }; - this->data_ = std::shared_ptr(ptr, std::move(deleter)); + this->data_ = std::shared_ptr(ptr, std::move(deleter)); #ifdef TA_TENSOR_MEM_TRACE if (nbytes() >= trace_if_larger_than_) { ptr_registry()->insert( @@ -201,7 +201,7 @@ class Tensor { #endif allocator.deallocate(ptr, size); }; - this->data_ = std::shared_ptr(ptr, std::move(deleter)); + this->data_ = std::shared_ptr(ptr, std::move(deleter)); #ifdef TA_TENSOR_MEM_TRACE if (nbytes() >= trace_if_larger_than_) { ptr_registry()->insert( @@ -216,7 +216,7 @@ class Tensor { /// Number of `range_`-sized blocks in `data_` /// \note this is not used for (in)equality comparison size_t batch_size_ = 1; - std::shared_ptr data_; ///< Shared pointer to the data + std::shared_ptr data_; ///< Shared pointer to the data public: /// constructs an empty (null) Tensor @@ -308,9 +308,10 @@ class Tensor { /// Construct a tensor with a fill op that takes an element index - /// \tparam ElementIndexOp callable of signature `value_type(const - /// Range::index_type&)` \param range An array with the size of of each - /// dimension \param element_idx_op a callable of type ElementIndexOp + /// \tparam ElementIndexOp callable of signature + /// `value_type(const Range::index_type&)` + /// \param range An array with the size of of each dimension + /// \param element_idx_op a callable of type ElementIndexOp template >> @@ -491,8 +492,8 @@ class Tensor { /// \param batch_size The batch size /// \param data shared pointer to the data Tensor(const range_type& range, size_t batch_size, - std::shared_ptr data) - : range_(range), batch_size_(batch_size), data_(data) { + std::shared_ptr data) + : range_(range), batch_size_(batch_size), data_(std::move(data)) { #ifdef TA_TENSOR_MEM_TRACE if (nbytes() >= trace_if_larger_than_) { ptr_registry()->insert( @@ -502,6 +503,20 @@ class Tensor { #endif } + /// Construct a tensor with a range equal to \c range using existing data + /// assuming unit batch size \param range The range of the tensor \param data + /// shared pointer to the data + Tensor(const range_type& range, std::shared_ptr data) + : range_(range), batch_size_(1), data_(std::move(data)) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, + make_string("TA::Tensor(range, data)::data_.get()=", data_.get())); + } +#endif + } + /// The batch size accessor /// @return the size of tensor batch represented by `*this` @@ -513,8 +528,8 @@ class Tensor { /// the batch Tensor batch(size_t idx) const { TA_ASSERT(idx < this->batch_size()); - std::shared_ptr data(this->data_, - this->data_.get() + idx * this->size()); + std::shared_ptr data(this->data_, + this->data_.get() + idx * this->size()); return Tensor(this->range(), 1, data); } @@ -537,6 +552,10 @@ class Tensor { result = detail::tensor_op( [](const numeric_type value) -> numeric_type { return value; }, *this); + } else if (range_) { // corner case: data_ = null implies range_.volume() + // == 0; + TA_ASSERT(range_.volume() == 0); + result = Tensor(range_); } return result; } @@ -962,12 +981,14 @@ class Tensor { /// Read-only shared_ptr to the data /// \return A const shared_ptr to the tensor data - std::shared_ptr data_shared() const { return this->data_; } + std::shared_ptr data_shared() const { + return this->data_; + } /// Mutable shared_ptr to the data /// \return A mutable shared_ptr to the tensor data - std::shared_ptr data_shared() { return this->data_; } + std::shared_ptr data_shared() { return this->data_; } /// Test if the tensor is empty @@ -1478,7 +1499,7 @@ class Tensor { // Addition operations - /// Add this and \c other to construct a new tensors + /// Add this and \c other to construct a new tensor /// \tparam Right The right-hand tensor type /// \param right The tensor that will be added to this tensor @@ -1486,7 +1507,7 @@ class Tensor { /// \c this and \c other template ::value>::type* = nullptr> - Tensor add(const Right& right) const { + Tensor add(const Right& right) const& { return binary( right, [](const numeric_type l, const numeric_t r) -> numeric_type { @@ -1494,6 +1515,19 @@ class Tensor { }); } + /// Add this and \c other to construct a new tensor + + /// \tparam Right The right-hand tensor type + /// \param right The tensor that will be added to this tensor + /// \return A new tensor where the elements are the sum of the elements of + /// \c this and \c other + template ::value>::type* = nullptr> + Tensor add(const Right& right) && { + add_to(right); + return std::move(*this); + } + /// Add this and \c other to construct a new, permuted tensor /// \tparam Right The right-hand tensor type diff --git a/src/TiledArray/tile_interface/add.h b/src/TiledArray/tile_interface/add.h index 9c0e02e558..879d2ed9d2 100644 --- a/src/TiledArray/tile_interface/add.h +++ b/src/TiledArray/tile_interface/add.h @@ -39,10 +39,21 @@ namespace TiledArray { /// \param left The left-hand argument to be added /// \param right The right-hand argument to be added /// \return A tile that is equal to (left + right) -template -inline auto add(const Left& left, const Right& right) - -> decltype(left.add(right)) { - return left.add(right); +template || + detail::has_member_function_add_anyreturn_v>> +inline decltype(auto) add(Left&& left, Right&& right) { + constexpr auto left_right = + (detail::has_member_function_add_anyreturn_v && + detail::has_member_function_add_anyreturn_v && + !std::is_reference_v && std::is_reference_v) || + (detail::has_member_function_add_anyreturn_v && + !detail::has_member_function_add_anyreturn_v); + if constexpr (left_right) + return std::forward(left).add(std::forward(right)); + else + return std::forward(right).add(std::forward(left)); } /// Add and scale tile arguments @@ -56,9 +67,26 @@ inline auto add(const Left& left, const Right& right) /// \return A tile that is equal to (left + right) * factor template < typename Left, typename Right, typename Scalar, - typename std::enable_if>::type* = nullptr> -inline auto add(const Left& left, const Right& right, const Scalar factor) { - return left.add(right, factor); + typename = std::enable_if_t && + (detail::has_member_function_add_anyreturn_v< + Left&&, Right&&, const Scalar> || + detail::has_member_function_add_anyreturn_v< + Right&&, Left&&, const Scalar>)>> +inline decltype(auto) add(Left&& left, Right&& right, const Scalar factor) { + constexpr auto left_right = + (detail::has_member_function_add_anyreturn_v && + detail::has_member_function_add_anyreturn_v && + !std::is_reference_v && std::is_reference_v) || + (detail::has_member_function_add_anyreturn_v && + !detail::has_member_function_add_anyreturn_v); + if constexpr (left_right) + return std::forward(left).add(std::forward(right), factor); + else + return std::forward(right).add(std::forward(left), factor); } /// Add and permute tile arguments @@ -72,10 +100,25 @@ inline auto add(const Left& left, const Right& right, const Scalar factor) { template < typename Left, typename Right, typename Perm, typename = std::enable_if_t && - detail::has_member_function_add_anyreturn_v< - const Left, const Right&, const Perm&>>> -inline auto add(const Left& left, const Right& right, const Perm& perm) { - return left.add(right, perm); + (detail::has_member_function_add_anyreturn_v< + Left&&, Right&&, const Perm&> || + detail::has_member_function_add_anyreturn_v< + Right&&, Left&&, const Perm&>)>> +inline decltype(auto) add(Left&& left, Right&& right, const Perm& perm) { + constexpr auto left_right = + (detail::has_member_function_add_anyreturn_v && + detail::has_member_function_add_anyreturn_v && + !std::is_reference_v && std::is_reference_v) || + (detail::has_member_function_add_anyreturn_v && + !detail::has_member_function_add_anyreturn_v); + if constexpr (left_right) + return std::forward(left).add(std::forward(right), perm); + else + return std::forward(right).add(std::forward(left), perm); } /// Add, scale, and permute tile arguments @@ -88,13 +131,31 @@ inline auto add(const Left& left, const Right& right, const Perm& perm) { /// \param factor The scaling factor /// \param perm The permutation to be applied to the result /// \return A tile that is equal to perm ^ (left + right) * factor -template < - typename Left, typename Right, typename Scalar, typename Perm, - typename std::enable_if && - detail::is_permutation_v>::type* = nullptr> -inline auto add(const Left& left, const Right& right, const Scalar factor, - const Perm& perm) { - return left.add(right, factor, perm); +template && detail::is_permutation_v && + (detail::has_member_function_add_anyreturn_v< + Left&&, Right&&, const Scalar, const Perm&> || + detail::has_member_function_add_anyreturn_v< + Right&&, Left&&, const Scalar, const Perm&>)>> +inline decltype(auto) add(Left&& left, Right&& right, const Scalar factor, + const Perm& perm) { + constexpr auto left_right = + (detail::has_member_function_add_anyreturn_v && + detail::has_member_function_add_anyreturn_v && + !std::is_reference_v && std::is_reference_v) || + (detail::has_member_function_add_anyreturn_v && + !detail::has_member_function_add_anyreturn_v); + if constexpr (left_right) + return std::forward(left).add(std::forward(right), factor, + perm); + else + return std::forward(right).add(std::forward(left), factor, + perm); } /// Add to the result tile @@ -104,9 +165,12 @@ inline auto add(const Left& left, const Right& right, const Scalar factor, /// \param result The result tile /// \param arg The argument to be added to the result /// \return A tile that is equal to result[i] += arg[i] -template -inline Result& add_to(Result& result, const Arg& arg) { - return result.add_to(arg); +template < + typename Result, typename Arg, + typename = std::enable_if_t< + detail::has_member_function_add_to_anyreturn_v>> +inline decltype(auto) add_to(Result&& result, const Arg& arg) { + return std::forward(result).add_to(arg); } /// Add and scale to the result tile @@ -118,11 +182,14 @@ inline Result& add_to(Result& result, const Arg& arg) { /// \param arg The argument to be added to \c result /// \param factor The scaling factor /// \return A tile that is equal to (result[i] += arg[i]) *= factor -template < - typename Result, typename Arg, typename Scalar, - typename std::enable_if>::type* = nullptr> -inline Result& add_to(Result& result, const Arg& arg, const Scalar factor) { - return result.add_to(arg, factor); +template && + detail::has_member_function_add_to_anyreturn_v< + Result&&, const Arg&, const Scalar>>::type* = nullptr> +inline decltype(auto) add_to(Result&& result, const Arg& arg, + const Scalar factor) { + return std::forward(result).add_to(arg, factor); } namespace tile_interface { diff --git a/src/TiledArray/tile_op/tile_interface.h b/src/TiledArray/tile_op/tile_interface.h index 65d970ebeb..ee8c1093a2 100644 --- a/src/TiledArray/tile_op/tile_interface.h +++ b/src/TiledArray/tile_op/tile_interface.h @@ -372,8 +372,8 @@ inline auto subt(const Arg& arg, const Scalar value, const Perm& perm) { /// \param arg The argument to be subtracted from the result /// \return A tile that is equal to result[i] -= arg[i] template -inline Result& subt_to(Result& result, const Arg& arg) { - return result.subt_to(arg); +inline decltype(auto) subt_to(Result&& result, const Arg& arg) { + return std::forward(result).subt_to(arg); } /// Subtract and scale from the result tile diff --git a/src/TiledArray/util/vector.h b/src/TiledArray/util/vector.h index 12c5d0dfcd..6e69f523f4 100644 --- a/src/TiledArray/util/vector.h +++ b/src/TiledArray/util/vector.h @@ -27,13 +27,16 @@ #define TILEDARRAY_UTIL_VECTOR_H #include +#include // Boost.Container 1.75 and earlier uses standard exception classes, 1.76+ use -// Boost.Container exceptions, unless BOOST_CONTAINER_USE_STD_EXCEPTIONS is defined: +// Boost.Container exceptions, unless BOOST_CONTAINER_USE_STD_EXCEPTIONS is +// defined: // https://www.boost.org/doc/libs/master/doc/html/container/release_notes.html#container.release_notes.release_notes_boost_1_76_00 -// Define BOOST_CONTAINER_USE_STD_EXCEPTIONS for Boost <1.76 so that exception checking can use this macro with all versions of Boost +// Define BOOST_CONTAINER_USE_STD_EXCEPTIONS for Boost <1.76 so that exception +// checking can use this macro with all versions of Boost #if BOOST_VERSION < 107600 && !defined(BOOST_CONTAINER_USE_STD_EXCEPTIONS) -# define BOOST_CONTAINER_USE_STD_EXCEPTIONS 1 +#define BOOST_CONTAINER_USE_STD_EXCEPTIONS 1 #endif #include @@ -41,6 +44,7 @@ #include #include +#include "TiledArray/error.h" namespace TiledArray { @@ -90,6 +94,32 @@ constexpr auto iv(Int i0, Ints... rest) { return result; } +namespace operators { + +template +decltype(auto) operator+(const boost::container::small_vector& v1, + const boost::container::small_vector& v2) { + TA_ASSERT(v1.size() == v2.size()); + boost::container::small_vector, std::max(N1, N2)> + result(v1.size()); + std::transform(v1.begin(), v1.end(), v2.begin(), result.begin(), + [](auto&& a, auto&& b) { return a + b; }); + return result; +} + +template +decltype(auto) operator-(const boost::container::small_vector& v1, + const boost::container::small_vector& v2) { + TA_ASSERT(v1.size() == v2.size()); + boost::container::small_vector, std::max(N1, N2)> + result(v1.size()); + std::transform(v1.begin(), v1.end(), v2.begin(), result.begin(), + [](auto&& a, auto&& b) { return a - b; }); + return result; +} + +} // namespace operators + } // namespace container } // namespace TiledArray diff --git a/tests/dist_array.cpp b/tests/dist_array.cpp index 061c5fdd17..c2ac8262d0 100644 --- a/tests/dist_array.cpp +++ b/tests/dist_array.cpp @@ -76,6 +76,13 @@ BOOST_AUTO_TEST_CASE(constructors) { for (ArrayN::const_iterator it = ad.begin(); it != ad.end(); ++it) BOOST_CHECK(!it->probe()); + // Construct a dense array in default world + { + BOOST_REQUIRE_NO_THROW(ArrayN ad(tr)); + ArrayN ad(tr); + BOOST_CHECK_EQUAL(ad.world().id(), get_default_world().id()); + } + // Construct a sparse array BOOST_REQUIRE_NO_THROW( SpArrayN as(world, tr, TiledArray::SparseShape(shape_tensor, tr))); @@ -88,6 +95,14 @@ BOOST_AUTO_TEST_CASE(constructors) { // now fill it BOOST_REQUIRE_NO_THROW(as.fill(1)); + // Construct a sparse array in default world + { + BOOST_REQUIRE_NO_THROW( + SpArrayN as(tr, TiledArray::SparseShape(shape_tensor, tr))); + SpArrayN as(tr, TiledArray::SparseShape(shape_tensor, tr)); + BOOST_CHECK_EQUAL(as.world().id(), get_default_world().id()); + } + // Construct a sparse array from another sparse array { auto op = [](auto& result, const auto& input) { result = input.clone(); }; @@ -107,6 +122,12 @@ BOOST_AUTO_TEST_CASE(single_tile_initializer_list_ctors) { ++itr; } } + + // now with default world + { + TArray a_vector(il); + BOOST_CHECK_EQUAL(a_vector.world().id(), get_default_world().id()); + } } // Create a matrix with an initializer list @@ -122,6 +143,12 @@ BOOST_AUTO_TEST_CASE(single_tile_initializer_list_ctors) { } } } + + // now with default world + { + TArray a_matrix(il); + BOOST_CHECK_EQUAL(a_matrix.world().id(), get_default_world().id()); + } } // Create a rank 3 tensor with an initializer list @@ -144,6 +171,12 @@ BOOST_AUTO_TEST_CASE(single_tile_initializer_list_ctors) { } } } + + // now with default world + { + TArray a_tensor3(il); + BOOST_CHECK_EQUAL(a_tensor3.world().id(), get_default_world().id()); + } } // Create a rank 4 tensor with an initializer list @@ -168,6 +201,12 @@ BOOST_AUTO_TEST_CASE(single_tile_initializer_list_ctors) { } } } + + // now with default world + { + TArray a_tensor4(il); + BOOST_CHECK_EQUAL(a_tensor4.world().id(), get_default_world().id()); + } } // Create a rank 5 tensor with an initializer list @@ -194,6 +233,12 @@ BOOST_AUTO_TEST_CASE(single_tile_initializer_list_ctors) { } } } + + // now with default world + { + TArray a_tensor5(il); + BOOST_CHECK_EQUAL(a_tensor5.world().id(), get_default_world().id()); + } } // Create a rank 6 tensor with an initializer list @@ -222,6 +267,12 @@ BOOST_AUTO_TEST_CASE(single_tile_initializer_list_ctors) { } } } + + // now with default world + { + TArray a_tensor6(il); + BOOST_CHECK_EQUAL(a_tensor6.world().id(), get_default_world().id()); + } } } @@ -232,6 +283,12 @@ BOOST_AUTO_TEST_CASE(multi_tile_initializer_list_ctors) { TiledRange tr{{0, 1, 3}}; TArray a_vector(world, tr, il); BOOST_CHECK_EQUAL(a_vector.size(), 2); + + // now with default world + { + TArray a_vector(tr, il); + BOOST_CHECK_EQUAL(a_vector.world().id(), get_default_world().id()); + } } { @@ -239,6 +296,12 @@ BOOST_AUTO_TEST_CASE(multi_tile_initializer_list_ctors) { TiledRange tr{{0, 1, 2}, {0, 1, 3}}; TArray a_matrix(world, tr, il); BOOST_CHECK_EQUAL(a_matrix.size(), 4); + + // now with default world + { + TArray a_matrix(tr, il); + BOOST_CHECK_EQUAL(a_matrix.world().id(), get_default_world().id()); + } } { @@ -247,6 +310,12 @@ BOOST_AUTO_TEST_CASE(multi_tile_initializer_list_ctors) { TiledRange tr{{0, 1, 2}, {0, 1, 2}, {0, 1, 3}}; TArray a_tensor(world, tr, il); BOOST_CHECK_EQUAL(a_tensor.size(), 8); + + // now with default world + { + TArray a_tensor(tr, il); + BOOST_CHECK_EQUAL(a_tensor.world().id(), get_default_world().id()); + } } { @@ -257,6 +326,12 @@ BOOST_AUTO_TEST_CASE(multi_tile_initializer_list_ctors) { TiledRange tr{{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 3}}; TArray a_tensor(world, tr, il); BOOST_CHECK_EQUAL(a_tensor.size(), 16); + + // now with default world + { + TArray a_tensor(tr, il); + BOOST_CHECK_EQUAL(a_tensor.world().id(), get_default_world().id()); + } } { @@ -269,6 +344,12 @@ BOOST_AUTO_TEST_CASE(multi_tile_initializer_list_ctors) { TiledRange tr{{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 3}}; TArray a_tensor(world, tr, il); BOOST_CHECK_EQUAL(a_tensor.size(), 32); + + // now with default world + { + TArray a_tensor(tr, il); + BOOST_CHECK_EQUAL(a_tensor.world().id(), get_default_world().id()); + } } { @@ -286,6 +367,12 @@ BOOST_AUTO_TEST_CASE(multi_tile_initializer_list_ctors) { {0, 1, 2}, {0, 1, 2}, {0, 1, 3}}; TArray a_tensor(world, tr, il); BOOST_CHECK_EQUAL(a_tensor.size(), 64); + + // now with default world + { + TArray a_tensor(tr, il); + BOOST_CHECK_EQUAL(a_tensor.world().id(), get_default_world().id()); + } } } diff --git a/tests/eigen.cpp b/tests/eigen.cpp index bfa4f1a0db..d577804417 100644 --- a/tests/eigen.cpp +++ b/tests/eigen.cpp @@ -421,7 +421,12 @@ BOOST_AUTO_TEST_CASE(tensor_to_array) { decltype(tensor) tensor_copy; if (GlobalFixture::world->rank() == 1) tensor_copy = tensor; GlobalFixture::world->gop.broadcast_serializable(tensor_copy, 1); +// Eigen::TensorBase::operator== is ambiguously defined in C++20 +#if __cplusplus >= 202002L + Eigen::Tensor eq = ((tensor - tensor_copy).abs() == 0).all(); +#else Eigen::Tensor eq = (tensor == tensor_copy).all(); +#endif BOOST_CHECK(eq() == true); } diff --git a/tests/einsum.cpp b/tests/einsum.cpp index 1c0172e554..0fcb71f072 100644 --- a/tests/einsum.cpp +++ b/tests/einsum.cpp @@ -764,7 +764,12 @@ bool isApprox(const Eigen::TensorBase& A, Eigen::Tensor r; if constexpr (std::is_integral_v && std::is_integral_v) { +// Eigen::TensorBase::operator== is ambiguously defined in C++20 +#if __cplusplus >= 202002L + r = ((derived(A) - derived(B)).abs() == 0).all(); +#else r = (derived(A) == derived(B)).all(); +#endif } else { // soft floating-point comparison r = ((derived(A) - derived(B)).abs() <= abs_comparison_threshold).all(); } diff --git a/tests/sparse_tile.h b/tests/sparse_tile.h index 6c365334fa..1b7cdd07e1 100644 --- a/tests/sparse_tile.h +++ b/tests/sparse_tile.h @@ -122,10 +122,36 @@ class EigenSparseTile { matrix_type& matrix() { return std::get<0>(*impl_); } /// data read-write accessor - template + template >> value_type& operator[](const Index& idx) { auto start = range().lobound_data(); - return std::get<0>(*impl_).coeffRef(idx[0] - start[0], idx[1] - start[1]); + return matrix().coeffRef(idx[0] - start[0], idx[1] - start[1]); + } + + /// data read-write accessor + template >* = nullptr> + value_type& operator[](const Ordinal& ord) { + auto idx = range().idx(ord); + auto start = range().lobound_data(); + return matrix().coeffRef(idx[0] - start[0], idx[1] - start[1]); + } + + /// data read-only accessor + template >> + value_type operator[](const Index& idx) const { + auto start = range().lobound_data(); + return matrix().coeff(idx[0] - start[0], idx[1] - start[1]); + } + + /// data read-only accessor + template >> + value_type operator[](const Ordinal& ord) const { + auto idx = range().idx(ord); + auto start = range().lobound_data(); + return matrix().coeff(idx[0] - start[0], idx[1] - start[1]); } /// Maximum # of elements in the tile @@ -138,8 +164,8 @@ class EigenSparseTile { // output template >::type* = nullptr> + typename std::enable_if< + madness::is_output_archive_v>::type* = nullptr> void serialize(Archive& ar) { if (impl_) { ar & true; @@ -151,7 +177,7 @@ class EigenSparseTile { for (typename matrix_type::InnerIterator it(mat, k); it; ++it) { datavec.push_back(Eigen::Triplet(it.row(), it.col(), it.value())); } - ar& datavec & this->range(); + ar& datavec& this->range(); } else { ar & false; } @@ -159,8 +185,8 @@ class EigenSparseTile { // output template >::type* = nullptr> + typename std::enable_if< + madness::is_input_archive_v>::type* = nullptr> void serialize(Archive& ar) { bool have_impl = false; ar& have_impl; @@ -229,22 +255,22 @@ EigenSparseTile add(const EigenSparseTile& arg1, arg1.range()); } -// dense_result[i] = dense_arg1[i] + sparse_arg2[i] -template -TiledArray::Tensor add(const TiledArray::Tensor& arg1, - const EigenSparseTile& arg2) { - TA_ASSERT(arg1.range() == arg2.range()); - - // this could be done better ... - return TiledArray::add(arg1, static_cast>(arg2)); -} - -// dense_result[i] = sparse_arg1[i] + dense_arg2[i] -template -TiledArray::Tensor add(const EigenSparseTile& arg1, - const TiledArray::Tensor& arg2) { - return TiledArray::add(arg2, arg1); -} +//// dense_result[i] = dense_arg1[i] + sparse_arg2[i] +// template +// TiledArray::Tensor add(const TiledArray::Tensor& arg1, +// const EigenSparseTile& arg2) { +// TA_ASSERT(arg1.range() == arg2.range()); +// +// // this could be done better ... +// return TiledArray::add(arg1, static_cast>(arg2)); +// } +// +//// dense_result[i] = sparse_arg1[i] + dense_arg2[i] +// template +// TiledArray::Tensor add(const EigenSparseTile& arg1, +// const TiledArray::Tensor& arg2) { +// return TiledArray::add(arg2, static_cast>(arg1)); +// } // dense_result[perm ^ i] = dense_arg1[i] + sparse_arg2[i] template < diff --git a/tests/tile_op_add.cpp b/tests/tile_op_add.cpp index b264ae15bf..c2e08c170c 100644 --- a/tests/tile_op_add.cpp +++ b/tests/tile_op_add.cpp @@ -26,6 +26,7 @@ #include "../src/TiledArray/tile_op/add.h" #include "../src/tiledarray.h" #include "range_fixture.h" +#include "sparse_tile.h" #include "unit_test_config.h" // using TiledArray::detail::Add; @@ -49,8 +50,7 @@ struct AddFixture : public RangeFixture { }; // AddFixture -BOOST_FIXTURE_TEST_SUITE(tile_op_add_suite, AddFixture, - TA_UT_LABEL_SERIAL) +BOOST_FIXTURE_TEST_SUITE(tile_op_add_suite, AddFixture, TA_UT_LABEL_SERIAL) BOOST_AUTO_TEST_CASE(constructor) { // Check that the constructors can be called without throwing exceptions @@ -398,4 +398,84 @@ BOOST_AUTO_TEST_CASE(binary_add_right_zero_perm_consume_right) { } } +BOOST_AUTO_TEST_CASE(binary_add_heterogeneous) { + TensorD a(RangeFixture::r, [](auto&) { return TiledArray::drand(); }); + EigenSparseTile b(RangeFixture::r); + + ///////////////// + // dense + sparse + ///////////////// + {{// a is persistent + auto c = add(a, b); + + // Check that the result range is correct + BOOST_CHECK_EQUAL(c.range(), a.range()); + + // Check that a nor b were consumed + BOOST_CHECK_NE(a.data(), nullptr); + BOOST_CHECK_NE(c.data(), a.data()); + + // Check that the data in the new tile is correct + for (std::size_t i = 0ul; i < r.volume(); ++i) { + BOOST_CHECK_EQUAL(c[i], a[i] + b[i]); + } +} +{ // a is consumed + auto a_copy = a.clone(); + if (r.rank() == 3) a.shift({-7, 7, 0}); + auto c = add(std::move(a), std::move(b)); + + // Check that the result range is correct + BOOST_CHECK_EQUAL(c.range(), b.range()); + + // Check that a was consumed + BOOST_CHECK_EQUAL(a.data(), nullptr); + + // Check that the data in the new tile is correct + for (std::size_t i = 0ul; i < r.volume(); ++i) { + BOOST_CHECK_EQUAL(c[i], a_copy[i] + b[i]); + } + a = a_copy; +} +} + +///////////////// +// sparse + dense +///////////////// +{ + { // a is persistent + auto c = add(b, a); + + // Check that the result range is correct + BOOST_CHECK_EQUAL(c.range(), b.range()); + + // Check that a was not consumed + BOOST_CHECK_NE(a.data(), nullptr); + BOOST_CHECK_NE(c.data(), a.data()); + + // Check that the data in the new tile is correct + for (std::size_t i = 0ul; i < r.volume(); ++i) { + BOOST_CHECK_EQUAL(c[i], b[i] + a[i]); + } + } + { // a is consumed + auto a_copy = a.clone(); + if (r.rank() == 3) a.shift({-7, 7, 0}); + auto c = add(std::move(b), std::move(a)); + + // Check that the result range is correct + BOOST_CHECK_EQUAL(c.range(), b.range()); + + // Check that a was consumed + BOOST_CHECK_EQUAL(a.data(), nullptr); + + // Check that the data in the new tile is correct + for (std::size_t i = 0ul; i < r.volume(); ++i) { + BOOST_CHECK_EQUAL(c[i], b[i] + a_copy[i]); + } + a = a_copy; + } +} +} + BOOST_AUTO_TEST_SUITE_END()