diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0167aab636..c426d1ffbe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -193,6 +193,7 @@ TiledArray/util/backtrace.h TiledArray/util/bug.h TiledArray/util/function.h TiledArray/util/initializer_list.h +TiledArray/util/invoke.h TiledArray/util/logger.h TiledArray/util/ptr_registry.h TiledArray/util/random.h @@ -258,7 +259,7 @@ set_source_files_properties( # the list of libraries on which TiledArray depends on, will be cached later # when FetchContent umpire: set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers umpire) -set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers TiledArray_UMPIRE) +set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers TiledArray_UMPIRE range-v3::range-v3) if(CUDA_FOUND OR HIP_FOUND) diff --git a/src/TiledArray/array_impl.h b/src/TiledArray/array_impl.h index e5ad9d5db9..df7138a9e7 100644 --- a/src/TiledArray/array_impl.h +++ b/src/TiledArray/array_impl.h @@ -30,6 +30,7 @@ #include #include #include +#include namespace TiledArray { namespace detail { @@ -407,7 +408,8 @@ class ArrayIterator { /// \note It is the users responsibility to ensure the process maps on all /// nodes are identical. template -class ArrayImpl : public TensorImpl { +class ArrayImpl : public TensorImpl, + public std::enable_shared_from_this> { public: typedef ArrayImpl ArrayImpl_; ///< This object type typedef TensorImpl TensorImpl_; ///< The base class of this object @@ -440,6 +442,68 @@ class ArrayImpl : public TensorImpl { private: storage_type data_; ///< Tile container + public: + static madness::AtomicInt cleanup_counter_; + + /// Array deleter function + + /// This function schedules a task for lazy cleanup. Array objects are + /// deleted only after the object has been deleted in all processes. + /// \param pimpl The implementation pointer to be deleted. + static void lazy_deleter(const ArrayImpl_* const pimpl) { + if (pimpl) { + if (madness::initialized()) { + World& world = pimpl->world(); + const madness::uniqueidT id = pimpl->id(); + cleanup_counter_++; + + // wait for all DelayedSet's to vanish + world.await([&]() { return (pimpl->num_live_ds() == 0); }, true); + + try { + world.gop.lazy_sync(id, [pimpl]() { + delete pimpl; + ArrayImpl_::cleanup_counter_--; + }); + } catch (madness::MadnessException& e) { + fprintf(stderr, + "!! ERROR TiledArray: madness::MadnessException thrown in " + "DistArray::lazy_deleter().\n" + "%s\n" + "!! ERROR TiledArray: The exception has been absorbed.\n" + "!! ERROR TiledArray: rank=%i\n", + e.what(), world.rank()); + + cleanup_counter_--; + delete pimpl; + } catch (std::exception& e) { + fprintf(stderr, + "!! ERROR TiledArray: std::exception thrown in " + "DistArray::lazy_deleter().\n" + "%s\n" + "!! ERROR TiledArray: The exception has been absorbed.\n" + "!! ERROR TiledArray: rank=%i\n", + e.what(), world.rank()); + + cleanup_counter_--; + delete pimpl; + } catch (...) { + fprintf(stderr, + "!! ERROR TiledArray: An unknown exception was thrown in " + "DistArray::lazy_deleter().\n" + "!! ERROR TiledArray: The exception has been absorbed.\n" + "!! ERROR TiledArray: rank=%i\n", + world.rank()); + + cleanup_counter_--; + delete pimpl; + } + } else { + delete pimpl; + } + } + } + public: /// Constructor @@ -453,7 +517,32 @@ class ArrayImpl : public TensorImpl { ArrayImpl(World& world, const trange_type& trange, const shape_type& shape, const std::shared_ptr& pmap) : TensorImpl_(world, trange, shape, pmap), - data_(world, trange.tiles_range().volume(), pmap) {} + data_(world, trange.tiles_range().volume(), pmap) { + // Validate the process map + TA_ASSERT(pmap->size() == trange.tiles_range().volume() && + "TiledArray::DistArray::DistArray() -- The size of the process " + "map is not " + "equal to the number of tiles in the TiledRange object."); + TA_ASSERT(pmap->rank() == + typename pmap_interface::size_type(world.rank()) && + "TiledArray::DistArray::DistArray() -- The rank of the process " + "map is not equal to that " + "of the world object."); + TA_ASSERT(pmap->procs() == + typename pmap_interface::size_type(world.size()) && + "TiledArray::DistArray::DistArray() -- The number of processes " + "in the process map is not " + "equal to that of the world object."); + + // Validate the shape + TA_ASSERT( + !shape.empty() && + "TiledArray::DistArray::DistArray() -- The shape is not initialized."); + TA_ASSERT(shape.validate(trange.tiles_range()) && + "TiledArray::DistArray::DistArray() -- The range of the shape is " + "not equal to " + "the tiles range."); + } /// Virtual destructor virtual ~ArrayImpl() {} @@ -649,8 +738,80 @@ class ArrayImpl : public TensorImpl { return data_.num_live_df(); } + /// Initialize (local) tiles with a user provided functor + + /// This function is used to initialize the local, non-zero tiles of the array + /// via a function (or functor). The work is done in parallel, therefore \c op + /// must be a thread safe function/functor. The signature of the functor + /// should be: + /// \code + /// value_type op(const range_type&) + /// \endcode + /// For example, in the following code, the array tiles are initialized with + /// random numbers from 0 to 1: + /// \code + /// array.init_tiles([] (const TiledArray::Range& range) -> + /// TiledArray::Tensor + /// { + /// // Initialize the tile with the given range object + /// TiledArray::Tensor tile(range); + /// + /// // Initialize the random number generator + /// std::default_random_engine generator; + /// std::uniform_real_distribution distribution(0.0,1.0); + /// + /// // Fill the tile with random numbers + /// for(auto& value : tile) + /// value = distribution(generator); + /// + /// return tile; + /// }); + /// \endcode + /// \tparam Op The type of the functor/function + /// \param[in] op The operation used to generate tiles + /// \param[in] skip_set If false, will throw if any tiles are already set + /// \throw TiledArray::Exception if the PIMPL is not set. Strong throw + /// guarantee. + /// \throw TiledArray::Exception if a tile is already set and skip_set is + /// false. Weak throw guarantee. + template + void init_tiles(Op&& op, bool skip_set = false) { + // lifetime management of op depends on whether it is a lvalue ref (i.e. has + // an external owner) or an rvalue ref + // - if op is an lvalue ref: pass op to tasks + // - if op is an rvalue ref pass make_shared_function(op) to tasks + auto op_shared_handle = make_op_shared_handle(std::forward(op)); + + auto it = this->pmap()->begin(); + const auto end = this->pmap()->end(); + for (; it != end; ++it) { + const auto& index = *it; + if (!this->is_zero(index)) { + if (skip_set) { + auto& fut = this->get_local(index); + if (fut.probe()) continue; + } + if constexpr (Exec == HostExecutor::MADWorld) { + Future tile = this->world().taskq.add( + [this_sptr = this->shared_from_this(), + index = ordinal_type(index), op_shared_handle]() -> value_type { + return op_shared_handle( + this_sptr->trange().make_tile_range(index)); + }); + set(index, std::move(tile)); + } else { + static_assert(Exec == HostExecutor::Thread); + set(index, op_shared_handle(this->trange().make_tile_range(index))); + } + } + } + } + }; // class ArrayImpl +template +madness::AtomicInt ArrayImpl::cleanup_counter_; + #ifndef TILEDARRAY_HEADER_ONLY extern template class ArrayImpl, DensePolicy>; @@ -673,6 +834,167 @@ extern template class ArrayImpl>, SparsePolicy>; #endif // TILEDARRAY_HEADER_ONLY +template +void write_tile_block(madness::uniqueidT target_array_id, + std::size_t target_tile_ord, + const Tile& target_tile_contribution) { + auto* world_ptr = World::world_from_id(target_array_id.get_world_id()); + auto target_array_ptr_opt = + world_ptr->ptr_from_id::storage_type>( + target_array_id); + TA_ASSERT(target_array_ptr_opt); + TA_ASSERT((*target_array_ptr_opt)->is_local(target_tile_ord)); + (*target_array_ptr_opt) + ->get_local(target_tile_ord) + .get() + .block(target_tile_contribution.range()) = target_tile_contribution; +} + +template +std::shared_ptr> make_with_new_trange( + const std::shared_ptr>& source_array_sptr, + const TiledRange& target_trange, + typename ArrayImpl::numeric_type new_value_fill = + typename ArrayImpl::numeric_type{0}) { + TA_ASSERT(source_array_sptr); + auto& source_array = *source_array_sptr; + auto& world = source_array.world(); + const auto rank = source_array.trange().rank(); + TA_ASSERT(rank == target_trange.rank()); + + // compute metadata + // - list of target tile indices and the corresponding Range1 for each 1-d + // source tile + using target_tiles_t = std::vector>; + using mode_target_tiles_t = std::vector; + using all_target_tiles_t = std::vector; + + all_target_tiles_t all_target_tiles(target_trange.rank()); + // for each mode ... + for (auto d = 0; d != target_trange.rank(); ++d) { + mode_target_tiles_t& mode_target_tiles = all_target_tiles[d]; + auto& target_tr1 = target_trange.dim(d); + auto& target_element_range = target_tr1.elements_range(); + // ... and each tile in that mode ... + for (auto&& source_tile : source_array.trange().dim(d)) { + mode_target_tiles.emplace_back(); + auto& target_tiles = mode_target_tiles.back(); + auto source_tile_lo = source_tile.lobound(); + auto source_tile_up = source_tile.upbound(); + auto source_element_idx = source_tile_lo; + // ... find all target tiles what overlap with it + if (target_element_range.overlaps_with(source_tile)) { + while (source_element_idx < source_tile_up) { + if (target_element_range.includes(source_element_idx)) { + auto target_tile_idx = + target_tr1.element_to_tile(source_element_idx); + auto target_tile = target_tr1.tile(target_tile_idx); + auto target_lo = + std::max(source_element_idx, target_tile.lobound()); + auto target_up = std::min(source_tile_up, target_tile.upbound()); + target_tiles.emplace_back(target_tile_idx, + Range1(target_lo, target_up)); + source_element_idx = target_up; + } else if (source_element_idx < target_element_range.lobound()) { + source_element_idx = target_element_range.lobound(); + } else if (source_element_idx >= target_element_range.upbound()) + break; + } + } + } + } + + // estimate the shape, if sparse + // use max value for each nonzero tile, then will recompute after tiles are + // assigned + using shape_type = typename Policy::shape_type; + shape_type target_shape; + const auto& target_tiles_range = target_trange.tiles_range(); + if constexpr (!is_dense_v) { + // each rank computes contributions to the shape norms from its local tiles + Tensor target_shape_norms(target_tiles_range, 0); + auto& source_trange = source_array.trange(); + const auto e = source_array.cend(); + for (auto it = source_array.cbegin(); it != e; ++it) { + auto source_tile_idx = it.index(); + + // make range for iterating over all possible target tile idx combinations + TA::Index target_tile_ord_extent_range(rank); + for (auto d = 0; d != rank; ++d) { + target_tile_ord_extent_range[d] = + all_target_tiles[d][source_tile_idx[d]].size(); + } + + // loop over every target tile combination + TA::Range target_tile_ord_extent(target_tile_ord_extent_range); + for (auto& target_tile_ord : target_tile_ord_extent) { + TA::Index target_tile_idx(rank); + for (auto d = 0; d != rank; ++d) { + target_tile_idx[d] = + all_target_tiles[d][source_tile_idx[d]][target_tile_ord[d]].first; + } + target_shape_norms(target_tile_idx) = std::numeric_limits::max(); + } + } + world.gop.max(target_shape_norms.data(), target_shape_norms.size()); + target_shape = SparseShape(target_shape_norms, target_trange); + } + + using Array = ArrayImpl; + auto target_array_sptr = std::shared_ptr( + new Array( + source_array.world(), target_trange, target_shape, + Policy::default_pmap(world, target_trange.tiles_range().volume())), + Array::lazy_deleter); + auto& target_array = *target_array_sptr; + target_array.init_tiles([value = new_value_fill](const Range& range) { + return typename Array::value_type(range, value); + }); + target_array.world().gop.fence(); + + // loop over local tile and sends its contributions to the targets + { + auto& source_trange = source_array.trange(); + const auto e = source_array.cend(); + auto& target_tiles_range = target_trange.tiles_range(); + for (auto it = source_array.cbegin(); it != e; ++it) { + const auto& source_tile = *it; + auto source_tile_idx = it.index(); + + // make range for iterating over all possible target tile idx combinations + TA::Index target_tile_ord_extent_range(rank); + for (auto d = 0; d != rank; ++d) { + target_tile_ord_extent_range[d] = + all_target_tiles[d][source_tile_idx[d]].size(); + } + + // loop over every target tile combination + TA::Range target_tile_ord_extent(target_tile_ord_extent_range); + for (auto& target_tile_ord : target_tile_ord_extent) { + TA::Index target_tile_idx(rank); + container::svector target_tile_rngs1(rank); + for (auto d = 0; d != rank; ++d) { + std::tie(target_tile_idx[d], target_tile_rngs1[d]) = + all_target_tiles[d][source_tile_idx[d]][target_tile_ord[d]]; + } + TA_ASSERT(source_tile.future().probe()); + Tile target_tile_contribution( + source_tile.get().block(target_tile_rngs1)); + auto target_tile_idx_ord = target_tiles_range.ordinal(target_tile_idx); + auto target_proc = target_array.pmap()->owner(target_tile_idx_ord); + world.taskq.add(target_proc, &write_tile_block, + target_array.id(), target_tile_idx_ord, + target_tile_contribution); + } + } + } + // data is mutated in place, so must wait for all tasks to complete + target_array.world().gop.fence(); + // WARNING!! need to truncate in DistArray ctor + + return target_array_sptr; +} + } // namespace detail } // namespace TiledArray diff --git a/src/TiledArray/conversions/retile.h b/src/TiledArray/conversions/retile.h index 26440166c4..9f0f4cab4a 100644 --- a/src/TiledArray/conversions/retile.h +++ b/src/TiledArray/conversions/retile.h @@ -22,8 +22,9 @@ #ifndef TILEDARRAY_RETILE_H #define TILEDARRAY_RETILE_H -#include "TiledArray/util/annotation.h" #include "TiledArray/special/diagonal_array.h" +#include "TiledArray/special/kronecker_delta.h" +#include "TiledArray/util/annotation.h" /// \name Retile function /// \brief Retiles a tensor with a provided TiledRange @@ -38,9 +39,11 @@ namespace TiledArray { -template -auto retile(const DistArray& tensor, - const TiledRange& new_trange) { +namespace detail { + +template +auto retile_v0(const DistArray& tensor, + const TiledRange& new_trange) { // Make sure ranks match auto rank = new_trange.rank(); auto tensor_rank = tensor.trange().rank(); @@ -67,11 +70,13 @@ auto retile(const DistArray& tensor, }; // Check the different dimensions and contract when needed - using tensor_type = DistArray; + using tensor_type = DistArray; auto start = detail::dummy_annotation(rank); tensor_type output_tensor; for (auto i = 0; i < rank; ++i) { - if (i == 0) { output_tensor(start) = tensor(start); } + if (i == 0) { + output_tensor(start) = tensor(start); + } if (new_trange.dim(i) != tensor.trange().dim(i)) { // Make identity for contraction TiledRange retiler{tensor.trange().dim(i), new_trange.dim(i)}; @@ -88,7 +93,80 @@ auto retile(const DistArray& tensor, return output_tensor; } -} // namespace TiledArray +template +auto retile_v1(const DistArray& tensor, + const TiledRange& new_trange) { + // Make sure ranks match + auto rank = new_trange.rank(); + auto tensor_rank = tensor.trange().rank(); + assert((rank == tensor_rank) && "TiledRanges are of different ranks"); + + // Makes the annotations for the contraction step + auto annotations = [&]() -> std::tuple { + std::ostringstream final, switcher; + final << "j0"; + switcher << "j0"; + for (unsigned int d = 1; d < rank; ++d) { + final << ",j" << d; + switcher << ",j" << d; + } + for (unsigned int d = 0; d < rank; ++d) { + switcher << ",i" << d; + } + return {final.str(), switcher.str()}; + }; + + // Check the different dimensions and contract when needed + using Array = DistArray; + container::svector retiler_ranges; + for (auto i = 0; i < rank; ++i) { + retiler_ranges.emplace_back(new_trange.dim(i)); + } + for (auto i = 0; i < rank; ++i) { + retiler_ranges.emplace_back(tensor.trange().dim(i)); + } + TA::TiledRange retiler_range(retiler_ranges); + TA::DistArray retiler( + tensor.world(), retiler_range, + SparseShape(kronecker_shape(retiler_range), retiler_range), + std::make_shared( + tensor.world(), retiler_range.tiles_range().volume())); + retiler.init_tiles([=](const TiledArray::Range& range) { + return KroneckerDeltaTile(range); + }); + + // Make indices for contraction + + // Retile + Array output; + auto start = detail::dummy_annotation(rank); + auto [finish, change] = annotations(); + output(finish) = retiler(change) * tensor(start); + + return output; +} + +template +auto retile_v2(const DistArray& source_array, + const TiledRange& target_trange) { + return DistArray(source_array, target_trange); +} + +} // namespace detail + +/// Creates a new DistArray with the same data as the input tensor, but with a +/// different trange. The primary use-case is to change tiling while keeping the +/// element range the same, but it can be used to select blocks of the data as +/// well as increasing the element range (with the new elements initialized to +/// zero) +/// \param array The DistArray whose data is to be retiled +/// \param target_trange The desired TiledRange of the output tensor +template +auto retile(const DistArray& array, + const TiledRange& target_trange) { + return detail::retile_v2(array, target_trange); +} +} // namespace TiledArray #endif // TILEDARRAY_RETILE_H diff --git a/src/TiledArray/conversions/vector_of_arrays.h b/src/TiledArray/conversions/vector_of_arrays.h index 9de3bf8d09..8b3f5ea8a4 100644 --- a/src/TiledArray/conversions/vector_of_arrays.h +++ b/src/TiledArray/conversions/vector_of_arrays.h @@ -5,8 +5,6 @@ #ifndef TILEDARRAY_CONVERSIONS_VECTOR_OF_ARRAYS_H_ #define TILEDARRAY_CONVERSIONS_VECTOR_OF_ARRAYS_H_ -#include - namespace TiledArray { namespace detail { diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 167464ccfb..c2645dd7ce 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -163,67 +163,6 @@ class DistArray : public madness::archive::ParallelSerializableObject { false; ///< if true, the impl object is scheduled to be destroyed in the ///< next fence - static madness::AtomicInt cleanup_counter_; - - /// Array deleter function - - /// This function schedules a task for lazy cleanup. Array objects are - /// deleted only after the object has been deleted in all processes. - /// \param pimpl The implementation pointer to be deleted. - static void lazy_deleter(const impl_type* const pimpl) { - if (pimpl) { - if (madness::initialized()) { - World& world = pimpl->world(); - const madness::uniqueidT id = pimpl->id(); - cleanup_counter_++; - - // wait for all DelayedSet's to vanish - world.await([&]() { return (pimpl->num_live_ds() == 0); }, true); - - try { - world.gop.lazy_sync(id, [pimpl]() { - delete pimpl; - DistArray::cleanup_counter_--; - }); - } catch (madness::MadnessException& e) { - fprintf(stderr, - "!! ERROR TiledArray: madness::MadnessException thrown in " - "Array::lazy_deleter().\n" - "%s\n" - "!! ERROR TiledArray: The exception has been absorbed.\n" - "!! ERROR TiledArray: rank=%i\n", - e.what(), world.rank()); - - cleanup_counter_--; - delete pimpl; - } catch (std::exception& e) { - fprintf(stderr, - "!! ERROR TiledArray: std::exception thrown in " - "Array::lazy_deleter().\n" - "%s\n" - "!! ERROR TiledArray: The exception has been absorbed.\n" - "!! ERROR TiledArray: rank=%i\n", - e.what(), world.rank()); - - cleanup_counter_--; - delete pimpl; - } catch (...) { - fprintf(stderr, - "!! ERROR TiledArray: An unknown exception was thrown in " - "Array::lazy_deleter().\n" - "!! ERROR TiledArray: The exception has been absorbed.\n" - "!! ERROR TiledArray: rank=%i\n", - world.rank()); - - cleanup_counter_--; - delete pimpl; - } - } else { - delete pimpl; - } - } - } - /// Sparse array initialization /// \param world The world where the array will live. @@ -239,34 +178,10 @@ class DistArray : public madness::archive::ParallelSerializableObject { if (!pmap) { // Construct a default process map pmap = Policy::default_pmap(world, trange.tiles_range().volume()); - } else { - // Validate the process map - TA_ASSERT(pmap->size() == trange.tiles_range().volume() && - "TiledArray::DistArray::DistArray() -- The size of the process " - "map is not " - "equal to the number of tiles in the TiledRange object."); - TA_ASSERT(pmap->rank() == - typename pmap_interface::size_type(world.rank()) && - "TiledArray::DistArray::DistArray() -- The rank of the process " - "map is not equal to that " - "of the world object."); - TA_ASSERT(pmap->procs() == - typename pmap_interface::size_type(world.size()) && - "TiledArray::DistArray::DistArray() -- The number of processes " - "in the process map is not " - "equal to that of the world object."); } - // Validate the shape - TA_ASSERT( - !shape.empty() && - "TiledArray::DistArray::DistArray() -- The shape is not initialized."); - TA_ASSERT(shape.validate(trange.tiles_range()) && - "TiledArray::DistArray::DistArray() -- The range of the shape is " - "not equal to " - "the tiles range."); - - return pimpl_type(new impl_type(world, trange, shape, pmap), lazy_deleter); + return pimpl_type(new impl_type(world, trange, shape, pmap), + impl_type::lazy_deleter); } public: @@ -541,6 +456,17 @@ class DistArray : public madness::archive::ParallelSerializableObject { : DistArray(array_from_il(get_default_world(), trange, il)) {} /// @} + /// "copy" constructor that replaces the TiledRange + + /// This constructor remaps the data of \p other according to \p new_trange , + /// with \p new_value_fill used to fill the new elements, if any + DistArray(const DistArray& other, const trange_type& new_trange, + numeric_type new_value_fill = numeric_type{0}) + : pimpl_( + make_with_new_trange(other.pimpl(), new_trange, new_value_fill)) { + this->truncate(); + } + /// converting copy constructor /// This constructor uses the meta data of `other` to initialize the meta @@ -647,10 +573,10 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// \throw madness::MadnessException When timeout has been exceeded. static void wait_for_lazy_cleanup(World& world, const double = 60.0) { try { - world.await([&]() { return (cleanup_counter_ == 0); }, true); + world.await([&]() { return (impl_type::cleanup_counter_ == 0); }, true); } catch (...) { printf("%i: Array lazy cleanup timeout with %i pending cleanup(s)\n", - world.rank(), int(cleanup_counter_)); + world.rank(), int(impl_type::cleanup_counter_)); throw; } } @@ -869,10 +795,9 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// first minimally contains the same number of elements as /// the tile. /// \throw TiledArray::Exception if the tile is already initialized. - template < - typename Integer, typename InIter, - typename = std::enable_if_t<(std::is_integral_v) && - detail::is_input_iterator::value>> + template )&&detail:: + is_input_iterator::value>> typename std::enable_if::value>::type set( const std::initializer_list& i, InIter first) { set>(i, first); @@ -965,9 +890,10 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// \throw TiledArray::Exception if index \c i has the wrong rank. Strong /// throw guarantee. /// \throw TiledArray::Exception if tile \c i is already set. - template ) && - is_value_or_future_to_value_v>> + template < + typename Index, typename Value, + typename = std::enable_if_t< + (std::is_integral_v)&&is_value_or_future_to_value_v>> void set(const std::initializer_list& i, Value&& v) { set>(i, std::forward(v)); } @@ -1061,34 +987,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// false. Weak throw guarantee. template void init_tiles(Op&& op, bool skip_set = false) { - // lifetime management of op depends on whether it is a lvalue ref (i.e. has - // an external owner) or an rvalue ref - // - if op is an lvalue ref: pass op to tasks - // - if op is an rvalue ref pass make_shared_function(op) to tasks - auto op_shared_handle = make_op_shared_handle(std::forward(op)); - - auto it = impl_ref().pmap()->begin(); - const auto end = pimpl_->pmap()->end(); - for (; it != end; ++it) { - const auto& index = *it; - if (!pimpl_->is_zero(index)) { - if (skip_set) { - auto fut = find_local(index); - if (fut.probe()) continue; - } - if constexpr (Exec == HostExecutor::MADWorld) { - Future tile = pimpl_->world().taskq.add( - [pimpl = pimpl_, index = ordinal_type(index), - op_shared_handle]() -> value_type { - return op_shared_handle(pimpl->trange().make_tile_range(index)); - }); - set(index, std::move(tile)); - } else { - static_assert(Exec == HostExecutor::Thread); - set(index, op_shared_handle(trange().make_tile_range(index))); - } - } - } + impl_ref().template init_tiles(std::forward(op), skip_set); } /// Initialize elements of local, non-zero tiles with a user provided functor @@ -1827,9 +1726,6 @@ class DistArray : public madness::archive::ParallelSerializableObject { }; // class DistArray -template -madness::AtomicInt DistArray::cleanup_counter_; - #ifndef TILEDARRAY_HEADER_ONLY extern template class DistArray, DensePolicy>; diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 7e6e4251b3..41797efafa 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -361,7 +361,8 @@ auto reduce_modes(TA::DistArray orig, size_t drank) { container::svector ix1s = rng.lobound(); { - auto dlo = delta_trange.make_tile_range(r).lobound(); + auto d = delta_trange.make_tile_range(r); + auto dlo = d.lobound(); std::copy(dlo.begin(), dlo.end(), std::back_inserter(ix1s)); } diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 7f411eaeba..073e8bacd3 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -147,6 +147,23 @@ class SparseShape; template class DistArray; +/// Type trait to detect dense shape types +template +struct is_dense : public std::false_type {}; + +template <> +struct is_dense : public std::true_type {}; + +template <> +struct is_dense : public std::true_type {}; + +template +struct is_dense> + : public is_dense::shape_type> {}; + +template +constexpr const bool is_dense_v = is_dense::value; + // Dense Array Typedefs template using TArray = DistArray, DensePolicy>; diff --git a/src/TiledArray/math/solvers/cp/cp.h b/src/TiledArray/math/solvers/cp/cp.h index 8c82485211..9065776211 100644 --- a/src/TiledArray/math/solvers/cp/cp.h +++ b/src/TiledArray/math/solvers/cp/cp.h @@ -28,7 +28,6 @@ #include #include -#include namespace TiledArray::math::cp { diff --git a/src/TiledArray/math/solvers/cp/cp_reconstruct.h b/src/TiledArray/math/solvers/cp/cp_reconstruct.h index b09165d335..283f96bb76 100644 --- a/src/TiledArray/math/solvers/cp/cp_reconstruct.h +++ b/src/TiledArray/math/solvers/cp/cp_reconstruct.h @@ -29,7 +29,6 @@ #include #include #include -#include namespace TiledArray::math::cp { diff --git a/src/TiledArray/meta.h b/src/TiledArray/meta.h index 9dc4ac9f55..18a1bf69f9 100644 --- a/src/TiledArray/meta.h +++ b/src/TiledArray/meta.h @@ -1,70 +1,9 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2017 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Eduard Valeyev - * Department of Chemistry, Virginia Tech - * - * meta.h - * April 11, 2017 - * - */ - #ifndef SRC_TILEDARRAY_META_H_ #define SRC_TILEDARRAY_META_H_ -#include -#include -#include -#include - -namespace TiledArray { -namespace meta { - -/// ||'s bools -template -struct or_reduce { - static constexpr bool value = head || or_reduce::value; -}; - -template -struct or_reduce { - static constexpr bool value = b; -}; - -// is any argument a Future? -// - yes: async launch -// - no: direct launch -template -auto invoke(Function&& fn, Args&&... args) -> typename std::enable_if< - !or_reduce>::value...>::value, - decltype(fn(args...))>::type { - return fn(std::forward(args)...); -} - -template < - typename Function, typename... Args, - typename = typename std::enable_if>::value...>::value>::type> -auto invoke(Function&& fn, Args&&... args) { - return TiledArray::get_default_world().taskq.add(std::forward(fn), - std::forward(args)...); -} +#pragma message( \ + "Header `TiledArray/meta.h` is deprecated, use `TiledArray/util/invoke.h` instead.") -} // namespace meta -} // namespace TiledArray +#include #endif // SRC_TILEDARRAY_META_H_ diff --git a/src/TiledArray/range1.h b/src/TiledArray/range1.h index ef6d422dcc..dbb4b05a67 100644 --- a/src/TiledArray/range1.h +++ b/src/TiledArray/range1.h @@ -74,6 +74,9 @@ struct Range1 { /// @return the extent of this range, i.e. second - first auto extent() const noexcept { return second - first; } + /// @return the volume of this range, i.e. second - first + auto volume() const noexcept { return second - first; } + /// swaps `*this` with @p other /// @p other a Range1 object void swap(Range1& other) noexcept { @@ -87,6 +90,21 @@ struct Range1 { return std::make_pair(first, second); } + /// Checks if a given index is within this range + /// @return true if \p i is within this range + template + typename std::enable_if::value, bool>::type includes( + const I& i) const { + return first <= i && i < second; + } + + /// Checks if a given range overlaps with this range + + /// @return true if \p r overlaps with this range + bool overlaps_with(const Range1& rng) const { + return lobound() < rng.upbound() && upbound() > rng.lobound(); + } + /// \brief Range1 iterator type /// /// Iterates over Range1 @@ -150,14 +168,14 @@ struct Range1 { typename std::enable_if>>::type* = nullptr> void serialize(Archive& ar) { - ar& first& second; + ar & first & second; } template >>::type* = nullptr> void serialize(Archive& ar) const { - ar& first& second; + ar & first & second; } }; diff --git a/src/TiledArray/shape.h b/src/TiledArray/shape.h index b630d7e019..9b8de8f6ef 100644 --- a/src/TiledArray/shape.h +++ b/src/TiledArray/shape.h @@ -23,29 +23,4 @@ #include #include -namespace TiledArray { - -template -class DistArray; -class DensePolicy; - -/// Type trait to detect dense shape types -template -struct is_dense : public std::false_type {}; - -template <> -struct is_dense : public std::true_type {}; - -template <> -struct is_dense : public std::true_type {}; - -template -struct is_dense > - : public is_dense::shape_type> {}; - -template -constexpr const bool is_dense_v = is_dense::value; - -} // namespace TiledArray - #endif // TILEDARRAY_SHAPE_H__INCLUDED diff --git a/src/TiledArray/special/diagonal_array.h b/src/TiledArray/special/diagonal_array.h index dd62db1498..d60b23db94 100644 --- a/src/TiledArray/special/diagonal_array.h +++ b/src/TiledArray/special/diagonal_array.h @@ -31,6 +31,8 @@ #include #include +#include + #include namespace TiledArray { @@ -43,7 +45,7 @@ namespace detail { /// empty Range /// \param[in] rng an input (rank-d) Range /// \return the range of diagonal elements, as a rank-1 Range -inline Range diagonal_range(Range const &rng) { +inline Range1 diagonal_range(Range const &rng) { const auto rank = rng.rank(); TA_ASSERT(rng.rank() > 0); auto lo = rng.lobound_data(); @@ -56,92 +58,64 @@ inline Range diagonal_range(Range const &rng) { // If the max small elem is less than the min large elem then a diagonal // elem is in this tile; if (max_low < min_up) { - return Range({max_low}, {min_up}); + return Range1{max_low, min_up}; } else { - return Range(); + return Range1{}; } } -/// \brief computes shape data (i.e. Frobenius norms of the tiles) for a -/// constant diagonal tensor -/// \tparam T a numeric type -/// \param trange a TiledRange of the result -/// \param val value of the diagonal elements -/// \return a Tensor containing the Frobenius norms of -/// the tiles of a DistArray with \p val on the diagonal and -/// zeroes elsewhere -template -Tensor diagonal_shape(TiledRange const &trange, T val) { - Tensor shape(trange.tiles_range(), 0.0); - - auto ext = trange.elements_range().extent(); - auto diag_extent = *std::min_element(std::begin(ext), std::end(ext)); - - auto ndim = trange.rank(); - auto diag_elem = 0ul; - // the diagonal elements will never be larger than the length of the - // shortest dimension - while (diag_elem < diag_extent) { - // Get the tile index corresponding to the current diagonal_elem - auto tile_idx = - trange.element_to_tile(container::svector(ndim, diag_elem)); - auto tile_range = trange.make_tile_range(tile_idx); - - // Compute the range of diagonal elements in the tile - auto d_range = diagonal_range(tile_range); - - // Since each diag elem has the same value the norm of the tile is - // \sqrt{\sum_{diag} val^2} = \sqrt{ndiags * val^2} - float t_norm = std::sqrt(val * val * d_range.volume()); - shape(tile_idx) = t_norm; - - // Update diag_elem to the next elem not in this tile - diag_elem = d_range.upbound_data()[0]; - } - - return shape; -} - /// \brief computes shape data (i.e. Frobenius norms of the tiles) for a /// non-constant diagonal tensor /// \tparam RandomAccessIterator an iterator over /// the range of diagonal elements +/// \tparam Sentinel sentinel type for the range of diagonal elements /// \param[in] trange a TiledRange of the result /// \param[in] diagonals_begin the begin iterator of the range of the diagonals /// \param[in] diagonals_end the end iterator of the range of the diagonals; if /// not given, default initialized and thus will not be checked /// \return a Tensor containing the Frobenius norms of the tiles of /// a DistArray with \p val on the diagonal and zeroes elsewhere -template +template std::enable_if_t::value, Tensor> diagonal_shape(TiledRange const &trange, RandomAccessIterator diagonals_begin, - RandomAccessIterator diagonals_end = {}) { - const bool have_end = diagonals_end == RandomAccessIterator{}; + Sentinel diagonals_end = {}) { + bool have_end = false; + if constexpr (detail::is_equality_comparable_v) { + have_end = diagonals_end != Sentinel{}; + } Tensor shape(trange.tiles_range(), 0.0); const auto rank = trange.rank(); - auto ext = trange.elements_range().extent_data(); - auto diag_extent = *std::min_element(ext, ext + rank); + TA_ASSERT(rank > 0); + const auto *lobound = trange.elements_range().lobound_data(); + const auto diag_lobound = *std::max_element(lobound, lobound + rank); + const auto *upbound = trange.elements_range().upbound_data(); + const auto diag_upbound = *std::min_element(upbound, upbound + rank); - auto ndim = trange.rank(); - auto diag_elem = 0ul; + auto diag_elem = diag_lobound; // the diagonal elements will never be larger than the length of the // shortest dimension - while (diag_elem < diag_extent) { + while (diag_elem < diag_upbound) { // Get the tile index corresponding to the current diagonal_elem - auto tile_idx = trange.element_to_tile(std::vector(ndim, diag_elem)); + auto tile_idx = trange.element_to_tile(Index(rank, diag_elem)); auto tile_range = trange.make_tile_range(tile_idx); // Compute the range of diagonal elements in the tile auto d_range = diagonal_range(tile_range); - TA_ASSERT(d_range != Range{}); - TA_ASSERT(diag_elem == d_range.lobound_data()[0]); - const auto beg = diag_elem; - const auto end = d_range.upbound_data()[0]; + TA_ASSERT(d_range != Range1{}); + TA_ASSERT(diag_elem == d_range.lobound()); + const auto beg = d_range.lobound(); + const auto end = d_range.upbound(); if (have_end) { - TA_ASSERT(diagonals_begin + beg < diagonals_end); - TA_ASSERT(diagonals_begin + end <= diagonals_end); + if constexpr (detail::are_less_than_comparable_v) { + TA_ASSERT(diagonals_begin + beg < diagonals_end); + } + if constexpr (detail::are_less_than_or_equal_comparable_v< + RandomAccessIterator, Sentinel>) { + TA_ASSERT(diagonals_begin + end <= diagonals_end); + } } auto t_norm = std::accumulate(diagonals_begin + beg, diagonals_begin + end, @@ -149,7 +123,7 @@ diagonal_shape(TiledRange const &trange, RandomAccessIterator diagonals_begin, const auto abs_val = std::abs(val); return sum + abs_val * abs_val; }); - shape(tile_idx) = static_cast(t_norm); + shape(tile_idx) = std::sqrt(static_cast(t_norm)); // Update diag_elem to the next elem not in this tile diag_elem = end; @@ -158,36 +132,18 @@ diagonal_shape(TiledRange const &trange, RandomAccessIterator diagonals_begin, return shape; } -/// \brief Writes tiles of a constant diagonal array - -/// \tparam Array a DistArray type +/// \brief computes shape data (i.e. Frobenius norms of the tiles) for a +/// constant diagonal tensor /// \tparam T a numeric type -/// \param[in] A an Array object -/// \param[in] val the value of the diagonal elements of A -template -void write_diag_tiles_to_array_val(Array &A, T val) { - using Tile = typename Array::value_type; - - // Task to create each tile - A.init_tiles([val](const Range &rng) { - // Compute range of diagonal elements in the tile - auto diags = detail::diagonal_range(rng); - const auto rank = rng.rank(); - - Tile tile(rng, 0.0); - - if (diags.volume() > 0) { // If the tile has diagonal elems - - // Loop over the elements and write val into them - auto diag_lo = diags.lobound_data()[0]; - auto diag_hi = diags.upbound_data()[0]; - for (auto elem = diag_lo; elem < diag_hi; ++elem) { - tile(std::vector(rank, elem)) = val; - } - } - - return tile; - }); +/// \param trange a TiledRange of the result +/// \param val value of the diagonal elements +/// \return a Tensor containing the Frobenius norms of +/// the tiles of a DistArray with \p val on the diagonal and +/// zeroes elsewhere +template +Tensor diagonal_shape(TiledRange const &trange, T val) { + auto val_range = ranges::views::repeat(val); + return diagonal_shape(trange, val_range.begin(), val_range.end()); } /// \brief Writes tiles of a nonconstant diagonal array @@ -212,10 +168,11 @@ write_diag_tiles_to_array_rng(Array &A, RandomAccessIterator diagonals_begin) { if (diags.volume() > 0) { // If the tile has diagonal elems // Loop over the elements and write val into them - auto diag_lo = diags.lobound_data()[0]; - auto diag_hi = diags.upbound_data()[0]; - for (auto elem = diag_lo; elem < diag_hi; ++elem) { - tile(std::vector(rank, elem)) = *(diagonals_begin + elem); + auto diag_lo = diags.lobound(); + auto diag_hi = diags.upbound(); + auto elem_it = diagonals_begin + diag_lo; + for (auto elem = diag_lo; elem < diag_hi; ++elem, ++elem_it) { + tile(Index(rank, elem)) = *elem_it; } } @@ -225,36 +182,6 @@ write_diag_tiles_to_array_rng(Array &A, RandomAccessIterator diagonals_begin) { } // namespace detail -/// \brief Creates a constant diagonal DistArray - -/// Creates an array whose only nonzero values are the (hyper)diagonal elements -/// (i.e. (n,n,n, ..., n) ), and they are all have the same value -/// \tparam Policy the policy type of the resulting DistArray -/// \tparam T a numeric type -/// \param world The world for the array -/// \param[in] trange The trange for the array -/// \param[in] val The value of the diagonal elements -/// \return a constant diagonal DistArray -template -Array diagonal_array(World &world, TiledRange const &trange, T val = 1) { - using Policy = typename Array::policy_type; - // Init the array - if constexpr (is_dense_v) { - Array A(world, trange); - detail::write_diag_tiles_to_array_val(A, val); - return A; - } else { - // Compute shape and init the Array - auto shape_norm = detail::diagonal_shape(trange, val); - using ShapeType = typename Policy::shape_type; - ShapeType shape(shape_norm, trange); - Array A(world, trange, shape); - detail::write_diag_tiles_to_array_val(A, val); - return A; - } - abort(); // unreachable -} - /// \brief Creates a non-constant diagonal DistArray /// Creates an array whose only nonzero values are the (hyper)diagonal elements @@ -262,30 +189,39 @@ Array diagonal_array(World &world, TiledRange const &trange, T val = 1) { /// input range /// \tparam Array a DistArray type /// \tparam RandomAccessIterator an iterator over the range of diagonal elements +/// \tparam Sentinel sentinel type for the range of diagonal elements /// \param world The world for the array /// \param[in] trange The trange for the array /// \param[in] diagonals_begin the begin iterator of the range of the diagonals /// \param[in] diagonals_end the end iterator of the range of the diagonals; /// if not given, default initialized and thus will not be checked /// \return a diagonal DistArray -template +template std::enable_if_t::value, Array> diagonal_array(World &world, TiledRange const &trange, RandomAccessIterator diagonals_begin, - RandomAccessIterator diagonals_end = {}) { + Sentinel diagonals_end = {}) { using Policy = typename Array::policy_type; - if (diagonals_end != RandomAccessIterator{}) { - const auto rank = trange.rank(); - auto ext = trange.elements_range().extent_data(); - [[maybe_unused]] auto diag_extent = *std::min_element(ext, ext + rank); - TA_ASSERT(diagonals_begin + diag_extent <= diagonals_end); + if constexpr (detail::is_equality_comparable_v) { + if (diagonals_end != Sentinel{}) { + auto diagonals_range = detail::diagonal_range(trange.elements_range()); + if constexpr (detail::are_less_than_comparable_v) { + TA_ASSERT(diagonals_begin + diagonals_range.lobound() < diagonals_end); + } + if constexpr (detail::are_less_than_or_equal_comparable_v< + RandomAccessIterator, Sentinel>) { + TA_ASSERT(diagonals_begin + diagonals_range.upbound() <= diagonals_end); + } + } } // Init the array if constexpr (is_dense_v) { Array A(world, trange); detail::write_diag_tiles_to_array_rng(A, diagonals_begin); + A.world().taskq.fence(); // ensure tasks outlive the diagonals_begin view return A; } else { // Compute shape and init the Array @@ -295,11 +231,29 @@ diagonal_array(World &world, TiledRange const &trange, ShapeType shape(shape_norm, trange); Array A(world, trange, shape); detail::write_diag_tiles_to_array_rng(A, diagonals_begin); + A.world().taskq.fence(); // ensure tasks outlive the diagonals_begin view return A; } abort(); // unreachable } +/// \brief Creates a constant diagonal DistArray + +/// Creates an array whose only nonzero values are the (hyper)diagonal elements +/// (i.e. (n,n,n, ..., n) ), and they are all have the same value +/// \tparam Policy the policy type of the resulting DistArray +/// \tparam T a numeric type +/// \param world The world for the array +/// \param[in] trange The trange for the array +/// \param[in] val The value of the diagonal elements +/// \return a constant diagonal DistArray +template +Array diagonal_array(World &world, TiledRange const &trange, T val = 1) { + auto val_range = ranges::views::repeat(val); + return diagonal_array(world, trange, val_range.begin(), + val_range.end()); +} + } // namespace TiledArray #endif // TILEDARRAY_SPECIALARRAYS_DIAGONAL_ARRAY_H__INCLUDED diff --git a/src/TiledArray/special/kronecker_delta.h b/src/TiledArray/special/kronecker_delta.h index 2b1df03294..35a8da6e57 100644 --- a/src/TiledArray/special/kronecker_delta.h +++ b/src/TiledArray/special/kronecker_delta.h @@ -37,31 +37,28 @@ #include #include +namespace TiledArray { + +// clang-format off /// *generalized* (asymmetric) Kronecker delta -/// *generalized* (asymmetric) Kronecker delta is a product of \c _N ordinary -/// Kronecker deltas Definition: KroneckerDeltaTile(b,k) = (b==k) ? 1 : 0 -/// KroneckerDeltaTile(b0,k0,b1,k1,b2,k2...bN,kN) = KroneckerDeltaTile(b0,k0) -/// KroneckerDeltaTile(b1,k1) ...`KroneckerDeltaTile(bN,kN) -/// -/// \note This is a stateful data tile. Meant to be generated by its (stateless) -/// lazy generator, \c LazyKroneckerDeltaTile. -/// -/// \tparam _N the number of ordinal Kronecker deltas in this product -template +/// *generalized* (asymmetric) Kronecker delta is a product of `N` ordinary +/// Kronecker deltas +/// Definition: `KroneckerDeltaTile(b,k) = (b==k) ? 1 : 0` and +/// `KroneckerDeltaTile(b1,b2,...bN,k1,k2,...kN) = KroneckerDeltaTile(b0,k0) KroneckerDeltaTile(b1,k1) ... KroneckerDeltaTile(bN,kN)`. +/// The implicit layout is hardwired to `b0,b1,b2,...,bN,k0,k1,k2,...,kN` since the intended use is for taking slices. +// clang-format on class KroneckerDeltaTile { public: - // Constants - static constexpr unsigned N = _N; // Concept typedefs - typedef TiledArray::Range range_type; // range type - typedef int value_type; // Element type + typedef Range range_type; // range type + typedef int value_type; // Element type typedef value_type numeric_type; // The scalar type that is compatible with value_type typedef size_t size_type; // Size type private: - range_type range_; + range_type range_; // range_.rank() = 2*N bool empty_; public: @@ -69,8 +66,13 @@ class KroneckerDeltaTile { KroneckerDeltaTile() : empty_(true) {} /// Productive ctor 1 + /// \param[in] range the range of the tile, by definition must be even-order + /// such that the number of Kronecker deltas `N` is `range.rank() / 2` \pre + /// `range.rank() % 2 == 1` KroneckerDeltaTile(const range_type& range) - : range_(range), empty_(is_empty(range_)) {} + : range_(range), empty_(is_empty(range_)) { + TA_ASSERT(range.rank() % 2 == 0); + } /// copy constructor (= deep copy) KroneckerDeltaTile(const KroneckerDeltaTile&) = default; @@ -88,6 +90,9 @@ class KroneckerDeltaTile { bool empty() const { return empty_; } + /// \return the number of Kronecker deltas in the product + unsigned int N() const { return range_.rank() / 2; } + /// MADNESS compliant serialization template void serialize(Archive& ar) { @@ -100,13 +105,15 @@ class KroneckerDeltaTile { /// @return false if contains any nonzeros static bool is_empty(const range_type& range) { bool empty = false; - TA_ASSERT(range.rank() == 2 * N); + TA_ASSERT(range.rank() % 2 == 0); + const auto N = range.rank() / 2; auto lobound = range.lobound_data(); auto upbound = range.upbound_data(); - for (auto i = 0; i != 2 * N && not empty; i += 2) - empty = (upbound[i] > lobound[i + 1] && upbound[i + 1] > lobound[i]) - ? true - : false; // assumes extents > 0 + for (auto i = 0; i != N && not empty; ++i) { + const auto lo = std::max(lobound[i], lobound[i + N]); + const auto up = std::min(upbound[i], upbound[i + N]); + empty = lo >= up; + } return empty; } @@ -115,155 +122,191 @@ class KroneckerDeltaTile { // these are to satisfy interfaces, but not needed, actually // Sum of hyper diagonal elements -template -typename KroneckerDeltaTile<_N>::numeric_type trace( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type trace(const KroneckerDeltaTile& arg); // foreach(i) result += arg[i] -template -typename KroneckerDeltaTile<_N>::numeric_type sum( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type sum(const KroneckerDeltaTile& arg); // foreach(i) result *= arg[i] -template -typename KroneckerDeltaTile<_N>::numeric_type product( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type product( + const KroneckerDeltaTile& arg); // foreach(i) result += arg[i] * arg[i] -template -typename KroneckerDeltaTile<_N>::numeric_type squared_norm( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type squared_norm( + const KroneckerDeltaTile& arg); // foreach(i) result = min(result, arg[i]) -template -typename KroneckerDeltaTile<_N>::numeric_type min( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type min(const KroneckerDeltaTile& arg); // foreach(i) result = max(result, arg[i]) -template -typename KroneckerDeltaTile<_N>::numeric_type max( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type max(const KroneckerDeltaTile& arg); // foreach(i) result = abs_min(result, arg[i]) -template -typename KroneckerDeltaTile<_N>::numeric_type abs_min( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type abs_min( + const KroneckerDeltaTile& arg); // foreach(i) result = abs_max(result, arg[i]) -template -typename KroneckerDeltaTile<_N>::numeric_type abs_max( - const KroneckerDeltaTile<_N>& arg); +typename KroneckerDeltaTile::numeric_type abs_max( + const KroneckerDeltaTile& arg); // Permutation operation // returns a tile for which result[perm ^ i] = tile[i] -template < - unsigned N, typename Perm, - typename = std::enable_if_t>> -KroneckerDeltaTile permute(const KroneckerDeltaTile& tile, - const Perm& perm) { +template >> +KroneckerDeltaTile permute(const KroneckerDeltaTile& tile, const Perm& perm) { abort(); } // dense_result[i] = dense_arg1[i] * sparse_arg2[i] -template -TiledArray::Tensor mult(const KroneckerDeltaTile<_N>& arg1, - const TiledArray::Tensor& arg2) { +template +Tensor mult(const KroneckerDeltaTile& arg1, const Tensor& arg2) { abort(); } // dense_result[perm ^ i] = dense_arg1[i] * sparse_arg2[i] -template < - typename T, unsigned _N, typename Perm, - typename = std::enable_if_t>> -TiledArray::Tensor mult(const KroneckerDeltaTile<_N>& arg1, - const TiledArray::Tensor& arg2, - const Perm& perm) { +template >> +Tensor mult(const KroneckerDeltaTile& arg1, const Tensor& arg2, + const Perm& perm) { abort(); } // dense_result[i] *= sparse_arg1[i] -template -TiledArray::Tensor& mult_to(TiledArray::Tensor& result, - const KroneckerDeltaTile& arg1) { +template +Tensor& mult_to(Tensor& result, const KroneckerDeltaTile& arg1) { abort(); return result; } // dense_result[i] = binary(dense_arg1[i], sparse_arg2[i], op) -template -TiledArray::Tensor binary(const KroneckerDeltaTile<_N>& arg1, - const TiledArray::Tensor& arg2, Op&& op) { +template +Tensor binary(const KroneckerDeltaTile& arg1, const Tensor& arg2, + Op&& op) { abort(); } // dense_result[perm ^ i] = binary(dense_arg1[i], sparse_arg2[i], op) -template < - typename T, unsigned _N, typename Op, typename Perm, - typename = std::enable_if_t>> -TiledArray::Tensor binary(const KroneckerDeltaTile<_N>& arg1, - const TiledArray::Tensor& arg2, Op&& op, - const Perm& perm) { +template >> +Tensor binary(const KroneckerDeltaTile& arg1, const Tensor& arg2, Op&& op, + const Perm& perm) { abort(); } -// Contraction operation +// Contraction operations // GEMM operation with fused indices as defined by gemm_config: -// dense_result[i,j] = dense_arg1[i,k] * sparse_arg2[k,j] -template -TiledArray::Tensor gemm( - const KroneckerDeltaTile& arg1, const TiledArray::Tensor& arg2, - const typename TiledArray::Tensor::numeric_type factor, - const TiledArray::math::GemmHelper& gemm_config) { +// dense_result[i,j] += dense_arg1[i,k] * sparse_arg2[k,j] +template +void gemm(Tensor& result, const KroneckerDeltaTile& arg1, + const Tensor& arg2, const typename Tensor::numeric_type factor, + const math::GemmHelper& gemm_config) { // preconditions: - // 1. implemented only outer product - assert(gemm_config.result_rank() == - gemm_config.left_rank() + gemm_config.right_rank()); + // 1. implemented only kronecker transform (every mode of arg2 is contracted + // with the matching mode of arg1) + TA_ASSERT((gemm_config.result_rank() == gemm_config.right_rank() && + gemm_config.left_rank() == + gemm_config.result_rank() + gemm_config.right_rank())); auto arg1_range = arg1.range(); auto arg2_range = arg2.range(); - auto result_range = - gemm_config.make_result_range(arg1_range, arg2_range); - TiledArray::Tensor result(result_range, 0); + // if result is empty, initialize it + const auto& result_range = + result.empty() + ? gemm_config.make_result_range(arg1_range, arg2_range) + : result.range(); + if (result.empty()) result = Tensor(result_range, 0); auto result_data = result.data(); auto arg1_extents = arg1_range.extent_data(); auto arg2_data = arg2.data(); auto arg2_volume = arg2_range.volume(); - if (not arg1.empty()) { - switch (N) { - case 1: { - auto i0_range = std::min(arg1_extents[0], arg1_extents[1]); - for (decltype(i0_range) i0 = 0; i0 != i0_range; ++i0) { - auto result_i0i0_ptr = - result_data + (i0 * arg1_extents[1] + i0) * arg2_volume; - std::copy(arg2_data, arg2_data + arg2_volume, result_i0i0_ptr); - } - } break; - case 2: { - auto i0_range = std::min(arg1_extents[0], arg1_extents[1]); - auto i1_range = std::min(arg1_extents[2], arg1_extents[3]); - auto ndim23 = arg1_extents[2] * arg1_extents[3]; - for (decltype(i0_range) i0 = 0; i0 != i0_range; ++i0) { - auto result_i0i0i1i1_ptr_offset = - result_data + (i0 * arg1_extents[1] + i0) * ndim23 * arg2_volume; - for (decltype(i1_range) i1 = 0; i1 != i1_range; ++i1) { - auto result_i0i0i1i1_ptr = - result_i0i0i1i1_ptr_offset + - (i1 * arg1_extents[3] + i1) * arg2_volume; - std::copy(arg2_data, arg2_data + arg2_volume, result_i0i0i1i1_ptr); - } - } - } break; - - default: - abort(); // not implemented - } - } + TA_ASSERT(!arg1.empty()); + const auto N = arg1.N(); + auto max = [&](const auto* v1, const auto* v2) { + TA::Index result(N); + for (auto i = 0; i != N; ++i) result[i] = std::max(v1[i], v2[i]); + return result; + }; + auto min = [&](const auto* v1, const auto* v2) { + TA::Index result(N); + for (auto i = 0; i != N; ++i) result[i] = std::min(v1[i], v2[i]); + return result; + }; + const auto read_lobound = + max(result_range.lobound_data(), arg2_range.lobound_data()); + const auto read_upbound = + min(result_range.upbound_data(), arg2_range.upbound_data()); + result.block(read_lobound, read_upbound) = + arg2.block(read_lobound, read_upbound); +} +// GEMM operation with fused indices as defined by gemm_config: +// dense_result[b0,..bN] = kronecker_arg1[b1,...bN,k1,...kN] * +// dense_arg2[k1,...kN] +template +Tensor gemm(const KroneckerDeltaTile& arg1, const Tensor& arg2, + const typename Tensor::numeric_type factor, + const math::GemmHelper& gemm_config) { + Tensor result; + gemm(result, arg1, arg2, factor, gemm_config); return result; } -// GEMM operation with fused indices as defined by gemm_config: -// dense_result[i,j] += dense_arg1[i,k] * sparse_arg2[k,j] -template -void gemm(TiledArray::Tensor& result, const KroneckerDeltaTile& arg1, - const TiledArray::Tensor& arg2, - const typename TiledArray::Tensor::numeric_type factor, - const TiledArray::math::GemmHelper& gemm_config) { - abort(); + +namespace detail { + +/// \brief computes shape data (i.e. Frobenius norms of the tiles) for a +/// DistArray of KroneckerDeltaTile +/// \param trange a TiledRange of the result +/// \return a Tensor containing the Frobenius norms of +/// the tiles of a DistArray of KroneckerDeltaTile's +/// \note Unlike diagonal_shape() which works for hyperdiagonal tensor with +/// `N` modes (`t(i,i,...i) = 1`), this works for product of `N` +/// Kroneckers (`t(i1,...iN,i1,...iN) = 1`, with `N` = `trange.rank() / 2`). +inline Tensor kronecker_shape(TiledRange const& trange) { + // preconditions + TA_ASSERT(trange.rank() % 2 == 0); + + Tensor shape(trange.tiles_range(), 0.0); + const auto N = trange.rank() / 2; + + // for every bra-ket pair of modes compute list of + // {bra tile index, ket tile index, number of nonzeros} + using bkn_type = std::tuple; + std::vector> bkns(N); + for (auto d = 0; d != N; ++d) { + auto& bkn = bkns[d]; + auto& bra_tr1 = trange.dim(d); + auto& ket_tr1 = trange.dim(d + N); + auto eidx = std::max(bra_tr1.elements_range().lobound(), + ket_tr1.elements_range().lobound()); + const auto eidx_fence = std::min(bra_tr1.elements_range().upbound(), + ket_tr1.elements_range().upbound()); + while (eidx < eidx_fence) { + const auto bra_tile_idx = bra_tr1.element_to_tile(eidx); + const auto& bra_tile = bra_tr1.tile(bra_tile_idx); + auto ket_tile_idx = ket_tr1.element_to_tile(eidx); + const auto& ket_tile = ket_tr1.tile(ket_tile_idx); + // closest tile boundary + const auto next_eidx = std::min(bra_tile.upbound(), ket_tile.upbound()); + bkn.emplace_back(bra_tile_idx, ket_tile_idx, next_eidx - eidx); + eidx = next_eidx; + } + } + + // number of nonzero tiles per mode + TA::Index nnz_tiles(N); + for (auto d = 0; d != N; ++d) nnz_tiles[d] = bkns[d].size(); + TA::Range nztiles_range(nnz_tiles); + TA::Index tile_idx(2 * N); + for (auto&& nztile : nztiles_range) { + std::size_t nnz_elements = 1; + for (auto d = 0; d != N; ++d) { + tile_idx[d] = std::get<0>(bkns[d][nztile[d]]); + tile_idx[d + N] = std::get<1>(bkns[d][nztile[d]]); + nnz_elements *= std::get<2>(bkns[d][nztile[d]]); + } + shape(tile_idx) = std::sqrt(nnz_elements); + } + + return shape; } +} // namespace detail + +} // namespace TiledArray + #endif // TILEDARRAY_TEST_SPARSE_TILE_H__INCLUDED diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 38f0e65ff9..12479ef53c 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -1242,7 +1242,8 @@ class Tensor { // clang-format on /// @{ template >> + typename = std::enable_if_t && + !std::is_same_v>> detail::TensorInterface block( const PairRange& bounds) const { return detail::TensorInterface( @@ -1250,7 +1251,8 @@ class Tensor { } template >> + typename = std::enable_if_t && + !std::is_same_v>> detail::TensorInterface block(const PairRange& bounds) { return detail::TensorInterface( BlockRange(this->range_, bounds), this->data()); @@ -1289,6 +1291,38 @@ class Tensor { } /// @} + // clang-format off + /// Constructs a view of the block defined by a TiledArray::Range . + + /// Examples of using this: + /// \code + /// std::vector lobounds = {0, 1, 2}; + /// std::vector upbounds = {4, 6, 8}; + /// + /// auto tview = t.block(TiledArray::Range(lobounds, upbounds)); + /// \endcode + /// \tparam PairRange Type representing a range of generalized pairs (see TiledArray::detail::is_gpair_v ) + /// \param bounds The block bounds + /// \return a {const,mutable} view of the block defined by its \p bounds + /// \throw TiledArray::Exception When the size of \p lower_bound is not + /// equal to that of \p upper_bound. + /// \throw TiledArray::Exception When `get<0>(bounds[i]) >= get<1>(bounds[i])` + // clang-format on + /// @{ + detail::TensorInterface block( + const Range& bounds) const { + return detail::TensorInterface( + BlockRange(this->range_, bounds.lobound(), bounds.upbound()), + this->data()); + } + + detail::TensorInterface block(const Range& bounds) { + return detail::TensorInterface( + BlockRange(this->range_, bounds.lobound(), bounds.upbound()), + this->data()); + } + /// @} + /// Create a permuted copy of this tensor /// \tparam Perm A permutation tile @@ -2373,7 +2407,6 @@ class Tensor { /// \return The vector norm of this tensor scalar_type squared_norm() const { - if constexpr (detail::is_tensor_v) { // If uninitialized tensor of tensor return zero. // All elements of this->data() are empty tensors in this case, diff --git a/src/TiledArray/tile_interface/cast.h b/src/TiledArray/tile_interface/cast.h index c22b97b051..52c7a550be 100644 --- a/src/TiledArray/tile_interface/cast.h +++ b/src/TiledArray/tile_interface/cast.h @@ -26,8 +26,8 @@ #ifndef TILEDARRAY_TILE_INTERFACE_CAST_H__INCLUDED #define TILEDARRAY_TILE_INTERFACE_CAST_H__INCLUDED -#include "../meta.h" -#include "../type_traits.h" +#include "TiledArray/type_traits.h" +#include "TiledArray/util/invoke.h" namespace TiledArray { @@ -80,7 +80,7 @@ class Cast(std::forward(arg)); }; - return TiledArray::meta::invoke(exec, arg); + return TiledArray::detail::invoke(exec, arg); } template static auto invoker( @@ -93,7 +93,7 @@ class Cast>(std::forward(arg)); }; - return TiledArray::meta::invoke(exec, std::forward(arg)); + return TiledArray::detail::invoke(exec, std::forward(arg)); } public: @@ -151,7 +151,7 @@ class Cast>::type> auto invoke_cast(Arg&& arg) { Cast> cast; - return TiledArray::meta::invoke(cast, std::forward(arg)); + return TiledArray::detail::invoke(cast, std::forward(arg)); } } // namespace TiledArray diff --git a/src/TiledArray/tile_op/binary_wrapper.h b/src/TiledArray/tile_op/binary_wrapper.h index 33d021f2b0..07dd9d19fd 100644 --- a/src/TiledArray/tile_op/binary_wrapper.h +++ b/src/TiledArray/tile_op/binary_wrapper.h @@ -224,7 +224,7 @@ class BinaryWrapper { madness::future_to_ref_t r) { return BinaryWrapper_::operator()(l, r); }; - return meta::invoke(continuation, eval_left, eval_right); + return detail::invoke(continuation, eval_left, eval_right); } /// Evaluate lazy and non-lazy tiles @@ -249,7 +249,7 @@ class BinaryWrapper { R&& r) { return BinaryWrapper_::operator()(l, std::forward(r)); }; - return meta::invoke(continuation, eval_left, std::forward(right)); + return detail::invoke(continuation, eval_left, std::forward(right)); } /// Evaluate non-lazy and lazy tiles @@ -273,7 +273,7 @@ class BinaryWrapper { [this](L&& l, madness::future_to_ref_t r) { return BinaryWrapper_::operator()(std::forward(l), r); }; - return meta::invoke(continuation, std::forward(left), eval_right); + return detail::invoke(continuation, std::forward(left), eval_right); } /// Evaluate two lazy-array tiles @@ -294,7 +294,7 @@ class BinaryWrapper { auto eval_left = invoke_cast(std::forward(left)); auto eval_right = invoke_cast(std::forward(right)); - if (perm_) return meta::invoke(op_, eval_left, eval_right, perm_); + if (perm_) return detail::invoke(op_, eval_left, eval_right, perm_); auto op_left = [this](eval_t& _left, eval_t& _right) { return op_.consume_left(_left, _right); @@ -304,11 +304,11 @@ class BinaryWrapper { }; // Override consumable if (is_consumable_tile>::value && left.is_consumable()) - return meta::invoke(op_left, eval_left, eval_right); + return detail::invoke(op_left, eval_left, eval_right); if (is_consumable_tile>::value && right.is_consumable()) - return meta::invoke(op_right, eval_left, eval_right); + return detail::invoke(op_right, eval_left, eval_right); - return meta::invoke(op_, eval_left, eval_right); + return detail::invoke(op_, eval_left, eval_right); } template < diff --git a/src/TiledArray/tile_op/unary_wrapper.h b/src/TiledArray/tile_op/unary_wrapper.h index 3712aca4f1..e1b89e02a7 100644 --- a/src/TiledArray/tile_op/unary_wrapper.h +++ b/src/TiledArray/tile_op/unary_wrapper.h @@ -152,8 +152,9 @@ class UnaryWrapper { /// `arg`. template >* = nullptr> auto operator()(A&& arg) const { - return (perm_ ? meta::invoke(op_, invoke_cast(std::forward(arg)), perm_) - : meta::invoke(op_, invoke_cast(std::forward(arg)))); + return (perm_ + ? detail::invoke(op_, invoke_cast(std::forward(arg)), perm_) + : detail::invoke(op_, invoke_cast(std::forward(arg)))); } /// Evaluate a lazy array tile @@ -176,10 +177,10 @@ class UnaryWrapper { // return op_.consume(std::forward(arg)); // }; auto op_consume = [this](eval_t& arg) { return op_.consume(arg); }; - return (perm_ ? meta::invoke(op_, std::move(cast_arg), perm_) + return (perm_ ? detail::invoke(op_, std::move(cast_arg), perm_) : (arg.is_consumable() - ? meta::invoke(op_consume, cast_arg) - : meta::invoke(op_, std::move(cast_arg)))); + ? detail::invoke(op_consume, cast_arg) + : detail::invoke(op_, std::move(cast_arg)))); } /// Consume a lazy tile @@ -196,8 +197,8 @@ class UnaryWrapper { // return op_.consume(std::forward(arg)); // }; auto op_consume = [this](eval_t& arg) { return op_.consume(arg); }; - return (perm_ ? meta::invoke(op_, std::move(cast_arg), perm_) - : meta::invoke(op_consume, cast_arg)); + return (perm_ ? detail::invoke(op_, std::move(cast_arg), perm_) + : detail::invoke(op_consume, cast_arg)); } template >* = nullptr> diff --git a/src/TiledArray/type_traits.h b/src/TiledArray/type_traits.h index 1bddff446d..5c3d066e9c 100644 --- a/src/TiledArray/type_traits.h +++ b/src/TiledArray/type_traits.h @@ -108,9 +108,9 @@ class LazyArrayTile; struct Derived : T, Fallback {}; \ \ template \ - static No& test(decltype(U::Member)*); \ + static No &test(decltype(U::Member) *); \ template \ - static Yes& test(U*); \ + static Yes &test(U *); \ \ public: \ static constexpr bool value = \ @@ -141,9 +141,9 @@ class LazyArrayTile; struct Derived : T, Fallback {}; \ \ template \ - static No& test(typename U::Type*); \ + static No &test(typename U::Type *); \ template \ - static Yes& test(U*); \ + static Yes &test(U *); \ \ public: \ static constexpr bool value = \ @@ -177,11 +177,11 @@ class LazyArrayTile; template \ struct CheckConst; \ template \ - static Yes test_const(CheckConst*); \ + static Yes test_const(CheckConst *); \ template \ static No test_const(...); \ template \ - static Yes test_nonconst(Check*); \ + static Yes test_nonconst(Check *); \ template \ static No test_nonconst(...); \ \ @@ -215,7 +215,7 @@ class LazyArrayTile; using Yes = char; \ using No = int; \ template \ - static auto func(void*) \ + static auto func(void *) \ -> decltype(std::add_pointer_t().Member( \ std::declval()...))>{}, \ Yes{}); \ @@ -248,7 +248,7 @@ class LazyArrayTile; using Yes = char; \ using No = int; \ template \ - static auto func(void*) \ + static auto func(void *) \ -> decltype(std::add_pointer_t< \ decltype(Function(std::declval()...))>{}, \ Yes{}); \ @@ -278,7 +278,7 @@ class LazyArrayTile; using Yes = char; \ using No = int; \ template \ - static auto func(void*) \ + static auto func(void *) \ -> decltype(std::add_pointer_t()...))>{}, \ Yes{}); \ @@ -451,7 +451,7 @@ template struct has_conversion_operator< From, To, typename std::enable_if< - is_type().operator To&())>::value>::type> + is_type().operator To &())>::value>::type> : std::true_type {}; #else template @@ -473,7 +473,7 @@ struct has_conversion_operator { /* operator exists */ template static decltype(test(&A::operator To)) test(decltype(&A::operator To), - void*) { + void *) { /* Operator exists. What about sig? */ typedef decltype(test(&A::operator To)) return_type; return return_type(); @@ -846,7 +846,7 @@ struct is_strictly_ordered_helper { using Yes = char; using No = int; template - static auto test(void*) + static auto test(void *) -> decltype(std::add_pointer_t() < std::declval())>{}, Yes{}); @@ -857,6 +857,160 @@ struct is_strictly_ordered_helper { static constexpr const bool value = sizeof(test(0)) == sizeof(Yes); }; +///////// is_less_than_comparable ///////// + +template > +struct is_less_than_comparable : public std::false_type {}; + +template +struct is_less_than_comparable() < + std::declval())>> + : public std::true_type {}; + +template +static constexpr bool is_less_than_comparable_v = + is_less_than_comparable::value; + +///////// are_less_than_comparable ///////// + +template > +struct are_less_than_comparable : public std::false_type {}; + +template +struct are_less_than_comparable< + T, U, + std::void_t() < + std::declval())>> : public std::true_type { +}; + +template +static constexpr bool are_less_than_comparable_v = + are_less_than_comparable::value; + +///////// is_less_than_or_equal_comparable ///////// + +template > +struct is_less_than_or_equal_comparable : public std::false_type {}; + +template +struct is_less_than_or_equal_comparable< + T, std::void_t() <= + std::declval())>> + : public std::true_type {}; + +template +static constexpr bool is_less_than_or_equal_comparable_v = + is_less_than_or_equal_comparable::value; + +///////// are_less_than_comparable ///////// + +template > +struct are_less_than_or_equal_comparable : public std::false_type {}; + +template +struct are_less_than_or_equal_comparable< + T, U, + std::void_t() <= + std::declval())>> : public std::true_type { +}; + +template +static constexpr bool are_less_than_or_equal_comparable_v = + are_less_than_or_equal_comparable::value; + +///////// is_greater_than_comparable ///////// + +template > +struct is_greater_than_comparable : public std::false_type {}; + +template +struct is_greater_than_comparable< + T, std::void_t() > + std::declval())>> + : public std::true_type {}; + +template +static constexpr bool is_greater_than_comparable_v = + is_greater_than_comparable::value; + +///////// are_greater_than_comparable ///////// + +template > +struct are_greater_than_comparable : public std::false_type {}; + +template +struct are_greater_than_comparable< + T, U, + std::void_t() > + std::declval())>> : public std::true_type { +}; + +template +static constexpr bool are_greater_than_comparable_v = + are_greater_than_comparable::value; + +///////// is_greater_than_or_equal_comparable ///////// + +template > +struct is_greater_than_or_equal_comparable : public std::false_type {}; + +template +struct is_greater_than_or_equal_comparable< + T, std::void_t() >= + std::declval())>> + : public std::true_type {}; + +template +static constexpr bool is_greater_than_or_equal_comparable_v = + is_greater_than_or_equal_comparable::value; + +///////// are_greater_than_comparable ///////// + +template > +struct are_greater_than_or_equal_comparable : public std::false_type {}; + +template +struct are_greater_than_or_equal_comparable< + T, U, + std::void_t() >= + std::declval())>> : public std::true_type { +}; + +template +static constexpr bool are_greater_than_or_equal_comparable_v = + are_greater_than_or_equal_comparable::value; + +///////// is_equality_comparable ///////// + +template > +struct is_equality_comparable : public std::false_type {}; + +template +struct is_equality_comparable() == + std::declval())>> + : public std::true_type {}; + +template +static constexpr bool is_equality_comparable_v = + is_equality_comparable::value; + +///////// are_equality_comparable ///////// + +template > +struct are_equality_comparable : public std::false_type {}; + +template +struct are_equality_comparable() == + std::declval())>> + : public std::true_type {}; + +template +static constexpr bool are_equality_comparable_v = + are_equality_comparable::value; + /// \c is_strictly_ordered::value is true if strict order is defined for T, /// i.e. "T < T" is defined template @@ -916,7 +1070,7 @@ struct is_std_gettable : std::false_type {}; template struct is_std_gettable< - I, T, std::void_t(std::declval()))>> + I, T, std::void_t(std::declval()))>> : std::true_type {}; template @@ -927,7 +1081,7 @@ struct is_boost_gettable : std::false_type {}; template struct is_boost_gettable< - I, T, std::void_t(std::declval()))>> + I, T, std::void_t(std::declval()))>> : std::true_type {}; template @@ -938,7 +1092,7 @@ constexpr const bool is_gettable_v = is_std_gettable_v || is_boost_gettable_v; template -auto get(T&& t) { +auto get(T &&t) { using boost::get; using std::get; return get(std::forward(t)); @@ -1099,22 +1253,22 @@ struct is_iterator -struct is_iterator : std::true_type { +struct is_iterator : std::true_type { typedef std::random_access_iterator_tag iterator_category; }; template -struct is_iterator : std::true_type { +struct is_iterator : std::true_type { typedef std::random_access_iterator_tag iterator_category; }; template -struct is_iterator : std::true_type { +struct is_iterator : std::true_type { typedef std::random_access_iterator_tag iterator_category; }; template -struct is_iterator : std::true_type { +struct is_iterator : std::true_type { typedef std::random_access_iterator_tag iterator_category; }; @@ -1153,8 +1307,8 @@ template struct is_range : std::false_type {}; template -struct is_range()), - std::end(std::declval()))>> +struct is_range()), + std::end(std::declval()))>> : std::true_type {}; /// \c is_range_v is an alias for \c is_range::value @@ -1169,7 +1323,7 @@ template struct is_sized_range : std::false_type {}; template -struct is_sized_range()))>> +struct is_sized_range()))>> : is_range {}; /// `is_sized_range_v` is an alias for `is_sized_range::value` @@ -1184,9 +1338,8 @@ template struct is_contiguous_range : std::false_type {}; template -struct is_contiguous_range()))>> - : is_range {}; +struct is_contiguous_range< + T, std::void_t()))>> : is_range {}; /// `is_contiguous_range_v` is an alias for `is_contiguous_range::value` template @@ -1197,14 +1350,14 @@ static constexpr bool is_contiguous_range_v = is_contiguous_range::value; /// std::begin(T&) /// @warning will be replaced by C++20 ranges::iterator_t template -using iterator_t = decltype(std::begin(std::declval())); +using iterator_t = decltype(std::begin(std::declval())); /// @tparam T a range type /// @c value_t is the value type, i.e. the type to which @c std::begin(T&) /// dereferences to /// @warning will be replaced by C++20 ranges::value_t template -using value_t = remove_cvr_t()))>; +using value_t = remove_cvr_t()))>; /// @tparam T a type /// `is_integral_range::value` is true if @p T is a range type that @@ -1361,7 +1514,7 @@ static constexpr bool is_gpair_range_v = is_gpair_range::value; template >>> -decltype(auto) at(GeneralizedPair&& v, std::size_t idx) { +decltype(auto) at(GeneralizedPair &&v, std::size_t idx) { assert(idx == 0 || idx == 1); if constexpr (is_gettable_pair_v>) { #if __cplusplus <= 201703L diff --git a/src/TiledArray/util/invoke.h b/src/TiledArray/util/invoke.h new file mode 100644 index 0000000000..ff8bbed191 --- /dev/null +++ b/src/TiledArray/util/invoke.h @@ -0,0 +1,70 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2017 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * + * meta.h + * April 11, 2017 + * + */ + +#ifndef TILEDARRAY_UTIL_INVOKE_H +#define TILEDARRAY_UTIL_INVOKE_H + +#include +#include +#include +#include + +namespace TiledArray { +namespace detail { + +/// ||'s bools +template +struct or_reduce { + static constexpr bool value = head || or_reduce::value; +}; + +template +struct or_reduce { + static constexpr bool value = b; +}; + +// is any argument a Future? +// - yes: async launch +// - no: direct launch +template +auto invoke(Function&& fn, Args&&... args) -> typename std::enable_if< + !or_reduce>::value...>::value, + decltype(fn(args...))>::type { + return fn(std::forward(args)...); +} + +template < + typename Function, typename... Args, + typename = typename std::enable_if>::value...>::value>::type> +auto invoke(Function&& fn, Args&&... args) { + return TiledArray::get_default_world().taskq.add(std::forward(fn), + std::forward(args)...); +} + +} // namespace detail +} // namespace TiledArray + +#endif // TILEDARRAY_UTIL_INVOKE_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index afb1e1c6a6..823e13bec8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -148,8 +148,6 @@ target_include_directories(${executable} PRIVATE # is too late to do this here; must set TA_ERROR=throw if want to run unit tests target_compile_definitions(${executable} PRIVATE TILEDARRAY_NO_USER_ERROR_MESSAGES=1 MADNESS_DISPLAY_EXCEPTION_BREAK_MESSAGE=0) -# always test range-v3 -target_link_libraries(${executable} PRIVATE range-v3::range-v3) # Add targets add_test(tiledarray/unit/build "${CMAKE_COMMAND}" --build ${PROJECT_BINARY_DIR} --target ${executable}) diff --git a/tests/expressions_mixed.cpp b/tests/expressions_mixed.cpp index 6b92cec695..bf79d86fc1 100644 --- a/tests/expressions_mixed.cpp +++ b/tests/expressions_mixed.cpp @@ -23,6 +23,7 @@ * */ +#include "TiledArray/special/diagonal_array.h" #include "TiledArray/special/kronecker_delta.h" #include "range_fixture.h" #include "sparse_tile.h" @@ -37,8 +38,8 @@ struct tag {}; struct MixedExpressionsFixture : public TiledRangeFixture { typedef DistArray>, DensePolicy> TArrayDS1; typedef DistArray>, DensePolicy> TArrayDS2; - typedef DistArray, DensePolicy> - ArrayKronDelta1; // will be turned into SparsePolicy next + typedef DistArray ArrayKronDelta; + typedef DistArray SpArrayKronDelta; MixedExpressionsFixture() : u(*GlobalFixture::world, trange2), @@ -46,25 +47,17 @@ struct MixedExpressionsFixture : public TiledRangeFixture { e2(*GlobalFixture::world, trange2e), e4(*GlobalFixture::world, trange4e), v(*GlobalFixture::world, trange2), - w(*GlobalFixture::world, trange2), - delta1(*GlobalFixture::world, trange2, DenseShape(), - std::make_shared( - *GlobalFixture::world, trange2.tiles_range().volume())), - delta1e(*GlobalFixture::world, trange2e, DenseShape(), - std::make_shared( - *GlobalFixture::world, trange2e.tiles_range().volume())) { + w(*GlobalFixture::world, trange2) { random_fill(u); random_fill(v); u2.fill(0); random_fill(e2); e4.fill(0); - init_kronecker_delta(delta1); - init_kronecker_delta(delta1e); GlobalFixture::world->gop.fence(); } - template - static void random_fill(DistArray& array) { + template + static void random_fill(DistArray& array) { array.fill_random(); } @@ -110,10 +103,12 @@ struct MixedExpressionsFixture : public TiledRangeFixture { return matrix; } - template - static void init_kronecker_delta(DistArray& array) { - array.init_tiles( - [=](const TiledArray::Range& range) { return Tile(range); }); + template + static void init_kronecker_delta( + DistArray& array) { + array.init_tiles([=](const TiledArray::Range& range) { + return KroneckerDeltaTile(range); + }); } ~MixedExpressionsFixture() { GlobalFixture::world->gop.fence(); } @@ -132,8 +127,6 @@ struct MixedExpressionsFixture : public TiledRangeFixture { TArrayDS1 v; TArrayDS1 v1; TArrayDS2 w; - ArrayKronDelta1 delta1; - ArrayKronDelta1 delta1e; }; // MixedExpressionsFixture // Instantiate static variables for fixture @@ -183,21 +176,40 @@ BOOST_AUTO_TEST_CASE(mult_factories) { // BOOST_CHECK_NO_THROW(w("a,b") = u("a,b") * v("a,b")); } -BOOST_AUTO_TEST_CASE(outer_product_factories) { +BOOST_AUTO_TEST_CASE(kronecker) { #if !MULT_DENSE_SPARSE_TO_SPARSE // ok BOOST_CHECK_NO_THROW(u2("a,b,c,d") += u("a,b") * v("c,d")); #endif - // these can only work if nproc == 1 since KroneckerDelta does not travel, and - // SUMMA does not support replicated args - if (GlobalFixture::world->nproc() == 1) { - // ok - BOOST_CHECK_NO_THROW(u2("a,b,c,d") += delta1("a,b") * u("c,d")); + // retile test + TSpArrayD x(*GlobalFixture::world, trange2); + random_fill(x); - // ok - BOOST_CHECK_NO_THROW(e4("a,c,b,d") += delta1e("a,b") * e2("c,d")); - } + // includes target tiles that receive contributions from multiple source + // tiles, tiny target tiles with single contribution, and tiles partially and + // completely outside the source range +#ifdef TA_SIGNED_1INDEX_TYPE + TA::TiledRange yrange{{-1, 18, 20, 45, 47}, {-1, 20, 22, 45, 47}}; +#else + TA::TiledRange yrange{{5, 18, 20, 45, 47}, {7, 20, 22, 45, 47}}; +#endif + TA::TSpArrayD y1; + // identical to y1 = TA::detail::retile_v1(x, yrange); + TA::TiledRange retiler_range{yrange.dim(0), yrange.dim(1), trange2.dim(0), + trange2.dim(1)}; + SpArrayKronDelta retiler( + *GlobalFixture::world, retiler_range, + SparseShape(detail::kronecker_shape(retiler_range), retiler_range), + std::make_shared( + *GlobalFixture::world, retiler_range.tiles_range().volume())); + init_kronecker_delta(retiler); + y1("d1,d2") = retiler("d1,d2,s1,s2") * x("s1,s2"); + // std::cout << "y1 = " << y1 << std::endl; + + auto y_ref = TA::retile(x, yrange); + // std::cout << "y_ref = " << y_ref << std::endl; + BOOST_CHECK((y1("d1,d2") - y_ref("d1,d2")).norm().get() == 0.); } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/sparse_tile.h b/tests/sparse_tile.h index 888c39811f..70897d7ca1 100644 --- a/tests/sparse_tile.h +++ b/tests/sparse_tile.h @@ -24,8 +24,6 @@ #include #include -#include - #include // Array class @@ -37,6 +35,8 @@ #include #include +#include + // sparse 2-dimensional matrix type, with tag type thrown in to make expression // engine work harder template > @@ -192,7 +192,7 @@ class EigenSparseTile { for (typename matrix_type::InnerIterator it(mat, k); it; ++it) { datavec.push_back(Eigen::Triplet(it.row(), it.col(), it.value())); } - ar& datavec& this->range(); + ar & datavec& this->range(); } else { ar & false; } @@ -204,11 +204,11 @@ class EigenSparseTile { madness::is_input_archive_v>::type* = nullptr> void serialize(Archive& ar) { bool have_impl = false; - ar& have_impl; + ar & have_impl; if (have_impl) { std::vector> datavec; range_type range; - ar& datavec& range; + ar & datavec & range; auto extents = range.extent(); matrix_type mat(extents[0], extents[1]); mat.setFromTriplets(datavec.begin(), datavec.end()); @@ -700,7 +700,7 @@ struct ArchiveLoadImpl> { static inline void load(const Archive& ar, Eigen::Triplet& obj) { int row, col; T value; - ar& row& col& value; + ar & row & col & value; obj = Eigen::Triplet(row, col, value); } }; @@ -708,7 +708,7 @@ struct ArchiveLoadImpl> { template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const Eigen::Triplet& obj) { - ar& obj.row() & obj.col() & obj.value(); + ar & obj.row() & obj.col() & obj.value(); } }; } // namespace archive diff --git a/tests/tot_array_fixture.h b/tests/tot_array_fixture.h index 5d0a0ce4dd..2c27824961 100644 --- a/tests/tot_array_fixture.h +++ b/tests/tot_array_fixture.h @@ -19,8 +19,10 @@ #ifndef TILEDARRAY_TEST_TOT_ARRAY_FIXTURE_H__INCLUDED #define TILEDARRAY_TEST_TOT_ARRAY_FIXTURE_H__INCLUDED +#include +#include #include -#include +#include #include "unit_test_config.h" #ifdef TILEDARRAY_HAS_BTAS #include @@ -621,8 +623,7 @@ Result general_product(TensorA const& A, TensorB const& B, } else { temp += temp_; } - } - else { + } else { TA_ASSERT(!(ix_A.empty() || ix_B.empty())); temp += A(ix_A) * B(ix_B); }