diff --git a/INSTALL.md b/INSTALL.md index 1ad512a07f..ad33841e44 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -40,7 +40,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Container: header-only - Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing* - Boost.Range: header-only, *only used for unit testing* -- [BTAS](http://github.com/ValeevGroup/BTAS), tag 2917aa21465a93ae6f399874f247b5fe31d6b693 . If usable BTAS installation is not found, TiledArray will download and compile +- [BTAS](http://github.com/ValeevGroup/BTAS), tag 6fcb6451bc7ca46a00534a30c51dc5c230c39ac3 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. - [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 0b44ef319643cb9721fbe17d294987c146e6460e . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. diff --git a/external/versions.cmake b/external/versions.cmake index ea25d3b1c6..676ff2ee4f 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -24,8 +24,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 29a2bf3d3c2670c608b7bfdf2299d76fbc20e041) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 2917aa21465a93ae6f399874f247b5fe31d6b693) -set(TA_TRACKED_BTAS_PREVIOUS_TAG fba66ad9881ab29ea8df49ac6a6006cab3fb3ce5) +set(TA_TRACKED_BTAS_TAG 6fcb6451bc7ca46a00534a30c51dc5c230c39ac3) +set(TA_TRACKED_BTAS_PREVIOUS_TAG 474ddc095cbea12a1d28aca5435703dd9f69b166) set(TA_TRACKED_LIBRETT_TAG 68abe31a9ec6fd2fd9ffbcd874daa80457f947da) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 7e27ac766a9038df6aa05613784a54a036c4b796) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1b3763c65f..e796d72626 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -195,10 +195,12 @@ TiledArray/util/initializer_list.h TiledArray/util/logger.h TiledArray/util/ptr_registry.cpp TiledArray/util/ptr_registry.h +TiledArray/util/random.cpp TiledArray/util/random.h TiledArray/util/singleton.h TiledArray/util/threads.h TiledArray/util/threads.cpp +TiledArray/util/thread_specific.h TiledArray/util/time.h TiledArray/util/vector.h ) diff --git a/src/TiledArray/block_range.h b/src/TiledArray/block_range.h index 807dee1f1a..08096c96ea 100644 --- a/src/TiledArray/block_range.h +++ b/src/TiledArray/block_range.h @@ -30,9 +30,15 @@ namespace TiledArray { -/// Range that references a subblock of another range +/// a Range that references a subblock of another Range class BlockRange : public Range { private: + /// datavec_ contains same data as if this were an ordinary Range, except: + /// - stride describes the host range + /// - offset_ corrects ordinals so that the first element in this (sub)Range + /// has ordinal of zero ... + /// - block_offset_ is the distance from the first element of this Range to + /// the first element of the host Range using Range::datavec_; using Range::offset_; using Range::rank_; @@ -300,37 +306,59 @@ class BlockRange : public Range { init>>(range, bounds); } - /// calculate the ordinal index of \c i + /// Converts coordinate index \c i to its ordinal index in the host Range - /// Convert a coordinate index to an ordinal index. /// \tparam Index An integral range type - /// \param index The index to be converted to an ordinal index - /// \return The ordinal index of \c index + /// \param index The index to be converted to the ordinal index + /// \return The ordinal index of \c index in the host Range /// \throw When \c index is not included in this range. - template >* = nullptr> + template >* = nullptr> ordinal_type ordinal(const Index& index) const { return Range::ordinal(index); } - template 1ul)>::type* = nullptr> + /// Converts coordinate index \c i to its ordinal index in the host Range + + /// \tparam Index An integral range type + /// \param index The index to be converted to the ordinal index + /// \return The ordinal index of \c index in the host Range + /// \throw When \c index is not included in this range. + template >* = nullptr> + ordinal_type ordinal(const std::initializer_list& index) const { + return Range::ordinal(index); + } + + /// Converts coordinate index \c i to its ordinal index in the host Range + + /// \tparam Index An integral range type + /// \param index The index to be converted to the ordinal index + /// \return The ordinal index of \c index in the host Range + /// \throw When \c index is not included in this range. + template < + typename... Index, + typename std::enable_if_t<(sizeof...(Index) > 1ul) && + (std::is_integral_v && ...)>* = nullptr> ordinal_type ordinal(const Index&... index) const { return Range::ordinal(index...); } - /// calculate the coordinate index of the ordinal index, \c index. + /// Converts the ordinal index within this block range to the ordinal index of + /// the host range - /// Convert an ordinal index to a coordinate index. - /// \param index Ordinal index - /// \return The index of the ordinal index + /// \warning this overloads base method that did not perform any conversion + /// of \p ord + /// \param index the ordinal index for the host Range + /// \return The ordinal index in the /// \throw TiledArray::Exception When \c index is not included in this range - ordinal_type ordinal(ordinal_type index) const { - // Check that index is contained by range. - TA_ASSERT(includes(index)); + ordinal_type ordinal(ordinal_type ord) const { + // Check that ord is contained by this range. + TA_ASSERT(Range::includes_ordinal(ord)); // Construct result coordinate index object and allocate its memory. - ordinal_type result = 0ul; + ordinal_type host_ord = 0ul; // Get pointers to the data const auto* MADNESS_RESTRICT const size = extent_data(); @@ -342,11 +370,11 @@ class BlockRange : public Range { const auto stride_i = stride[i]; // Compute result index element i - result += (index % size_i) * stride_i; - index /= size_i; + host_ord += (ord % size_i) * stride_i; + ord /= size_i; } - return result + block_offset_ - offset_; + return host_ord + block_offset_ - offset_; } /// Resize of block range is not supported diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h index 2ca19c67fd..816a8bfe24 100644 --- a/src/TiledArray/conversions/eigen.h +++ b/src/TiledArray/conversions/eigen.h @@ -963,10 +963,10 @@ DistArray_ eigen_tensor_to_array( /// replicated TiledArray::DistArray. Usage: /// \code /// TiledArray::TArrayD -/// array(world, trange); +/// array(world, trange_3d); /// // Set tiles of array ... /// -/// auto t = array_to_eigen_tensor(array); +/// auto t = array_to_eigen_tensor(array); /// \endcode /// \tparam Tile the tile type of \c src /// \tparam Policy the policy type of \c src @@ -980,13 +980,11 @@ DistArray_ eigen_tensor_to_array( /// create the Eigen::Tensor on every rank (this requires /// that \c src.is_replicated()==true ) /// \return Eigen::Tensor object containing the data of \c src , if my rank -/// equals -/// \c target_rank or \c target_rank==-1 , +/// equals \c target_rank or \c target_rank==-1 , /// default-initialized Eigen::Tensor otherwise. template Tensor array_to_eigen_tensor(const TiledArray::DistArray& src, int target_rank = -1) { - TA_ASSERT(src.tiles_range().rank() == Tensor::NumDimensions); // Test preconditions diff --git a/src/TiledArray/conversions/foreach.h b/src/TiledArray/conversions/foreach.h index c001a1ee66..9d219ac191 100644 --- a/src/TiledArray/conversions/foreach.h +++ b/src/TiledArray/conversions/foreach.h @@ -288,12 +288,13 @@ inline std:: int task_count = 0; auto op_shared_handle = make_op_shared_handle(std::forward(op)); const auto task = [op_shared_handle, &counter, &tile_norms]( - const ordinal_type index, + const ordinal_type ord, const_if_t& arg_tile, const ArgTiles&... arg_tiles) -> result_value_type { op_helper op_caller; - auto result_tile = op_caller(std::move(op_shared_handle), tile_norms[index], - arg_tile, arg_tiles...); + auto result_tile = + op_caller(std::move(op_shared_handle), tile_norms.at_ordinal(ord), + arg_tile, arg_tiles...); ++counter; return result_tile; }; @@ -304,32 +305,32 @@ inline std:: switch (shape_reduction) { case ShapeReductionMethod::Intersect: // Get local tile index iterator - for (auto index : *(arg.pmap())) { - if (is_zero_intersection({arg.is_zero(index), args.is_zero(index)...})) + for (auto ord : *(arg.pmap())) { + if (is_zero_intersection({arg.is_zero(ord), args.is_zero(ord)...})) continue; - auto result_tile = world.taskq.add(task, index, arg.find_local(index), - args.find(index)...); + auto result_tile = + world.taskq.add(task, ord, arg.find_local(ord), args.find(ord)...); ++task_count; - tiles.emplace_back(index, std::move(result_tile)); + tiles.emplace_back(ord, std::move(result_tile)); if (op_returns_void) // if Op does not evaluate norms, use the (scaled) // norms of the first arg - tile_norms[index] = arg_shape_data[index]; + tile_norms.at_ordinal(ord) = arg_shape_data.at_ordinal(ord); } break; case ShapeReductionMethod::Union: // Get local tile index iterator - for (auto index : *(arg.pmap())) { - if (is_zero_union({arg.is_zero(index), args.is_zero(index)...})) - continue; + for (auto ord : *(arg.pmap())) { + if (is_zero_union({arg.is_zero(ord), args.is_zero(ord)...})) continue; auto result_tile = - world.taskq.add(task, index, detail::get_sparse_tile(index, arg), - detail::get_sparse_tile(index, args)...); + world.taskq.add(task, ord, detail::get_sparse_tile(ord, arg), + detail::get_sparse_tile(ord, args)...); ++task_count; - tiles.emplace_back(index, std::move(result_tile)); + tiles.emplace_back(ord, std::move(result_tile)); if (op_returns_void) // if Op does not evaluate norms, find max // (scaled) norms of all args - tile_norms[index] = - std::max({arg_shape_data[index], args.shape().data()[index]...}); + tile_norms.at_ordinal(ord) = + std::max({arg_shape_data.at_ordinal(ord), + args.shape().data().at_ordinal(ord)...}); } break; default: diff --git a/src/TiledArray/conversions/vector_of_arrays.h b/src/TiledArray/conversions/vector_of_arrays.h index f54be33bd2..9de3bf8d09 100644 --- a/src/TiledArray/conversions/vector_of_arrays.h +++ b/src/TiledArray/conversions/vector_of_arrays.h @@ -23,21 +23,25 @@ namespace detail { /// @return TiledRange of fused Array object inline TiledArray::TiledRange prepend_dim_to_trange( std::size_t array_rank, const TiledArray::TiledRange& array_trange, - std::size_t nblocks, std::size_t avg_block_size, std::size_t num_avg_plus_one) { + std::size_t nblocks, std::size_t avg_block_size, + std::size_t num_avg_plus_one) { /// make the new TiledRange1 for new dimension TiledArray::TiledRange1 new_trange1; { std::vector new_trange1_v; new_trange1_v.reserve(nblocks + 1); auto block_counter = 0; - for(auto i = 0; i < num_avg_plus_one; ++i, block_counter += avg_block_size + 1){ + for (auto i = 0; i < num_avg_plus_one; + ++i, block_counter += avg_block_size + 1) { new_trange1_v.emplace_back(block_counter); } - for (auto i = num_avg_plus_one; i < nblocks; ++i, block_counter+= avg_block_size) { + for (auto i = num_avg_plus_one; i < nblocks; + ++i, block_counter += avg_block_size) { new_trange1_v.emplace_back(block_counter); } new_trange1_v.emplace_back(array_rank); - new_trange1 = TiledArray::TiledRange1(new_trange1_v.begin(), new_trange1_v.end()); + new_trange1 = + TiledArray::TiledRange1(new_trange1_v.begin(), new_trange1_v.end()); } /// make the new range for N+1 Array @@ -70,14 +74,16 @@ inline TiledArray::TiledRange prepend_dim_to_trange( /// (the size of @c arrays on each rank will depend on world.size) /// @param[in] fused_trange the TiledRange of the fused @c arrays /// @param[in] avg_block_size The average number of elements per block -/// @param[in] num_avg_plus_one Number of tiles with one more than the average block size. +/// @param[in] num_avg_plus_one Number of tiles with one more than the average +/// block size. /// @return SparseShape of fused Array object template TiledArray::SparseShape fuse_tilewise_vector_of_shapes( - madness::World& global_world, - const std::vector>& arrays, - const std::size_t array_rank, const TiledArray::TiledRange& fused_trange, - const size_t avg_block_size, const size_t num_avg_plus_one) { + madness::World& global_world, + const std::vector>& + arrays, + const std::size_t array_rank, const TiledArray::TiledRange& fused_trange, + const size_t avg_block_size, const size_t num_avg_plus_one) { if (arrays.size() == 0) { TiledArray::Tensor fused_tile_norms(fused_trange.tiles_range(), 0.f); return TiledArray::SparseShape(global_world, fused_tile_norms, @@ -95,12 +101,14 @@ TiledArray::SparseShape fuse_tilewise_vector_of_shapes( const auto& tiles_range = arrays[0].trange().tiles_range(); for (auto&& tile_idx : tiles_range) { const auto tile_ord = tiles_range.ordinal(tile_idx); - tile_volumes.emplace(tile_volumes_ptr + tile_ord, arrays[0].trange().make_tile_range(tile_idx).volume()); + tile_volumes.emplace( + tile_volumes_ptr + tile_ord, + arrays[0].trange().make_tile_range(tile_idx).volume()); } } TiledArray::Tensor fused_tile_norms( - fused_trange.tiles_range(), 0.f); // use nonzeroes for local tiles only + fused_trange.tiles_range(), 0.f); // use nonzeroes for local tiles only // compute norms of fused tiles // N.B. tile norms are stored in scaled format, unscale in order to compute @@ -108,7 +116,8 @@ TiledArray::SparseShape fuse_tilewise_vector_of_shapes( std::size_t narrays = array_rank; size_t fused_tile_ord = 0; auto element_offset_in_owner = 0; - // need to do this loop twice once for tiles with avg + 1 and once for tiles with avg + // need to do this loop twice once for tiles with avg + 1 and once for tiles + // with avg auto block_size = avg_block_size + 1; size_t vidx = 0, fused_vidx = 0; for (; fused_vidx < num_avg_plus_one; vidx += block_size, ++fused_vidx) { @@ -123,13 +132,13 @@ TiledArray::SparseShape fuse_tilewise_vector_of_shapes( const auto tile_volume = tile_volumes[tile_ord]; for (size_t v = 0, vv = vidx; v != block_size; ++v, ++vv) { const auto unscaled_tile_norm = - (*(array_ptr)).shape().data()[tile_ord] * tile_volume; + (*(array_ptr)).shape().data().at_ordinal(tile_ord) * tile_volume; unscaled_fused_tile_norm2 += unscaled_tile_norm * unscaled_tile_norm; ++array_ptr; } const auto fused_tile_volume = tile_volume * block_size; const auto fused_tile_norm = - std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume; + std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume; *(fused_tile_norms.data() + fused_tile_ord) = fused_tile_norm; } @@ -140,7 +149,7 @@ TiledArray::SparseShape fuse_tilewise_vector_of_shapes( } block_size = avg_block_size; - for (;vidx < narrays; vidx += block_size, ++fused_vidx) { + for (; vidx < narrays; vidx += block_size, ++fused_vidx) { bool have_rank = (rank == fused_vidx % size); // how many arrays actually constribute to this fused tile ... last fused // tile may have fewer than block_size @@ -152,13 +161,13 @@ TiledArray::SparseShape fuse_tilewise_vector_of_shapes( const auto tile_volume = tile_volumes[tile_ord]; for (size_t v = 0, vv = vidx; v != block_size; ++v, ++vv) { const auto unscaled_tile_norm = - (*(array_ptr)).shape().data()[tile_ord] * tile_volume; + (*(array_ptr)).shape().data().at_ordinal(tile_ord) * tile_volume; unscaled_fused_tile_norm2 += unscaled_tile_norm * unscaled_tile_norm; ++array_ptr; } const auto fused_tile_volume = tile_volume * block_size; const auto fused_tile_norm = - std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume; + std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume; *(fused_tile_norms.data() + fused_tile_ord) = fused_tile_norm; } @@ -167,8 +176,8 @@ TiledArray::SparseShape fuse_tilewise_vector_of_shapes( fused_tile_ord += ntiles_per_array; } } - auto fused_shapes = TiledArray::SparseShape(global_world, fused_tile_norms, - fused_trange, true); + auto fused_shapes = TiledArray::SparseShape( + global_world, fused_tile_norms, fused_trange, true); return fused_shapes; } @@ -187,12 +196,14 @@ TiledArray::SparseShape fuse_tilewise_vector_of_shapes( /// (the size of @c arrays on each rank will depend on world.size) /// @param[in] fused_trange the TiledRange of the fused @c arrays /// @param[in] avg_block_size The average number of elements per block -/// @param[in] num_avg_plus_one Number of tiles with one more than the average block size. +/// @param[in] num_avg_plus_one Number of tiles with one more than the average +/// block size. /// @return DenseShape of fused Array object template TiledArray::DenseShape fuse_tilewise_vector_of_shapes( madness::World&, - const std::vector>& arrays, + const std::vector>& + arrays, const std::size_t array_rank, const TiledArray::TiledRange& fused_trange, const size_t avg_block_size, const size_t num_avg_plus_one) { return TiledArray::DenseShape(1, fused_trange); @@ -213,8 +224,9 @@ TiledArray::DenseShape fuse_tilewise_vector_of_shapes( /// @return the Shape of the @c i -th subarray inline TiledArray::SparseShape tilewise_slice_of_fused_shape( const TiledArray::TiledRange& split_trange, - const TiledArray::SparsePolicy::shape_type& shape, const std::size_t tile_idx, - const std::size_t split_ntiles, const std::size_t tile_size) { + const TiledArray::SparsePolicy::shape_type& shape, + const std::size_t tile_idx, const std::size_t split_ntiles, + const std::size_t tile_size) { TA_ASSERT(split_ntiles == split_trange.tiles_range().volume()); TiledArray::Tensor split_tile_norms(split_trange.tiles_range()); @@ -292,8 +304,9 @@ class dist_subarray_vec /// @return number of Array in @c split_array unsigned long size() const { return rank_; } - void delete_arrays(int begin_index, int end_index){ - for(auto array_ptr = split_array.begin() + begin_index; array_ptr() != split_array.begin() + end_index; ++array_ptr()) { + void delete_arrays(int begin_index, int end_index) { + for (auto array_ptr = split_array.begin() + begin_index; + array_ptr() != split_array.begin() + end_index; ++array_ptr()) { (*array_ptr) = Array(); } Array::wait_for_lazy_cleanup(); @@ -328,108 +341,113 @@ class dist_subarray_vec /// @sa detail::fuse_tilewise_vector_of_shapes template TiledArray::DistArray fuse_tilewise_vector_of_arrays( - madness::World& global_world, - const std::vector>& array_vec, - const std::size_t fused_dim_extent, - const TiledArray::TiledRange& array_trange, std::size_t target_block_size = 1) { - auto nproc = global_world.size(); - - // make instances of array_vec globally accessible - using Array = TiledArray::DistArray; - detail::dist_subarray_vec arrays(global_world, array_vec, - fused_dim_extent); - - std::size_t nblocks = - (fused_dim_extent + target_block_size - 1) / target_block_size; - auto dv = std::div((int) (fused_dim_extent + nblocks - 1), (int) nblocks); - auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1; - auto dv1 = std::div(num_avg_plus_one, nproc); - - // make fused tiledrange - auto fused_trange = - detail::prepend_dim_to_trange(fused_dim_extent, array_trange, - nblocks, avg_block_size, num_avg_plus_one); - std::size_t ntiles_per_array = array_trange.tiles_range().volume(); - - // make fused shape - auto fused_shape = detail::fuse_tilewise_vector_of_shapes( - global_world, arrays.array_accessor(), fused_dim_extent, fused_trange, - avg_block_size, num_avg_plus_one); - - // make fused array - TiledArray::DistArray fused_array(global_world, fused_trange, - fused_shape); - - /// copy the data from a sequence of tiles - auto make_tile = [](const TiledArray::Range& range, - const std::vector>& tiles) { - TA_ASSERT(range.extent(0) == tiles.size()); - Tile result(range); - const auto volume = range.volume(); - size_t result_volume = 0; - auto* result_ptr = result.data(); - for (auto&& fut_of_tile : tiles) { - TA_ASSERT(fut_of_tile.probe()); - const auto& tile = fut_of_tile.get(); - const auto* tile_data = tile.data(); - const auto tile_volume = tile.size(); - std::copy(tile_data, tile_data + tile_volume, result_ptr); - result_ptr += tile_volume; - result_volume += tile_volume; - } - TA_ASSERT(volume == result_volume); - return result; - }; - - /// write to blocks of fused_array - for (auto&& fused_tile_ord : *fused_array.pmap()) { - if (!fused_array.is_zero(fused_tile_ord)) { - // convert ordinal of the fused tile to the ordinals of its constituent - // tiles - const auto div0 = std::ldiv(fused_tile_ord, ntiles_per_array); - // index of the 0th mode of this tile - const auto tile_idx_mode0 = div0.quot; - // ordinal of the corresponding tile in the unfused array - const auto tile_ord_array = div0.rem; - - const auto div1 = std::ldiv(tile_idx_mode0, nproc); - const auto tile_idx_on_owner = div1.quot; - auto block_size = (tile_idx_mode0 < num_avg_plus_one ? - avg_block_size + 1 : avg_block_size); - - const auto owner_rank = div1.rem; - - auto num_avg_plusone_on_owner = (owner_rank < dv1.rem ? dv1.quot + 1 : dv1.quot); - const auto vector_idx_offset_on_owner = (tile_idx_mode0 < num_avg_plus_one ? - tile_idx_on_owner * block_size : - (num_avg_plusone_on_owner) * (block_size + 1) + (tile_idx_on_owner - num_avg_plusone_on_owner) * block_size); - - auto fused_tile_range = - fused_array.trange().make_tile_range(fused_tile_ord); - // make a vector of Futures to the input tiles - std::vector> input_tiles; - input_tiles.reserve(fused_tile_range.extent(0)); - for (size_t v = 0, vidx = tile_idx_mode0 * block_size; - v != block_size && vidx < fused_dim_extent; ++v, ++vidx) { - using Index = decltype(tile_ord_array); - input_tiles.emplace_back( - arrays.task(owner_rank, - &detail::dist_subarray_vec< - DistArray>::template get_tile, - vector_idx_offset_on_owner + v, tile_ord_array)); - } - fused_array.set( - fused_tile_ord, - global_world.taskq.add(make_tile, std::move(fused_tile_range), - std::move(input_tiles))); + madness::World& global_world, + const std::vector>& array_vec, + const std::size_t fused_dim_extent, + const TiledArray::TiledRange& array_trange, + std::size_t target_block_size = 1) { + auto nproc = global_world.size(); + + // make instances of array_vec globally accessible + using Array = TiledArray::DistArray; + detail::dist_subarray_vec arrays(global_world, array_vec, + fused_dim_extent); + + std::size_t nblocks = + (fused_dim_extent + target_block_size - 1) / target_block_size; + auto dv = std::div((int)(fused_dim_extent + nblocks - 1), (int)nblocks); + auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1; + auto dv1 = std::div(num_avg_plus_one, nproc); + + // make fused tiledrange + auto fused_trange = + detail::prepend_dim_to_trange(fused_dim_extent, array_trange, nblocks, + avg_block_size, num_avg_plus_one); + std::size_t ntiles_per_array = array_trange.tiles_range().volume(); + + // make fused shape + auto fused_shape = detail::fuse_tilewise_vector_of_shapes( + global_world, arrays.array_accessor(), fused_dim_extent, fused_trange, + avg_block_size, num_avg_plus_one); + + // make fused array + TiledArray::DistArray fused_array(global_world, fused_trange, + fused_shape); + + /// copy the data from a sequence of tiles + auto make_tile = [](const TiledArray::Range& range, + const std::vector>& tiles) { + TA_ASSERT(range.extent(0) == tiles.size()); + Tile result(range); + const auto volume = range.volume(); + size_t result_volume = 0; + auto* result_ptr = result.data(); + for (auto&& fut_of_tile : tiles) { + TA_ASSERT(fut_of_tile.probe()); + const auto& tile = fut_of_tile.get(); + const auto* tile_data = tile.data(); + const auto tile_volume = tile.size(); + std::copy(tile_data, tile_data + tile_volume, result_ptr); + result_ptr += tile_volume; + result_volume += tile_volume; + } + TA_ASSERT(volume == result_volume); + return result; + }; + + /// write to blocks of fused_array + for (auto&& fused_tile_ord : *fused_array.pmap()) { + if (!fused_array.is_zero(fused_tile_ord)) { + // convert ordinal of the fused tile to the ordinals of its constituent + // tiles + const auto div0 = std::ldiv(fused_tile_ord, ntiles_per_array); + // index of the 0th mode of this tile + const auto tile_idx_mode0 = div0.quot; + // ordinal of the corresponding tile in the unfused array + const auto tile_ord_array = div0.rem; + + const auto div1 = std::ldiv(tile_idx_mode0, nproc); + const auto tile_idx_on_owner = div1.quot; + auto block_size = (tile_idx_mode0 < num_avg_plus_one ? avg_block_size + 1 + : avg_block_size); + + const auto owner_rank = div1.rem; + + auto num_avg_plusone_on_owner = + (owner_rank < dv1.rem ? dv1.quot + 1 : dv1.quot); + const auto vector_idx_offset_on_owner = + (tile_idx_mode0 < num_avg_plus_one + ? tile_idx_on_owner * block_size + : (num_avg_plusone_on_owner) * (block_size + 1) + + (tile_idx_on_owner - num_avg_plusone_on_owner) * + block_size); + + auto fused_tile_range = + fused_array.trange().make_tile_range(fused_tile_ord); + // make a vector of Futures to the input tiles + std::vector> input_tiles; + input_tiles.reserve(fused_tile_range.extent(0)); + for (size_t v = 0, vidx = tile_idx_mode0 * block_size; + v != block_size && vidx < fused_dim_extent; ++v, ++vidx) { + using Index = decltype(tile_ord_array); + input_tiles.emplace_back( + arrays.task(owner_rank, + &detail::dist_subarray_vec< + DistArray>::template get_tile, + vector_idx_offset_on_owner + v, tile_ord_array)); } + fused_array.set( + fused_tile_ord, + global_world.taskq.add(make_tile, std::move(fused_tile_range), + std::move(input_tiles))); } + } - // keep arrays around until everyone is done - global_world.gop.fence(); + // keep arrays around until everyone is done + global_world.gop.fence(); - return fused_array; - } + return fused_array; +} /// @brief extracts a subarray of a fused array created with /// fuse_vector_of_arrays and creates the array in @c local_world. @@ -446,7 +464,8 @@ TiledArray::DistArray fuse_tilewise_vector_of_arrays( /// @sa detail::tilewise_slice_of_fused_shape template void split_tilewise_fused_array( - madness::World& local_world, const TiledArray::DistArray& fused_array, + madness::World& local_world, + const TiledArray::DistArray& fused_array, std::size_t tile_idx, std::vector>& split_arrays, const TiledArray::TiledRange& split_trange) { @@ -465,8 +484,8 @@ void split_tilewise_fused_array( split_trange, shape, tile_idx, split_ntiles, tile_size); // create split Array object TiledArray::DistArray split_array(local_world, split_trange, - split_shape); - //split_arrays.push_back(split_array); + split_shape); + // split_arrays.push_back(split_array); split_arrays.emplace_back(std::move(split_array)); } diff --git a/src/TiledArray/cp/btas_cp.h b/src/TiledArray/cp/btas_cp.h index e6f0109e85..69d0b097d1 100644 --- a/src/TiledArray/cp/btas_cp.h +++ b/src/TiledArray/cp/btas_cp.h @@ -31,7 +31,7 @@ template auto btas_cp_als(madness::World& world, const DistArray Reference, long btas_cp_rank, TA::TiledRange1 rank_trange1, std::size_t decomp_world_rank = 0, double als_threshold = 1e-3, - bool verbose = true) { + bool verbose = false) { using tile_type = typename DistArray::value_type::value_type; using BTAS_Tensor = btas::Tensor>; @@ -114,7 +114,7 @@ template auto btas_cp_rals(madness::World& world, DistArray Reference, long btas_cp_rank, TA::TiledRange1 rank_trange1, std::size_t decomp_world_rank = 0, - double als_threshold = 1e-3, bool verbose = true) { + double als_threshold = 1e-3, bool verbose = false) { using BTAS_Tensor = btas::Tensor>; diff --git a/src/TiledArray/cp/cp.h b/src/TiledArray/cp/cp.h index 7917e6b01f..d676f8fdd8 100644 --- a/src/TiledArray/cp/cp.h +++ b/src/TiledArray/cp/cp.h @@ -8,42 +8,10 @@ #include #include #include -#include namespace TiledArray::cp { + namespace detail { -// A seed for the random number generator. -static inline unsigned int& random_seed_accessor() { - static unsigned int value = 3; - return value; -} - -// given a rank and block size, this computes a -// trange for the rank dimension to be used to make the CP factors. -static inline TiledRange1 compute_trange1(size_t rank, size_t rank_block_size) { - std::size_t nblocks = (rank + rank_block_size - 1) / rank_block_size; - auto dv = std::div((int)(rank + nblocks - 1), (int)nblocks); - auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1; - - TiledArray::TiledRange1 new_trange1; - { - std::vector new_trange1_v; - new_trange1_v.reserve(nblocks + 1); - auto block_counter = 0; - for (auto i = 0; i < num_avg_plus_one; - ++i, block_counter += avg_block_size + 1) { - new_trange1_v.emplace_back(block_counter); - } - for (auto i = num_avg_plus_one; i < nblocks; - ++i, block_counter += avg_block_size) { - new_trange1_v.emplace_back(block_counter); - } - new_trange1_v.emplace_back(rank); - new_trange1 = - TiledArray::TiledRange1(new_trange1_v.begin(), new_trange1_v.end()); - } - return new_trange1; -} static inline char intToAlphabet(int i) { return static_cast('a' + i); } @@ -94,9 +62,8 @@ class CP { /// moving to @c rank else builds an efficient /// random guess with rank @c rank /// \param[in] rank Rank of the CP deccomposition - /// \param[in] rank_block_size 0; What is the size of the blocks - /// in the rank mode's TiledRange, will compute TiledRange1 inline. - /// if 0 : rank_blocck_size = rank. + /// \param[in] rank_block_size The target tile size of + /// rank mode's range; if 0 will use \p rank as \p rank_block_size . /// \param[in] build_rank should CP approximation be built from rank 1 /// or set. /// \param[in] epsilonALS 1e-3; the stopping condition for the ALS solver @@ -112,13 +79,13 @@ class CP { if (build_rank) { size_t cur_rank = 1; do { - rank_trange = detail::compute_trange1(cur_rank, rank_block_size); + rank_trange = TiledArray::compute_trange1(cur_rank, rank_block_size); build_guess(cur_rank, rank_trange); ALS(cur_rank, 100, verbose); ++cur_rank; } while (cur_rank < rank); } else { - rank_trange = detail::compute_trange1(rank, rank_block_size); + rank_trange = TiledArray::compute_trange1(rank, rank_block_size); build_guess(rank, rank_trange); ALS(rank, 100, verbose); } @@ -144,7 +111,7 @@ class CP { double epsilon = 1.0; fit_tol = epsilonALS; do { - auto rank_trange = detail::compute_trange1(cur_rank, rank_block_size); + auto rank_trange = compute_trange1(cur_rank, rank_block_size); build_guess(cur_rank, rank_trange); ALS(cur_rank, 100, verbose); ++cur_rank; @@ -197,9 +164,10 @@ class CP { final_fit, // The final fit of the ALS // optimization at fixed rank. fit_tol, // Tolerance for the ALS solver - converged_num, // How many times the ALS solver - // has changed less than the tolerance norm_reference; // used in determining the CP fit. + std::size_t converged_num = + 0; // How many times the ALS solver + // has changed less than the tolerance in a row /// This function is determined by the specific CP solver. /// builds the rank @c rank CP approximation and stores @@ -228,14 +196,12 @@ class CP { auto lambda = std::vector( rank, (typename Tile::value_type)0); if (world.rank() == 0) { - std::mt19937 generator(detail::random_seed_accessor()); - std::uniform_real_distribution<> distribution(-1.0, 1.0); auto factor_ptr = factor.data(); size_t offset = 0; for (auto r = 0; r < rank; ++r, offset += mode_size) { auto lam_ptr = lambda.data() + r; for (auto m = offset; m < offset + mode_size; ++m) { - auto val = distribution(generator); + auto val = TiledArray::drand() * 2 - 1; // random number in [-1,1] *(factor_ptr + m) = val; *lam_ptr += val * val; } @@ -326,9 +292,9 @@ class CP { auto lo = tile.range().lobound_data(); auto up = tile.range().upbound_data(); for (auto R = lo[0]; R < up[0]; ++R) { - const auto norm_squared_RR = tile(R); + const auto norm_squared_RR = tile({R}); using std::sqrt; - tile(R) = sqrt(norm_squared_RR); + tile({R}) = sqrt(norm_squared_RR); } }, /* fence = */ true); @@ -365,7 +331,7 @@ class CP { /// \returns bool : is the change in fit less than the ALS tolerance? virtual bool check_fit(bool verbose = false) { // Compute the inner product T * T_CP - double inner_prod = MTtKRP("r,n").dot(unNormalized_Factor("r,n")); + const auto ref_dot_cp = MTtKRP("r,n").dot(unNormalized_Factor("r,n")); // compute the square of the CP tensor (can use the grammian) auto factor_norm = [&]() { auto gram_ptr = partial_grammian.begin(); @@ -381,27 +347,39 @@ class CP { return result; }; // compute the error in the loss function and find the fit - double normFactors = factor_norm(), - normResidual = - sqrt(abs(norm_reference * norm_reference + - normFactors * normFactors - 2.0 * inner_prod)), - fit = 1.0 - (normResidual / norm_reference), - fit_change = abs(prev_fit - fit); + const auto norm_cp = factor_norm(); // ||T_CP||_2 + const auto squared_norm_error = norm_reference * norm_reference + + norm_cp * norm_cp - + 2.0 * ref_dot_cp; // ||T - T_CP||_2^2 + // N.B. squared_norm_error is very noisy + // TA_ASSERT(squared_norm_error >= - 1e-8); + const auto norm_error = sqrt(abs(squared_norm_error)); + const auto fit = 1.0 - (norm_error / norm_reference); + const auto fit_change = fit - prev_fit; prev_fit = fit; // print fit data if required if (verbose) { - std::cout << fit << "\t" << fit_change << std::endl; + std::cout << MTtKRP.world().rank() << ": fit=" << fit + << " fit_change=" << fit_change << std::endl; } // if the change in fit is less than the tolerance try to return true. - if (fit_change < fit_tol) { + if (abs(fit_change) < fit_tol) { converged_num++; if (converged_num == 2) { converged_num = 0; final_fit = prev_fit; prev_fit = 1.0; + if (verbose) + std::cout << MTtKRP.world().rank() << ": converged" << std::endl; return true; + } else { + TA_ASSERT(converged_num == 1); + if (verbose) + std::cout << MTtKRP.world().rank() << ": pre-converged" << std::endl; } + } else { + converged_num = 0; } return false; } diff --git a/src/TiledArray/cp/cp_als.h b/src/TiledArray/cp/cp_als.h index 1bcbaf5b0f..197900b5b2 100644 --- a/src/TiledArray/cp/cp_als.h +++ b/src/TiledArray/cp/cp_als.h @@ -12,7 +12,7 @@ * takes a reference order-N tensor and decomposes it into a * set of order-2 tensors all coupled by a hyperdimension called the rank. * These factors are optimized using an alternating least squares - * algorithm. This class is derived form the base CP class + * algorithm. * * @tparam Tile typing for the DistArray tiles * @tparam Policy policy of the DistArray @@ -152,7 +152,6 @@ class CP_ALS : public CP { final = mixed_contractions; W("r,rp") *= this->partial_grammian[contracted_index]("r,rp"); - world.gop.fence(); } if (mode == ndim - 1) this->MTtKRP = An; diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 217c14ed46..ea6a066441 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -890,20 +890,16 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// /// \tparam T The type of random value to generate. Defaults to /// element_type. - /// \tparam A template type parameter which will be deduced as - /// void only if MakeRandom knows how to generate random - /// values of type T. If MakeRandom does not know how to - /// generate random values of type T, SFINAE will disable - /// this function. /// \param[in] skip_set If false, will throw if any tiles are already set /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong /// throw guarantee. /// \throw TiledArray::Exception if skip_set is false and a local tile is /// already initialized. Weak throw guarantee. - template > void fill_random(bool skip_set = false) { - init_elements( + init_elements( [](const auto&) { return detail::MakeRandom::generate_value(); }); } @@ -943,7 +939,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// guarantee. /// \throw TiledArray::Exception if a tile is already set and skip_set is /// false. Weak throw guarantee. - template + template void init_tiles(Op&& op, bool skip_set = false) { // lifetime management of op depends on whether it is a lvalue ref (i.e. has // an external owner) or an rvalue ref @@ -957,15 +953,20 @@ class DistArray : public madness::archive::ParallelSerializableObject { const auto& index = *it; if (!pimpl_->is_zero(index)) { if (skip_set) { - auto fut = find(index); + auto fut = find_local(index); if (fut.probe()) continue; } - Future tile = pimpl_->world().taskq.add( - [pimpl = pimpl_, index = ordinal_type(index), - op_shared_handle]() -> value_type { - return op_shared_handle(pimpl->trange().make_tile_range(index)); - }); - set(index, std::move(tile)); + if constexpr (Exec == HostExecutor::MADWorld) { + Future tile = pimpl_->world().taskq.add( + [pimpl = pimpl_, index = ordinal_type(index), + op_shared_handle]() -> value_type { + return op_shared_handle(pimpl->trange().make_tile_range(index)); + }); + set(index, std::move(tile)); + } else { + static_assert(Exec == HostExecutor::Thread); + set(index, op_shared_handle(trange().make_tile_range(index))); + } } } } @@ -994,10 +995,10 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// \throw TiledArray::Exception if skip_set is false and a local, non-zero /// tile is already initialized. Weak throw /// guarantee. - template + template void init_elements(Op&& op, bool skip_set = false) { auto op_shared_handle = make_op_shared_handle(std::forward(op)); - init_tiles( + init_tiles( [op = std::move(op_shared_handle)]( const TiledArray::Range& range) -> value_type { // Initialize the tile with the given range object @@ -1545,30 +1546,31 @@ class DistArray : public madness::archive::ParallelSerializableObject { } private: - template - std::enable_if_t, void> check_index( - const Index i) const { + template + std::enable_if_t, void> check_index( + const Ordinal ord) const { TA_ASSERT( - impl_ref().tiles_range().includes(i) && + impl_ref().tiles_range().includes_ordinal(ord) && "The ordinal index used to access an array tile is out of range."); } template - std::enable_if_t, void> check_index( + std::enable_if_t, void> check_index( const Index& i) const { TA_ASSERT( impl_ref().tiles_range().includes(i) && "The coordinate index used to access an array tile is out of range."); } - template + template >> void check_index(const std::initializer_list& i) const { check_index>(i); } - template - std::enable_if_t, void> check_local_index( - const Index i) const { + template + std::enable_if_t, void> check_local_index( + const Ordinal i) const { check_index(i); TA_ASSERT(pimpl_->is_local(i) // pimpl_ already checked && @@ -1576,15 +1578,16 @@ class DistArray : public madness::archive::ParallelSerializableObject { } template - std::enable_if_t, void> check_local_index( - const Index& i) const { + std::enable_if_t, void> + check_local_index(const Index& i) const { check_index(i); TA_ASSERT( pimpl_->is_local(i) // pimpl_ already checked && "The coordinate index used to access an array tile is not local."); } - template + template >> void check_local_index(const std::initializer_list& i) const { check_local_index>(i); } diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 0a4be1ba43..c248956066 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -179,7 +179,7 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, bi = bi.reshape(shape, batch); for (size_t k = 0; k < batch; ++k) { auto hk = ai.batch(k).dot(bi.batch(k)); - tile[k] += hk; + tile({k}) += hk; } } auto pc = C.permutation; @@ -225,7 +225,7 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, if (!term.array.is_local(idx)) continue; if (term.array.is_zero(idx)) continue; // TODO no need for immediate evaluation - auto tile = term.array.find(idx).get(); + auto tile = term.array.find_local(idx).get(); if (P) tile = tile.permute(P); auto shape = term.ei_tiled_range.tile(ei); tile = tile.reshape(shape, batch); @@ -247,7 +247,7 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, if (!C.ei.is_local(e)) continue; if (C.ei.is_zero(e)) continue; // TODO no need for immediate evaluation - auto tile = C.ei.find(e).get(); + auto tile = C.ei.find_local(e).get(); assert(tile.batch_size() == batch); const Permutation &P = C.permutation; auto c = apply(P, h + e); @@ -406,11 +406,11 @@ namespace TiledArray::expressions { /// @param[in] B second argument to the product /// @warning just as in the plain expression code, reductions are a special /// case; use Expr::reduce() -template +template auto einsum(expressions::TsrExpr A, expressions::TsrExpr B) { auto a = std::get<0>(idx(A)); auto b = std::get<0>(idx(B)); - return einsum(A, B, std::string(a^b)); + return einsum(A, B, std::string(a ^ b)); } /// einsum function with result indices explicitly specified @@ -419,37 +419,29 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B) { /// @param[in] r result indices /// @warning just as in the plain expression code, reductions are a special /// case; use Expr::reduce() -template -auto einsum( - expressions::TsrExpr A, - expressions::TsrExpr B, - const std::string &cs, - World &world = get_default_world()) -{ +template +auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, + const std::string &cs, World &world = get_default_world()) { static_assert(std::is_same::value); using E = expressions::TsrExpr; return Einsum::einsum(E(A), E(B), Einsum::idx(cs), world); } template -auto dot(expressions::TsrExpr A, - expressions::TsrExpr B, - expressions::TsrExpr C, - World &world = get_default_world()) -{ +auto dot(expressions::TsrExpr A, expressions::TsrExpr B, + expressions::TsrExpr C, World &world = get_default_world()) { static_assert(std::is_same::value); static_assert(std::is_same::value); using E = expressions::TsrExpr; return Einsum::dot(E(A), E(B), E(C), world); } -} // TiledArray::expressions +} // namespace TiledArray::expressions namespace TiledArray { using expressions::dot; using expressions::einsum; -using expressions::dot; template auto einsum(const std::string &expr, const DistArray &A, diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 51e43d9bee..87af0e6115 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -149,6 +149,8 @@ using Array [[deprecated("use TiledArray::DistArray or TiledArray::TArray")]] = DistArray; +enum class HostExecutor { Thread, MADWorld, Default = MADWorld }; + } // namespace TiledArray #ifndef TILEDARRAY_DISABLE_NAMESPACE_TA diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index 9c0e54a757..8108ecf227 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -76,7 +76,8 @@ class Range { /// stride[0], ..., stride[rank_ - 1] } /// \endcode container::svector datavec_; - distance_type offset_ = 0l; ///< Ordinal index offset correction + distance_type offset_ = + 0l; ///< Ordinal index offset correction to support nonzero lobound ordinal_type volume_ = 0ul; ///< Total number of elements unsigned int rank_ = 0u; ///< The rank (or number of dimensions) in the range @@ -825,7 +826,7 @@ class Range { /// \throw TiledArray::Exception When the rank of this range is not /// equal to the size of the index. template , + typename std::enable_if, bool>::type* = nullptr> bool includes(const Index& index) const { TA_ASSERT(*this); @@ -870,11 +871,29 @@ class Range { /// \param i The ordinal index to check for inclusion in the range /// \return \c true when \c i \c >= \c 0 and \c i \c < \c volume + /// \warning if this->order()==1 this /// \throw nothing template typename std::enable_if, bool>::type includes( Ordinal i) const { TA_ASSERT(*this); + // can't distinguish between includes(Index...) and includes(ordinal) + // thus assume includes_ordinal() if this->rank()==1 + TA_ASSERT(this->rank() != 1 && + "use Range::includes(index) or " + "Range::includes_ordinal(index_ordinal) if this->rank()==1"); + return include_ordinal_(i); + } + + /// Check the ordinal index to make sure it is within the range. + + /// \param i The ordinal index to check for inclusion in the range + /// \return \c true when \c i \c >= \c 0 and \c i \c < \c volume + /// \throw nothing + template + typename std::enable_if, bool>::type + includes_ordinal(Ordinal i) const { + TA_ASSERT(*this); return include_ordinal_(i); } @@ -1009,10 +1028,7 @@ class Range { /// \param index Ordinal index /// \return \c index (unchanged) /// \throw When \c index is not included in this range - ordinal_type ordinal(const ordinal_type index) const { - TA_ASSERT(includes(index)); - return index; - } + ordinal_type ordinal(const ordinal_type index) const { return index; } /// calculate the ordinal index of \p index @@ -1024,8 +1040,6 @@ class Range { template >* = nullptr> ordinal_type ordinal(const Index& index) const { - TA_ASSERT(includes(index)); - auto* MADNESS_RESTRICT const stride = stride_data(); ordinal_type result = 0ul; @@ -1071,16 +1085,16 @@ class Range { return ordinal(temp_index); } - /// calculate the coordinate index of the ordinal index, \c index. + /// calculate the coordinate index of the ordinal index, \c ord. /// Convert an ordinal index to a coordinate index. - /// \param index Ordinal index + /// \param ord Ordinal index /// \return The index of the ordinal index - /// \throw TiledArray::Exception When \c index is not included in this range - index_type idx(ordinal_type index) const { + /// \throw TiledArray::Exception When \p ord is not included in this range + index_type idx(ordinal_type ord) const { // Check that index is contained by range. // N.B. this will fail if any extent is zero - TA_ASSERT(includes(index)); + TA_ASSERT(includes_ordinal(ord)); // Construct result coordinate index object and allocate its memory. Range_::index result(rank_, 0); @@ -1096,8 +1110,8 @@ class Range { const auto size_i = size[i]; // Compute result index element i - const auto result_i = (index % size_i) + lower_i; - index /= size_i; + const auto result_i = (ord % size_i) + lower_i; + ord /= size_i; // Store result result_data[i] = result_i; diff --git a/src/TiledArray/sparse_shape.h b/src/TiledArray/sparse_shape.h index da56cc24ef..7346f45d1c 100644 --- a/src/TiledArray/sparse_shape.h +++ b/src/TiledArray/sparse_shape.h @@ -485,9 +485,23 @@ class SparseShape { /// Check that a tile is zero - /// \tparam Index The type of the index - /// \return false - template + /// \tparam Ordinal an integer type + /// \param ord the ordinal index + /// \return true if tile at position \p ord is zero + template + std::enable_if_t, bool> is_zero( + const Ordinal& ord) const { + TA_ASSERT(!tile_norms_.empty()); + return tile_norms_.at_ordinal(ord) < my_threshold_; + } + + /// Check that a tile is zero + + /// \tparam Index a sized integral range type + /// \param i the index + /// \return true if tile at position \p i is zero + template >> bool is_zero(const Index& i) const { TA_ASSERT(!tile_norms_.empty()); return tile_norms_[i] < my_threshold_; diff --git a/src/TiledArray/tensor.h b/src/TiledArray/tensor.h index f24621077c..edb7ba2e47 100644 --- a/src/TiledArray/tensor.h +++ b/src/TiledArray/tensor.h @@ -62,8 +62,20 @@ template ::value && inline std::ostream& operator<<(std::ostream& os, const T& t) { os << t.range() << " { "; const auto n = t.range().volume(); - for (auto i = 0ul; i < n; ++i) os << t[i] << " "; - + std::size_t offset = 0ul; + const auto more_than_1_batch = t.batch_size() > 1; + for (auto b = 0ul; b != t.batch_size(); ++b) { + if (more_than_1_batch) { + os << "[batch " << b << "]{ "; + } + for (auto ord = 0ul; ord < n; ++ord) { + os << t.data()[offset + ord] << " "; + } + if (more_than_1_batch) { + os << "} "; + } + offset += n; + } os << "}"; return os; diff --git a/src/TiledArray/tensor/kernels.h b/src/TiledArray/tensor/kernels.h index 7276678145..c65d0e5c69 100644 --- a/src/TiledArray/tensor/kernels.h +++ b/src/TiledArray/tensor/kernels.h @@ -224,8 +224,8 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) { const auto volume = result.range().volume(); - for (decltype(result.range().volume()) i = 0ul; i < volume; ++i) { - inplace_tensor_op(op, result[i], tensors[i]...); + for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) { + inplace_tensor_op(op, result.at_ordinal(ord), tensors.at_ordinal(ord)...); } } @@ -395,9 +395,9 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) { inplace_tensor_op(op, result_data[i], tensors_data[i]...); }; - for (decltype(result.range().volume()) i = 0ul; i < volume; i += stride) - inplace_tensor_range(result.data() + result.range().ordinal(i), - (tensors.data() + tensors.range().ordinal(i))...); + for (decltype(result.range().volume()) ord = 0ul; ord < volume; ord += stride) + inplace_tensor_range(result.data() + result.range().ordinal(ord), + (tensors.data() + tensors.range().ordinal(ord))...); } // ------------------------------------------------------------------------- @@ -455,9 +455,9 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) { const auto volume = result.range().volume(); - for (decltype(result.range().volume()) i = 0ul; i < volume; ++i) { - new (result.data() + i) typename TR::value_type( - tensor_op(op, tensors[i]...)); + for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) { + new (result.data() + ord) typename TR::value_type( + tensor_op(op, tensors.at_ordinal(ord)...)); } } @@ -578,10 +578,11 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1, new (result_ptr) typename T1::value_type(op(value1, values...)); }; - for (decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride) - math::vector_ptr_op(wrapper_op, stride, result.data() + i, - (tensor1.data() + tensor1.range().ordinal(i)), - (tensors.data() + tensors.range().ordinal(i))...); + for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; + ord += stride) + math::vector_ptr_op(wrapper_op, stride, result.data() + ord, + (tensor1.data() + tensor1.range().ordinal(ord)), + (tensors.data() + tensors.range().ordinal(ord))...); } /// Initialize tensor with one or more non-contiguous tensor arguments @@ -625,10 +626,10 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1, op, tensor1_data[i], tensors_data[i]...)); }; - for (decltype(volume) i = 0ul; i < volume; i += stride) - inplace_tensor_range(result.data() + i, - (tensor1.data() + tensor1.range().ordinal(i)), - (tensors.data() + tensors.range().ordinal(i))...); + for (decltype(volume) ord = 0ul; ord < volume; ord += stride) + inplace_tensor_range(result.data() + ord, + (tensor1.data() + tensor1.range().ordinal(ord)), + (tensors.data() + tensors.range().ordinal(ord))...); } // ------------------------------------------------------------------------- @@ -735,9 +736,10 @@ Scalar tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, Scalar identity, const auto volume = tensor1.range().volume(); auto result = identity; - for (decltype(tensor1.range().volume()) i = 0ul; i < volume; ++i) { + for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; ++ord) { auto temp = - tensor_reduce(reduce_op, join_op, identity, tensor1[i], tensors[i]...); + tensor_reduce(reduce_op, join_op, identity, tensor1.at_ordinal(ord), + tensors.at_ordinal(ord)...); join_op(result, temp); } @@ -778,11 +780,12 @@ Scalar tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, const auto volume = tensor1.range().volume(); Scalar result = identity; - for (decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride) { + for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; + ord += stride) { Scalar temp = identity; math::reduce_op(reduce_op, join_op, identity, stride, temp, - tensor1.data() + tensor1.range().ordinal(i), - (tensors.data() + tensors.range().ordinal(i))...); + tensor1.data() + tensor1.range().ordinal(ord), + (tensors.data() + tensors.range().ordinal(ord))...); join_op(result, temp); } @@ -834,10 +837,11 @@ Scalar tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, }; Scalar result = identity; - for (decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride) { - Scalar temp = - tensor_reduce_range(result, tensor1.data() + tensor1.range().ordinal(i), - (tensors.data() + tensors.range().ordinal(i))...); + for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; + ord += stride) { + Scalar temp = tensor_reduce_range( + result, tensor1.data() + tensor1.range().ordinal(ord), + (tensors.data() + tensors.range().ordinal(ord))...); join_op(result, temp); } diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 5009391c16..d3c91361f5 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -610,12 +610,19 @@ class Tensor { /// \tparam Ordinal an integer type that represents an ordinal /// \param[in] ord an ordinal index /// \return Const reference to the element at position \c ord . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is + /// included in the range, and `batch_size()==1` template ::value>* = nullptr> const_reference operator[](const Ordinal ord) const { - TA_ASSERT(this->range_.includes(ord)); + TA_ASSERT(!this->empty()); + // can't distinguish between operator[](Index...) and operator[](ordinal) + // thus assume at_ordinal() if this->rank()==1 + TA_ASSERT(this->range_.rank() != 1 && + "use Tensor::operator[](index) or " + "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); + TA_ASSERT(this->batch_size() == 1); + TA_ASSERT(this->range_.includes_ordinal(ord)); return this->data()[ord]; } @@ -624,12 +631,51 @@ class Tensor { /// \tparam Ordinal an integer type that represents an ordinal /// \param[in] ord an ordinal index /// \return Reference to the element at position \c ord . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is + /// included in the range, and `batch_size()==1` template ::value>* = nullptr> reference operator[](const Ordinal ord) { - TA_ASSERT(this->range_.includes(ord)); + TA_ASSERT(!this->empty()); + // can't distinguish between operator[](Index...) and operator[](ordinal) + // thus assume at_ordinal() if this->rank()==1 + TA_ASSERT(this->range_.rank() != 1 && + "use Tensor::operator[](index) or " + "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); + TA_ASSERT(this->batch_size() == 1); + TA_ASSERT(this->range_.includes_ordinal(ord)); + return this->data()[ord]; + } + + /// Const element accessor + + /// \tparam Ordinal an integer type that represents an ordinal + /// \param[in] ord an ordinal index + /// \return Const reference to the element at position \c ord . + /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is + /// included in the range, and `batch_size()==1` + template ::value>* = nullptr> + const_reference at_ordinal(const Ordinal ord) const { + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + TA_ASSERT(this->range_.includes_ordinal(ord)); + return this->data()[ord]; + } + + /// Element accessor + + /// \tparam Ordinal an integer type that represents an ordinal + /// \param[in] ord an ordinal index + /// \return Reference to the element at position \c ord . + /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is + /// included in the range, and `batch_size()==1` + template ::value>* = nullptr> + reference at_ordinal(const Ordinal ord) { + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + TA_ASSERT(this->range_.includes_ordinal(ord)); return this->data()[ord]; } @@ -638,13 +684,16 @@ class Tensor { /// \tparam Index An integral range type /// \param[in] i an index /// \return Const reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> const_reference operator[](const Index& i) const { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Element accessor @@ -652,13 +701,16 @@ class Tensor { /// \tparam Index An integral range type /// \param[in] i an index /// \return Reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> reference operator[](const Index& i) { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Const element accessor @@ -666,13 +718,16 @@ class Tensor { /// \tparam Integer An integral type /// \param[in] i an index /// \return Const reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> const_reference operator[](const std::initializer_list& i) const { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Element accessor @@ -680,13 +735,58 @@ class Tensor { /// \tparam Integer An integral type /// \param[in] i an index /// \return Reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> reference operator[](const std::initializer_list& i) { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; + } + + /// Const element accessor + + /// \tparam Ordinal an integer type that represents an ordinal + /// \param[in] ord an ordinal index + /// \return Const reference to the element at position \c ord . + /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is + /// included in the range, and `batch_size()==1` + template >* = nullptr> + const_reference operator()(const Ordinal& ord) const { + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + // can't distinguish between operator[](Index...) and operator[](ordinal) + // thus assume at_ordinal() if this->rank()==1 + TA_ASSERT(this->range_.rank() != 1 && + "use Tensor::operator()(index) or " + "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); + TA_ASSERT(this->range_.includes_ordinal(ord)); + return this->data()[ord]; + } + + /// Element accessor + + /// \tparam Ordinal an integer type that represents an ordinal + /// \param[in] ord an ordinal index + /// \return Reference to the element at position \c ord . + /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is + /// included in the range, and `batch_size()==1` + template >* = nullptr> + reference operator()(const Ordinal& ord) { + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + // can't distinguish between operator[](Index...) and operator[](ordinal) + // thus assume at_ordinal() if this->rank()==1 + TA_ASSERT(this->range_.rank() != 1 && + "use Tensor::operator()(index) or " + "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); + TA_ASSERT(this->range_.includes_ordinal(ord)); + return this->data()[ord]; } /// Const element accessor @@ -694,13 +794,16 @@ class Tensor { /// \tparam Index An integral range type /// \param[in] i an index /// \return Const reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> const_reference operator()(const Index& i) const { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Element accessor @@ -708,13 +811,16 @@ class Tensor { /// \tparam Index An integral range type /// \param[in] i an index /// \return Reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> reference operator()(const Index& i) { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Const element accessor @@ -722,13 +828,16 @@ class Tensor { /// \tparam Integer An integral type /// \param[in] i an index /// \return Const reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> const_reference operator()(const std::initializer_list& i) const { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Element accessor @@ -736,41 +845,59 @@ class Tensor { /// \tparam Integer An integral type /// \param[in] i an index /// \return Reference to the element at position \c i . - /// \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template >* = nullptr> reference operator()(const std::initializer_list& i) { - TA_ASSERT(this->range_.includes(i)); - return this->data()[this->range_.ordinal(i)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal(i); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Const element accessor /// \tparam Index an integral list ( see TiledArray::detail::is_integral_list - /// ) \param[in] i an index \return Const reference to the element at position - /// \c i . \note This asserts (using TA_ASSERT) that this is not empty and ord - /// is included in the range + /// ) + /// \param[in] i an index \return Const reference to the element at position + /// \c i . + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template < typename... Index, - std::enable_if_t::value>* = nullptr> + std::enable_if_t<(sizeof...(Index) > 1ul) && + detail::is_integral_list::value>* = nullptr> const_reference operator()(const Index&... i) const { - TA_ASSERT(this->range_.includes(i...)); - return this->data()[this->range_.ordinal(i...)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + const auto iord = this->range_.ordinal( + std::array, sizeof...(Index)>{{i...}}); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Element accessor /// \tparam Index an integral list ( see TiledArray::detail::is_integral_list - /// ) \param[in] i an index \return Reference to the element at position \c i - /// . \note This asserts (using TA_ASSERT) that this is not empty and ord is - /// included in the range + /// ) + /// \param[in] i an index \return Reference to the element at position \c i + /// . + /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is + /// included in the range, and `batch_size()==1` template < typename... Index, - std::enable_if_t::value>* = nullptr> + std::enable_if_t<(sizeof...(Index) > 1ul) && + detail::is_integral_list::value>* = nullptr> reference operator()(const Index&... i) { - TA_ASSERT(this->range_.includes(i...)); - return this->data()[this->range_.ordinal(i...)]; + TA_ASSERT(!this->empty()); + TA_ASSERT(this->batch_size() == 1); + using Int = std::common_type_t; + const auto iord = this->range_.ordinal( + std::array{{static_cast(i)...}}); + TA_ASSERT(this->range_.includes_ordinal(iord)); + return this->data()[iord]; } /// Iterator factory diff --git a/src/TiledArray/tensor_impl.h b/src/TiledArray/tensor_impl.h index 1872cb8584..6811fc6cb2 100644 --- a/src/TiledArray/tensor_impl.h +++ b/src/TiledArray/tensor_impl.h @@ -133,41 +133,135 @@ class TensorImpl : private NO_DEFAULTS { /// Query a tile owner - /// \tparam Index The index type + /// \tparam Index The sized integral range type /// \param i The tile index to query /// \return The process ID of the node that owns tile \c i /// \throw TiledArray::Exception When \c i is outside the tiled range tile /// range /// \throw TiledArray::Exception When the process map has not been set - template - ProcessID owner(const Index& i) const { - TA_ASSERT(trange_.tiles_range().includes(i)); - return pmap_->owner(trange_.tiles_range().ordinal(i)); + template >> + ProcessID owner(const Index& index) const { + const auto ord = trange_.tiles_range().ordinal(index); + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return pmap_->owner(ord); + } + + /// Query a tile owner + + /// \tparam Integer An integer type + /// \param i The tile index to query + /// \return The process ID of the node that owns tile \c i + /// \throw TiledArray::Exception When \c i is outside the tiled range tile + /// range + /// \throw TiledArray::Exception When the process map has not been set + template >> + ProcessID owner(const std::initializer_list& index) const { + const auto ord = trange_.tiles_range().ordinal(index); + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return pmap_->owner(ord); + } + + /// Query a tile owner + + /// \tparam Ordinal An integer type + /// \param i The tile index to query + /// \return The process ID of the node that owns tile \c i + /// \throw TiledArray::Exception When \c i is outside the tiled range tile + /// range + /// \throw TiledArray::Exception When the process map has not been set + template + std::enable_if_t, ProcessID> owner( + const Ordinal& ord) const { + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return pmap_->owner(ord); + } + + /// Query for a locally owned tile + + /// \tparam Index The sized integral range type + /// \param i The tile index to query + /// \return \c true if the tile is owned by this node, otherwise \c false + /// \throw TiledArray::Exception When the process map has not been set + template >> + bool is_local(const Index& index) const { + const auto ord = trange_.tiles_range().ordinal(index); + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return pmap_->is_local(ord); + } + + /// Query for a locally owned tile + + /// \tparam Integer An integer type + /// \param i The tile index to query + /// \return \c true if the tile is owned by this node, otherwise \c false + /// \throw TiledArray::Exception When the process map has not been set + template >* = nullptr> + bool is_local(const std::initializer_list& index) const { + const auto ord = trange_.tiles_range().ordinal(index); + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return pmap_->is_local(ord); } /// Query for a locally owned tile - /// \tparam Index The index type + /// \tparam Ordinal An integer type /// \param i The tile index to query /// \return \c true if the tile is owned by this node, otherwise \c false /// \throw TiledArray::Exception When the process map has not been set - template - bool is_local(const Index& i) const { - TA_ASSERT(trange_.tiles_range().includes(i)); - return pmap_->is_local(trange_.tiles_range().ordinal(i)); + template + std::enable_if_t, bool> is_local( + const Ordinal& ord) const { + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return pmap_->is_local(ord); + } + + /// Query for a zero tile + + /// \tparam Index The sized integral range type + /// \param i The tile index to query + /// \return \c true if the tile is zero, otherwise \c false + /// \throw TiledArray::Exception When \c i is outside the tiled range tile + /// range + template && + !std::is_integral_v>> + bool is_zero(const Index& index) const { + const auto ord = trange_.tiles_range().ordinal(index); + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return shape_->is_zero(ord); + } + + /// Query for a zero tile + + /// \tparam Integer An integer type + /// \param i The tile index to query + /// \return \c true if the tile is zero, otherwise \c false + /// \throw TiledArray::Exception When \c i is outside the tiled range tile + /// range + template >> + bool is_zero(const std::initializer_list& index) const { + const auto ord = trange_.tiles_range().ordinal(index); + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return shape_->is_zero(ord); } /// Query for a zero tile - /// \tparam Index The index type + /// \tparam Ordinal An integer type /// \param i The tile index to query /// \return \c true if the tile is zero, otherwise \c false /// \throw TiledArray::Exception When \c i is outside the tiled range tile /// range - template - bool is_zero(const Index& i) const { - TA_ASSERT(trange_.tiles_range().includes(i)); - return shape_->is_zero(trange_.tiles_range().ordinal(i)); + template + std::enable_if_t, bool> is_zero( + const Ordinal& ord) const { + TA_ASSERT(trange_.tiles_range().includes_ordinal(ord)); + return shape_->is_zero(ord); } /// Query the density of the tensor diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h index 86b949e45a..57366dbe60 100644 --- a/src/TiledArray/tile.h +++ b/src/TiledArray/tile.h @@ -213,7 +213,7 @@ class Tile { std::enable_if_t::value>* = nullptr> const_reference operator[](const Ordinal ord) const { TA_ASSERT(pimpl_); - TA_ASSERT(tensor().range().includes(ord)); + TA_ASSERT(tensor().range().includes_ordinal(ord)); return tensor().data()[ord]; } @@ -228,7 +228,7 @@ class Tile { std::enable_if_t::value>* = nullptr> reference operator[](const Ordinal ord) { TA_ASSERT(pimpl_); - TA_ASSERT(tensor().range().includes(ord)); + TA_ASSERT(tensor().range().includes_ordinal(ord)); return tensor().data()[ord]; } @@ -360,7 +360,8 @@ class Tile { /// is included in the range template < typename... Index, - std::enable_if_t::value>* = nullptr> + std::enable_if_t<(sizeof...(Index) > 1ul) && + detail::is_integral_list::value>* = nullptr> const_reference operator()(const Index&... i) const { TA_ASSERT(pimpl_); TA_ASSERT(tensor().range().includes(i...)); @@ -375,7 +376,8 @@ class Tile { /// included in the range template < typename... Index, - std::enable_if_t::value>* = nullptr> + std::enable_if_t<(sizeof...(Index) > 1ul) && + detail::is_integral_list::value>* = nullptr> reference operator()(const Index&... i) { TA_ASSERT(pimpl_); TA_ASSERT(tensor().range().includes(i...)); diff --git a/src/TiledArray/tiled_range.h b/src/TiledArray/tiled_range.h index 7014ed382d..8c0714aa7d 100644 --- a/src/TiledArray/tiled_range.h +++ b/src/TiledArray/tiled_range.h @@ -180,12 +180,12 @@ class TiledRange { /// Construct a range for the tile indexed by the given ordinal index. - /// \param i The ordinal index of the tile range to be constructed + /// \param ord The ordinal index of the tile range to be constructed /// \throw std::runtime_error Throws if i is not included in the range /// \return The constructed range object - range_type make_tile_range(const index1_type& i) const { - TA_ASSERT(tiles_range().includes(i)); - return make_tile_range(tiles_range().idx(i)); + range_type make_tile_range(const ordinal_type& ord) const { + TA_ASSERT(tiles_range().includes_ordinal(ord)); + return make_tile_range(tiles_range().idx(ord)); } /// Construct a range for the tile indexed by the given index. diff --git a/src/TiledArray/util/random.cpp b/src/TiledArray/util/random.cpp new file mode 100644 index 0000000000..2762b10b76 --- /dev/null +++ b/src/TiledArray/util/random.cpp @@ -0,0 +1,60 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2013 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include + +#include +#include +#include + +namespace TiledArray { + +namespace detail { + +auto& random_engine_pool_accessor() { + using engine_t = boost::random::mt19937; + static auto tspool = make_tspool(engine_t(1)); + return tspool; +} +} // namespace detail + +boost::random::mt19937& random_engine() { + return detail::random_engine_pool_accessor().local(); +} + +int rand() { + using dist_t = boost::random::uniform_int_distribution<>; + static dist_t dist(0, RAND_MAX); + + return dist(random_engine()); +} + +double drand() { + using dist_t = boost::random::uniform_real_distribution; + static dist_t dist(0, 1); + + return dist(random_engine()); +} + +void srand(unsigned int seed) { + using engine_t = boost::random::mt19937; + detail::random_engine_pool_accessor() = detail::make_tspool(engine_t(seed)); +} + +} // namespace TiledArray diff --git a/src/TiledArray/util/random.h b/src/TiledArray/util/random.h index bb8bc35b93..b096654bc6 100644 --- a/src/TiledArray/util/random.h +++ b/src/TiledArray/util/random.h @@ -24,6 +24,38 @@ #include // for std::rand #include // for true_type, false_type, and enable_if +#include + +namespace TiledArray { + +/// \return reference to the thread-specific random engine used to implement +/// TiledArray::rand() and TiledArray::drand() +/// \note the reference to the thread local is not persistent; the value +/// will change after every call to TiledArray::srand() +boost::random::mt19937& random_engine(); + +/// reentrant high-quality version of std::rand() + +/// \return a random integer in [0,`RAND_MAX`] +/// \sa std::rand() +int rand(); + +/// reentrant high-quality version of drand48() + +/// \return a random integer in [0,1] +/// \sa drand48() +double drand(); + +/// Seeds the pseudo-random number generator used by TiledArray::rand() and +/// TiledArray::drand() with the value \p seed . + +/// Calling this changes the values returned by TiledArray::random_engine() +/// \param seed the value used to seed the random engine +/// \warning this is not reentrant +void srand(unsigned int seed); + +} // namespace TiledArray + namespace TiledArray::detail { //------------------------------------------------------------------------------ @@ -99,14 +131,14 @@ struct MakeRandom { static_assert(std::is_floating_point_v || std::is_integral_v); if constexpr (std::is_floating_point_v) - return (2 * static_cast(std::rand()) / RAND_MAX) - 1; + return (2 * static_cast(TiledArray::rand()) / RAND_MAX) - 1; else if constexpr (std::is_integral_v) { static_assert(RAND_MAX == 2147483647); static_assert(RAND_MAX % 2 == 1); constexpr std::int64_t RAND_MAX_DIVBY_9 = (static_cast(RAND_MAX) + 8) / 9; const ValueType v = static_cast( - static_cast(std::rand()) / RAND_MAX_DIVBY_9); + static_cast(TiledArray::rand()) / RAND_MAX_DIVBY_9); if constexpr (std::is_signed_v) { return v - 4; } else { diff --git a/src/TiledArray/util/thread_specific.h b/src/TiledArray/util/thread_specific.h new file mode 100644 index 0000000000..ff722cb222 --- /dev/null +++ b/src/TiledArray/util/thread_specific.h @@ -0,0 +1,188 @@ + +#ifndef TILEDARRAY_UTIL_THREAD_SPECIFIC_H__INCLUDED +#define TILEDARRAY_UTIL_THREAD_SPECIFIC_H__INCLUDED + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace TiledArray { +namespace detail { + +/// Implements a highly-limited subset of tbb::enumerable_thread_specific +/// using Pthread thread-specific storage +template +class thread_specific { + public: + template >::value>> + explicit thread_specific(F &&init) + : init_(std::forward(init)), + mtx_(std::make_unique()), + key_ptr_(new pthread_key_t, &pthread_key_deleter) { + auto retcode = pthread_key_create(&key(), NULL); + // ensure that pthread_key_create sets all thread-specific values to null + TA_ASSERT(pthread_getspecific(key()) == NULL); + if (retcode == EAGAIN) + TA_EXCEPTION( + "thread_specific could not be constructed due to reaching the " + "maximum number of thread-specific keys"); + else if (retcode == ENOMEM) { + TA_EXCEPTION( + "thread_specific could not be constructed due to a memory allocation " + "error"); + } else if (retcode != 0) + TA_EXCEPTION( + "thread_specific could not be constructed due to an unknown error"); + } + + thread_specific(const thread_specific &) = delete; + thread_specific(thread_specific &&other) = default; + thread_specific &operator=(const thread_specific &) = delete; + thread_specific &operator=(thread_specific &&) = default; + ~thread_specific() = default; + + const pthread_key_t &key() const { return *key_ptr_; } + + /// @return reference to the thread-local @c Item instance. + Item &local() { return *(ts_item_ptr().get()); } + + private: + template + friend class TSPool; + + pthread_key_t &key() { return *key_ptr_; } + static void pthread_key_deleter(pthread_key_t *key_ptr) { + if (key_ptr) { + pthread_key_delete(*key_ptr); + delete key_ptr; + } + }; + + Item init_; + std::map> items_; + std::unique_ptr mtx_; + std::unique_ptr key_ptr_; + + const std::unique_ptr &ts_item_ptr() { + const std::unique_ptr *ptr = + reinterpret_cast *>( + pthread_getspecific(key())); + if (ptr == nullptr) { + std::lock_guard lock{*mtx_}; + const auto thread_id = std::this_thread::get_id(); + auto it = items_.find(thread_id); + // case 1: there is already an item for this thread id? (e.g. thread id + // was reused) initialize ts_item_ptr_ so that the next call from this + // thread is fast + if (it != items_.end()) { + // none of the non-TBB MADNESS task backends delete their + // threads but can't guarantee in general + ptr = &(it->second); + } + // case 2: need to make a new object + else { + auto result = items_.emplace(thread_id, std::make_unique(init_)); + ptr = &(result.first->second); + } + pthread_setspecific(key(), (const void *)ptr); + } + return *ptr; + } + + public: + using iterator = typename decltype(items_)::iterator; + using const_iterator = typename decltype(items_)::const_iterator; +}; + +/// A pool of thread-specific objects +template +class TSPool { + public: + /// Don't allow copies or default initialization. + TSPool() = delete; + TSPool(TSPool const &) = delete; + TSPool &operator=(TSPool const &) = delete; + + TSPool &operator=(TSPool &&) = default; + TSPool(TSPool &&a) = default; + + /// Initializes the pool with a single @c Item + explicit TSPool(Item e) : item_(std::move(e)), items_(item_) {} + /// @return reference to the thread-local @c Item instance. + Item &local() { return items_.local(); } + + /// Iterator over thread-local items + template + class Iterator + : public boost::iterator_facade, Item_, + boost::bidirectional_traversal_tag> { + public: + Iterator(thread_specific &items) : item_(items.items_.begin()) { + static_assert(!std::is_const_v); + } + Iterator(const thread_specific &items) : item_(items.items_.begin()) { + static_assert(std::is_const_v); + } + Iterator(thread_specific &items, bool /* end_tag */) + : item_(items.items_.end()) { + static_assert(!std::is_const_v); + } + Iterator(const thread_specific &items, bool /* end_tag */) + : item_(items.items_.end()) { + static_assert(std::is_const_v); + } + + private: + friend class boost::iterator_core_access; + + using nc_items_type = decltype(thread_specific::items_); + using items_type = + std::conditional_t, + std::add_const_t, nc_items_type>; + std::conditional_t, + typename items_type::const_iterator, + typename items_type::iterator> + item_; // points to the current item + + void increment() { ++item_; } + void decrement() { --item_; } + bool equal(Iterator const &other) const { + return this->item_ == other.item_; + } + Item_ &dereference() const { return *(item_->second); } + }; + + using iterator = Iterator; + using const_iterator = Iterator; + auto begin() { return iterator{items_}; } + auto end() { return iterator{items_, true}; } + auto begin() const { return const_iterator{items_}; } + auto end() const { return const_iterator{items_, true}; } + + private: + Item item_; //!< used only to initialize the thread-specific items + thread_specific items_; +}; + +template +TSPool make_tspool(Item e) { + return TSPool(std::move(e)); +} + +template +std::shared_ptr> make_shared_tspool(Item e) { + return std::make_shared>(std::move(e)); +} + +} // namespace detail +} // namespace TiledArray + +#endif // TILEDARRAY_UTIL_THREAD_SPECIFIC_H__INCLUDED diff --git a/tests/cp.cpp b/tests/cp.cpp index e339ab5a9b..763231ab9a 100644 --- a/tests/cp.cpp +++ b/tests/cp.cpp @@ -35,7 +35,8 @@ #include "TiledArray/cp/cp_als.h" #include "TiledArray/cp/cp_reconstruct.h" -const std::string __dirname = dirname(strdup(__FILE__)); +constexpr std::int64_t rank_tile_size = 10; +constexpr bool verbose = false; using namespace TiledArray; @@ -43,24 +44,10 @@ struct CPFixture : public TiledRangeFixture { CPFixture() : shape_tr(make_random_sparseshape(tr)), a_sparse(*GlobalFixture::world, tr, shape_tr) { - random_fill(a_sparse); + a_sparse.fill_random(); a_sparse.truncate(); } - template - static void random_fill(DistArray& array) { - typename DistArray::pmap_interface::const_iterator it = - array.pmap()->begin(); - typename DistArray::pmap_interface::const_iterator end = - array.pmap()->end(); - for (; it != end; ++it) { - if (!array.is_zero(*it)) - array.set(*it, array.world().taskq.add( - &CPFixture::template make_rand_tile, - array.trange().make_tile_range(*it))); - } - } - /// make a shape with approximate half dense and half sparse static SparseShape make_random_sparseshape(const TiledRange& tr) { std::size_t n = tr.tiles_range().volume(); @@ -78,39 +65,6 @@ struct CPFixture : public TiledRangeFixture { return SparseShape(norms, tr); } - // Fill a tile with random data - template - static Tile make_rand_tile(const typename Tile::range_type& r) { - Tile tile(r); - for (std::size_t i = 0ul; i < tile.size(); ++i) set_random(tile[i]); - return tile; - } - - template - static double init_rand_tile(Tile& tile, const typename Tile::range_type& r) { - tile = Tile(r); - for (std::size_t i = 0ul; i < tile.size(); ++i) set_random(tile[i]); - double result; - norm(tile, result); - return result; - } - - template - static double init_unit_tile(Tile& tile, const typename Tile::range_type& r) { - tile = Tile(r); - std::mt19937 generator(3); - std::uniform_real_distribution distribution(-1.0, 1.0); - for (std::size_t i = 0ul; i < tile.size(); ++i) tile[i] = 1; - double result; - norm(tile, result); - return result; - } - - template - static void set_random(T& t) { - t = GlobalFixture::world->rand() % 101; - } - static TensorF tensori_to_tensorf(const TensorI& tensori) { std::size_t n = tensori.size(); TensorF tensorf(tensori.range()); @@ -138,39 +92,38 @@ struct CPFixture : public TiledRangeFixture { }; template -TArrayT compute_cp(const TArrayT& T, size_t cp_rank) { +TArrayT compute_cp(const TArrayT& T, size_t cp_rank, bool verbose = false) { cp::CP_ALS CPD( T); - CPD.compute_rank(cp_rank, 80, false, 1e-3, true); + CPD.compute_rank(cp_rank, rank_tile_size, false, 1e-3, verbose); return CPD.reconstruct(); } -// TiledArray::TiledRange1 compute_trange1(std::size_t range_size, -// std::size_t target_block_size) { -// if (range_size > 0) { -// std::size_t nblocks = -// (range_size + target_block_size - 1) / target_block_size; -// auto dv = std::div((int) (range_size + nblocks - 1), (int) nblocks); -// auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1; -// std::vector hashmarks; -// hashmarks.reserve(nblocks + 1); -// auto block_counter = 0; -// for(auto i = 0; i < num_avg_plus_one; ++i, block_counter += -// avg_block_size + 1){ -// hashmarks.push_back(block_counter); -// } -// for (auto i = num_avg_plus_one; i < nblocks; ++i, block_counter+= -// avg_block_size) { -// hashmarks.push_back(block_counter); -// } -// hashmarks.push_back(range_size); -// return TA::TiledRange1(hashmarks.begin(), hashmarks.end()); -// } else -// return TA::TiledRange1{}; -// } + +enum class Fill { Random, Constant }; + +template +TArrayT make(const TiledRange& tr, OptionalArgs&&... opt_args) { + TArrayT result(*GlobalFixture::world, tr, opt_args...); + switch (fill) { + case Fill::Constant: + result.fill(1); + break; + case Fill::Random: + // make sure randomness is deterministic + result.template fill_random(); + break; + } + return result; +} BOOST_FIXTURE_TEST_SUITE(cp_suite, CPFixture) +const auto target_rel_error = std::sqrt(std::numeric_limits::epsilon()); + BOOST_AUTO_TEST_CASE(btas_cp_als) { + TiledArray::srand(1); + // Make a tiled range with block size of 1 TiledArray::TiledRange tr3, tr4, tr5; { @@ -183,96 +136,93 @@ BOOST_AUTO_TEST_CASE(btas_cp_als) { tr5 = TiledRange({tr1_mode0, tr1_mode0, tr1_mode1, tr1_mode2, tr1_mode3}); } - // Dense test - // order-3 test + // Dense + + // order-3 rank-1 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr3, - &this->init_unit_tile); + auto b_dense = make(tr3); size_t cp_rank = 1; factors = cp::btas_cp_als(*GlobalFixture::world, b_dense, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, 1e-3, + verbose); auto b_cp = cp::reconstruct(factors); TArrayD diff; diff("a,b,c") = b_dense("a,b,c") - b_cp("a,b,c"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } - // order-4 test + // order-4 rank-1 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr4, - &this->init_unit_tile); + auto b_dense = make(tr4); size_t cp_rank = 1; factors = cp::btas_cp_als(*GlobalFixture::world, b_dense, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, 1e-3, + verbose); auto b_cp = cp::reconstruct(factors); TArrayD diff; diff("a,b,c,d") = b_dense("a,b,c,d") - b_cp("a,b,c,d"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } - // order-5 test + // order-5 rank-1 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr5, - &this->init_unit_tile); + auto b_dense = make(tr5); size_t cp_rank = 1; factors = cp::btas_cp_als(*GlobalFixture::world, b_dense, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, 1e-3, + verbose); auto b_cp = cp::reconstruct(factors); TArrayD diff; diff("a,b,c,d,e") = b_dense("a,b,c,d,e") - b_cp("a,b,c,d,e"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } - // sparse test + // sparse + + // order-3 rank-N test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr3, - &this->init_rand_tile); + auto b_sparse = make(tr3); size_t cp_rank = 77; - factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + factors = + cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank, + compute_trange1(cp_rank, rank_tile_size), 0, 1e-3); auto b_cp = cp::reconstruct(factors); TSpArrayD diff; diff("a,b,c") = b_sparse("a,b,c") - b_cp("a,b,c"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); BOOST_CHECK(accurate); } - // order-4 test + // order-4 rank-1 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr4, - &this->init_unit_tile); + auto b_sparse = make(tr4); size_t cp_rank = 1; factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, 1e-3, + verbose); auto b_cp = cp::reconstruct(factors); TSpArrayD diff; diff("a,b,c,d") = b_sparse("a,b,c,d") - b_cp("a,b,c,d"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); BOOST_CHECK(accurate); } // order-5 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr5, - &this->init_unit_tile); + auto b_sparse = make(tr5); size_t cp_rank = 1; factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, 1e-3, + verbose); auto b_cp = cp::reconstruct(factors); TSpArrayD diff; @@ -283,6 +233,8 @@ BOOST_AUTO_TEST_CASE(btas_cp_als) { } BOOST_AUTO_TEST_CASE(btas_cp_rals) { + TiledArray::srand(1); + // Make a tiled range with block size of 1 TiledArray::TiledRange tr3, tr4, tr5; { @@ -299,93 +251,87 @@ BOOST_AUTO_TEST_CASE(btas_cp_rals) { // order-3 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr3, - &this->init_unit_tile); + auto b_dense = make(tr3); size_t cp_rank = 1; factors = cp::btas_cp_rals(*GlobalFixture::world, b_dense, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, + 1e-3, verbose); auto b_cp = cp::reconstruct(factors); TArrayD diff; diff("a,b,c") = b_dense("a,b,c") - b_cp("a,b,c"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } // order-4 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr4, - &this->init_unit_tile); + auto b_dense = make(tr4); size_t cp_rank = 1; factors = cp::btas_cp_rals(*GlobalFixture::world, b_dense, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, + 1e-3, verbose); auto b_cp = cp::reconstruct(factors); TArrayD diff; diff("a,b,c,d") = b_dense("a,b,c,d") - b_cp("a,b,c,d"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } // order-5 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr5, - &this->init_unit_tile); + auto b_dense = make(tr5); size_t cp_rank = 1; factors = cp::btas_cp_rals(*GlobalFixture::world, b_dense, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, + 1e-3, verbose); auto b_cp = cp::reconstruct(factors); TArrayD diff; diff("a,b,c,d,e") = b_dense("a,b,c,d,e") - b_cp("a,b,c,d,e"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } // sparse test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr3, - &this->init_rand_tile); + auto b_sparse = make(tr3); size_t cp_rank = 77; factors = cp::btas_cp_rals(*GlobalFixture::world, b_sparse, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, + 1e-3, verbose); auto b_cp = cp::reconstruct(factors); TSpArrayD diff; diff("a,b,c") = b_sparse("a,b,c") - b_cp("a,b,c"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); BOOST_CHECK(accurate); } // order-4 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr4, - &this->init_unit_tile); + auto b_sparse = make(tr4); size_t cp_rank = 1; factors = cp::btas_cp_rals(*GlobalFixture::world, b_sparse, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, + 1e-3, verbose); auto b_cp = cp::reconstruct(factors); TSpArrayD diff; diff("a,b,c,d") = b_sparse("a,b,c,d") - b_cp("a,b,c,d"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); BOOST_CHECK(accurate); } // order-5 test { std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr5, - &this->init_unit_tile); + auto b_sparse = make(tr5); size_t cp_rank = 1; factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank, - compute_trange1(cp_rank, 80), 0, 1e-3); + compute_trange1(cp_rank, rank_tile_size), 0, 1e-3, + verbose); auto b_cp = cp::reconstruct(factors); TSpArrayD diff; @@ -396,6 +342,8 @@ BOOST_AUTO_TEST_CASE(btas_cp_rals) { } BOOST_AUTO_TEST_CASE(ta_cp_als) { + TiledArray::srand(1); + // Make a tiled range with block size of 1 TiledArray::TiledRange tr3, tr4, tr5; { @@ -411,84 +359,68 @@ BOOST_AUTO_TEST_CASE(ta_cp_als) { // Dense test // order-3 test { - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr3, - &this->init_unit_tile); + auto b_dense = make(tr3); size_t cp_rank = 1; - auto b_cp = compute_cp(b_dense, cp_rank); + auto b_cp = compute_cp(b_dense, cp_rank, verbose); TArrayD diff; diff("a,b,c") = b_dense("a,b,c") - b_cp("a,b,c"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } // order-4 test { - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr4, - &this->init_unit_tile); + auto b_dense = make(tr4); size_t cp_rank = 1; - auto b_cp = compute_cp(b_dense, cp_rank); + auto b_cp = compute_cp(b_dense, cp_rank, verbose); TArrayD diff; diff("a,b,c,d") = b_dense("a,b,c,d") - b_cp("a,b,c,d"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } // order-5 test { - std::vector factors; - // Make an sparse array with tiled range from above. - auto b_dense = make_array(*GlobalFixture::world, tr5, - &this->init_unit_tile); + auto b_dense = make(tr5); size_t cp_rank = 1; - auto b_cp = compute_cp(b_dense, cp_rank); + auto b_cp = compute_cp(b_dense, cp_rank, verbose); TArrayD diff; diff("a,b,c,d,e") = b_dense("a,b,c,d,e") - b_cp("a,b,c,d,e"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error); BOOST_CHECK(accurate); } // sparse test // order-3 test { - std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr3, - &this->init_rand_tile); - b_sparse.truncate(); + auto b_sparse = make(tr3); + // std::cout << "b_sparse = " << + // array_to_eigen_tensor>(b_sparse) << std::endl; size_t cp_rank = 77; - auto b_cp = compute_cp(b_sparse, cp_rank); + auto b_cp = compute_cp(b_sparse, cp_rank, verbose); TSpArrayD diff; diff("a,b,c") = b_sparse("a,b,c") - b_cp("a,b,c"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); BOOST_CHECK(accurate); } // order-4 test { - std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr4, - &this->init_unit_tile); - b_sparse.truncate(); + auto b_sparse = make(tr4); size_t cp_rank = 1; - auto b_cp = compute_cp(b_sparse, cp_rank); + auto b_cp = compute_cp(b_sparse, cp_rank, verbose); TSpArrayD diff; diff("a,b,c,d") = b_sparse("a,b,c,d") - b_cp("a,b,c,d"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); BOOST_CHECK(accurate); } // order-5 test { - std::vector factors; - // Make an sparse array with tiled range from above. - auto b_sparse = make_array(*GlobalFixture::world, tr5, - &this->init_unit_tile); + auto b_sparse = make(tr5); double cp_rank = 1; - auto b_cp = compute_cp(b_sparse, cp_rank); + auto b_cp = compute_cp(b_sparse, cp_rank, verbose); TSpArrayD diff; diff("a,b,c,d,e") = b_sparse("a,b,c,d,e") - b_cp("a,b,c,d,e"); - bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); BOOST_CHECK(accurate); } } diff --git a/tests/expressions_fixture.h b/tests/expressions_fixture.h index a558d32444..3e493b1200 100644 --- a/tests/expressions_fixture.h +++ b/tests/expressions_fixture.h @@ -139,7 +139,8 @@ struct ExpressionsFixture : public TiledRangeFixture { // Fill a tile with random data static Tile make_rand_tile(const typename Tile::range_type& r) { Tile tile(r); - for (std::size_t i = 0ul; i < tile.size(); ++i) set_random(tile[i]); + for (std::size_t i = 0ul; i < tile.size(); ++i) + set_random(tile.at_ordinal(i)); return tile; } @@ -198,7 +199,7 @@ struct ExpressionsFixture : public TiledRangeFixture { // make sure all mpi gets the same shape if (GlobalFixture::world->rank() == 0) { for (std::size_t i = 0; i < n; i++) { - norms[i] = GlobalFixture::world->drand() > 0.5 ? 0.0 : 1.0; + norms.at_ordinal(i) = GlobalFixture::world->drand() > 0.5 ? 0.0 : 1.0; } } diff --git a/tests/expressions_mixed.cpp b/tests/expressions_mixed.cpp index 04fef98683..6b92cec695 100644 --- a/tests/expressions_mixed.cpp +++ b/tests/expressions_mixed.cpp @@ -65,44 +65,7 @@ struct MixedExpressionsFixture : public TiledRangeFixture { template static void random_fill(DistArray& array) { - typename DistArray::pmap_interface::const_iterator it = - array.pmap()->begin(); - typename DistArray::pmap_interface::const_iterator end = - array.pmap()->end(); - for (; it != end; ++it) - array.set(*it, array.world().taskq.add( - &MixedExpressionsFixture::template make_rand_tile< - DistArray>, - array.trange().make_tile_range(*it))); - } - - template - static void set_random(T& t) { - // with 50% generate nonzero integer value in [0,101) - auto rand_int = GlobalFixture::world->rand(); - t = (rand_int < 0x8fffff) ? rand_int % 101 : 0; - } - - template - static void set_random(std::complex& t) { - // with 50% generate nonzero value - auto rand_int1 = GlobalFixture::world->rand(); - if (rand_int1 < 0x8ffffful) { - t = std::complex{T(rand_int1 % 101), - T(GlobalFixture::world->rand() % 101)}; - } else - t = std::complex{0, 0}; - } - - // Fill a tile with random data - template - static typename A::value_type make_rand_tile( - const typename A::value_type::range_type& r) { - typename A::value_type tile(r); - for (const auto& i : r) { - set_random(tile[i]); - } - return tile; + array.fill_random(); } template diff --git a/tests/linalg.cpp b/tests/linalg.cpp index 0753e8cf50..5c84d0b5e4 100644 --- a/tests/linalg.cpp +++ b/tests/linalg.cpp @@ -119,7 +119,10 @@ struct LinearAlgebraFixture : ReferenceFixture { template static void compare(const char* context, const A& non_dist, const A& result, double e) { - BOOST_TEST_CONTEXT(context); + // clang-format off + BOOST_TEST_CONTEXT(context) + ; + // clang-format on auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get(); BOOST_CHECK_SMALL(diff_with_non_dist, e); } diff --git a/tests/random.cpp b/tests/random.cpp index c5ffbf1a56..f1bef4aea2 100644 --- a/tests/random.cpp +++ b/tests/random.cpp @@ -11,7 +11,7 @@ using true_types = boost::mpl::list, std::complex>; } // namespace -BOOST_AUTO_TEST_SUITE(can_make_random, TA_UT_LABEL_SERIAL) +BOOST_AUTO_TEST_SUITE(random_suite, TA_UT_LABEL_SERIAL) BOOST_AUTO_TEST_CASE_TEMPLATE(can_make_random_false, ValueType, false_types) { using can_make_random_t = CanMakeRandom; @@ -44,4 +44,17 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(enable_if_true, ValueType, true_types) { BOOST_CHECK(result); } +BOOST_AUTO_TEST_CASE(rand) { + // set random seed + TiledArray::srand(20010515); + auto v0 = TiledArray::detail::MakeRandom::generate_value(); + auto v1 = TiledArray::detail::MakeRandom::generate_value(); + auto v2 = TiledArray::detail::MakeRandom::generate_value(); + // now set random seed + TiledArray::srand(20010515); + BOOST_CHECK_EQUAL(v0, TiledArray::detail::MakeRandom::generate_value()); + BOOST_CHECK_EQUAL(v1, TiledArray::detail::MakeRandom::generate_value()); + BOOST_CHECK_EQUAL(v2, TiledArray::detail::MakeRandom::generate_value()); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/range.cpp b/tests/range.cpp index 0e9987220b..1ad294363f 100644 --- a/tests/range.cpp +++ b/tests/range.cpp @@ -321,7 +321,7 @@ BOOST_AUTO_TEST_CASE(constructors) { stride_ref.begin(), stride_ref.end()); BOOST_CHECK_EQUAL(r2.volume(), 48); } -#else // TA_SIGNED_1INDEX_TYPE +#else // TA_SIGNED_1INDEX_TYPE BOOST_REQUIRE_THROW(Range r2({{-1, 1}, {-2, 2}, {0, 6}}), TiledArray::Exception); #endif // TA_SIGNED_1INDEX_TYPE @@ -603,12 +603,33 @@ BOOST_AUTO_TEST_CASE(include) { BOOST_CHECK(r1.includes(t19)); // check corners BOOST_CHECK(r1.includes(t20)); + // + // try different flavors of includes + // + // 1-d Range is a corner case + Range r_1d({3}); + // integers... + BOOST_CHECK(r1.includes(t20[0], t20[1], t20[2])); + // initializer_list + BOOST_CHECK(r1.includes({t20[0], t20[1], t20[2]})); + BOOST_CHECK(r_1d.includes({0})); + // array + BOOST_CHECK( + r1.includes(std::array{{t20[0], t20[1], t20[2]}})); + BOOST_CHECK(r_1d.includes(std::array{{0}})); + // ordinal Range::size_type o = 0; for (; o < r.volume(); ++o) { BOOST_CHECK(r.includes(o)); + BOOST_CHECK(r.includes_ordinal(o)); // equivalent: see below } - BOOST_CHECK(!r.includes(o)); + BOOST_CHECK_THROW( + r_1d.includes(0), + TiledArray::Exception); // for 1-d Range can't distinguish + // includes(integers...) and includes(ordinal) + BOOST_CHECK( + r_1d.includes_ordinal(0)); // so must explicitly use includes_ordinal // ensure that includes always fails if any extent is zero { diff --git a/tests/solvers.cpp b/tests/solvers.cpp index af6af21847..c8ea24dcf8 100644 --- a/tests/solvers.cpp +++ b/tests/solvers.cpp @@ -104,9 +104,9 @@ struct make_b> { typename T::value_type tile(range); // Fill tile with data - tile(0) = 1; - tile(1) = 4; - tile(2) = 0; + tile({0}) = 1; + tile({1}) = 4; + tile({2}) = 0; return tile; } @@ -130,9 +130,9 @@ struct make_pc> { typename T::value_type tile(range); // Fill tile with data - tile(0) = 1; - tile(1) = 1; - tile(2) = 1; + tile({0}) = 1; + tile({1}) = 1; + tile({2}) = 1; return tile; } @@ -157,7 +157,7 @@ struct validate> { const auto& tile_0 = x.find({0}).get(); double error_2norm = 0.0; for (int i = 0; i != 3; ++i) { - auto delta = tile_0(i) - tile_0_ref[i]; + auto delta = tile_0({i}) - tile_0_ref[i]; error_2norm += delta * delta; } error_2norm = std::sqrt(error_2norm); diff --git a/tests/sparse_shape_fixture.h b/tests/sparse_shape_fixture.h index 8358ab8aaa..772cd657ab 100644 --- a/tests/sparse_shape_fixture.h +++ b/tests/sparse_shape_fixture.h @@ -43,10 +43,13 @@ struct SparseShapeFixture : public TiledRangeFixture { tolerance(0.0001) { + prior_default_threshold = SparseShape::threshold(); SparseShape::threshold(default_threshold); } - ~SparseShapeFixture() {} + ~SparseShapeFixture() { + SparseShape::threshold(prior_default_threshold); + } static Tensor make_norm_tensor(const TiledRange& trange, const float fill_percent, @@ -104,6 +107,8 @@ struct SparseShapeFixture : public TiledRangeFixture { Permutation perm; TiledArray::detail::PermIndex perm_index; const float tolerance; + inline static float prior_default_threshold = + std::numeric_limits::max(); static constexpr float default_threshold = 0.001; }; // SparseShapeFixture diff --git a/tests/tensor_tensor_view.cpp b/tests/tensor_tensor_view.cpp index 1a6262f45e..f69e95c0f6 100644 --- a/tests/tensor_tensor_view.cpp +++ b/tests/tensor_tensor_view.cpp @@ -24,8 +24,8 @@ */ #include -#include #include "TiledArray/tensor/tensor_interface.h" +#include "TiledArray/util/random.h" #include "tiledarray.h" #include "unit_test_config.h" @@ -39,11 +39,7 @@ struct TensorViewFixture { static Tensor random_tensor(const Range& range) { Tensor result(range); - std::default_random_engine generator( - std::chrono::system_clock::now().time_since_epoch().count()); - std::uniform_int_distribution distribution(0, 100); - - for (auto& value : result) value = distribution(generator); + for (auto& value : result) value = TiledArray::rand() % 101; return result; } @@ -53,6 +49,8 @@ struct TensorViewFixture { Tensor t{random_tensor(Range(lower_bound, upper_bound))}; + constexpr static std::size_t ntiles_per_test = 10; + }; // TensorViewFixture const std::array TensorViewFixture::lower_bound{{0, 1, 2}}; @@ -62,6 +60,7 @@ BOOST_FIXTURE_TEST_SUITE(tensor_view_suite, TensorViewFixture, TA_UT_LABEL_SERIAL) BOOST_AUTO_TEST_CASE(non_const_view) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -96,12 +95,16 @@ BOOST_AUTO_TEST_CASE(non_const_view) { BOOST_CHECK_EQUAL(view(*it), t(*it)); BOOST_CHECK_EQUAL(view(i), t(*it)); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } } BOOST_AUTO_TEST_CASE(const_view) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -136,6 +139,9 @@ BOOST_AUTO_TEST_CASE(const_view) { BOOST_CHECK_EQUAL(view(*it), t(*it)); BOOST_CHECK_EQUAL(view(i), t(*it)); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } @@ -167,6 +173,7 @@ BOOST_AUTO_TEST_CASE(transitive_read_write) { } BOOST_AUTO_TEST_CASE(assign_tensor_to_view) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -195,12 +202,16 @@ BOOST_AUTO_TEST_CASE(assign_tensor_to_view) { BOOST_CHECK_EQUAL(t(*it), tensor(*it)); BOOST_CHECK_EQUAL(t(*it), tensor(i)); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } } BOOST_AUTO_TEST_CASE(copy_view_to_tensor) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -224,12 +235,16 @@ BOOST_AUTO_TEST_CASE(copy_view_to_tensor) { BOOST_CHECK_EQUAL(tensor(i), view(*it)); BOOST_CHECK_EQUAL(tensor(i), view(i)); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } } BOOST_AUTO_TEST_CASE(assign_view_to_tensor) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -254,12 +269,16 @@ BOOST_AUTO_TEST_CASE(assign_view_to_tensor) { BOOST_CHECK_EQUAL(tensor(i), view(*it)); BOOST_CHECK_EQUAL(tensor(i), view(i)); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } } BOOST_AUTO_TEST_CASE(add_tensor_view) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -286,12 +305,16 @@ BOOST_AUTO_TEST_CASE(add_tensor_view) { ++it) { BOOST_CHECK_EQUAL(result(*it), view(*it) + right(*it)); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } } BOOST_AUTO_TEST_CASE(add_tensor_to_view) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -318,12 +341,16 @@ BOOST_AUTO_TEST_CASE(add_tensor_to_view) { BOOST_CHECK_EQUAL(view(*it), temp(*it) + tensor(*it)); BOOST_CHECK_EQUAL(view(i), temp(*it) + tensor(*it)); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } } BOOST_AUTO_TEST_CASE(scale_view) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -347,12 +374,16 @@ BOOST_AUTO_TEST_CASE(scale_view) { ++it, ++i) { BOOST_CHECK_EQUAL(result(*it), view(*it) * 3); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } } BOOST_AUTO_TEST_CASE(scale_to_view) { + std::size_t tile_count = 0; for (auto lower_it = t.range().begin(); lower_it != t.range().end(); ++lower_it) { const auto lower = *lower_it; @@ -378,6 +409,9 @@ BOOST_AUTO_TEST_CASE(scale_to_view) { BOOST_CHECK_EQUAL(view(*it), temp(*it) * 3); BOOST_CHECK_EQUAL(view(i), temp(*it) * 3); } + + ++tile_count; + if (tile_count == ntiles_per_test) return; } } } diff --git a/tests/tot_expressions.cpp b/tests/tot_expressions.cpp index c834810065..66939bcf3e 100644 --- a/tests/tot_expressions.cpp +++ b/tests/tot_expressions.cpp @@ -116,30 +116,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov, TestParam, test_params) { using il_type = detail::vector_il; range_type r2{2}, r3{3}; inner_type lhs_0{r2}; - lhs_0[0] = 0; - lhs_0[1] = 1; + lhs_0({0}) = 0; + lhs_0({1}) = 1; inner_type lhs_1{r3}; - lhs_1[0] = 1; - lhs_1[1] = 2; - lhs_1[2] = 3; + lhs_1({0}) = 1; + lhs_1({1}) = 2; + lhs_1({2}) = 3; il_type lhs_il{lhs_0, lhs_1}; inner_type rhs_0{r2}; - rhs_0[0] = 1; - rhs_0[1] = 2; + rhs_0({0}) = 1; + rhs_0({1}) = 2; inner_type rhs_1{r3}; - rhs_1[0] = 2; - rhs_1[1] = 3; - rhs_1[2] = 4; + rhs_1({0}) = 2; + rhs_1({1}) = 3; + rhs_1({2}) = 4; il_type rhs_il{rhs_0, rhs_1}; inner_type c_0{r2}; - c_0[0] = 1; - c_0[1] = 3; + c_0({0}) = 1; + c_0({1}) = 3; inner_type c_1{r3}; - c_1[0] = 3; - c_1[1] = 5; - c_1[2] = 7; + c_1({0}) = 3; + c_1({1}) = 5; + c_1({2}) = 7; il_type corr_il{c_0, c_1}; tensor_type lhs(m_world, lhs_il); tensor_type rhs(m_world, rhs_il); @@ -502,56 +502,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov, TestParam, test_params) { using range_type = Range; using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = 1; - c_00[1] = 3; + c_00({0}) = 1; + c_00({1}) = 3; inner_type c_01(range_type{2}); - c_01[0] = 3; - c_01[1] = 5; + c_01({0}) = 3; + c_01({1}) = 5; inner_type c_02(range_type{2}); - c_02[0] = 5; - c_02[1] = 7; + c_02({0}) = 5; + c_02({1}) = 7; inner_type c_10(range_type{3}); - c_10[0] = 3; - c_10[1] = 5; - c_10[2] = 7; + c_10({0}) = 3; + c_10({1}) = 5; + c_10({2}) = 7; inner_type c_11(range_type{3}); - c_11[0] = 5; - c_11[1] = 7; - c_11[2] = 9; + c_11({0}) = 5; + c_11({1}) = 7; + c_11({2}) = 9; inner_type c_12(range_type{3}); - c_12[0] = 7; - c_12[1] = 9; - c_12[2] = 11; + c_12({0}) = 7; + c_12({1}) = 9; + c_12({2}) = 11; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -569,56 +569,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_result_transpose, TestParam, test_params) { using range_type = Range; using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = 1; - c_00[1] = 3; + c_00({0}) = 1; + c_00({1}) = 3; inner_type c_01(range_type{2}); - c_01[0] = 3; - c_01[1] = 5; + c_01({0}) = 3; + c_01({1}) = 5; inner_type c_02(range_type{2}); - c_02[0] = 5; - c_02[1] = 7; + c_02({0}) = 5; + c_02({1}) = 7; inner_type c_10(range_type{3}); - c_10[0] = 3; - c_10[1] = 5; - c_10[2] = 7; + c_10({0}) = 3; + c_10({1}) = 5; + c_10({2}) = 7; inner_type c_11(range_type{3}); - c_11[0] = 5; - c_11[1] = 7; - c_11[2] = 9; + c_11({0}) = 5; + c_11({1}) = 7; + c_11({2}) = 9; inner_type c_12(range_type{3}); - c_12[0] = 7; - c_12[1] = 9; - c_12[2] = 11; + c_12({0}) = 7; + c_12({1}) = 9; + c_12({2}) = 11; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -1158,30 +1158,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov, TestParam, test_params) { using il_type = detail::vector_il; range_type r2{2}, r3{3}; inner_type lhs_0{r2}; - lhs_0[0] = 0; - lhs_0[1] = 1; + lhs_0({0}) = 0; + lhs_0({1}) = 1; inner_type lhs_1{r3}; - lhs_1[0] = 1; - lhs_1[1] = 2; - lhs_1[2] = 3; + lhs_1({0}) = 1; + lhs_1({1}) = 2; + lhs_1({2}) = 3; il_type lhs_il{lhs_0, lhs_1}; inner_type rhs_0{r2}; - rhs_0[0] = 1; - rhs_0[1] = 2; + rhs_0({0}) = 1; + rhs_0({1}) = 2; inner_type rhs_1{r3}; - rhs_1[0] = 2; - rhs_1[1] = 3; - rhs_1[2] = 4; + rhs_1({0}) = 2; + rhs_1({1}) = 3; + rhs_1({2}) = 4; il_type rhs_il{rhs_0, rhs_1}; inner_type c_0{r2}; - c_0[0] = -1; - c_0[1] = -1; + c_0({0}) = -1; + c_0({1}) = -1; inner_type c_1{r3}; - c_1[0] = -1; - c_1[1] = -1; - c_1[2] = -1; + c_1({0}) = -1; + c_1({1}) = -1; + c_1({2}) = -1; il_type corr_il{c_0, c_1}; tensor_type lhs(m_world, lhs_il); @@ -1545,56 +1545,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov, TestParam, test_params) { using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = -1; - c_00[1] = -1; + c_00({0}) = -1; + c_00({1}) = -1; inner_type c_01(range_type{2}); - c_01[0] = -1; - c_01[1] = -1; + c_01({0}) = -1; + c_01({1}) = -1; inner_type c_02(range_type{2}); - c_02[0] = -1; - c_02[1] = -1; + c_02({0}) = -1; + c_02({1}) = -1; inner_type c_10(range_type{3}); - c_10[0] = -1; - c_10[1] = -1; - c_10[2] = -1; + c_10({0}) = -1; + c_10({1}) = -1; + c_10({2}) = -1; inner_type c_11(range_type{3}); - c_11[0] = -1; - c_11[1] = -1; - c_11[2] = -1; + c_11({0}) = -1; + c_11({1}) = -1; + c_11({2}) = -1; inner_type c_12(range_type{3}); - c_12[0] = -1; - c_12[1] = -1; - c_12[2] = -1; + c_12({0}) = -1; + c_12({1}) = -1; + c_12({2}) = -1; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -1612,56 +1612,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_result_transpose, TestParam, test_params) { using range_type = Range; using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = -1; - c_00[1] = -1; + c_00({0}) = -1; + c_00({1}) = -1; inner_type c_01(range_type{2}); - c_01[0] = -1; - c_01[1] = -1; + c_01({0}) = -1; + c_01({1}) = -1; inner_type c_02(range_type{2}); - c_02[0] = -1; - c_02[1] = -1; + c_02({0}) = -1; + c_02({1}) = -1; inner_type c_10(range_type{3}); - c_10[0] = -1; - c_10[1] = -1; - c_10[2] = -1; + c_10({0}) = -1; + c_10({1}) = -1; + c_10({2}) = -1; inner_type c_11(range_type{3}); - c_11[0] = -1; - c_11[1] = -1; - c_11[2] = -1; + c_11({0}) = -1; + c_11({1}) = -1; + c_11({2}) = -1; inner_type c_12(range_type{3}); - c_12[0] = -1; - c_12[1] = -1; - c_12[2] = -1; + c_12({0}) = -1; + c_12({1}) = -1; + c_12({2}) = -1; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -2201,30 +2201,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov_lhs, TestParam, test_params) { using il_type = detail::vector_il; range_type r2{2}, r3{3}; inner_type lhs_0{r2}; - lhs_0[0] = 0; - lhs_0[1] = 1; + lhs_0({0}) = 0; + lhs_0({1}) = 1; inner_type lhs_1{r3}; - lhs_1[0] = 1; - lhs_1[1] = 2; - lhs_1[2] = 3; + lhs_1({0}) = 1; + lhs_1({1}) = 2; + lhs_1({2}) = 3; il_type lhs_il{lhs_0, lhs_1}; inner_type rhs_0{r2}; - rhs_0[0] = 1; - rhs_0[1] = 2; + rhs_0({0}) = 1; + rhs_0({1}) = 2; inner_type rhs_1{r3}; - rhs_1[0] = 2; - rhs_1[1] = 3; - rhs_1[2] = 4; + rhs_1({0}) = 2; + rhs_1({1}) = 3; + rhs_1({2}) = 4; il_type rhs_il{rhs_0, rhs_1}; inner_type c_0{r2}; - c_0[0] = 1; - c_0[1] = 4; + c_0({0}) = 1; + c_0({1}) = 4; inner_type c_1{r3}; - c_1[0] = 4; - c_1[1] = 7; - c_1[2] = 10; + c_1({0}) = 4; + c_1({1}) = 7; + c_1({2}) = 10; il_type corr_il{c_0, c_1}; tensor_type lhs(m_world, lhs_il); tensor_type rhs(m_world, rhs_il); @@ -2240,30 +2240,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov_rhs, TestParam, test_params) { using il_type = detail::vector_il; range_type r2{2}, r3{3}; inner_type lhs_0{r2}; - lhs_0[0] = 0; - lhs_0[1] = 1; + lhs_0({0}) = 0; + lhs_0({1}) = 1; inner_type lhs_1{r3}; - lhs_1[0] = 1; - lhs_1[1] = 2; - lhs_1[2] = 3; + lhs_1({0}) = 1; + lhs_1({1}) = 2; + lhs_1({2}) = 3; il_type lhs_il{lhs_0, lhs_1}; inner_type rhs_0{r2}; - rhs_0[0] = 1; - rhs_0[1] = 2; + rhs_0({0}) = 1; + rhs_0({1}) = 2; inner_type rhs_1{r3}; - rhs_1[0] = 2; - rhs_1[1] = 3; - rhs_1[2] = 4; + rhs_1({0}) = 2; + rhs_1({1}) = 3; + rhs_1({2}) = 4; il_type rhs_il{rhs_0, rhs_1}; inner_type c_0{r2}; - c_0[0] = 2; - c_0[1] = 5; + c_0({0}) = 2; + c_0({1}) = 5; inner_type c_1{r3}; - c_1[0] = 5; - c_1[1] = 8; - c_1[2] = 11; + c_1({0}) = 5; + c_1({1}) = 8; + c_1({2}) = 11; il_type corr_il{c_0, c_1}; tensor_type lhs(m_world, lhs_il); tensor_type rhs(m_world, rhs_il); @@ -2419,56 +2419,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_lhs, TestParam, test_params) { using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = 1; - c_00[1] = 4; + c_00({0}) = 1; + c_00({1}) = 4; inner_type c_01(range_type{2}); - c_01[0] = 4; - c_01[1] = 7; + c_01({0}) = 4; + c_01({1}) = 7; inner_type c_02(range_type{2}); - c_02[0] = 7; - c_02[1] = 10; + c_02({0}) = 7; + c_02({1}) = 10; inner_type c_10(range_type{3}); - c_10[0] = 4; - c_10[1] = 7; - c_10[2] = 10; + c_10({0}) = 4; + c_10({1}) = 7; + c_10({2}) = 10; inner_type c_11(range_type{3}); - c_11[0] = 7; - c_11[1] = 10; - c_11[2] = 13; + c_11({0}) = 7; + c_11({1}) = 10; + c_11({2}) = 13; inner_type c_12(range_type{3}); - c_12[0] = 10; - c_12[1] = 13; - c_12[2] = 16; + c_12({0}) = 10; + c_12({1}) = 13; + c_12({2}) = 16; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -2487,56 +2487,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_rhs, TestParam, test_params) { using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = 2; - c_00[1] = 5; + c_00({0}) = 2; + c_00({1}) = 5; inner_type c_01(range_type{2}); - c_01[0] = 5; - c_01[1] = 8; + c_01({0}) = 5; + c_01({1}) = 8; inner_type c_02(range_type{2}); - c_02[0] = 8; - c_02[1] = 11; + c_02({0}) = 8; + c_02({1}) = 11; inner_type c_10(range_type{3}); - c_10[0] = 5; - c_10[1] = 8; - c_10[2] = 11; + c_10({0}) = 5; + c_10({1}) = 8; + c_10({2}) = 11; inner_type c_11(range_type{3}); - c_11[0] = 8; - c_11[1] = 11; - c_11[2] = 14; + c_11({0}) = 8; + c_11({1}) = 11; + c_11({2}) = 14; inner_type c_12(range_type{3}); - c_12[0] = 11; - c_12[1] = 14; - c_12[2] = 17; + c_12({0}) = 11; + c_12({1}) = 14; + c_12({2}) = 17; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -2905,30 +2905,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov, TestParam, test_params) { using il_type = detail::vector_il; range_type r2{2}, r3{3}; inner_type lhs_0{r2}; - lhs_0[0] = 0; - lhs_0[1] = 1; + lhs_0({0}) = 0; + lhs_0({1}) = 1; inner_type lhs_1{r3}; - lhs_1[0] = 1; - lhs_1[1] = 2; - lhs_1[2] = 3; + lhs_1({0}) = 1; + lhs_1({1}) = 2; + lhs_1({2}) = 3; il_type lhs_il{lhs_0, lhs_1}; inner_type rhs_0{r2}; - rhs_0[0] = 1; - rhs_0[1] = 2; + rhs_0({0}) = 1; + rhs_0({1}) = 2; inner_type rhs_1{r3}; - rhs_1[0] = 2; - rhs_1[1] = 3; - rhs_1[2] = 4; + rhs_1({0}) = 2; + rhs_1({1}) = 3; + rhs_1({2}) = 4; il_type rhs_il{rhs_0, rhs_1}; inner_type c_0{r2}; - c_0[0] = 0; - c_0[1] = 2; + c_0({0}) = 0; + c_0({1}) = 2; inner_type c_1{r3}; - c_1[0] = 2; - c_1[1] = 6; - c_1[2] = 12; + c_1({0}) = 2; + c_1({1}) = 6; + c_1({2}) = 12; il_type corr_il{c_0, c_1}; tensor_type lhs(m_world, lhs_il); tensor_type rhs(m_world, rhs_il); @@ -3291,56 +3291,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov, TestParam, test_params) { using range_type = Range; using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = 0; - c_00[1] = 2; + c_00({0}) = 0; + c_00({1}) = 2; inner_type c_01(range_type{2}); - c_01[0] = 2; - c_01[1] = 6; + c_01({0}) = 2; + c_01({1}) = 6; inner_type c_02(range_type{2}); - c_02[0] = 6; - c_02[1] = 12; + c_02({0}) = 6; + c_02({1}) = 12; inner_type c_10(range_type{3}); - c_10[0] = 2; - c_10[1] = 6; - c_10[2] = 12; + c_10({0}) = 2; + c_10({1}) = 6; + c_10({2}) = 12; inner_type c_11(range_type{3}); - c_11[0] = 6; - c_11[1] = 12; - c_11[2] = 20; + c_11({0}) = 6; + c_11({1}) = 12; + c_11({2}) = 20; inner_type c_12(range_type{3}); - c_12[0] = 12; - c_12[1] = 20; - c_12[2] = 30; + c_12({0}) = 12; + c_12({1}) = 20; + c_12({2}) = 30; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -3358,56 +3358,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_result_transpose, TestParam, test_params) { using range_type = Range; using il_type = detail::matrix_il; inner_type lhs_00(range_type{2}); - lhs_00[0] = 0; - lhs_00[1] = 1; + lhs_00({0}) = 0; + lhs_00({1}) = 1; inner_type lhs_01(range_type{2}); - lhs_01[0] = 1; - lhs_01[1] = 2; + lhs_01({0}) = 1; + lhs_01({1}) = 2; inner_type lhs_02(range_type{2}); - lhs_02[0] = 2; - lhs_02[1] = 3; + lhs_02({0}) = 2; + lhs_02({1}) = 3; inner_type lhs_10(range_type{3}); - lhs_10[0] = 1; - lhs_10[1] = 2; - lhs_10[2] = 3; + lhs_10({0}) = 1; + lhs_10({1}) = 2; + lhs_10({2}) = 3; inner_type lhs_11(range_type{3}); - lhs_11[0] = 2; - lhs_11[1] = 3; - lhs_11[2] = 4; + lhs_11({0}) = 2; + lhs_11({1}) = 3; + lhs_11({2}) = 4; inner_type lhs_12(range_type{3}); - lhs_12[0] = 3; - lhs_12[1] = 4; - lhs_12[2] = 5; + lhs_12({0}) = 3; + lhs_12({1}) = 4; + lhs_12({2}) = 5; inner_type rhs_02(range_type{2}); - rhs_02[0] = 3; - rhs_02[1] = 4; + rhs_02({0}) = 3; + rhs_02({1}) = 4; inner_type rhs_12(range_type{3}); - rhs_12[0] = 4; - rhs_12[1] = 5; - rhs_12[2] = 6; + rhs_12({0}) = 4; + rhs_12({1}) = 5; + rhs_12({2}) = 6; inner_type c_00(range_type{2}); - c_00[0] = 0; - c_00[1] = 2; + c_00({0}) = 0; + c_00({1}) = 2; inner_type c_01(range_type{2}); - c_01[0] = 2; - c_01[1] = 6; + c_01({0}) = 2; + c_01({1}) = 6; inner_type c_02(range_type{2}); - c_02[0] = 6; - c_02[1] = 12; + c_02({0}) = 6; + c_02({1}) = 12; inner_type c_10(range_type{3}); - c_10[0] = 2; - c_10[1] = 6; - c_10[2] = 12; + c_10({0}) = 2; + c_10({1}) = 6; + c_10({2}) = 12; inner_type c_11(range_type{3}); - c_11[0] = 6; - c_11[1] = 12; - c_11[2] = 20; + c_11({0}) = 6; + c_11({1}) = 12; + c_11({2}) = 20; inner_type c_12(range_type{3}); - c_12[0] = 12; - c_12[1] = 20; - c_12[2] = 30; + c_12({0}) = 12; + c_12({1}) = 20; + c_12({2}) = 30; il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}}; il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}}; @@ -4215,21 +4215,21 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov_inner_contraction, TestParam, test_params) { using policy_type = policy_type; range_type r2{2}, r3{3}; inner_type lhs_0{r2}; - lhs_0[0] = 0; - lhs_0[1] = 1; + lhs_0({0}) = 0; + lhs_0({1}) = 1; inner_type lhs_1{r3}; - lhs_1[0] = 1; - lhs_1[1] = 2; - lhs_1[2] = 3; + lhs_1({0}) = 1; + lhs_1({1}) = 2; + lhs_1({2}) = 3; il_type lhs_il{lhs_0, lhs_1}; inner_type rhs_0{r2}; - rhs_0[0] = 1; - rhs_0[1] = 2; + rhs_0({0}) = 1; + rhs_0({1}) = 2; inner_type rhs_1{r3}; - rhs_1[0] = 2; - rhs_1[1] = 3; - rhs_1[2] = 4; + rhs_1({0}) = 2; + rhs_1({1}) = 3; + rhs_1({2}) = 4; il_type rhs_il{rhs_0, rhs_1}; using result_type = DistArray>, policy_type>;