diff --git a/INSTALL.md b/INSTALL.md
index 1ad512a07f..ad33841e44 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -40,7 +40,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b
   - Boost.Container: header-only
   - Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing*
   - Boost.Range: header-only, *only used for unit testing*
-- [BTAS](http://github.com/ValeevGroup/BTAS), tag 2917aa21465a93ae6f399874f247b5fe31d6b693 . If usable BTAS installation is not found, TiledArray will download and compile
+- [BTAS](http://github.com/ValeevGroup/BTAS), tag 6fcb6451bc7ca46a00534a30c51dc5c230c39ac3 . If usable BTAS installation is not found, TiledArray will download and compile
   BTAS from source. *This is the recommended way to compile BTAS for all users*.
 - [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 0b44ef319643cb9721fbe17d294987c146e6460e .
   Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray.
diff --git a/external/versions.cmake b/external/versions.cmake
index ea25d3b1c6..676ff2ee4f 100644
--- a/external/versions.cmake
+++ b/external/versions.cmake
@@ -24,8 +24,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 29a2bf3d3c2670c608b7bfdf2299d76fbc20e041)
 set(TA_TRACKED_MADNESS_VERSION 0.10.1)
 set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)
 
-set(TA_TRACKED_BTAS_TAG 2917aa21465a93ae6f399874f247b5fe31d6b693)
-set(TA_TRACKED_BTAS_PREVIOUS_TAG fba66ad9881ab29ea8df49ac6a6006cab3fb3ce5)
+set(TA_TRACKED_BTAS_TAG 6fcb6451bc7ca46a00534a30c51dc5c230c39ac3)
+set(TA_TRACKED_BTAS_PREVIOUS_TAG 474ddc095cbea12a1d28aca5435703dd9f69b166)
 
 set(TA_TRACKED_LIBRETT_TAG 68abe31a9ec6fd2fd9ffbcd874daa80457f947da)
 set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 7e27ac766a9038df6aa05613784a54a036c4b796)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1b3763c65f..e796d72626 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -195,10 +195,12 @@ TiledArray/util/initializer_list.h
 TiledArray/util/logger.h
 TiledArray/util/ptr_registry.cpp
 TiledArray/util/ptr_registry.h
+TiledArray/util/random.cpp
 TiledArray/util/random.h
 TiledArray/util/singleton.h
 TiledArray/util/threads.h
 TiledArray/util/threads.cpp
+TiledArray/util/thread_specific.h
 TiledArray/util/time.h
 TiledArray/util/vector.h
 )
diff --git a/src/TiledArray/block_range.h b/src/TiledArray/block_range.h
index 807dee1f1a..08096c96ea 100644
--- a/src/TiledArray/block_range.h
+++ b/src/TiledArray/block_range.h
@@ -30,9 +30,15 @@
 
 namespace TiledArray {
 
-/// Range that references a subblock of another range
+/// a Range that references a subblock of another Range
 class BlockRange : public Range {
  private:
+  /// datavec_ contains same data as if this were an ordinary Range, except:
+  /// - stride describes the host range
+  /// - offset_ corrects ordinals so that the first element in this (sub)Range
+  /// has ordinal of zero ...
+  /// - block_offset_ is the distance from the first element of this Range to
+  /// the first element of the host Range
   using Range::datavec_;
   using Range::offset_;
   using Range::rank_;
@@ -300,37 +306,59 @@ class BlockRange : public Range {
     init<std::initializer_list<std::initializer_list<Index>>>(range, bounds);
   }
 
-  /// calculate the ordinal index of \c i
+  /// Converts coordinate index \c i to its ordinal index in the host Range
 
-  /// Convert a coordinate index to an ordinal index.
   /// \tparam Index An integral range type
-  /// \param index The index to be converted to an ordinal index
-  /// \return The ordinal index of \c index
+  /// \param index The index to be converted to the ordinal index
+  /// \return The ordinal index of \c index in the host Range
   /// \throw When \c index is not included in this range.
-  template <typename Index, typename std::enable_if_t<
-                                detail::is_integral_range_v<Index>>* = nullptr>
+  template <typename Index,
+            typename std::enable_if_t<
+                detail::is_integral_sized_range_v<Index>>* = nullptr>
   ordinal_type ordinal(const Index& index) const {
     return Range::ordinal(index);
   }
 
-  template <typename... Index,
-            typename std::enable_if<(sizeof...(Index) > 1ul)>::type* = nullptr>
+  /// Converts coordinate index \c i to its ordinal index in the host Range
+
+  /// \tparam Index An integral range type
+  /// \param index The index to be converted to the ordinal index
+  /// \return The ordinal index of \c index in the host Range
+  /// \throw When \c index is not included in this range.
+  template <typename Integer,
+            typename std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>
+  ordinal_type ordinal(const std::initializer_list<Integer>& index) const {
+    return Range::ordinal(index);
+  }
+
+  /// Converts coordinate index \c i to its ordinal index in the host Range
+
+  /// \tparam Index An integral range type
+  /// \param index The index to be converted to the ordinal index
+  /// \return The ordinal index of \c index in the host Range
+  /// \throw When \c index is not included in this range.
+  template <
+      typename... Index,
+      typename std::enable_if_t<(sizeof...(Index) > 1ul) &&
+                                (std::is_integral_v<Index> && ...)>* = nullptr>
   ordinal_type ordinal(const Index&... index) const {
     return Range::ordinal(index...);
   }
 
-  /// calculate the coordinate index of the ordinal index, \c index.
+  /// Converts the ordinal index within this block range to the ordinal index of
+  /// the host range
 
-  /// Convert an ordinal index to a coordinate index.
-  /// \param index Ordinal index
-  /// \return The index of the ordinal index
+  /// \warning this overloads base method that did not perform any conversion
+  ///          of \p ord
+  /// \param index the ordinal index for the host Range
+  /// \return The ordinal index in the
   /// \throw TiledArray::Exception When \c index is not included in this range
-  ordinal_type ordinal(ordinal_type index) const {
-    // Check that index is contained by range.
-    TA_ASSERT(includes(index));
+  ordinal_type ordinal(ordinal_type ord) const {
+    // Check that ord is contained by this range.
+    TA_ASSERT(Range::includes_ordinal(ord));
 
     // Construct result coordinate index object and allocate its memory.
-    ordinal_type result = 0ul;
+    ordinal_type host_ord = 0ul;
 
     // Get pointers to the data
     const auto* MADNESS_RESTRICT const size = extent_data();
@@ -342,11 +370,11 @@ class BlockRange : public Range {
       const auto stride_i = stride[i];
 
       // Compute result index element i
-      result += (index % size_i) * stride_i;
-      index /= size_i;
+      host_ord += (ord % size_i) * stride_i;
+      ord /= size_i;
     }
 
-    return result + block_offset_ - offset_;
+    return host_ord + block_offset_ - offset_;
   }
 
   /// Resize of block range is not supported
diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h
index 2ca19c67fd..816a8bfe24 100644
--- a/src/TiledArray/conversions/eigen.h
+++ b/src/TiledArray/conversions/eigen.h
@@ -963,10 +963,10 @@ DistArray_ eigen_tensor_to_array(
 /// replicated TiledArray::DistArray. Usage:
 /// \code
 /// TiledArray::TArrayD
-/// array(world, trange);
+/// array(world, trange_3d);
 /// // Set tiles of array ...
 ///
-/// auto t = array_to_eigen_tensor(array);
+/// auto t = array_to_eigen_tensor<Eigen::Tensor<double, 3>(array);
 /// \endcode
 /// \tparam Tile the tile type of \c src
 /// \tparam Policy the policy type of \c src
@@ -980,13 +980,11 @@ DistArray_ eigen_tensor_to_array(
 ///            create the Eigen::Tensor on every rank (this requires
 ///            that \c src.is_replicated()==true )
 /// \return Eigen::Tensor object containing the data of \c src , if my rank
-/// equals
-///         \c target_rank or \c target_rank==-1 ,
+///         equals \c target_rank or \c target_rank==-1 ,
 ///         default-initialized Eigen::Tensor otherwise.
 template <typename Tensor, typename Tile, typename Policy>
 Tensor array_to_eigen_tensor(const TiledArray::DistArray<Tile, Policy>& src,
                              int target_rank = -1) {
-
   TA_ASSERT(src.tiles_range().rank() == Tensor::NumDimensions);
 
   // Test preconditions
diff --git a/src/TiledArray/conversions/foreach.h b/src/TiledArray/conversions/foreach.h
index c001a1ee66..9d219ac191 100644
--- a/src/TiledArray/conversions/foreach.h
+++ b/src/TiledArray/conversions/foreach.h
@@ -288,12 +288,13 @@ inline std::
   int task_count = 0;
   auto op_shared_handle = make_op_shared_handle(std::forward<Op>(op));
   const auto task = [op_shared_handle, &counter, &tile_norms](
-                        const ordinal_type index,
+                        const ordinal_type ord,
                         const_if_t<not inplace, arg_value_type>& arg_tile,
                         const ArgTiles&... arg_tiles) -> result_value_type {
     op_helper<inplace, result_value_type> op_caller;
-    auto result_tile = op_caller(std::move(op_shared_handle), tile_norms[index],
-                                 arg_tile, arg_tiles...);
+    auto result_tile =
+        op_caller(std::move(op_shared_handle), tile_norms.at_ordinal(ord),
+                  arg_tile, arg_tiles...);
     ++counter;
     return result_tile;
   };
@@ -304,32 +305,32 @@ inline std::
   switch (shape_reduction) {
     case ShapeReductionMethod::Intersect:
       // Get local tile index iterator
-      for (auto index : *(arg.pmap())) {
-        if (is_zero_intersection({arg.is_zero(index), args.is_zero(index)...}))
+      for (auto ord : *(arg.pmap())) {
+        if (is_zero_intersection({arg.is_zero(ord), args.is_zero(ord)...}))
           continue;
-        auto result_tile = world.taskq.add(task, index, arg.find_local(index),
-                                           args.find(index)...);
+        auto result_tile =
+            world.taskq.add(task, ord, arg.find_local(ord), args.find(ord)...);
         ++task_count;
-        tiles.emplace_back(index, std::move(result_tile));
+        tiles.emplace_back(ord, std::move(result_tile));
         if (op_returns_void)  // if Op does not evaluate norms, use the (scaled)
                               // norms of the first arg
-          tile_norms[index] = arg_shape_data[index];
+          tile_norms.at_ordinal(ord) = arg_shape_data.at_ordinal(ord);
       }
       break;
     case ShapeReductionMethod::Union:
       // Get local tile index iterator
-      for (auto index : *(arg.pmap())) {
-        if (is_zero_union({arg.is_zero(index), args.is_zero(index)...}))
-          continue;
+      for (auto ord : *(arg.pmap())) {
+        if (is_zero_union({arg.is_zero(ord), args.is_zero(ord)...})) continue;
         auto result_tile =
-            world.taskq.add(task, index, detail::get_sparse_tile(index, arg),
-                            detail::get_sparse_tile(index, args)...);
+            world.taskq.add(task, ord, detail::get_sparse_tile(ord, arg),
+                            detail::get_sparse_tile(ord, args)...);
         ++task_count;
-        tiles.emplace_back(index, std::move(result_tile));
+        tiles.emplace_back(ord, std::move(result_tile));
         if (op_returns_void)  // if Op does not evaluate norms, find max
                               // (scaled) norms of all args
-          tile_norms[index] =
-              std::max({arg_shape_data[index], args.shape().data()[index]...});
+          tile_norms.at_ordinal(ord) =
+              std::max({arg_shape_data.at_ordinal(ord),
+                        args.shape().data().at_ordinal(ord)...});
       }
       break;
     default:
diff --git a/src/TiledArray/conversions/vector_of_arrays.h b/src/TiledArray/conversions/vector_of_arrays.h
index f54be33bd2..9de3bf8d09 100644
--- a/src/TiledArray/conversions/vector_of_arrays.h
+++ b/src/TiledArray/conversions/vector_of_arrays.h
@@ -23,21 +23,25 @@ namespace detail {
 /// @return TiledRange of fused Array object
 inline TiledArray::TiledRange prepend_dim_to_trange(
     std::size_t array_rank, const TiledArray::TiledRange& array_trange,
-    std::size_t nblocks, std::size_t avg_block_size, std::size_t num_avg_plus_one) {
+    std::size_t nblocks, std::size_t avg_block_size,
+    std::size_t num_avg_plus_one) {
   /// make the new TiledRange1 for new dimension
   TiledArray::TiledRange1 new_trange1;
   {
     std::vector<std::size_t> new_trange1_v;
     new_trange1_v.reserve(nblocks + 1);
     auto block_counter = 0;
-    for(auto i = 0; i < num_avg_plus_one; ++i, block_counter += avg_block_size + 1){
+    for (auto i = 0; i < num_avg_plus_one;
+         ++i, block_counter += avg_block_size + 1) {
       new_trange1_v.emplace_back(block_counter);
     }
-    for (auto i = num_avg_plus_one; i < nblocks; ++i, block_counter+= avg_block_size) {
+    for (auto i = num_avg_plus_one; i < nblocks;
+         ++i, block_counter += avg_block_size) {
       new_trange1_v.emplace_back(block_counter);
     }
     new_trange1_v.emplace_back(array_rank);
-    new_trange1 = TiledArray::TiledRange1(new_trange1_v.begin(), new_trange1_v.end());
+    new_trange1 =
+        TiledArray::TiledRange1(new_trange1_v.begin(), new_trange1_v.end());
   }
 
   /// make the new range for N+1 Array
@@ -70,14 +74,16 @@ inline TiledArray::TiledRange prepend_dim_to_trange(
 ///        (the size of @c arrays on each rank will depend on world.size)
 /// @param[in] fused_trange the TiledRange of the fused @c arrays
 /// @param[in] avg_block_size The average number of elements per block
-/// @param[in] num_avg_plus_one Number of tiles with one more than the average block size.
+/// @param[in] num_avg_plus_one Number of tiles with one more than the average
+/// block size.
 /// @return SparseShape of fused Array object
 template <typename Tile>
 TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
-        madness::World& global_world,
-        const std::vector<TiledArray::DistArray<Tile, TiledArray::SparsePolicy>>& arrays,
-        const std::size_t array_rank, const TiledArray::TiledRange& fused_trange,
-        const size_t avg_block_size, const size_t num_avg_plus_one) {
+    madness::World& global_world,
+    const std::vector<TiledArray::DistArray<Tile, TiledArray::SparsePolicy>>&
+        arrays,
+    const std::size_t array_rank, const TiledArray::TiledRange& fused_trange,
+    const size_t avg_block_size, const size_t num_avg_plus_one) {
   if (arrays.size() == 0) {
     TiledArray::Tensor<float> fused_tile_norms(fused_trange.tiles_range(), 0.f);
     return TiledArray::SparseShape<float>(global_world, fused_tile_norms,
@@ -95,12 +101,14 @@ TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
     const auto& tiles_range = arrays[0].trange().tiles_range();
     for (auto&& tile_idx : tiles_range) {
       const auto tile_ord = tiles_range.ordinal(tile_idx);
-      tile_volumes.emplace(tile_volumes_ptr + tile_ord, arrays[0].trange().make_tile_range(tile_idx).volume());
+      tile_volumes.emplace(
+          tile_volumes_ptr + tile_ord,
+          arrays[0].trange().make_tile_range(tile_idx).volume());
     }
   }
 
   TiledArray::Tensor<float> fused_tile_norms(
-          fused_trange.tiles_range(), 0.f);  // use nonzeroes for local tiles only
+      fused_trange.tiles_range(), 0.f);  // use nonzeroes for local tiles only
 
   // compute norms of fused tiles
   // N.B. tile norms are stored in scaled format, unscale in order to compute
@@ -108,7 +116,8 @@ TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
   std::size_t narrays = array_rank;
   size_t fused_tile_ord = 0;
   auto element_offset_in_owner = 0;
-  // need to do this loop twice once for tiles with avg + 1 and once for tiles with avg
+  // need to do this loop twice once for tiles with avg + 1 and once for tiles
+  // with avg
   auto block_size = avg_block_size + 1;
   size_t vidx = 0, fused_vidx = 0;
   for (; fused_vidx < num_avg_plus_one; vidx += block_size, ++fused_vidx) {
@@ -123,13 +132,13 @@ TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
         const auto tile_volume = tile_volumes[tile_ord];
         for (size_t v = 0, vv = vidx; v != block_size; ++v, ++vv) {
           const auto unscaled_tile_norm =
-                  (*(array_ptr)).shape().data()[tile_ord] * tile_volume;
+              (*(array_ptr)).shape().data().at_ordinal(tile_ord) * tile_volume;
           unscaled_fused_tile_norm2 += unscaled_tile_norm * unscaled_tile_norm;
           ++array_ptr;
         }
         const auto fused_tile_volume = tile_volume * block_size;
         const auto fused_tile_norm =
-                std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume;
+            std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume;
 
         *(fused_tile_norms.data() + fused_tile_ord) = fused_tile_norm;
       }
@@ -140,7 +149,7 @@ TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
   }
 
   block_size = avg_block_size;
-  for (;vidx < narrays; vidx += block_size, ++fused_vidx) {
+  for (; vidx < narrays; vidx += block_size, ++fused_vidx) {
     bool have_rank = (rank == fused_vidx % size);
     // how many arrays actually constribute to this fused tile ... last fused
     // tile may have fewer than block_size
@@ -152,13 +161,13 @@ TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
         const auto tile_volume = tile_volumes[tile_ord];
         for (size_t v = 0, vv = vidx; v != block_size; ++v, ++vv) {
           const auto unscaled_tile_norm =
-                  (*(array_ptr)).shape().data()[tile_ord] * tile_volume;
+              (*(array_ptr)).shape().data().at_ordinal(tile_ord) * tile_volume;
           unscaled_fused_tile_norm2 += unscaled_tile_norm * unscaled_tile_norm;
           ++array_ptr;
         }
         const auto fused_tile_volume = tile_volume * block_size;
         const auto fused_tile_norm =
-                std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume;
+            std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume;
 
         *(fused_tile_norms.data() + fused_tile_ord) = fused_tile_norm;
       }
@@ -167,8 +176,8 @@ TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
       fused_tile_ord += ntiles_per_array;
     }
   }
-  auto fused_shapes = TiledArray::SparseShape<float>(global_world, fused_tile_norms,
-                                             fused_trange, true);
+  auto fused_shapes = TiledArray::SparseShape<float>(
+      global_world, fused_tile_norms, fused_trange, true);
 
   return fused_shapes;
 }
@@ -187,12 +196,14 @@ TiledArray::SparseShape<float> fuse_tilewise_vector_of_shapes(
 ///        (the size of @c arrays on each rank will depend on world.size)
 /// @param[in] fused_trange the TiledRange of the fused @c arrays
 /// @param[in] avg_block_size The average number of elements per block
-/// @param[in] num_avg_plus_one Number of tiles with one more than the average block size.
+/// @param[in] num_avg_plus_one Number of tiles with one more than the average
+/// block size.
 /// @return DenseShape of fused Array object
 template <typename Tile>
 TiledArray::DenseShape fuse_tilewise_vector_of_shapes(
     madness::World&,
-    const std::vector<TiledArray::DistArray<Tile, TiledArray::DensePolicy>>& arrays,
+    const std::vector<TiledArray::DistArray<Tile, TiledArray::DensePolicy>>&
+        arrays,
     const std::size_t array_rank, const TiledArray::TiledRange& fused_trange,
     const size_t avg_block_size, const size_t num_avg_plus_one) {
   return TiledArray::DenseShape(1, fused_trange);
@@ -213,8 +224,9 @@ TiledArray::DenseShape fuse_tilewise_vector_of_shapes(
 /// @return the Shape of the @c i -th subarray
 inline TiledArray::SparseShape<float> tilewise_slice_of_fused_shape(
     const TiledArray::TiledRange& split_trange,
-    const TiledArray::SparsePolicy::shape_type& shape, const std::size_t tile_idx,
-    const std::size_t split_ntiles, const std::size_t tile_size) {
+    const TiledArray::SparsePolicy::shape_type& shape,
+    const std::size_t tile_idx, const std::size_t split_ntiles,
+    const std::size_t tile_size) {
   TA_ASSERT(split_ntiles == split_trange.tiles_range().volume());
   TiledArray::Tensor<float> split_tile_norms(split_trange.tiles_range());
 
@@ -292,8 +304,9 @@ class dist_subarray_vec
   /// @return number of Array in @c split_array
   unsigned long size() const { return rank_; }
 
-  void delete_arrays(int begin_index, int end_index){
-    for(auto array_ptr = split_array.begin() + begin_index; array_ptr() != split_array.begin() + end_index; ++array_ptr()) {
+  void delete_arrays(int begin_index, int end_index) {
+    for (auto array_ptr = split_array.begin() + begin_index;
+         array_ptr() != split_array.begin() + end_index; ++array_ptr()) {
       (*array_ptr) = Array();
     }
     Array::wait_for_lazy_cleanup();
@@ -328,108 +341,113 @@ class dist_subarray_vec
 /// @sa detail::fuse_tilewise_vector_of_shapes
 template <typename Tile, typename Policy>
 TiledArray::DistArray<Tile, Policy> fuse_tilewise_vector_of_arrays(
-        madness::World& global_world,
-        const std::vector<TiledArray::DistArray<Tile, Policy>>& array_vec,
-        const std::size_t fused_dim_extent,
-        const TiledArray::TiledRange& array_trange, std::size_t target_block_size = 1) {
-    auto nproc = global_world.size();
-
-    // make instances of array_vec globally accessible
-    using Array = TiledArray::DistArray<Tile, Policy>;
-    detail::dist_subarray_vec<Array> arrays(global_world, array_vec,
-                                            fused_dim_extent);
-
-    std::size_t nblocks =
-            (fused_dim_extent + target_block_size - 1) / target_block_size;
-    auto dv = std::div((int) (fused_dim_extent + nblocks - 1), (int) nblocks);
-    auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1;
-    auto dv1 = std::div(num_avg_plus_one, nproc);
-
-    // make fused tiledrange
-    auto fused_trange =
-            detail::prepend_dim_to_trange(fused_dim_extent, array_trange,
-                                          nblocks, avg_block_size, num_avg_plus_one);
-    std::size_t ntiles_per_array = array_trange.tiles_range().volume();
-
-    // make fused shape
-    auto fused_shape = detail::fuse_tilewise_vector_of_shapes(
-            global_world, arrays.array_accessor(), fused_dim_extent, fused_trange,
-            avg_block_size, num_avg_plus_one);
-
-    // make fused array
-    TiledArray::DistArray<Tile, Policy> fused_array(global_world, fused_trange,
-                                            fused_shape);
-
-    /// copy the data from a sequence of tiles
-    auto make_tile = [](const TiledArray::Range& range,
-                        const std::vector<madness::Future<Tile>>& tiles) {
-      TA_ASSERT(range.extent(0) == tiles.size());
-      Tile result(range);
-      const auto volume = range.volume();
-      size_t result_volume = 0;
-      auto* result_ptr = result.data();
-      for (auto&& fut_of_tile : tiles) {
-        TA_ASSERT(fut_of_tile.probe());
-        const auto& tile = fut_of_tile.get();
-        const auto* tile_data = tile.data();
-        const auto tile_volume = tile.size();
-        std::copy(tile_data, tile_data + tile_volume, result_ptr);
-        result_ptr += tile_volume;
-        result_volume += tile_volume;
-      }
-      TA_ASSERT(volume == result_volume);
-      return result;
-    };
-
-    /// write to blocks of fused_array
-    for (auto&& fused_tile_ord : *fused_array.pmap()) {
-      if (!fused_array.is_zero(fused_tile_ord)) {
-        // convert ordinal of the fused tile to the ordinals of its constituent
-        // tiles
-        const auto div0 = std::ldiv(fused_tile_ord, ntiles_per_array);
-        // index of the 0th mode of this tile
-        const auto tile_idx_mode0 = div0.quot;
-        // ordinal of the corresponding tile in the unfused array
-        const auto tile_ord_array = div0.rem;
-
-        const auto div1 = std::ldiv(tile_idx_mode0, nproc);
-        const auto tile_idx_on_owner = div1.quot;
-        auto block_size = (tile_idx_mode0 < num_avg_plus_one ?
-                avg_block_size + 1 : avg_block_size);
-
-        const auto owner_rank = div1.rem;
-
-        auto num_avg_plusone_on_owner = (owner_rank < dv1.rem ? dv1.quot + 1 : dv1.quot);
-        const auto vector_idx_offset_on_owner = (tile_idx_mode0 < num_avg_plus_one ?
-                                                 tile_idx_on_owner * block_size :
-                                                 (num_avg_plusone_on_owner) * (block_size + 1) + (tile_idx_on_owner - num_avg_plusone_on_owner) * block_size);
-
-        auto fused_tile_range =
-                fused_array.trange().make_tile_range(fused_tile_ord);
-        // make a vector of Futures to the input tiles
-        std::vector<madness::Future<Tile>> input_tiles;
-        input_tiles.reserve(fused_tile_range.extent(0));
-        for (size_t v = 0, vidx = tile_idx_mode0 * block_size;
-             v != block_size && vidx < fused_dim_extent; ++v, ++vidx) {
-          using Index = decltype(tile_ord_array);
-          input_tiles.emplace_back(
-                  arrays.task(owner_rank,
-                              &detail::dist_subarray_vec<
-                                      DistArray<Tile, Policy>>::template get_tile<Index>,
-                              vector_idx_offset_on_owner + v, tile_ord_array));
-        }
-        fused_array.set(
-                fused_tile_ord,
-                global_world.taskq.add(make_tile, std::move(fused_tile_range),
-                                       std::move(input_tiles)));
+    madness::World& global_world,
+    const std::vector<TiledArray::DistArray<Tile, Policy>>& array_vec,
+    const std::size_t fused_dim_extent,
+    const TiledArray::TiledRange& array_trange,
+    std::size_t target_block_size = 1) {
+  auto nproc = global_world.size();
+
+  // make instances of array_vec globally accessible
+  using Array = TiledArray::DistArray<Tile, Policy>;
+  detail::dist_subarray_vec<Array> arrays(global_world, array_vec,
+                                          fused_dim_extent);
+
+  std::size_t nblocks =
+      (fused_dim_extent + target_block_size - 1) / target_block_size;
+  auto dv = std::div((int)(fused_dim_extent + nblocks - 1), (int)nblocks);
+  auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1;
+  auto dv1 = std::div(num_avg_plus_one, nproc);
+
+  // make fused tiledrange
+  auto fused_trange =
+      detail::prepend_dim_to_trange(fused_dim_extent, array_trange, nblocks,
+                                    avg_block_size, num_avg_plus_one);
+  std::size_t ntiles_per_array = array_trange.tiles_range().volume();
+
+  // make fused shape
+  auto fused_shape = detail::fuse_tilewise_vector_of_shapes(
+      global_world, arrays.array_accessor(), fused_dim_extent, fused_trange,
+      avg_block_size, num_avg_plus_one);
+
+  // make fused array
+  TiledArray::DistArray<Tile, Policy> fused_array(global_world, fused_trange,
+                                                  fused_shape);
+
+  /// copy the data from a sequence of tiles
+  auto make_tile = [](const TiledArray::Range& range,
+                      const std::vector<madness::Future<Tile>>& tiles) {
+    TA_ASSERT(range.extent(0) == tiles.size());
+    Tile result(range);
+    const auto volume = range.volume();
+    size_t result_volume = 0;
+    auto* result_ptr = result.data();
+    for (auto&& fut_of_tile : tiles) {
+      TA_ASSERT(fut_of_tile.probe());
+      const auto& tile = fut_of_tile.get();
+      const auto* tile_data = tile.data();
+      const auto tile_volume = tile.size();
+      std::copy(tile_data, tile_data + tile_volume, result_ptr);
+      result_ptr += tile_volume;
+      result_volume += tile_volume;
+    }
+    TA_ASSERT(volume == result_volume);
+    return result;
+  };
+
+  /// write to blocks of fused_array
+  for (auto&& fused_tile_ord : *fused_array.pmap()) {
+    if (!fused_array.is_zero(fused_tile_ord)) {
+      // convert ordinal of the fused tile to the ordinals of its constituent
+      // tiles
+      const auto div0 = std::ldiv(fused_tile_ord, ntiles_per_array);
+      // index of the 0th mode of this tile
+      const auto tile_idx_mode0 = div0.quot;
+      // ordinal of the corresponding tile in the unfused array
+      const auto tile_ord_array = div0.rem;
+
+      const auto div1 = std::ldiv(tile_idx_mode0, nproc);
+      const auto tile_idx_on_owner = div1.quot;
+      auto block_size = (tile_idx_mode0 < num_avg_plus_one ? avg_block_size + 1
+                                                           : avg_block_size);
+
+      const auto owner_rank = div1.rem;
+
+      auto num_avg_plusone_on_owner =
+          (owner_rank < dv1.rem ? dv1.quot + 1 : dv1.quot);
+      const auto vector_idx_offset_on_owner =
+          (tile_idx_mode0 < num_avg_plus_one
+               ? tile_idx_on_owner * block_size
+               : (num_avg_plusone_on_owner) * (block_size + 1) +
+                     (tile_idx_on_owner - num_avg_plusone_on_owner) *
+                         block_size);
+
+      auto fused_tile_range =
+          fused_array.trange().make_tile_range(fused_tile_ord);
+      // make a vector of Futures to the input tiles
+      std::vector<madness::Future<Tile>> input_tiles;
+      input_tiles.reserve(fused_tile_range.extent(0));
+      for (size_t v = 0, vidx = tile_idx_mode0 * block_size;
+           v != block_size && vidx < fused_dim_extent; ++v, ++vidx) {
+        using Index = decltype(tile_ord_array);
+        input_tiles.emplace_back(
+            arrays.task(owner_rank,
+                        &detail::dist_subarray_vec<
+                            DistArray<Tile, Policy>>::template get_tile<Index>,
+                        vector_idx_offset_on_owner + v, tile_ord_array));
       }
+      fused_array.set(
+          fused_tile_ord,
+          global_world.taskq.add(make_tile, std::move(fused_tile_range),
+                                 std::move(input_tiles)));
     }
+  }
 
-    // keep arrays around until everyone is done
-    global_world.gop.fence();
+  // keep arrays around until everyone is done
+  global_world.gop.fence();
 
-    return fused_array;
-  }
+  return fused_array;
+}
 
 /// @brief extracts a subarray of a fused array created with
 /// fuse_vector_of_arrays and creates the array in @c local_world.
@@ -446,7 +464,8 @@ TiledArray::DistArray<Tile, Policy> fuse_tilewise_vector_of_arrays(
 /// @sa detail::tilewise_slice_of_fused_shape
 template <typename Tile, typename Policy>
 void split_tilewise_fused_array(
-    madness::World& local_world, const TiledArray::DistArray<Tile, Policy>& fused_array,
+    madness::World& local_world,
+    const TiledArray::DistArray<Tile, Policy>& fused_array,
     std::size_t tile_idx,
     std::vector<TiledArray::DistArray<Tile, Policy>>& split_arrays,
     const TiledArray::TiledRange& split_trange) {
@@ -465,8 +484,8 @@ void split_tilewise_fused_array(
         split_trange, shape, tile_idx, split_ntiles, tile_size);
     // create split Array object
     TiledArray::DistArray<Tile, Policy> split_array(local_world, split_trange,
-                                            split_shape);
-    //split_arrays.push_back(split_array);
+                                                    split_shape);
+    // split_arrays.push_back(split_array);
     split_arrays.emplace_back(std::move(split_array));
   }
 
diff --git a/src/TiledArray/cp/btas_cp.h b/src/TiledArray/cp/btas_cp.h
index e6f0109e85..69d0b097d1 100644
--- a/src/TiledArray/cp/btas_cp.h
+++ b/src/TiledArray/cp/btas_cp.h
@@ -31,7 +31,7 @@ template <typename Tile, typename Policy>
 auto btas_cp_als(madness::World& world, const DistArray<Tile, Policy> Reference,
                  long btas_cp_rank, TA::TiledRange1 rank_trange1,
                  std::size_t decomp_world_rank = 0, double als_threshold = 1e-3,
-                 bool verbose = true) {
+                 bool verbose = false) {
   using tile_type = typename DistArray<Tile, Policy>::value_type::value_type;
   using BTAS_Tensor =
       btas::Tensor<tile_type, btas::DEFAULT::range, btas::varray<double>>;
@@ -114,7 +114,7 @@ template <typename Tile, typename Policy>
 auto btas_cp_rals(madness::World& world, DistArray<Tile, Policy> Reference,
                   long btas_cp_rank, TA::TiledRange1 rank_trange1,
                   std::size_t decomp_world_rank = 0,
-                  double als_threshold = 1e-3, bool verbose = true) {
+                  double als_threshold = 1e-3, bool verbose = false) {
   using BTAS_Tensor =
       btas::Tensor<typename Tile::value_type, btas::DEFAULT::range,
                    btas::varray<typename Tile::value_type>>;
diff --git a/src/TiledArray/cp/cp.h b/src/TiledArray/cp/cp.h
index 7917e6b01f..d676f8fdd8 100644
--- a/src/TiledArray/cp/cp.h
+++ b/src/TiledArray/cp/cp.h
@@ -8,42 +8,10 @@
 #include <TiledArray/conversions/btas.h>
 #include <TiledArray/expressions/einsum.h>
 #include <tiledarray.h>
-#include <random>
 
 namespace TiledArray::cp {
+
 namespace detail {
-// A seed for the random number generator.
-static inline unsigned int& random_seed_accessor() {
-  static unsigned int value = 3;
-  return value;
-}
-
-// given a rank and block size, this computes a
-// trange for the rank dimension to be used to make the CP factors.
-static inline TiledRange1 compute_trange1(size_t rank, size_t rank_block_size) {
-  std::size_t nblocks = (rank + rank_block_size - 1) / rank_block_size;
-  auto dv = std::div((int)(rank + nblocks - 1), (int)nblocks);
-  auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1;
-
-  TiledArray::TiledRange1 new_trange1;
-  {
-    std::vector<std::size_t> new_trange1_v;
-    new_trange1_v.reserve(nblocks + 1);
-    auto block_counter = 0;
-    for (auto i = 0; i < num_avg_plus_one;
-         ++i, block_counter += avg_block_size + 1) {
-      new_trange1_v.emplace_back(block_counter);
-    }
-    for (auto i = num_avg_plus_one; i < nblocks;
-         ++i, block_counter += avg_block_size) {
-      new_trange1_v.emplace_back(block_counter);
-    }
-    new_trange1_v.emplace_back(rank);
-    new_trange1 =
-        TiledArray::TiledRange1(new_trange1_v.begin(), new_trange1_v.end());
-  }
-  return new_trange1;
-}
 
 static inline char intToAlphabet(int i) { return static_cast<char>('a' + i); }
 
@@ -94,9 +62,8 @@ class CP {
   /// moving to @c rank else builds an efficient
   /// random guess with rank @c rank
   /// \param[in] rank Rank of the CP deccomposition
-  /// \param[in] rank_block_size 0; What is the size of the blocks
-  /// in the rank mode's TiledRange, will compute TiledRange1 inline.
-  /// if 0 : rank_blocck_size = rank.
+  /// \param[in] rank_block_size The target tile size of
+  /// rank mode's range; if 0 will use \p rank as \p rank_block_size .
   /// \param[in] build_rank should CP approximation be built from rank 1
   /// or set.
   /// \param[in] epsilonALS 1e-3; the stopping condition for the ALS solver
@@ -112,13 +79,13 @@ class CP {
     if (build_rank) {
       size_t cur_rank = 1;
       do {
-        rank_trange = detail::compute_trange1(cur_rank, rank_block_size);
+        rank_trange = TiledArray::compute_trange1(cur_rank, rank_block_size);
         build_guess(cur_rank, rank_trange);
         ALS(cur_rank, 100, verbose);
         ++cur_rank;
       } while (cur_rank < rank);
     } else {
-      rank_trange = detail::compute_trange1(rank, rank_block_size);
+      rank_trange = TiledArray::compute_trange1(rank, rank_block_size);
       build_guess(rank, rank_trange);
       ALS(rank, 100, verbose);
     }
@@ -144,7 +111,7 @@ class CP {
     double epsilon = 1.0;
     fit_tol = epsilonALS;
     do {
-      auto rank_trange = detail::compute_trange1(cur_rank, rank_block_size);
+      auto rank_trange = compute_trange1(cur_rank, rank_block_size);
       build_guess(cur_rank, rank_trange);
       ALS(cur_rank, 100, verbose);
       ++cur_rank;
@@ -197,9 +164,10 @@ class CP {
       final_fit,          // The final fit of the ALS
                           // optimization at fixed rank.
       fit_tol,            // Tolerance for the ALS solver
-      converged_num,      // How many times the ALS solver
-                          // has changed less than the tolerance
       norm_reference;     // used in determining the CP fit.
+  std::size_t converged_num =
+      0;  // How many times the ALS solver
+          // has changed less than the tolerance in a row
 
   /// This function is determined by the specific CP solver.
   /// builds the rank @c rank CP approximation and stores
@@ -228,14 +196,12 @@ class CP {
     auto lambda = std::vector<typename Tile::value_type>(
         rank, (typename Tile::value_type)0);
     if (world.rank() == 0) {
-      std::mt19937 generator(detail::random_seed_accessor());
-      std::uniform_real_distribution<> distribution(-1.0, 1.0);
       auto factor_ptr = factor.data();
       size_t offset = 0;
       for (auto r = 0; r < rank; ++r, offset += mode_size) {
         auto lam_ptr = lambda.data() + r;
         for (auto m = offset; m < offset + mode_size; ++m) {
-          auto val = distribution(generator);
+          auto val = TiledArray::drand() * 2 - 1;  // random number in [-1,1]
           *(factor_ptr + m) = val;
           *lam_ptr += val * val;
         }
@@ -326,9 +292,9 @@ class CP {
           auto lo = tile.range().lobound_data();
           auto up = tile.range().upbound_data();
           for (auto R = lo[0]; R < up[0]; ++R) {
-            const auto norm_squared_RR = tile(R);
+            const auto norm_squared_RR = tile({R});
             using std::sqrt;
-            tile(R) = sqrt(norm_squared_RR);
+            tile({R}) = sqrt(norm_squared_RR);
           }
         },
         /* fence = */ true);
@@ -365,7 +331,7 @@ class CP {
   /// \returns bool : is the change in fit less than the ALS tolerance?
   virtual bool check_fit(bool verbose = false) {
     // Compute the inner product T * T_CP
-    double inner_prod = MTtKRP("r,n").dot(unNormalized_Factor("r,n"));
+    const auto ref_dot_cp = MTtKRP("r,n").dot(unNormalized_Factor("r,n"));
     // compute the square of the CP tensor (can use the grammian)
     auto factor_norm = [&]() {
       auto gram_ptr = partial_grammian.begin();
@@ -381,27 +347,39 @@ class CP {
       return result;
     };
     // compute the error in the loss function and find the fit
-    double normFactors = factor_norm(),
-           normResidual =
-               sqrt(abs(norm_reference * norm_reference +
-                        normFactors * normFactors - 2.0 * inner_prod)),
-           fit = 1.0 - (normResidual / norm_reference),
-           fit_change = abs(prev_fit - fit);
+    const auto norm_cp = factor_norm();  // ||T_CP||_2
+    const auto squared_norm_error = norm_reference * norm_reference +
+                                    norm_cp * norm_cp -
+                                    2.0 * ref_dot_cp;  // ||T - T_CP||_2^2
+    // N.B. squared_norm_error is very noisy
+    // TA_ASSERT(squared_norm_error >= - 1e-8);
+    const auto norm_error = sqrt(abs(squared_norm_error));
+    const auto fit = 1.0 - (norm_error / norm_reference);
+    const auto fit_change = fit - prev_fit;
     prev_fit = fit;
     // print fit data if required
     if (verbose) {
-      std::cout << fit << "\t" << fit_change << std::endl;
+      std::cout << MTtKRP.world().rank() << ": fit=" << fit
+                << " fit_change=" << fit_change << std::endl;
     }
 
     // if the change in fit is less than the tolerance try to return true.
-    if (fit_change < fit_tol) {
+    if (abs(fit_change) < fit_tol) {
       converged_num++;
       if (converged_num == 2) {
         converged_num = 0;
         final_fit = prev_fit;
         prev_fit = 1.0;
+        if (verbose)
+          std::cout << MTtKRP.world().rank() << ": converged" << std::endl;
         return true;
+      } else {
+        TA_ASSERT(converged_num == 1);
+        if (verbose)
+          std::cout << MTtKRP.world().rank() << ": pre-converged" << std::endl;
       }
+    } else {
+      converged_num = 0;
     }
     return false;
   }
diff --git a/src/TiledArray/cp/cp_als.h b/src/TiledArray/cp/cp_als.h
index 1bcbaf5b0f..197900b5b2 100644
--- a/src/TiledArray/cp/cp_als.h
+++ b/src/TiledArray/cp/cp_als.h
@@ -12,7 +12,7 @@
  * takes a reference order-N tensor and decomposes it into a
  * set of order-2 tensors all coupled by a hyperdimension called the rank.
  * These factors are optimized using an alternating least squares
- * algorithm. This class is derived form the base CP class
+ * algorithm.
  *
  * @tparam Tile typing for the DistArray tiles
  * @tparam Policy policy of the DistArray
@@ -152,7 +152,6 @@ class CP_ALS : public CP<Tile, Policy> {
 
       final = mixed_contractions;
       W("r,rp") *= this->partial_grammian[contracted_index]("r,rp");
-      world.gop.fence();
     }
 
     if (mode == ndim - 1) this->MTtKRP = An;
diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h
index 217c14ed46..ea6a066441 100644
--- a/src/TiledArray/dist_array.h
+++ b/src/TiledArray/dist_array.h
@@ -890,20 +890,16 @@ class DistArray : public madness::archive::ParallelSerializableObject {
   ///
   /// \tparam T The type of random value to generate. Defaults to
   ///           element_type.
-  /// \tparam <anonymous> A template type parameter which will be deduced as
-  ///                     void only if MakeRandom knows how to generate random
-  ///                     values of type T. If MakeRandom does not know how to
-  ///                     generate random values of type T, SFINAE will disable
-  ///                     this function.
   /// \param[in] skip_set If false, will throw if any tiles are already set
   /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong
   ///                              throw guarantee.
   /// \throw TiledArray::Exception if skip_set is false and a local tile is
   ///                              already initialized. Weak throw guarantee.
-  template <typename T = element_type,
+  template <HostExecutor Exec = HostExecutor::Default,
+            typename T = element_type,
             typename = detail::enable_if_can_make_random_t<T>>
   void fill_random(bool skip_set = false) {
-    init_elements(
+    init_elements<Exec>(
         [](const auto&) { return detail::MakeRandom<T>::generate_value(); });
   }
 
@@ -943,7 +939,7 @@ class DistArray : public madness::archive::ParallelSerializableObject {
   ///                              guarantee.
   /// \throw TiledArray::Exception if a tile is already set and skip_set is
   ///                              false. Weak throw guarantee.
-  template <typename Op>
+  template <HostExecutor Exec = HostExecutor::Default, typename Op>
   void init_tiles(Op&& op, bool skip_set = false) {
     // lifetime management of op depends on whether it is a lvalue ref (i.e. has
     // an external owner) or an rvalue ref
@@ -957,15 +953,20 @@ class DistArray : public madness::archive::ParallelSerializableObject {
       const auto& index = *it;
       if (!pimpl_->is_zero(index)) {
         if (skip_set) {
-          auto fut = find(index);
+          auto fut = find_local(index);
           if (fut.probe()) continue;
         }
-        Future<value_type> tile = pimpl_->world().taskq.add(
-            [pimpl = pimpl_, index = ordinal_type(index),
-             op_shared_handle]() -> value_type {
-              return op_shared_handle(pimpl->trange().make_tile_range(index));
-            });
-        set(index, std::move(tile));
+        if constexpr (Exec == HostExecutor::MADWorld) {
+          Future<value_type> tile = pimpl_->world().taskq.add(
+              [pimpl = pimpl_, index = ordinal_type(index),
+               op_shared_handle]() -> value_type {
+                return op_shared_handle(pimpl->trange().make_tile_range(index));
+              });
+          set(index, std::move(tile));
+        } else {
+          static_assert(Exec == HostExecutor::Thread);
+          set(index, op_shared_handle(trange().make_tile_range(index)));
+        }
       }
     }
   }
@@ -994,10 +995,10 @@ class DistArray : public madness::archive::ParallelSerializableObject {
   /// \throw TiledArray::Exception if skip_set is false and a local, non-zero
   ///                              tile is already initialized. Weak throw
   ///                              guarantee.
-  template <typename Op>
+  template <HostExecutor Exec = HostExecutor::Default, typename Op>
   void init_elements(Op&& op, bool skip_set = false) {
     auto op_shared_handle = make_op_shared_handle(std::forward<Op>(op));
-    init_tiles(
+    init_tiles<Exec>(
         [op = std::move(op_shared_handle)](
             const TiledArray::Range& range) -> value_type {
           // Initialize the tile with the given range object
@@ -1545,30 +1546,31 @@ class DistArray : public madness::archive::ParallelSerializableObject {
   }
 
  private:
-  template <typename Index>
-  std::enable_if_t<std::is_integral_v<Index>, void> check_index(
-      const Index i) const {
+  template <typename Ordinal>
+  std::enable_if_t<std::is_integral_v<Ordinal>, void> check_index(
+      const Ordinal ord) const {
     TA_ASSERT(
-        impl_ref().tiles_range().includes(i) &&
+        impl_ref().tiles_range().includes_ordinal(ord) &&
         "The ordinal index used to access an array tile is out of range.");
   }
 
   template <typename Index>
-  std::enable_if_t<detail::is_integral_range_v<Index>, void> check_index(
+  std::enable_if_t<detail::is_integral_sized_range_v<Index>, void> check_index(
       const Index& i) const {
     TA_ASSERT(
         impl_ref().tiles_range().includes(i) &&
         "The coordinate index used to access an array tile is out of range.");
   }
 
-  template <typename Index1>
+  template <typename Index1,
+            typename = std::enable_if_t<std::is_integral_v<Index1>>>
   void check_index(const std::initializer_list<Index1>& i) const {
     check_index<std::initializer_list<Index1>>(i);
   }
 
-  template <typename Index>
-  std::enable_if_t<std::is_integral_v<Index>, void> check_local_index(
-      const Index i) const {
+  template <typename Ordinal>
+  std::enable_if_t<std::is_integral_v<Ordinal>, void> check_local_index(
+      const Ordinal i) const {
     check_index(i);
     TA_ASSERT(pimpl_->is_local(i)  // pimpl_ already checked
               &&
@@ -1576,15 +1578,16 @@ class DistArray : public madness::archive::ParallelSerializableObject {
   }
 
   template <typename Index>
-  std::enable_if_t<detail::is_integral_range_v<Index>, void> check_local_index(
-      const Index& i) const {
+  std::enable_if_t<detail::is_integral_sized_range_v<Index>, void>
+  check_local_index(const Index& i) const {
     check_index(i);
     TA_ASSERT(
         pimpl_->is_local(i)  // pimpl_ already checked
         && "The coordinate index used to access an array tile is not local.");
   }
 
-  template <typename Index1>
+  template <typename Index1,
+            typename = std::enable_if_t<std::is_integral_v<Index1>>>
   void check_local_index(const std::initializer_list<Index1>& i) const {
     check_local_index<std::initializer_list<Index1>>(i);
   }
diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h
index 0a4be1ba43..c248956066 100644
--- a/src/TiledArray/einsum/tiledarray.h
+++ b/src/TiledArray/einsum/tiledarray.h
@@ -179,7 +179,7 @@ auto einsum(expressions::TsrExpr<Array_> A, expressions::TsrExpr<Array_> B,
         bi = bi.reshape(shape, batch);
         for (size_t k = 0; k < batch; ++k) {
           auto hk = ai.batch(k).dot(bi.batch(k));
-          tile[k] += hk;
+          tile({k}) += hk;
         }
       }
       auto pc = C.permutation;
@@ -225,7 +225,7 @@ auto einsum(expressions::TsrExpr<Array_> A, expressions::TsrExpr<Array_> B,
         if (!term.array.is_local(idx)) continue;
         if (term.array.is_zero(idx)) continue;
         // TODO no need for immediate evaluation
-        auto tile = term.array.find(idx).get();
+        auto tile = term.array.find_local(idx).get();
         if (P) tile = tile.permute(P);
         auto shape = term.ei_tiled_range.tile(ei);
         tile = tile.reshape(shape, batch);
@@ -247,7 +247,7 @@ auto einsum(expressions::TsrExpr<Array_> A, expressions::TsrExpr<Array_> B,
       if (!C.ei.is_local(e)) continue;
       if (C.ei.is_zero(e)) continue;
       // TODO no need for immediate evaluation
-      auto tile = C.ei.find(e).get();
+      auto tile = C.ei.find_local(e).get();
       assert(tile.batch_size() == batch);
       const Permutation &P = C.permutation;
       auto c = apply(P, h + e);
@@ -406,11 +406,11 @@ namespace TiledArray::expressions {
 /// @param[in] B second argument to the product
 /// @warning just as in the plain expression code, reductions are a special
 /// case; use Expr::reduce()
-template<typename T, typename U>
+template <typename T, typename U>
 auto einsum(expressions::TsrExpr<T> A, expressions::TsrExpr<U> B) {
   auto a = std::get<0>(idx(A));
   auto b = std::get<0>(idx(B));
-  return einsum(A, B, std::string(a^b));
+  return einsum(A, B, std::string(a ^ b));
 }
 
 /// einsum function with result indices explicitly specified
@@ -419,37 +419,29 @@ auto einsum(expressions::TsrExpr<T> A, expressions::TsrExpr<U> B) {
 /// @param[in] r result indices
 /// @warning just as in the plain expression code, reductions are a special
 /// case; use Expr::reduce()
-template<typename T, typename U, typename ... Indices>
-auto einsum(
-  expressions::TsrExpr<T> A,
-  expressions::TsrExpr<U> B,
-  const std::string &cs,
-  World &world = get_default_world())
-{
+template <typename T, typename U, typename... Indices>
+auto einsum(expressions::TsrExpr<T> A, expressions::TsrExpr<U> B,
+            const std::string &cs, World &world = get_default_world()) {
   static_assert(std::is_same<const T, const U>::value);
   using E = expressions::TsrExpr<const T>;
   return Einsum::einsum(E(A), E(B), Einsum::idx<T>(cs), world);
 }
 
 template <typename T, typename U, typename V>
-auto dot(expressions::TsrExpr<T> A,
-         expressions::TsrExpr<U> B,
-         expressions::TsrExpr<V> C,
-         World &world = get_default_world())
-{
+auto dot(expressions::TsrExpr<T> A, expressions::TsrExpr<U> B,
+         expressions::TsrExpr<V> C, World &world = get_default_world()) {
   static_assert(std::is_same<const T, const U>::value);
   static_assert(std::is_same<const T, const V>::value);
   using E = expressions::TsrExpr<const T>;
   return Einsum::dot(E(A), E(B), E(C), world);
 }
 
-} // TiledArray::expressions
+}  // namespace TiledArray::expressions
 
 namespace TiledArray {
 
 using expressions::dot;
 using expressions::einsum;
-using expressions::dot;
 
 template <typename T, typename P>
 auto einsum(const std::string &expr, const DistArray<T, P> &A,
diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h
index 51e43d9bee..87af0e6115 100644
--- a/src/TiledArray/fwd.h
+++ b/src/TiledArray/fwd.h
@@ -149,6 +149,8 @@ using Array
     [[deprecated("use TiledArray::DistArray or TiledArray::TArray<T>")]] =
         DistArray<Tile, Policy>;
 
+enum class HostExecutor { Thread, MADWorld, Default = MADWorld };
+
 }  // namespace TiledArray
 
 #ifndef TILEDARRAY_DISABLE_NAMESPACE_TA
diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h
index 9c0e54a757..8108ecf227 100644
--- a/src/TiledArray/range.h
+++ b/src/TiledArray/range.h
@@ -76,7 +76,8 @@ class Range {
   ///   stride[0],  ..., stride[rank_ - 1] }
   /// \endcode
   container::svector<index1_type, 4 * TA_MAX_SOO_RANK_METADATA> datavec_;
-  distance_type offset_ = 0l;  ///< Ordinal index offset correction
+  distance_type offset_ =
+      0l;  ///< Ordinal index offset correction to support nonzero lobound
   ordinal_type volume_ = 0ul;  ///< Total number of elements
   unsigned int rank_ = 0u;  ///< The rank (or number of dimensions) in the range
 
@@ -825,7 +826,7 @@ class Range {
   /// \throw TiledArray::Exception When the rank of this range is not
   /// equal to the size of the index.
   template <typename Index,
-            typename std::enable_if<detail::is_integral_range_v<Index>,
+            typename std::enable_if<detail::is_integral_sized_range_v<Index>,
                                     bool>::type* = nullptr>
   bool includes(const Index& index) const {
     TA_ASSERT(*this);
@@ -870,11 +871,29 @@ class Range {
 
   /// \param i The ordinal index to check for inclusion in the range
   /// \return \c true when \c i \c >= \c 0 and \c i \c < \c volume
+  /// \warning if this->order()==1 this
   /// \throw nothing
   template <typename Ordinal>
   typename std::enable_if<std::is_integral_v<Ordinal>, bool>::type includes(
       Ordinal i) const {
     TA_ASSERT(*this);
+    // can't distinguish between includes(Index...) and includes(ordinal)
+    // thus assume includes_ordinal() if this->rank()==1
+    TA_ASSERT(this->rank() != 1 &&
+              "use Range::includes(index) or "
+              "Range::includes_ordinal(index_ordinal) if this->rank()==1");
+    return include_ordinal_(i);
+  }
+
+  /// Check the ordinal index to make sure it is within the range.
+
+  /// \param i The ordinal index to check for inclusion in the range
+  /// \return \c true when \c i \c >= \c 0 and \c i \c < \c volume
+  /// \throw nothing
+  template <typename Ordinal>
+  typename std::enable_if<std::is_integral_v<Ordinal>, bool>::type
+  includes_ordinal(Ordinal i) const {
+    TA_ASSERT(*this);
     return include_ordinal_(i);
   }
 
@@ -1009,10 +1028,7 @@ class Range {
   /// \param index Ordinal index
   /// \return \c index (unchanged)
   /// \throw When \c index is not included in this range
-  ordinal_type ordinal(const ordinal_type index) const {
-    TA_ASSERT(includes(index));
-    return index;
-  }
+  ordinal_type ordinal(const ordinal_type index) const { return index; }
 
   /// calculate the ordinal index of \p index
 
@@ -1024,8 +1040,6 @@ class Range {
   template <typename Index, typename std::enable_if_t<
                                 detail::is_integral_range_v<Index>>* = nullptr>
   ordinal_type ordinal(const Index& index) const {
-    TA_ASSERT(includes(index));
-
     auto* MADNESS_RESTRICT const stride = stride_data();
 
     ordinal_type result = 0ul;
@@ -1071,16 +1085,16 @@ class Range {
     return ordinal(temp_index);
   }
 
-  /// calculate the coordinate index of the ordinal index, \c index.
+  /// calculate the coordinate index of the ordinal index, \c ord.
 
   /// Convert an ordinal index to a coordinate index.
-  /// \param index Ordinal index
+  /// \param ord Ordinal index
   /// \return The index of the ordinal index
-  /// \throw TiledArray::Exception When \c index is not included in this range
-  index_type idx(ordinal_type index) const {
+  /// \throw TiledArray::Exception When \p ord is not included in this range
+  index_type idx(ordinal_type ord) const {
     // Check that index is contained by range.
     // N.B. this will fail if any extent is zero
-    TA_ASSERT(includes(index));
+    TA_ASSERT(includes_ordinal(ord));
 
     // Construct result coordinate index object and allocate its memory.
     Range_::index result(rank_, 0);
@@ -1096,8 +1110,8 @@ class Range {
       const auto size_i = size[i];
 
       // Compute result index element i
-      const auto result_i = (index % size_i) + lower_i;
-      index /= size_i;
+      const auto result_i = (ord % size_i) + lower_i;
+      ord /= size_i;
 
       // Store result
       result_data[i] = result_i;
diff --git a/src/TiledArray/sparse_shape.h b/src/TiledArray/sparse_shape.h
index da56cc24ef..7346f45d1c 100644
--- a/src/TiledArray/sparse_shape.h
+++ b/src/TiledArray/sparse_shape.h
@@ -485,9 +485,23 @@ class SparseShape {
 
   /// Check that a tile is zero
 
-  /// \tparam Index The type of the index
-  /// \return false
-  template <typename Index>
+  /// \tparam Ordinal an integer type
+  /// \param ord the ordinal index
+  /// \return true if tile at position \p ord is zero
+  template <typename Ordinal>
+  std::enable_if_t<std::is_integral_v<Ordinal>, bool> is_zero(
+      const Ordinal& ord) const {
+    TA_ASSERT(!tile_norms_.empty());
+    return tile_norms_.at_ordinal(ord) < my_threshold_;
+  }
+
+  /// Check that a tile is zero
+
+  /// \tparam Index a sized integral range type
+  /// \param i the index
+  /// \return true if tile at position \p i is zero
+  template <typename Index, typename = std::enable_if_t<
+                                detail::is_integral_sized_range_v<Index>>>
   bool is_zero(const Index& i) const {
     TA_ASSERT(!tile_norms_.empty());
     return tile_norms_[i] < my_threshold_;
diff --git a/src/TiledArray/tensor.h b/src/TiledArray/tensor.h
index f24621077c..edb7ba2e47 100644
--- a/src/TiledArray/tensor.h
+++ b/src/TiledArray/tensor.h
@@ -62,8 +62,20 @@ template <typename T, typename std::enable_if<detail::is_tensor<T>::value &&
 inline std::ostream& operator<<(std::ostream& os, const T& t) {
   os << t.range() << " { ";
   const auto n = t.range().volume();
-  for (auto i = 0ul; i < n; ++i) os << t[i] << " ";
-
+  std::size_t offset = 0ul;
+  const auto more_than_1_batch = t.batch_size() > 1;
+  for (auto b = 0ul; b != t.batch_size(); ++b) {
+    if (more_than_1_batch) {
+      os << "[batch " << b << "]{ ";
+    }
+    for (auto ord = 0ul; ord < n; ++ord) {
+      os << t.data()[offset + ord] << " ";
+    }
+    if (more_than_1_batch) {
+      os << "} ";
+    }
+    offset += n;
+  }
   os << "}";
 
   return os;
diff --git a/src/TiledArray/tensor/kernels.h b/src/TiledArray/tensor/kernels.h
index 7276678145..c65d0e5c69 100644
--- a/src/TiledArray/tensor/kernels.h
+++ b/src/TiledArray/tensor/kernels.h
@@ -224,8 +224,8 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
 
   const auto volume = result.range().volume();
 
-  for (decltype(result.range().volume()) i = 0ul; i < volume; ++i) {
-    inplace_tensor_op(op, result[i], tensors[i]...);
+  for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
+    inplace_tensor_op(op, result.at_ordinal(ord), tensors.at_ordinal(ord)...);
   }
 }
 
@@ -395,9 +395,9 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
           inplace_tensor_op(op, result_data[i], tensors_data[i]...);
       };
 
-  for (decltype(result.range().volume()) i = 0ul; i < volume; i += stride)
-    inplace_tensor_range(result.data() + result.range().ordinal(i),
-                         (tensors.data() + tensors.range().ordinal(i))...);
+  for (decltype(result.range().volume()) ord = 0ul; ord < volume; ord += stride)
+    inplace_tensor_range(result.data() + result.range().ordinal(ord),
+                         (tensors.data() + tensors.range().ordinal(ord))...);
 }
 
 // -------------------------------------------------------------------------
@@ -455,9 +455,9 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) {
 
   const auto volume = result.range().volume();
 
-  for (decltype(result.range().volume()) i = 0ul; i < volume; ++i) {
-    new (result.data() + i) typename TR::value_type(
-        tensor_op<typename TR::value_type>(op, tensors[i]...));
+  for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
+    new (result.data() + ord) typename TR::value_type(
+        tensor_op<typename TR::value_type>(op, tensors.at_ordinal(ord)...));
   }
 }
 
@@ -578,10 +578,11 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1,
     new (result_ptr) typename T1::value_type(op(value1, values...));
   };
 
-  for (decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride)
-    math::vector_ptr_op(wrapper_op, stride, result.data() + i,
-                        (tensor1.data() + tensor1.range().ordinal(i)),
-                        (tensors.data() + tensors.range().ordinal(i))...);
+  for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume;
+       ord += stride)
+    math::vector_ptr_op(wrapper_op, stride, result.data() + ord,
+                        (tensor1.data() + tensor1.range().ordinal(ord)),
+                        (tensors.data() + tensors.range().ordinal(ord))...);
 }
 
 /// Initialize tensor with one or more non-contiguous tensor arguments
@@ -625,10 +626,10 @@ inline void tensor_init(Op&& op, TR& result, const T1& tensor1,
                   op, tensor1_data[i], tensors_data[i]...));
       };
 
-  for (decltype(volume) i = 0ul; i < volume; i += stride)
-    inplace_tensor_range(result.data() + i,
-                         (tensor1.data() + tensor1.range().ordinal(i)),
-                         (tensors.data() + tensors.range().ordinal(i))...);
+  for (decltype(volume) ord = 0ul; ord < volume; ord += stride)
+    inplace_tensor_range(result.data() + ord,
+                         (tensor1.data() + tensor1.range().ordinal(ord)),
+                         (tensors.data() + tensors.range().ordinal(ord))...);
 }
 
 // -------------------------------------------------------------------------
@@ -735,9 +736,10 @@ Scalar tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op, Scalar identity,
   const auto volume = tensor1.range().volume();
 
   auto result = identity;
-  for (decltype(tensor1.range().volume()) i = 0ul; i < volume; ++i) {
+  for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume; ++ord) {
     auto temp =
-        tensor_reduce(reduce_op, join_op, identity, tensor1[i], tensors[i]...);
+        tensor_reduce(reduce_op, join_op, identity, tensor1.at_ordinal(ord),
+                      tensors.at_ordinal(ord)...);
     join_op(result, temp);
   }
 
@@ -778,11 +780,12 @@ Scalar tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op,
   const auto volume = tensor1.range().volume();
 
   Scalar result = identity;
-  for (decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride) {
+  for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume;
+       ord += stride) {
     Scalar temp = identity;
     math::reduce_op(reduce_op, join_op, identity, stride, temp,
-                    tensor1.data() + tensor1.range().ordinal(i),
-                    (tensors.data() + tensors.range().ordinal(i))...);
+                    tensor1.data() + tensor1.range().ordinal(ord),
+                    (tensors.data() + tensors.range().ordinal(ord))...);
     join_op(result, temp);
   }
 
@@ -834,10 +837,11 @@ Scalar tensor_reduce(ReduceOp&& reduce_op, JoinOp&& join_op,
       };
 
   Scalar result = identity;
-  for (decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride) {
-    Scalar temp =
-        tensor_reduce_range(result, tensor1.data() + tensor1.range().ordinal(i),
-                            (tensors.data() + tensors.range().ordinal(i))...);
+  for (decltype(tensor1.range().volume()) ord = 0ul; ord < volume;
+       ord += stride) {
+    Scalar temp = tensor_reduce_range(
+        result, tensor1.data() + tensor1.range().ordinal(ord),
+        (tensors.data() + tensors.range().ordinal(ord))...);
     join_op(result, temp);
   }
 
diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h
index 5009391c16..d3c91361f5 100644
--- a/src/TiledArray/tensor/tensor.h
+++ b/src/TiledArray/tensor/tensor.h
@@ -610,12 +610,19 @@ class Tensor {
   /// \tparam Ordinal an integer type that represents an ordinal
   /// \param[in] ord an ordinal index
   /// \return Const reference to the element at position \c ord .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is
+  /// included in the range, and `batch_size()==1`
   template <typename Ordinal,
             std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>
   const_reference operator[](const Ordinal ord) const {
-    TA_ASSERT(this->range_.includes(ord));
+    TA_ASSERT(!this->empty());
+    // can't distinguish between operator[](Index...) and operator[](ordinal)
+    // thus assume at_ordinal() if this->rank()==1
+    TA_ASSERT(this->range_.rank() != 1 &&
+              "use Tensor::operator[](index) or "
+              "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1");
+    TA_ASSERT(this->batch_size() == 1);
+    TA_ASSERT(this->range_.includes_ordinal(ord));
     return this->data()[ord];
   }
 
@@ -624,12 +631,51 @@ class Tensor {
   /// \tparam Ordinal an integer type that represents an ordinal
   /// \param[in] ord an ordinal index
   /// \return Reference to the element at position \c ord .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is
+  /// included in the range, and `batch_size()==1`
   template <typename Ordinal,
             std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>
   reference operator[](const Ordinal ord) {
-    TA_ASSERT(this->range_.includes(ord));
+    TA_ASSERT(!this->empty());
+    // can't distinguish between operator[](Index...) and operator[](ordinal)
+    // thus assume at_ordinal() if this->rank()==1
+    TA_ASSERT(this->range_.rank() != 1 &&
+              "use Tensor::operator[](index) or "
+              "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1");
+    TA_ASSERT(this->batch_size() == 1);
+    TA_ASSERT(this->range_.includes_ordinal(ord));
+    return this->data()[ord];
+  }
+
+  /// Const element accessor
+
+  /// \tparam Ordinal an integer type that represents an ordinal
+  /// \param[in] ord an ordinal index
+  /// \return Const reference to the element at position \c ord .
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is
+  /// included in the range, and `batch_size()==1`
+  template <typename Ordinal,
+            std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>
+  const_reference at_ordinal(const Ordinal ord) const {
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    TA_ASSERT(this->range_.includes_ordinal(ord));
+    return this->data()[ord];
+  }
+
+  /// Element accessor
+
+  /// \tparam Ordinal an integer type that represents an ordinal
+  /// \param[in] ord an ordinal index
+  /// \return Reference to the element at position \c ord .
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is
+  /// included in the range, and `batch_size()==1`
+  template <typename Ordinal,
+            std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>
+  reference at_ordinal(const Ordinal ord) {
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    TA_ASSERT(this->range_.includes_ordinal(ord));
     return this->data()[ord];
   }
 
@@ -638,13 +684,16 @@ class Tensor {
   /// \tparam Index An integral range type
   /// \param[in] i an index
   /// \return Const reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Index,
             std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>
   const_reference operator[](const Index& i) const {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Element accessor
@@ -652,13 +701,16 @@ class Tensor {
   /// \tparam Index An integral range type
   /// \param[in] i an index
   /// \return Reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Index,
             std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>
   reference operator[](const Index& i) {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Const element accessor
@@ -666,13 +718,16 @@ class Tensor {
   /// \tparam Integer An integral type
   /// \param[in] i an index
   /// \return Const reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Integer,
             std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>
   const_reference operator[](const std::initializer_list<Integer>& i) const {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Element accessor
@@ -680,13 +735,58 @@ class Tensor {
   /// \tparam Integer An integral type
   /// \param[in] i an index
   /// \return Reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Integer,
             std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>
   reference operator[](const std::initializer_list<Integer>& i) {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
+  }
+
+  /// Const element accessor
+
+  /// \tparam Ordinal an integer type that represents an ordinal
+  /// \param[in] ord an ordinal index
+  /// \return Const reference to the element at position \c ord .
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is
+  /// included in the range, and `batch_size()==1`
+  template <typename Ordinal,
+            std::enable_if_t<std::is_integral_v<Ordinal>>* = nullptr>
+  const_reference operator()(const Ordinal& ord) const {
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    // can't distinguish between operator[](Index...) and operator[](ordinal)
+    // thus assume at_ordinal() if this->rank()==1
+    TA_ASSERT(this->range_.rank() != 1 &&
+              "use Tensor::operator()(index) or "
+              "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1");
+    TA_ASSERT(this->range_.includes_ordinal(ord));
+    return this->data()[ord];
+  }
+
+  /// Element accessor
+
+  /// \tparam Ordinal an integer type that represents an ordinal
+  /// \param[in] ord an ordinal index
+  /// \return Reference to the element at position \c ord .
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p ord is
+  /// included in the range, and `batch_size()==1`
+  template <typename Ordinal,
+            std::enable_if_t<std::is_integral_v<Ordinal>>* = nullptr>
+  reference operator()(const Ordinal& ord) {
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    // can't distinguish between operator[](Index...) and operator[](ordinal)
+    // thus assume at_ordinal() if this->rank()==1
+    TA_ASSERT(this->range_.rank() != 1 &&
+              "use Tensor::operator()(index) or "
+              "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1");
+    TA_ASSERT(this->range_.includes_ordinal(ord));
+    return this->data()[ord];
   }
 
   /// Const element accessor
@@ -694,13 +794,16 @@ class Tensor {
   /// \tparam Index An integral range type
   /// \param[in] i an index
   /// \return Const reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Index,
             std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>
   const_reference operator()(const Index& i) const {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Element accessor
@@ -708,13 +811,16 @@ class Tensor {
   /// \tparam Index An integral range type
   /// \param[in] i an index
   /// \return Reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Index,
             std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>
   reference operator()(const Index& i) {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Const element accessor
@@ -722,13 +828,16 @@ class Tensor {
   /// \tparam Integer An integral type
   /// \param[in] i an index
   /// \return Const reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Integer,
             std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>
   const_reference operator()(const std::initializer_list<Integer>& i) const {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Element accessor
@@ -736,41 +845,59 @@ class Tensor {
   /// \tparam Integer An integral type
   /// \param[in] i an index
   /// \return Reference to the element at position \c i .
-  /// \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <typename Integer,
             std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>
   reference operator()(const std::initializer_list<Integer>& i) {
-    TA_ASSERT(this->range_.includes(i));
-    return this->data()[this->range_.ordinal(i)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(i);
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Const element accessor
 
   /// \tparam Index an integral list ( see TiledArray::detail::is_integral_list
-  /// ) \param[in] i an index \return Const reference to the element at position
-  /// \c i . \note This asserts (using TA_ASSERT) that this is not empty and ord
-  /// is included in the range
+  /// )
+  /// \param[in] i an index \return Const reference to the element at position
+  /// \c i .
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <
       typename... Index,
-      std::enable_if_t<detail::is_integral_list<Index...>::value>* = nullptr>
+      std::enable_if_t<(sizeof...(Index) > 1ul) &&
+                       detail::is_integral_list<Index...>::value>* = nullptr>
   const_reference operator()(const Index&... i) const {
-    TA_ASSERT(this->range_.includes(i...));
-    return this->data()[this->range_.ordinal(i...)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    const auto iord = this->range_.ordinal(
+        std::array<std::common_type_t<Index...>, sizeof...(Index)>{{i...}});
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Element accessor
 
   /// \tparam Index an integral list ( see TiledArray::detail::is_integral_list
-  /// ) \param[in] i an index \return Reference to the element at position \c i
-  /// . \note This asserts (using TA_ASSERT) that this is not empty and ord is
-  /// included in the range
+  /// )
+  /// \param[in] i an index \return Reference to the element at position \c i
+  /// .
+  /// \note This asserts (using TA_ASSERT) that this is not empty, \p i is
+  /// included in the range, and `batch_size()==1`
   template <
       typename... Index,
-      std::enable_if_t<detail::is_integral_list<Index...>::value>* = nullptr>
+      std::enable_if_t<(sizeof...(Index) > 1ul) &&
+                       detail::is_integral_list<Index...>::value>* = nullptr>
   reference operator()(const Index&... i) {
-    TA_ASSERT(this->range_.includes(i...));
-    return this->data()[this->range_.ordinal(i...)];
+    TA_ASSERT(!this->empty());
+    TA_ASSERT(this->batch_size() == 1);
+    using Int = std::common_type_t<Index...>;
+    const auto iord = this->range_.ordinal(
+        std::array<Int, sizeof...(Index)>{{static_cast<Int>(i)...}});
+    TA_ASSERT(this->range_.includes_ordinal(iord));
+    return this->data()[iord];
   }
 
   /// Iterator factory
diff --git a/src/TiledArray/tensor_impl.h b/src/TiledArray/tensor_impl.h
index 1872cb8584..6811fc6cb2 100644
--- a/src/TiledArray/tensor_impl.h
+++ b/src/TiledArray/tensor_impl.h
@@ -133,41 +133,135 @@ class TensorImpl : private NO_DEFAULTS {
 
   /// Query a tile owner
 
-  /// \tparam Index The index type
+  /// \tparam Index The sized integral range type
   /// \param i The tile index to query
   /// \return The process ID of the node that owns tile \c i
   /// \throw TiledArray::Exception When \c i is outside the tiled range tile
   /// range
   /// \throw TiledArray::Exception When the process map has not been set
-  template <typename Index>
-  ProcessID owner(const Index& i) const {
-    TA_ASSERT(trange_.tiles_range().includes(i));
-    return pmap_->owner(trange_.tiles_range().ordinal(i));
+  template <typename Index, typename = std::enable_if_t<
+                                detail::is_integral_sized_range_v<Index>>>
+  ProcessID owner(const Index& index) const {
+    const auto ord = trange_.tiles_range().ordinal(index);
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return pmap_->owner(ord);
+  }
+
+  /// Query a tile owner
+
+  /// \tparam Integer An integer type
+  /// \param i The tile index to query
+  /// \return The process ID of the node that owns tile \c i
+  /// \throw TiledArray::Exception When \c i is outside the tiled range tile
+  /// range
+  /// \throw TiledArray::Exception When the process map has not been set
+  template <typename Integer,
+            typename = std::enable_if_t<std::is_integral_v<Integer>>>
+  ProcessID owner(const std::initializer_list<Integer>& index) const {
+    const auto ord = trange_.tiles_range().ordinal(index);
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return pmap_->owner(ord);
+  }
+
+  /// Query a tile owner
+
+  /// \tparam Ordinal An integer type
+  /// \param i The tile index to query
+  /// \return The process ID of the node that owns tile \c i
+  /// \throw TiledArray::Exception When \c i is outside the tiled range tile
+  /// range
+  /// \throw TiledArray::Exception When the process map has not been set
+  template <typename Ordinal>
+  std::enable_if_t<std::is_integral_v<Ordinal>, ProcessID> owner(
+      const Ordinal& ord) const {
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return pmap_->owner(ord);
+  }
+
+  /// Query for a locally owned tile
+
+  /// \tparam Index The sized integral range type
+  /// \param i The tile index to query
+  /// \return \c true if the tile is owned by this node, otherwise \c false
+  /// \throw TiledArray::Exception When the process map has not been set
+  template <typename Index, typename = std::enable_if_t<
+                                detail::is_integral_sized_range_v<Index>>>
+  bool is_local(const Index& index) const {
+    const auto ord = trange_.tiles_range().ordinal(index);
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return pmap_->is_local(ord);
+  }
+
+  /// Query for a locally owned tile
+
+  /// \tparam Integer An integer type
+  /// \param i The tile index to query
+  /// \return \c true if the tile is owned by this node, otherwise \c false
+  /// \throw TiledArray::Exception When the process map has not been set
+  template <typename Integer,
+            std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>
+  bool is_local(const std::initializer_list<Integer>& index) const {
+    const auto ord = trange_.tiles_range().ordinal(index);
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return pmap_->is_local(ord);
   }
 
   /// Query for a locally owned tile
 
-  /// \tparam Index The index type
+  /// \tparam Ordinal An integer type
   /// \param i The tile index to query
   /// \return \c true if the tile is owned by this node, otherwise \c false
   /// \throw TiledArray::Exception When the process map has not been set
-  template <typename Index>
-  bool is_local(const Index& i) const {
-    TA_ASSERT(trange_.tiles_range().includes(i));
-    return pmap_->is_local(trange_.tiles_range().ordinal(i));
+  template <typename Ordinal>
+  std::enable_if_t<std::is_integral_v<Ordinal>, bool> is_local(
+      const Ordinal& ord) const {
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return pmap_->is_local(ord);
+  }
+
+  /// Query for a zero tile
+
+  /// \tparam Index The sized integral range type
+  /// \param i The tile index to query
+  /// \return \c true if the tile is zero, otherwise \c false
+  /// \throw TiledArray::Exception When \c i is outside the tiled range tile
+  /// range
+  template <typename Index, typename = std::enable_if_t<
+                                detail::is_integral_sized_range_v<Index> &&
+                                !std::is_integral_v<Index>>>
+  bool is_zero(const Index& index) const {
+    const auto ord = trange_.tiles_range().ordinal(index);
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return shape_->is_zero(ord);
+  }
+
+  /// Query for a zero tile
+
+  /// \tparam Integer An integer type
+  /// \param i The tile index to query
+  /// \return \c true if the tile is zero, otherwise \c false
+  /// \throw TiledArray::Exception When \c i is outside the tiled range tile
+  /// range
+  template <typename Integer,
+            typename = std::enable_if_t<std::is_integral_v<Integer>>>
+  bool is_zero(const std::initializer_list<Integer>& index) const {
+    const auto ord = trange_.tiles_range().ordinal(index);
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return shape_->is_zero(ord);
   }
 
   /// Query for a zero tile
 
-  /// \tparam Index The index type
+  /// \tparam Ordinal An integer type
   /// \param i The tile index to query
   /// \return \c true if the tile is zero, otherwise \c false
   /// \throw TiledArray::Exception When \c i is outside the tiled range tile
   /// range
-  template <typename Index>
-  bool is_zero(const Index& i) const {
-    TA_ASSERT(trange_.tiles_range().includes(i));
-    return shape_->is_zero(trange_.tiles_range().ordinal(i));
+  template <typename Ordinal>
+  std::enable_if_t<std::is_integral_v<Ordinal>, bool> is_zero(
+      const Ordinal& ord) const {
+    TA_ASSERT(trange_.tiles_range().includes_ordinal(ord));
+    return shape_->is_zero(ord);
   }
 
   /// Query the density of the tensor
diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h
index 86b949e45a..57366dbe60 100644
--- a/src/TiledArray/tile.h
+++ b/src/TiledArray/tile.h
@@ -213,7 +213,7 @@ class Tile {
             std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>
   const_reference operator[](const Ordinal ord) const {
     TA_ASSERT(pimpl_);
-    TA_ASSERT(tensor().range().includes(ord));
+    TA_ASSERT(tensor().range().includes_ordinal(ord));
     return tensor().data()[ord];
   }
 
@@ -228,7 +228,7 @@ class Tile {
             std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>
   reference operator[](const Ordinal ord) {
     TA_ASSERT(pimpl_);
-    TA_ASSERT(tensor().range().includes(ord));
+    TA_ASSERT(tensor().range().includes_ordinal(ord));
     return tensor().data()[ord];
   }
 
@@ -360,7 +360,8 @@ class Tile {
   /// is included in the range
   template <
       typename... Index,
-      std::enable_if_t<detail::is_integral_list<Index...>::value>* = nullptr>
+      std::enable_if_t<(sizeof...(Index) > 1ul) &&
+                       detail::is_integral_list<Index...>::value>* = nullptr>
   const_reference operator()(const Index&... i) const {
     TA_ASSERT(pimpl_);
     TA_ASSERT(tensor().range().includes(i...));
@@ -375,7 +376,8 @@ class Tile {
   /// included in the range
   template <
       typename... Index,
-      std::enable_if_t<detail::is_integral_list<Index...>::value>* = nullptr>
+      std::enable_if_t<(sizeof...(Index) > 1ul) &&
+                       detail::is_integral_list<Index...>::value>* = nullptr>
   reference operator()(const Index&... i) {
     TA_ASSERT(pimpl_);
     TA_ASSERT(tensor().range().includes(i...));
diff --git a/src/TiledArray/tiled_range.h b/src/TiledArray/tiled_range.h
index 7014ed382d..8c0714aa7d 100644
--- a/src/TiledArray/tiled_range.h
+++ b/src/TiledArray/tiled_range.h
@@ -180,12 +180,12 @@ class TiledRange {
 
   /// Construct a range for the tile indexed by the given ordinal index.
 
-  /// \param i The ordinal index of the tile range to be constructed
+  /// \param ord The ordinal index of the tile range to be constructed
   /// \throw std::runtime_error Throws if i is not included in the range
   /// \return The constructed range object
-  range_type make_tile_range(const index1_type& i) const {
-    TA_ASSERT(tiles_range().includes(i));
-    return make_tile_range(tiles_range().idx(i));
+  range_type make_tile_range(const ordinal_type& ord) const {
+    TA_ASSERT(tiles_range().includes_ordinal(ord));
+    return make_tile_range(tiles_range().idx(ord));
   }
 
   /// Construct a range for the tile indexed by the given index.
diff --git a/src/TiledArray/util/random.cpp b/src/TiledArray/util/random.cpp
new file mode 100644
index 0000000000..2762b10b76
--- /dev/null
+++ b/src/TiledArray/util/random.cpp
@@ -0,0 +1,60 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2013  Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <TiledArray/util/random.h>
+
+#include <TiledArray/util/thread_specific.h>
+#include <boost/random/uniform_int_distribution.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+
+namespace TiledArray {
+
+namespace detail {
+
+auto& random_engine_pool_accessor() {
+  using engine_t = boost::random::mt19937;
+  static auto tspool = make_tspool(engine_t(1));
+  return tspool;
+}
+}  // namespace detail
+
+boost::random::mt19937& random_engine() {
+  return detail::random_engine_pool_accessor().local();
+}
+
+int rand() {
+  using dist_t = boost::random::uniform_int_distribution<>;
+  static dist_t dist(0, RAND_MAX);
+
+  return dist(random_engine());
+}
+
+double drand() {
+  using dist_t = boost::random::uniform_real_distribution<double>;
+  static dist_t dist(0, 1);
+
+  return dist(random_engine());
+}
+
+void srand(unsigned int seed) {
+  using engine_t = boost::random::mt19937;
+  detail::random_engine_pool_accessor() = detail::make_tspool(engine_t(seed));
+}
+
+}  // namespace TiledArray
diff --git a/src/TiledArray/util/random.h b/src/TiledArray/util/random.h
index bb8bc35b93..b096654bc6 100644
--- a/src/TiledArray/util/random.h
+++ b/src/TiledArray/util/random.h
@@ -24,6 +24,38 @@
 #include <cstdlib>      // for std::rand
 #include <type_traits>  // for true_type, false_type, and enable_if
 
+#include <boost/random/mersenne_twister.hpp>
+
+namespace TiledArray {
+
+/// \return reference to the thread-specific random engine used to implement
+/// TiledArray::rand() and TiledArray::drand()
+/// \note the reference to the thread local is not persistent; the value
+///       will change after every call to TiledArray::srand()
+boost::random::mt19937& random_engine();
+
+/// reentrant high-quality version of std::rand()
+
+/// \return a random integer in [0,`RAND_MAX`]
+/// \sa std::rand()
+int rand();
+
+/// reentrant high-quality version of drand48()
+
+/// \return a random integer in [0,1]
+/// \sa drand48()
+double drand();
+
+/// Seeds the pseudo-random number generator used by TiledArray::rand() and
+/// TiledArray::drand() with the value \p seed .
+
+/// Calling this changes the values returned by TiledArray::random_engine()
+/// \param seed the value used to seed the random engine
+/// \warning this is not reentrant
+void srand(unsigned int seed);
+
+}  // namespace TiledArray
+
 namespace TiledArray::detail {
 
 //------------------------------------------------------------------------------
@@ -99,14 +131,14 @@ struct MakeRandom {
     static_assert(std::is_floating_point_v<ValueType> ||
                   std::is_integral_v<ValueType>);
     if constexpr (std::is_floating_point_v<ValueType>)
-      return (2 * static_cast<ValueType>(std::rand()) / RAND_MAX) - 1;
+      return (2 * static_cast<ValueType>(TiledArray::rand()) / RAND_MAX) - 1;
     else if constexpr (std::is_integral_v<ValueType>) {
       static_assert(RAND_MAX == 2147483647);
       static_assert(RAND_MAX % 2 == 1);
       constexpr std::int64_t RAND_MAX_DIVBY_9 =
           (static_cast<std::int64_t>(RAND_MAX) + 8) / 9;
       const ValueType v = static_cast<ValueType>(
-          static_cast<std::int64_t>(std::rand()) / RAND_MAX_DIVBY_9);
+          static_cast<std::int64_t>(TiledArray::rand()) / RAND_MAX_DIVBY_9);
       if constexpr (std::is_signed_v<ValueType>) {
         return v - 4;
       } else {
diff --git a/src/TiledArray/util/thread_specific.h b/src/TiledArray/util/thread_specific.h
new file mode 100644
index 0000000000..ff722cb222
--- /dev/null
+++ b/src/TiledArray/util/thread_specific.h
@@ -0,0 +1,188 @@
+
+#ifndef TILEDARRAY_UTIL_THREAD_SPECIFIC_H__INCLUDED
+#define TILEDARRAY_UTIL_THREAD_SPECIFIC_H__INCLUDED
+
+#include <TiledArray/error.h>
+
+#include <pthread.h>
+#include <cerrno>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include <boost/iterator/iterator_facade.hpp>
+
+namespace TiledArray {
+namespace detail {
+
+/// Implements a highly-limited subset of tbb::enumerable_thread_specific
+/// using Pthread thread-specific storage
+template <typename Item>
+class thread_specific {
+ public:
+  template <typename F, typename = std::enable_if_t<!std::is_same<
+                            thread_specific, std::decay_t<F>>::value>>
+  explicit thread_specific(F &&init)
+      : init_(std::forward<F>(init)),
+        mtx_(std::make_unique<std::mutex>()),
+        key_ptr_(new pthread_key_t, &pthread_key_deleter) {
+    auto retcode = pthread_key_create(&key(), NULL);
+    // ensure that pthread_key_create sets all thread-specific values to null
+    TA_ASSERT(pthread_getspecific(key()) == NULL);
+    if (retcode == EAGAIN)
+      TA_EXCEPTION(
+          "thread_specific could not be constructed due to reaching the "
+          "maximum number of thread-specific keys");
+    else if (retcode == ENOMEM) {
+      TA_EXCEPTION(
+          "thread_specific could not be constructed due to a memory allocation "
+          "error");
+    } else if (retcode != 0)
+      TA_EXCEPTION(
+          "thread_specific could not be constructed due to an unknown error");
+  }
+
+  thread_specific(const thread_specific &) = delete;
+  thread_specific(thread_specific &&other) = default;
+  thread_specific &operator=(const thread_specific &) = delete;
+  thread_specific &operator=(thread_specific &&) = default;
+  ~thread_specific() = default;
+
+  const pthread_key_t &key() const { return *key_ptr_; }
+
+  /// @return reference to the thread-local @c Item instance.
+  Item &local() { return *(ts_item_ptr().get()); }
+
+ private:
+  template <typename>
+  friend class TSPool;
+
+  pthread_key_t &key() { return *key_ptr_; }
+  static void pthread_key_deleter(pthread_key_t *key_ptr) {
+    if (key_ptr) {
+      pthread_key_delete(*key_ptr);
+      delete key_ptr;
+    }
+  };
+
+  Item init_;
+  std::map<std::thread::id, std::unique_ptr<Item>> items_;
+  std::unique_ptr<std::mutex> mtx_;
+  std::unique_ptr<pthread_key_t, decltype(&pthread_key_deleter)> key_ptr_;
+
+  const std::unique_ptr<Item> &ts_item_ptr() {
+    const std::unique_ptr<Item> *ptr =
+        reinterpret_cast<const std::unique_ptr<Item> *>(
+            pthread_getspecific(key()));
+    if (ptr == nullptr) {
+      std::lock_guard<std::mutex> lock{*mtx_};
+      const auto thread_id = std::this_thread::get_id();
+      auto it = items_.find(thread_id);
+      // case 1: there is already an item for this thread id? (e.g. thread id
+      // was reused) initialize ts_item_ptr_ so that the next call from this
+      // thread is fast
+      if (it != items_.end()) {
+        // none of the non-TBB MADNESS task backends delete their
+        // threads but can't guarantee in general
+        ptr = &(it->second);
+      }
+      // case 2: need to make a new object
+      else {
+        auto result = items_.emplace(thread_id, std::make_unique<Item>(init_));
+        ptr = &(result.first->second);
+      }
+      pthread_setspecific(key(), (const void *)ptr);
+    }
+    return *ptr;
+  }
+
+ public:
+  using iterator = typename decltype(items_)::iterator;
+  using const_iterator = typename decltype(items_)::const_iterator;
+};
+
+/// A pool of thread-specific objects
+template <typename Item>
+class TSPool {
+ public:
+  /// Don't allow copies or default initialization.
+  TSPool() = delete;
+  TSPool(TSPool const &) = delete;
+  TSPool &operator=(TSPool const &) = delete;
+
+  TSPool &operator=(TSPool &&) = default;
+  TSPool(TSPool &&a) = default;
+
+  /// Initializes the pool with a single @c Item
+  explicit TSPool(Item e) : item_(std::move(e)), items_(item_) {}
+  /// @return reference to the thread-local @c Item instance.
+  Item &local() { return items_.local(); }
+
+  /// Iterator over thread-local items
+  template <typename Item_ = Item>
+  class Iterator
+      : public boost::iterator_facade<Iterator<Item_>, Item_,
+                                      boost::bidirectional_traversal_tag> {
+   public:
+    Iterator(thread_specific<Item> &items) : item_(items.items_.begin()) {
+      static_assert(!std::is_const_v<Item_>);
+    }
+    Iterator(const thread_specific<Item> &items) : item_(items.items_.begin()) {
+      static_assert(std::is_const_v<Item_>);
+    }
+    Iterator(thread_specific<Item> &items, bool /* end_tag */)
+        : item_(items.items_.end()) {
+      static_assert(!std::is_const_v<Item_>);
+    }
+    Iterator(const thread_specific<Item> &items, bool /* end_tag */)
+        : item_(items.items_.end()) {
+      static_assert(std::is_const_v<Item_>);
+    }
+
+   private:
+    friend class boost::iterator_core_access;
+
+    using nc_items_type = decltype(thread_specific<Item>::items_);
+    using items_type =
+        std::conditional_t<std::is_const_v<Item_>,
+                           std::add_const_t<nc_items_type>, nc_items_type>;
+    std::conditional_t<std::is_const_v<Item_>,
+                       typename items_type::const_iterator,
+                       typename items_type::iterator>
+        item_;  // points to the current item
+
+    void increment() { ++item_; }
+    void decrement() { --item_; }
+    bool equal(Iterator const &other) const {
+      return this->item_ == other.item_;
+    }
+    Item_ &dereference() const { return *(item_->second); }
+  };
+
+  using iterator = Iterator<Item>;
+  using const_iterator = Iterator<const Item>;
+  auto begin() { return iterator{items_}; }
+  auto end() { return iterator{items_, true}; }
+  auto begin() const { return const_iterator{items_}; }
+  auto end() const { return const_iterator{items_, true}; }
+
+ private:
+  Item item_;  //!< used only to initialize the thread-specific items
+  thread_specific<Item> items_;
+};
+
+template <typename Item>
+TSPool<Item> make_tspool(Item e) {
+  return TSPool<Item>(std::move(e));
+}
+
+template <typename Item>
+std::shared_ptr<TSPool<Item>> make_shared_tspool(Item e) {
+  return std::make_shared<TSPool<Item>>(std::move(e));
+}
+
+}  // namespace detail
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_UTIL_THREAD_SPECIFIC_H__INCLUDED
diff --git a/tests/cp.cpp b/tests/cp.cpp
index e339ab5a9b..763231ab9a 100644
--- a/tests/cp.cpp
+++ b/tests/cp.cpp
@@ -35,7 +35,8 @@
 #include "TiledArray/cp/cp_als.h"
 #include "TiledArray/cp/cp_reconstruct.h"
 
-const std::string __dirname = dirname(strdup(__FILE__));
+constexpr std::int64_t rank_tile_size = 10;
+constexpr bool verbose = false;
 
 using namespace TiledArray;
 
@@ -43,24 +44,10 @@ struct CPFixture : public TiledRangeFixture {
   CPFixture()
       : shape_tr(make_random_sparseshape(tr)),
         a_sparse(*GlobalFixture::world, tr, shape_tr) {
-    random_fill(a_sparse);
+    a_sparse.fill_random<HostExecutor::Thread>();
     a_sparse.truncate();
   }
 
-  template <typename Tile, typename Policy>
-  static void random_fill(DistArray<Tile, Policy>& array) {
-    typename DistArray<Tile, Policy>::pmap_interface::const_iterator it =
-        array.pmap()->begin();
-    typename DistArray<Tile, Policy>::pmap_interface::const_iterator end =
-        array.pmap()->end();
-    for (; it != end; ++it) {
-      if (!array.is_zero(*it))
-        array.set(*it, array.world().taskq.add(
-                           &CPFixture::template make_rand_tile<Tile>,
-                           array.trange().make_tile_range(*it)));
-    }
-  }
-
   /// make a shape with approximate half dense and half sparse
   static SparseShape<float> make_random_sparseshape(const TiledRange& tr) {
     std::size_t n = tr.tiles_range().volume();
@@ -78,39 +65,6 @@ struct CPFixture : public TiledRangeFixture {
     return SparseShape<float>(norms, tr);
   }
 
-  // Fill a tile with random data
-  template <typename Tile>
-  static Tile make_rand_tile(const typename Tile::range_type& r) {
-    Tile tile(r);
-    for (std::size_t i = 0ul; i < tile.size(); ++i) set_random(tile[i]);
-    return tile;
-  }
-
-  template <typename Tile>
-  static double init_rand_tile(Tile& tile, const typename Tile::range_type& r) {
-    tile = Tile(r);
-    for (std::size_t i = 0ul; i < tile.size(); ++i) set_random(tile[i]);
-    double result;
-    norm(tile, result);
-    return result;
-  }
-
-  template <typename Tile>
-  static double init_unit_tile(Tile& tile, const typename Tile::range_type& r) {
-    tile = Tile(r);
-    std::mt19937 generator(3);
-    std::uniform_real_distribution distribution(-1.0, 1.0);
-    for (std::size_t i = 0ul; i < tile.size(); ++i) tile[i] = 1;
-    double result;
-    norm(tile, result);
-    return result;
-  }
-
-  template <typename T>
-  static void set_random(T& t) {
-    t = GlobalFixture::world->rand() % 101;
-  }
-
   static TensorF tensori_to_tensorf(const TensorI& tensori) {
     std::size_t n = tensori.size();
     TensorF tensorf(tensori.range());
@@ -138,39 +92,38 @@ struct CPFixture : public TiledRangeFixture {
 };
 
 template <typename TArrayT>
-TArrayT compute_cp(const TArrayT& T, size_t cp_rank) {
+TArrayT compute_cp(const TArrayT& T, size_t cp_rank, bool verbose = false) {
   cp::CP_ALS<typename TArrayT::value_type, typename TArrayT::policy_type> CPD(
       T);
-  CPD.compute_rank(cp_rank, 80, false, 1e-3, true);
+  CPD.compute_rank(cp_rank, rank_tile_size, false, 1e-3, verbose);
   return CPD.reconstruct();
 }
-// TiledArray::TiledRange1 compute_trange1(std::size_t range_size,
-//                                         std::size_t target_block_size) {
-//   if (range_size > 0) {
-//     std::size_t nblocks =
-//         (range_size + target_block_size - 1) / target_block_size;
-//     auto dv = std::div((int) (range_size + nblocks - 1), (int) nblocks);
-//     auto avg_block_size = dv.quot - 1, num_avg_plus_one = dv.rem + 1;
-//     std::vector<std::size_t> hashmarks;
-//     hashmarks.reserve(nblocks + 1);
-//     auto block_counter = 0;
-//     for(auto i = 0; i < num_avg_plus_one; ++i, block_counter +=
-//     avg_block_size + 1){
-//       hashmarks.push_back(block_counter);
-//     }
-//     for (auto i = num_avg_plus_one; i < nblocks; ++i, block_counter+=
-//     avg_block_size) {
-//       hashmarks.push_back(block_counter);
-//     }
-//     hashmarks.push_back(range_size);
-//     return TA::TiledRange1(hashmarks.begin(), hashmarks.end());
-//   } else
-//     return TA::TiledRange1{};
-// }
+
+enum class Fill { Random, Constant };
+
+template <typename TArrayT, Fill fill = Fill::Constant,
+          typename... OptionalArgs>
+TArrayT make(const TiledRange& tr, OptionalArgs&&... opt_args) {
+  TArrayT result(*GlobalFixture::world, tr, opt_args...);
+  switch (fill) {
+    case Fill::Constant:
+      result.fill(1);
+      break;
+    case Fill::Random:
+      // make sure randomness is deterministic
+      result.template fill_random<HostExecutor::Thread>();
+      break;
+  }
+  return result;
+}
 
 BOOST_FIXTURE_TEST_SUITE(cp_suite, CPFixture)
 
+const auto target_rel_error = std::sqrt(std::numeric_limits<double>::epsilon());
+
 BOOST_AUTO_TEST_CASE(btas_cp_als) {
+  TiledArray::srand(1);
+
   // Make a tiled range with block size of 1
   TiledArray::TiledRange tr3, tr4, tr5;
   {
@@ -183,96 +136,93 @@ BOOST_AUTO_TEST_CASE(btas_cp_als) {
     tr5 = TiledRange({tr1_mode0, tr1_mode0, tr1_mode1, tr1_mode2, tr1_mode3});
   }
 
-  // Dense test
-  // order-3 test
+  // Dense
+
+  // order-3 rank-1 test
   {
     std::vector<TArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr3,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr3);
     size_t cp_rank = 1;
     factors = cp::btas_cp_als(*GlobalFixture::world, b_dense, cp_rank,
-                              compute_trange1(cp_rank, 80), 0, 1e-3);
+                              compute_trange1(cp_rank, rank_tile_size), 0, 1e-3,
+                              verbose);
     auto b_cp = cp::reconstruct(factors);
     TArrayD diff;
     diff("a,b,c") = b_dense("a,b,c") - b_cp("a,b,c");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
-  // order-4 test
+  // order-4 rank-1 test
   {
     std::vector<TArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr4,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr4);
     size_t cp_rank = 1;
     factors = cp::btas_cp_als(*GlobalFixture::world, b_dense, cp_rank,
-                              compute_trange1(cp_rank, 80), 0, 1e-3);
+                              compute_trange1(cp_rank, rank_tile_size), 0, 1e-3,
+                              verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TArrayD diff;
     diff("a,b,c,d") = b_dense("a,b,c,d") - b_cp("a,b,c,d");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
-  // order-5 test
+  // order-5 rank-1 test
   {
     std::vector<TArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr5,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr5);
     size_t cp_rank = 1;
     factors = cp::btas_cp_als(*GlobalFixture::world, b_dense, cp_rank,
-                              compute_trange1(cp_rank, 80), 0, 1e-3);
+                              compute_trange1(cp_rank, rank_tile_size), 0, 1e-3,
+                              verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TArrayD diff;
     diff("a,b,c,d,e") = b_dense("a,b,c,d,e") - b_cp("a,b,c,d,e");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
 
-  // sparse test
+  // sparse
+
+  // order-3 rank-N test
   {
     std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr3,
-                                          &this->init_rand_tile<TensorD>);
+    auto b_sparse = make<TSpArrayD, Fill::Random>(tr3);
     size_t cp_rank = 77;
-    factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank,
-                              compute_trange1(cp_rank, 80), 0, 1e-3);
+    factors =
+        cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank,
+                        compute_trange1(cp_rank, rank_tile_size), 0, 1e-3);
 
     auto b_cp = cp::reconstruct(factors);
     TSpArrayD diff;
     diff("a,b,c") = b_sparse("a,b,c") - b_cp("a,b,c");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error);
     BOOST_CHECK(accurate);
   }
-  // order-4 test
+  // order-4 rank-1 test
   {
     std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr4,
-                                          &this->init_unit_tile<TensorD>);
+    auto b_sparse = make<TSpArrayD>(tr4);
     size_t cp_rank = 1;
     factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank,
-                              compute_trange1(cp_rank, 80), 0, 1e-3);
+                              compute_trange1(cp_rank, rank_tile_size), 0, 1e-3,
+                              verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TSpArrayD diff;
     diff("a,b,c,d") = b_sparse("a,b,c,d") - b_cp("a,b,c,d");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-5 test
   {
     std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr5,
-                                          &this->init_unit_tile<TensorD>);
+    auto b_sparse = make<TSpArrayD>(tr5);
     size_t cp_rank = 1;
     factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank,
-                              compute_trange1(cp_rank, 80), 0, 1e-3);
+                              compute_trange1(cp_rank, rank_tile_size), 0, 1e-3,
+                              verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TSpArrayD diff;
@@ -283,6 +233,8 @@ BOOST_AUTO_TEST_CASE(btas_cp_als) {
 }
 
 BOOST_AUTO_TEST_CASE(btas_cp_rals) {
+  TiledArray::srand(1);
+
   // Make a tiled range with block size of 1
   TiledArray::TiledRange tr3, tr4, tr5;
   {
@@ -299,93 +251,87 @@ BOOST_AUTO_TEST_CASE(btas_cp_rals) {
   // order-3 test
   {
     std::vector<TArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr3,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr3);
     size_t cp_rank = 1;
     factors = cp::btas_cp_rals(*GlobalFixture::world, b_dense, cp_rank,
-                               compute_trange1(cp_rank, 80), 0, 1e-3);
+                               compute_trange1(cp_rank, rank_tile_size), 0,
+                               1e-3, verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TArrayD diff;
     diff("a,b,c") = b_dense("a,b,c") - b_cp("a,b,c");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-4 test
   {
     std::vector<TArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr4,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr4);
     size_t cp_rank = 1;
     factors = cp::btas_cp_rals(*GlobalFixture::world, b_dense, cp_rank,
-                               compute_trange1(cp_rank, 80), 0, 1e-3);
+                               compute_trange1(cp_rank, rank_tile_size), 0,
+                               1e-3, verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TArrayD diff;
     diff("a,b,c,d") = b_dense("a,b,c,d") - b_cp("a,b,c,d");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-5 test
   {
     std::vector<TArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr5,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr5);
     size_t cp_rank = 1;
     factors = cp::btas_cp_rals(*GlobalFixture::world, b_dense, cp_rank,
-                               compute_trange1(cp_rank, 80), 0, 1e-3);
+                               compute_trange1(cp_rank, rank_tile_size), 0,
+                               1e-3, verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TArrayD diff;
     diff("a,b,c,d,e") = b_dense("a,b,c,d,e") - b_cp("a,b,c,d,e");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
 
   // sparse test
   {
     std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr3,
-                                          &this->init_rand_tile<TensorD>);
+    auto b_sparse = make<TSpArrayD, Fill::Random>(tr3);
     size_t cp_rank = 77;
     factors = cp::btas_cp_rals(*GlobalFixture::world, b_sparse, cp_rank,
-                               compute_trange1(cp_rank, 80), 0, 1e-3);
+                               compute_trange1(cp_rank, rank_tile_size), 0,
+                               1e-3, verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TSpArrayD diff;
     diff("a,b,c") = b_sparse("a,b,c") - b_cp("a,b,c");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-4 test
   {
     std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr4,
-                                          &this->init_unit_tile<TensorD>);
+    auto b_sparse = make<TSpArrayD>(tr4);
     size_t cp_rank = 1;
     factors = cp::btas_cp_rals(*GlobalFixture::world, b_sparse, cp_rank,
-                               compute_trange1(cp_rank, 80), 0, 1e-3);
+                               compute_trange1(cp_rank, rank_tile_size), 0,
+                               1e-3, verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TSpArrayD diff;
     diff("a,b,c,d") = b_sparse("a,b,c,d") - b_cp("a,b,c,d");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-5 test
   {
     std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr5,
-                                          &this->init_unit_tile<TensorD>);
+    auto b_sparse = make<TSpArrayD>(tr5);
     size_t cp_rank = 1;
     factors = cp::btas_cp_als(*GlobalFixture::world, b_sparse, cp_rank,
-                              compute_trange1(cp_rank, 80), 0, 1e-3);
+                              compute_trange1(cp_rank, rank_tile_size), 0, 1e-3,
+                              verbose);
 
     auto b_cp = cp::reconstruct(factors);
     TSpArrayD diff;
@@ -396,6 +342,8 @@ BOOST_AUTO_TEST_CASE(btas_cp_rals) {
 }
 
 BOOST_AUTO_TEST_CASE(ta_cp_als) {
+  TiledArray::srand(1);
+
   // Make a tiled range with block size of 1
   TiledArray::TiledRange tr3, tr4, tr5;
   {
@@ -411,84 +359,68 @@ BOOST_AUTO_TEST_CASE(ta_cp_als) {
   // Dense test
   // order-3 test
   {
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr3,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr3);
     size_t cp_rank = 1;
-    auto b_cp = compute_cp(b_dense, cp_rank);
+    auto b_cp = compute_cp(b_dense, cp_rank, verbose);
     TArrayD diff;
     diff("a,b,c") = b_dense("a,b,c") - b_cp("a,b,c");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-4 test
   {
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr4,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr4);
     size_t cp_rank = 1;
-    auto b_cp = compute_cp(b_dense, cp_rank);
+    auto b_cp = compute_cp(b_dense, cp_rank, verbose);
 
     TArrayD diff;
     diff("a,b,c,d") = b_dense("a,b,c,d") - b_cp("a,b,c,d");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-5 test
   {
-    std::vector<TArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_dense = make_array<TArrayD>(*GlobalFixture::world, tr5,
-                                       &this->init_unit_tile<TensorD>);
+    auto b_dense = make<TArrayD>(tr5);
     size_t cp_rank = 1;
-    auto b_cp = compute_cp(b_dense, cp_rank);
+    auto b_cp = compute_cp(b_dense, cp_rank, verbose);
 
     TArrayD diff;
     diff("a,b,c,d,e") = b_dense("a,b,c,d,e") - b_cp("a,b,c,d,e");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < target_rel_error);
     BOOST_CHECK(accurate);
   }
 
   // sparse test
   // order-3 test
   {
-    std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr3,
-                                          &this->init_rand_tile<TensorD>);
-    b_sparse.truncate();
+    auto b_sparse = make<TSpArrayD, Fill::Random>(tr3);
+    // std::cout << "b_sparse = " <<
+    // array_to_eigen_tensor<Eigen::Tensor<double,3>>(b_sparse) << std::endl;
     size_t cp_rank = 77;
-    auto b_cp = compute_cp(b_sparse, cp_rank);
+    auto b_cp = compute_cp(b_sparse, cp_rank, verbose);
     TSpArrayD diff;
     diff("a,b,c") = b_sparse("a,b,c") - b_cp("a,b,c");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-4 test
   {
-    std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr4,
-                                          &this->init_unit_tile<TensorD>);
-    b_sparse.truncate();
+    auto b_sparse = make<TSpArrayD>(tr4);
     size_t cp_rank = 1;
-    auto b_cp = compute_cp(b_sparse, cp_rank);
+    auto b_cp = compute_cp(b_sparse, cp_rank, verbose);
     TSpArrayD diff;
     diff("a,b,c,d") = b_sparse("a,b,c,d") - b_cp("a,b,c,d");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error);
     BOOST_CHECK(accurate);
   }
   // order-5 test
   {
-    std::vector<TSpArrayD> factors;
-    // Make an sparse array with tiled range from above.
-    auto b_sparse = make_array<TSpArrayD>(*GlobalFixture::world, tr5,
-                                          &this->init_unit_tile<TensorD>);
+    auto b_sparse = make<TSpArrayD>(tr5);
     double cp_rank = 1;
-    auto b_cp = compute_cp(b_sparse, cp_rank);
+    auto b_cp = compute_cp(b_sparse, cp_rank, verbose);
     TSpArrayD diff;
     diff("a,b,c,d,e") = b_sparse("a,b,c,d,e") - b_cp("a,b,c,d,e");
-    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-10);
+    bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error);
     BOOST_CHECK(accurate);
   }
 }
diff --git a/tests/expressions_fixture.h b/tests/expressions_fixture.h
index a558d32444..3e493b1200 100644
--- a/tests/expressions_fixture.h
+++ b/tests/expressions_fixture.h
@@ -139,7 +139,8 @@ struct ExpressionsFixture : public TiledRangeFixture {
   // Fill a tile with random data
   static Tile make_rand_tile(const typename Tile::range_type& r) {
     Tile tile(r);
-    for (std::size_t i = 0ul; i < tile.size(); ++i) set_random(tile[i]);
+    for (std::size_t i = 0ul; i < tile.size(); ++i)
+      set_random(tile.at_ordinal(i));
     return tile;
   }
 
@@ -198,7 +199,7 @@ struct ExpressionsFixture : public TiledRangeFixture {
     // make sure all mpi gets the same shape
     if (GlobalFixture::world->rank() == 0) {
       for (std::size_t i = 0; i < n; i++) {
-        norms[i] = GlobalFixture::world->drand() > 0.5 ? 0.0 : 1.0;
+        norms.at_ordinal(i) = GlobalFixture::world->drand() > 0.5 ? 0.0 : 1.0;
       }
     }
 
diff --git a/tests/expressions_mixed.cpp b/tests/expressions_mixed.cpp
index 04fef98683..6b92cec695 100644
--- a/tests/expressions_mixed.cpp
+++ b/tests/expressions_mixed.cpp
@@ -65,44 +65,7 @@ struct MixedExpressionsFixture : public TiledRangeFixture {
 
   template <typename Tile>
   static void random_fill(DistArray<Tile>& array) {
-    typename DistArray<Tile>::pmap_interface::const_iterator it =
-        array.pmap()->begin();
-    typename DistArray<Tile>::pmap_interface::const_iterator end =
-        array.pmap()->end();
-    for (; it != end; ++it)
-      array.set(*it, array.world().taskq.add(
-                         &MixedExpressionsFixture::template make_rand_tile<
-                             DistArray<Tile>>,
-                         array.trange().make_tile_range(*it)));
-  }
-
-  template <typename T>
-  static void set_random(T& t) {
-    // with 50% generate nonzero integer value in [0,101)
-    auto rand_int = GlobalFixture::world->rand();
-    t = (rand_int < 0x8fffff) ? rand_int % 101 : 0;
-  }
-
-  template <typename T>
-  static void set_random(std::complex<T>& t) {
-    // with 50% generate nonzero value
-    auto rand_int1 = GlobalFixture::world->rand();
-    if (rand_int1 < 0x8ffffful) {
-      t = std::complex<T>{T(rand_int1 % 101),
-                          T(GlobalFixture::world->rand() % 101)};
-    } else
-      t = std::complex<T>{0, 0};
-  }
-
-  // Fill a tile with random data
-  template <typename A>
-  static typename A::value_type make_rand_tile(
-      const typename A::value_type::range_type& r) {
-    typename A::value_type tile(r);
-    for (const auto& i : r) {
-      set_random(tile[i]);
-    }
-    return tile;
+    array.fill_random();
   }
 
   template <typename M, typename A>
diff --git a/tests/linalg.cpp b/tests/linalg.cpp
index 0753e8cf50..5c84d0b5e4 100644
--- a/tests/linalg.cpp
+++ b/tests/linalg.cpp
@@ -119,7 +119,10 @@ struct LinearAlgebraFixture : ReferenceFixture {
   template <class A>
   static void compare(const char* context, const A& non_dist, const A& result,
                       double e) {
-    BOOST_TEST_CONTEXT(context);
+    // clang-format off
+    BOOST_TEST_CONTEXT(context)
+    ;
+    // clang-format on
     auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get();
     BOOST_CHECK_SMALL(diff_with_non_dist, e);
   }
diff --git a/tests/random.cpp b/tests/random.cpp
index c5ffbf1a56..f1bef4aea2 100644
--- a/tests/random.cpp
+++ b/tests/random.cpp
@@ -11,7 +11,7 @@ using true_types = boost::mpl::list<int, float, double, std::complex<float>,
                                     std::complex<double>>;
 }  // namespace
 
-BOOST_AUTO_TEST_SUITE(can_make_random, TA_UT_LABEL_SERIAL)
+BOOST_AUTO_TEST_SUITE(random_suite, TA_UT_LABEL_SERIAL)
 
 BOOST_AUTO_TEST_CASE_TEMPLATE(can_make_random_false, ValueType, false_types) {
   using can_make_random_t = CanMakeRandom<ValueType>;
@@ -44,4 +44,17 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(enable_if_true, ValueType, true_types) {
   BOOST_CHECK(result);
 }
 
+BOOST_AUTO_TEST_CASE(rand) {
+  // set random seed
+  TiledArray::srand(20010515);
+  auto v0 = TiledArray::detail::MakeRandom<int>::generate_value();
+  auto v1 = TiledArray::detail::MakeRandom<int>::generate_value();
+  auto v2 = TiledArray::detail::MakeRandom<int>::generate_value();
+  // now set random seed
+  TiledArray::srand(20010515);
+  BOOST_CHECK_EQUAL(v0, TiledArray::detail::MakeRandom<int>::generate_value());
+  BOOST_CHECK_EQUAL(v1, TiledArray::detail::MakeRandom<int>::generate_value());
+  BOOST_CHECK_EQUAL(v2, TiledArray::detail::MakeRandom<int>::generate_value());
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/range.cpp b/tests/range.cpp
index 0e9987220b..1ad294363f 100644
--- a/tests/range.cpp
+++ b/tests/range.cpp
@@ -321,7 +321,7 @@ BOOST_AUTO_TEST_CASE(constructors) {
                                   stride_ref.begin(), stride_ref.end());
     BOOST_CHECK_EQUAL(r2.volume(), 48);
   }
-#else  // TA_SIGNED_1INDEX_TYPE
+#else   // TA_SIGNED_1INDEX_TYPE
   BOOST_REQUIRE_THROW(Range r2({{-1, 1}, {-2, 2}, {0, 6}}),
                       TiledArray::Exception);
 #endif  // TA_SIGNED_1INDEX_TYPE
@@ -603,12 +603,33 @@ BOOST_AUTO_TEST_CASE(include) {
   BOOST_CHECK(r1.includes(t19));  // check corners
   BOOST_CHECK(r1.includes(t20));
 
+  //
+  // try different flavors of includes
+  //
+  // 1-d Range is a corner case
+  Range r_1d({3});
+  // integers...
+  BOOST_CHECK(r1.includes(t20[0], t20[1], t20[2]));
+  // initializer_list<integer>
+  BOOST_CHECK(r1.includes({t20[0], t20[1], t20[2]}));
+  BOOST_CHECK(r_1d.includes({0}));
+  // array<integer>
+  BOOST_CHECK(
+      r1.includes(std::array<Range::index1_type, 3>{{t20[0], t20[1], t20[2]}}));
+  BOOST_CHECK(r_1d.includes(std::array<Range::index1_type, 1>{{0}}));
+  // ordinal
   Range::size_type o = 0;
   for (; o < r.volume(); ++o) {
     BOOST_CHECK(r.includes(o));
+    BOOST_CHECK(r.includes_ordinal(o));  // equivalent: see below
   }
-
   BOOST_CHECK(!r.includes(o));
+  BOOST_CHECK_THROW(
+      r_1d.includes(0),
+      TiledArray::Exception);  // for 1-d Range can't distinguish
+                               // includes(integers...) and includes(ordinal)
+  BOOST_CHECK(
+      r_1d.includes_ordinal(0));  // so must explicitly use includes_ordinal
 
   // ensure that includes always fails if any extent is zero
   {
diff --git a/tests/solvers.cpp b/tests/solvers.cpp
index af6af21847..c8ea24dcf8 100644
--- a/tests/solvers.cpp
+++ b/tests/solvers.cpp
@@ -104,9 +104,9 @@ struct make_b<DistArray<Tile, Policy>> {
     typename T::value_type tile(range);
 
     // Fill tile with data
-    tile(0) = 1;
-    tile(1) = 4;
-    tile(2) = 0;
+    tile({0}) = 1;
+    tile({1}) = 4;
+    tile({2}) = 0;
 
     return tile;
   }
@@ -130,9 +130,9 @@ struct make_pc<DistArray<Tile, Policy>> {
     typename T::value_type tile(range);
 
     // Fill tile with data
-    tile(0) = 1;
-    tile(1) = 1;
-    tile(2) = 1;
+    tile({0}) = 1;
+    tile({1}) = 1;
+    tile({2}) = 1;
 
     return tile;
   }
@@ -157,7 +157,7 @@ struct validate<DistArray<Tile, Policy>> {
       const auto& tile_0 = x.find({0}).get();
       double error_2norm = 0.0;
       for (int i = 0; i != 3; ++i) {
-        auto delta = tile_0(i) - tile_0_ref[i];
+        auto delta = tile_0({i}) - tile_0_ref[i];
         error_2norm += delta * delta;
       }
       error_2norm = std::sqrt(error_2norm);
diff --git a/tests/sparse_shape_fixture.h b/tests/sparse_shape_fixture.h
index 8358ab8aaa..772cd657ab 100644
--- a/tests/sparse_shape_fixture.h
+++ b/tests/sparse_shape_fixture.h
@@ -43,10 +43,13 @@ struct SparseShapeFixture : public TiledRangeFixture {
         tolerance(0.0001)
 
   {
+    prior_default_threshold = SparseShape<float>::threshold();
     SparseShape<float>::threshold(default_threshold);
   }
 
-  ~SparseShapeFixture() {}
+  ~SparseShapeFixture() {
+    SparseShape<float>::threshold(prior_default_threshold);
+  }
 
   static Tensor<float> make_norm_tensor(const TiledRange& trange,
                                         const float fill_percent,
@@ -104,6 +107,8 @@ struct SparseShapeFixture : public TiledRangeFixture {
   Permutation perm;
   TiledArray::detail::PermIndex perm_index;
   const float tolerance;
+  inline static float prior_default_threshold =
+      std::numeric_limits<float>::max();
   static constexpr float default_threshold = 0.001;
 };  // SparseShapeFixture
 
diff --git a/tests/tensor_tensor_view.cpp b/tests/tensor_tensor_view.cpp
index 1a6262f45e..f69e95c0f6 100644
--- a/tests/tensor_tensor_view.cpp
+++ b/tests/tensor_tensor_view.cpp
@@ -24,8 +24,8 @@
  */
 
 #include <chrono>
-#include <random>
 #include "TiledArray/tensor/tensor_interface.h"
+#include "TiledArray/util/random.h"
 #include "tiledarray.h"
 #include "unit_test_config.h"
 
@@ -39,11 +39,7 @@ struct TensorViewFixture {
   static Tensor<int> random_tensor(const Range& range) {
     Tensor<int> result(range);
 
-    std::default_random_engine generator(
-        std::chrono::system_clock::now().time_since_epoch().count());
-    std::uniform_int_distribution<int> distribution(0, 100);
-
-    for (auto& value : result) value = distribution(generator);
+    for (auto& value : result) value = TiledArray::rand() % 101;
 
     return result;
   }
@@ -53,6 +49,8 @@ struct TensorViewFixture {
 
   Tensor<int> t{random_tensor(Range(lower_bound, upper_bound))};
 
+  constexpr static std::size_t ntiles_per_test = 10;
+
 };  // TensorViewFixture
 
 const std::array<int, 3> TensorViewFixture::lower_bound{{0, 1, 2}};
@@ -62,6 +60,7 @@ BOOST_FIXTURE_TEST_SUITE(tensor_view_suite, TensorViewFixture,
                          TA_UT_LABEL_SERIAL)
 
 BOOST_AUTO_TEST_CASE(non_const_view) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -96,12 +95,16 @@ BOOST_AUTO_TEST_CASE(non_const_view) {
           BOOST_CHECK_EQUAL(view(*it), t(*it));
           BOOST_CHECK_EQUAL(view(i), t(*it));
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
 }
 
 BOOST_AUTO_TEST_CASE(const_view) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -136,6 +139,9 @@ BOOST_AUTO_TEST_CASE(const_view) {
           BOOST_CHECK_EQUAL(view(*it), t(*it));
           BOOST_CHECK_EQUAL(view(i), t(*it));
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
@@ -167,6 +173,7 @@ BOOST_AUTO_TEST_CASE(transitive_read_write) {
 }
 
 BOOST_AUTO_TEST_CASE(assign_tensor_to_view) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -195,12 +202,16 @@ BOOST_AUTO_TEST_CASE(assign_tensor_to_view) {
           BOOST_CHECK_EQUAL(t(*it), tensor(*it));
           BOOST_CHECK_EQUAL(t(*it), tensor(i));
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
 }
 
 BOOST_AUTO_TEST_CASE(copy_view_to_tensor) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -224,12 +235,16 @@ BOOST_AUTO_TEST_CASE(copy_view_to_tensor) {
           BOOST_CHECK_EQUAL(tensor(i), view(*it));
           BOOST_CHECK_EQUAL(tensor(i), view(i));
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
 }
 
 BOOST_AUTO_TEST_CASE(assign_view_to_tensor) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -254,12 +269,16 @@ BOOST_AUTO_TEST_CASE(assign_view_to_tensor) {
           BOOST_CHECK_EQUAL(tensor(i), view(*it));
           BOOST_CHECK_EQUAL(tensor(i), view(i));
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
 }
 
 BOOST_AUTO_TEST_CASE(add_tensor_view) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -286,12 +305,16 @@ BOOST_AUTO_TEST_CASE(add_tensor_view) {
              ++it) {
           BOOST_CHECK_EQUAL(result(*it), view(*it) + right(*it));
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
 }
 
 BOOST_AUTO_TEST_CASE(add_tensor_to_view) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -318,12 +341,16 @@ BOOST_AUTO_TEST_CASE(add_tensor_to_view) {
           BOOST_CHECK_EQUAL(view(*it), temp(*it) + tensor(*it));
           BOOST_CHECK_EQUAL(view(i), temp(*it) + tensor(*it));
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
 }
 
 BOOST_AUTO_TEST_CASE(scale_view) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -347,12 +374,16 @@ BOOST_AUTO_TEST_CASE(scale_view) {
              ++it, ++i) {
           BOOST_CHECK_EQUAL(result(*it), view(*it) * 3);
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
 }
 
 BOOST_AUTO_TEST_CASE(scale_to_view) {
+  std::size_t tile_count = 0;
   for (auto lower_it = t.range().begin(); lower_it != t.range().end();
        ++lower_it) {
     const auto lower = *lower_it;
@@ -378,6 +409,9 @@ BOOST_AUTO_TEST_CASE(scale_to_view) {
           BOOST_CHECK_EQUAL(view(*it), temp(*it) * 3);
           BOOST_CHECK_EQUAL(view(i), temp(*it) * 3);
         }
+
+        ++tile_count;
+        if (tile_count == ntiles_per_test) return;
       }
     }
   }
diff --git a/tests/tot_expressions.cpp b/tests/tot_expressions.cpp
index c834810065..66939bcf3e 100644
--- a/tests/tot_expressions.cpp
+++ b/tests/tot_expressions.cpp
@@ -116,30 +116,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov, TestParam, test_params) {
   using il_type = detail::vector_il<inner_type>;
   range_type r2{2}, r3{3};
   inner_type lhs_0{r2};
-  lhs_0[0] = 0;
-  lhs_0[1] = 1;
+  lhs_0({0}) = 0;
+  lhs_0({1}) = 1;
   inner_type lhs_1{r3};
-  lhs_1[0] = 1;
-  lhs_1[1] = 2;
-  lhs_1[2] = 3;
+  lhs_1({0}) = 1;
+  lhs_1({1}) = 2;
+  lhs_1({2}) = 3;
   il_type lhs_il{lhs_0, lhs_1};
 
   inner_type rhs_0{r2};
-  rhs_0[0] = 1;
-  rhs_0[1] = 2;
+  rhs_0({0}) = 1;
+  rhs_0({1}) = 2;
   inner_type rhs_1{r3};
-  rhs_1[0] = 2;
-  rhs_1[1] = 3;
-  rhs_1[2] = 4;
+  rhs_1({0}) = 2;
+  rhs_1({1}) = 3;
+  rhs_1({2}) = 4;
   il_type rhs_il{rhs_0, rhs_1};
 
   inner_type c_0{r2};
-  c_0[0] = 1;
-  c_0[1] = 3;
+  c_0({0}) = 1;
+  c_0({1}) = 3;
   inner_type c_1{r3};
-  c_1[0] = 3;
-  c_1[1] = 5;
-  c_1[2] = 7;
+  c_1({0}) = 3;
+  c_1({1}) = 5;
+  c_1({2}) = 7;
   il_type corr_il{c_0, c_1};
   tensor_type<TestParam> lhs(m_world, lhs_il);
   tensor_type<TestParam> rhs(m_world, rhs_il);
@@ -502,56 +502,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov, TestParam, test_params) {
   using range_type = Range;
   using il_type = detail::matrix_il<inner_type>;
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = 1;
-  c_00[1] = 3;
+  c_00({0}) = 1;
+  c_00({1}) = 3;
   inner_type c_01(range_type{2});
-  c_01[0] = 3;
-  c_01[1] = 5;
+  c_01({0}) = 3;
+  c_01({1}) = 5;
   inner_type c_02(range_type{2});
-  c_02[0] = 5;
-  c_02[1] = 7;
+  c_02({0}) = 5;
+  c_02({1}) = 7;
   inner_type c_10(range_type{3});
-  c_10[0] = 3;
-  c_10[1] = 5;
-  c_10[2] = 7;
+  c_10({0}) = 3;
+  c_10({1}) = 5;
+  c_10({2}) = 7;
   inner_type c_11(range_type{3});
-  c_11[0] = 5;
-  c_11[1] = 7;
-  c_11[2] = 9;
+  c_11({0}) = 5;
+  c_11({1}) = 7;
+  c_11({2}) = 9;
   inner_type c_12(range_type{3});
-  c_12[0] = 7;
-  c_12[1] = 9;
-  c_12[2] = 11;
+  c_12({0}) = 7;
+  c_12({1}) = 9;
+  c_12({2}) = 11;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -569,56 +569,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_result_transpose, TestParam, test_params) {
   using range_type = Range;
   using il_type = detail::matrix_il<inner_type>;
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = 1;
-  c_00[1] = 3;
+  c_00({0}) = 1;
+  c_00({1}) = 3;
   inner_type c_01(range_type{2});
-  c_01[0] = 3;
-  c_01[1] = 5;
+  c_01({0}) = 3;
+  c_01({1}) = 5;
   inner_type c_02(range_type{2});
-  c_02[0] = 5;
-  c_02[1] = 7;
+  c_02({0}) = 5;
+  c_02({1}) = 7;
   inner_type c_10(range_type{3});
-  c_10[0] = 3;
-  c_10[1] = 5;
-  c_10[2] = 7;
+  c_10({0}) = 3;
+  c_10({1}) = 5;
+  c_10({2}) = 7;
   inner_type c_11(range_type{3});
-  c_11[0] = 5;
-  c_11[1] = 7;
-  c_11[2] = 9;
+  c_11({0}) = 5;
+  c_11({1}) = 7;
+  c_11({2}) = 9;
   inner_type c_12(range_type{3});
-  c_12[0] = 7;
-  c_12[1] = 9;
-  c_12[2] = 11;
+  c_12({0}) = 7;
+  c_12({1}) = 9;
+  c_12({2}) = 11;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -1158,30 +1158,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov, TestParam, test_params) {
   using il_type = detail::vector_il<inner_type>;
   range_type r2{2}, r3{3};
   inner_type lhs_0{r2};
-  lhs_0[0] = 0;
-  lhs_0[1] = 1;
+  lhs_0({0}) = 0;
+  lhs_0({1}) = 1;
   inner_type lhs_1{r3};
-  lhs_1[0] = 1;
-  lhs_1[1] = 2;
-  lhs_1[2] = 3;
+  lhs_1({0}) = 1;
+  lhs_1({1}) = 2;
+  lhs_1({2}) = 3;
   il_type lhs_il{lhs_0, lhs_1};
 
   inner_type rhs_0{r2};
-  rhs_0[0] = 1;
-  rhs_0[1] = 2;
+  rhs_0({0}) = 1;
+  rhs_0({1}) = 2;
   inner_type rhs_1{r3};
-  rhs_1[0] = 2;
-  rhs_1[1] = 3;
-  rhs_1[2] = 4;
+  rhs_1({0}) = 2;
+  rhs_1({1}) = 3;
+  rhs_1({2}) = 4;
   il_type rhs_il{rhs_0, rhs_1};
 
   inner_type c_0{r2};
-  c_0[0] = -1;
-  c_0[1] = -1;
+  c_0({0}) = -1;
+  c_0({1}) = -1;
   inner_type c_1{r3};
-  c_1[0] = -1;
-  c_1[1] = -1;
-  c_1[2] = -1;
+  c_1({0}) = -1;
+  c_1({1}) = -1;
+  c_1({2}) = -1;
   il_type corr_il{c_0, c_1};
 
   tensor_type<TestParam> lhs(m_world, lhs_il);
@@ -1545,56 +1545,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov, TestParam, test_params) {
   using il_type = detail::matrix_il<inner_type>;
 
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = -1;
-  c_00[1] = -1;
+  c_00({0}) = -1;
+  c_00({1}) = -1;
   inner_type c_01(range_type{2});
-  c_01[0] = -1;
-  c_01[1] = -1;
+  c_01({0}) = -1;
+  c_01({1}) = -1;
   inner_type c_02(range_type{2});
-  c_02[0] = -1;
-  c_02[1] = -1;
+  c_02({0}) = -1;
+  c_02({1}) = -1;
   inner_type c_10(range_type{3});
-  c_10[0] = -1;
-  c_10[1] = -1;
-  c_10[2] = -1;
+  c_10({0}) = -1;
+  c_10({1}) = -1;
+  c_10({2}) = -1;
   inner_type c_11(range_type{3});
-  c_11[0] = -1;
-  c_11[1] = -1;
-  c_11[2] = -1;
+  c_11({0}) = -1;
+  c_11({1}) = -1;
+  c_11({2}) = -1;
   inner_type c_12(range_type{3});
-  c_12[0] = -1;
-  c_12[1] = -1;
-  c_12[2] = -1;
+  c_12({0}) = -1;
+  c_12({1}) = -1;
+  c_12({2}) = -1;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -1612,56 +1612,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_result_transpose, TestParam, test_params) {
   using range_type = Range;
   using il_type = detail::matrix_il<inner_type>;
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = -1;
-  c_00[1] = -1;
+  c_00({0}) = -1;
+  c_00({1}) = -1;
   inner_type c_01(range_type{2});
-  c_01[0] = -1;
-  c_01[1] = -1;
+  c_01({0}) = -1;
+  c_01({1}) = -1;
   inner_type c_02(range_type{2});
-  c_02[0] = -1;
-  c_02[1] = -1;
+  c_02({0}) = -1;
+  c_02({1}) = -1;
   inner_type c_10(range_type{3});
-  c_10[0] = -1;
-  c_10[1] = -1;
-  c_10[2] = -1;
+  c_10({0}) = -1;
+  c_10({1}) = -1;
+  c_10({2}) = -1;
   inner_type c_11(range_type{3});
-  c_11[0] = -1;
-  c_11[1] = -1;
-  c_11[2] = -1;
+  c_11({0}) = -1;
+  c_11({1}) = -1;
+  c_11({2}) = -1;
   inner_type c_12(range_type{3});
-  c_12[0] = -1;
-  c_12[1] = -1;
-  c_12[2] = -1;
+  c_12({0}) = -1;
+  c_12({1}) = -1;
+  c_12({2}) = -1;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -2201,30 +2201,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov_lhs, TestParam, test_params) {
   using il_type = detail::vector_il<inner_type>;
   range_type r2{2}, r3{3};
   inner_type lhs_0{r2};
-  lhs_0[0] = 0;
-  lhs_0[1] = 1;
+  lhs_0({0}) = 0;
+  lhs_0({1}) = 1;
   inner_type lhs_1{r3};
-  lhs_1[0] = 1;
-  lhs_1[1] = 2;
-  lhs_1[2] = 3;
+  lhs_1({0}) = 1;
+  lhs_1({1}) = 2;
+  lhs_1({2}) = 3;
   il_type lhs_il{lhs_0, lhs_1};
 
   inner_type rhs_0{r2};
-  rhs_0[0] = 1;
-  rhs_0[1] = 2;
+  rhs_0({0}) = 1;
+  rhs_0({1}) = 2;
   inner_type rhs_1{r3};
-  rhs_1[0] = 2;
-  rhs_1[1] = 3;
-  rhs_1[2] = 4;
+  rhs_1({0}) = 2;
+  rhs_1({1}) = 3;
+  rhs_1({2}) = 4;
   il_type rhs_il{rhs_0, rhs_1};
 
   inner_type c_0{r2};
-  c_0[0] = 1;
-  c_0[1] = 4;
+  c_0({0}) = 1;
+  c_0({1}) = 4;
   inner_type c_1{r3};
-  c_1[0] = 4;
-  c_1[1] = 7;
-  c_1[2] = 10;
+  c_1({0}) = 4;
+  c_1({1}) = 7;
+  c_1({2}) = 10;
   il_type corr_il{c_0, c_1};
   tensor_type<TestParam> lhs(m_world, lhs_il);
   tensor_type<TestParam> rhs(m_world, rhs_il);
@@ -2240,30 +2240,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov_rhs, TestParam, test_params) {
   using il_type = detail::vector_il<inner_type>;
   range_type r2{2}, r3{3};
   inner_type lhs_0{r2};
-  lhs_0[0] = 0;
-  lhs_0[1] = 1;
+  lhs_0({0}) = 0;
+  lhs_0({1}) = 1;
   inner_type lhs_1{r3};
-  lhs_1[0] = 1;
-  lhs_1[1] = 2;
-  lhs_1[2] = 3;
+  lhs_1({0}) = 1;
+  lhs_1({1}) = 2;
+  lhs_1({2}) = 3;
   il_type lhs_il{lhs_0, lhs_1};
 
   inner_type rhs_0{r2};
-  rhs_0[0] = 1;
-  rhs_0[1] = 2;
+  rhs_0({0}) = 1;
+  rhs_0({1}) = 2;
   inner_type rhs_1{r3};
-  rhs_1[0] = 2;
-  rhs_1[1] = 3;
-  rhs_1[2] = 4;
+  rhs_1({0}) = 2;
+  rhs_1({1}) = 3;
+  rhs_1({2}) = 4;
   il_type rhs_il{rhs_0, rhs_1};
 
   inner_type c_0{r2};
-  c_0[0] = 2;
-  c_0[1] = 5;
+  c_0({0}) = 2;
+  c_0({1}) = 5;
   inner_type c_1{r3};
-  c_1[0] = 5;
-  c_1[1] = 8;
-  c_1[2] = 11;
+  c_1({0}) = 5;
+  c_1({1}) = 8;
+  c_1({2}) = 11;
   il_type corr_il{c_0, c_1};
   tensor_type<TestParam> lhs(m_world, lhs_il);
   tensor_type<TestParam> rhs(m_world, rhs_il);
@@ -2419,56 +2419,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_lhs, TestParam, test_params) {
   using il_type = detail::matrix_il<inner_type>;
 
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = 1;
-  c_00[1] = 4;
+  c_00({0}) = 1;
+  c_00({1}) = 4;
   inner_type c_01(range_type{2});
-  c_01[0] = 4;
-  c_01[1] = 7;
+  c_01({0}) = 4;
+  c_01({1}) = 7;
   inner_type c_02(range_type{2});
-  c_02[0] = 7;
-  c_02[1] = 10;
+  c_02({0}) = 7;
+  c_02({1}) = 10;
   inner_type c_10(range_type{3});
-  c_10[0] = 4;
-  c_10[1] = 7;
-  c_10[2] = 10;
+  c_10({0}) = 4;
+  c_10({1}) = 7;
+  c_10({2}) = 10;
   inner_type c_11(range_type{3});
-  c_11[0] = 7;
-  c_11[1] = 10;
-  c_11[2] = 13;
+  c_11({0}) = 7;
+  c_11({1}) = 10;
+  c_11({2}) = 13;
   inner_type c_12(range_type{3});
-  c_12[0] = 10;
-  c_12[1] = 13;
-  c_12[2] = 16;
+  c_12({0}) = 10;
+  c_12({1}) = 13;
+  c_12({2}) = 16;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -2487,56 +2487,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_rhs, TestParam, test_params) {
   using il_type = detail::matrix_il<inner_type>;
 
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = 2;
-  c_00[1] = 5;
+  c_00({0}) = 2;
+  c_00({1}) = 5;
   inner_type c_01(range_type{2});
-  c_01[0] = 5;
-  c_01[1] = 8;
+  c_01({0}) = 5;
+  c_01({1}) = 8;
   inner_type c_02(range_type{2});
-  c_02[0] = 8;
-  c_02[1] = 11;
+  c_02({0}) = 8;
+  c_02({1}) = 11;
   inner_type c_10(range_type{3});
-  c_10[0] = 5;
-  c_10[1] = 8;
-  c_10[2] = 11;
+  c_10({0}) = 5;
+  c_10({1}) = 8;
+  c_10({2}) = 11;
   inner_type c_11(range_type{3});
-  c_11[0] = 8;
-  c_11[1] = 11;
-  c_11[2] = 14;
+  c_11({0}) = 8;
+  c_11({1}) = 11;
+  c_11({2}) = 14;
   inner_type c_12(range_type{3});
-  c_12[0] = 11;
-  c_12[1] = 14;
-  c_12[2] = 17;
+  c_12({0}) = 11;
+  c_12({1}) = 14;
+  c_12({2}) = 17;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -2905,30 +2905,30 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov, TestParam, test_params) {
   using il_type = detail::vector_il<inner_type>;
   range_type r2{2}, r3{3};
   inner_type lhs_0{r2};
-  lhs_0[0] = 0;
-  lhs_0[1] = 1;
+  lhs_0({0}) = 0;
+  lhs_0({1}) = 1;
   inner_type lhs_1{r3};
-  lhs_1[0] = 1;
-  lhs_1[1] = 2;
-  lhs_1[2] = 3;
+  lhs_1({0}) = 1;
+  lhs_1({1}) = 2;
+  lhs_1({2}) = 3;
   il_type lhs_il{lhs_0, lhs_1};
 
   inner_type rhs_0{r2};
-  rhs_0[0] = 1;
-  rhs_0[1] = 2;
+  rhs_0({0}) = 1;
+  rhs_0({1}) = 2;
   inner_type rhs_1{r3};
-  rhs_1[0] = 2;
-  rhs_1[1] = 3;
-  rhs_1[2] = 4;
+  rhs_1({0}) = 2;
+  rhs_1({1}) = 3;
+  rhs_1({2}) = 4;
   il_type rhs_il{rhs_0, rhs_1};
 
   inner_type c_0{r2};
-  c_0[0] = 0;
-  c_0[1] = 2;
+  c_0({0}) = 0;
+  c_0({1}) = 2;
   inner_type c_1{r3};
-  c_1[0] = 2;
-  c_1[1] = 6;
-  c_1[2] = 12;
+  c_1({0}) = 2;
+  c_1({1}) = 6;
+  c_1({2}) = 12;
   il_type corr_il{c_0, c_1};
   tensor_type<TestParam> lhs(m_world, lhs_il);
   tensor_type<TestParam> rhs(m_world, rhs_il);
@@ -3291,56 +3291,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov, TestParam, test_params) {
   using range_type = Range;
   using il_type = detail::matrix_il<inner_type>;
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = 0;
-  c_00[1] = 2;
+  c_00({0}) = 0;
+  c_00({1}) = 2;
   inner_type c_01(range_type{2});
-  c_01[0] = 2;
-  c_01[1] = 6;
+  c_01({0}) = 2;
+  c_01({1}) = 6;
   inner_type c_02(range_type{2});
-  c_02[0] = 6;
-  c_02[1] = 12;
+  c_02({0}) = 6;
+  c_02({1}) = 12;
   inner_type c_10(range_type{3});
-  c_10[0] = 2;
-  c_10[1] = 6;
-  c_10[2] = 12;
+  c_10({0}) = 2;
+  c_10({1}) = 6;
+  c_10({2}) = 12;
   inner_type c_11(range_type{3});
-  c_11[0] = 6;
-  c_11[1] = 12;
-  c_11[2] = 20;
+  c_11({0}) = 6;
+  c_11({1}) = 12;
+  c_11({2}) = 20;
   inner_type c_12(range_type{3});
-  c_12[0] = 12;
-  c_12[1] = 20;
-  c_12[2] = 30;
+  c_12({0}) = 12;
+  c_12({1}) = 20;
+  c_12({2}) = 30;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -3358,56 +3358,56 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(mov_result_transpose, TestParam, test_params) {
   using range_type = Range;
   using il_type = detail::matrix_il<inner_type>;
   inner_type lhs_00(range_type{2});
-  lhs_00[0] = 0;
-  lhs_00[1] = 1;
+  lhs_00({0}) = 0;
+  lhs_00({1}) = 1;
   inner_type lhs_01(range_type{2});
-  lhs_01[0] = 1;
-  lhs_01[1] = 2;
+  lhs_01({0}) = 1;
+  lhs_01({1}) = 2;
   inner_type lhs_02(range_type{2});
-  lhs_02[0] = 2;
-  lhs_02[1] = 3;
+  lhs_02({0}) = 2;
+  lhs_02({1}) = 3;
   inner_type lhs_10(range_type{3});
-  lhs_10[0] = 1;
-  lhs_10[1] = 2;
-  lhs_10[2] = 3;
+  lhs_10({0}) = 1;
+  lhs_10({1}) = 2;
+  lhs_10({2}) = 3;
   inner_type lhs_11(range_type{3});
-  lhs_11[0] = 2;
-  lhs_11[1] = 3;
-  lhs_11[2] = 4;
+  lhs_11({0}) = 2;
+  lhs_11({1}) = 3;
+  lhs_11({2}) = 4;
   inner_type lhs_12(range_type{3});
-  lhs_12[0] = 3;
-  lhs_12[1] = 4;
-  lhs_12[2] = 5;
+  lhs_12({0}) = 3;
+  lhs_12({1}) = 4;
+  lhs_12({2}) = 5;
 
   inner_type rhs_02(range_type{2});
-  rhs_02[0] = 3;
-  rhs_02[1] = 4;
+  rhs_02({0}) = 3;
+  rhs_02({1}) = 4;
   inner_type rhs_12(range_type{3});
-  rhs_12[0] = 4;
-  rhs_12[1] = 5;
-  rhs_12[2] = 6;
+  rhs_12({0}) = 4;
+  rhs_12({1}) = 5;
+  rhs_12({2}) = 6;
 
   inner_type c_00(range_type{2});
-  c_00[0] = 0;
-  c_00[1] = 2;
+  c_00({0}) = 0;
+  c_00({1}) = 2;
   inner_type c_01(range_type{2});
-  c_01[0] = 2;
-  c_01[1] = 6;
+  c_01({0}) = 2;
+  c_01({1}) = 6;
   inner_type c_02(range_type{2});
-  c_02[0] = 6;
-  c_02[1] = 12;
+  c_02({0}) = 6;
+  c_02({1}) = 12;
   inner_type c_10(range_type{3});
-  c_10[0] = 2;
-  c_10[1] = 6;
-  c_10[2] = 12;
+  c_10({0}) = 2;
+  c_10({1}) = 6;
+  c_10({2}) = 12;
   inner_type c_11(range_type{3});
-  c_11[0] = 6;
-  c_11[1] = 12;
-  c_11[2] = 20;
+  c_11({0}) = 6;
+  c_11({1}) = 12;
+  c_11({2}) = 20;
   inner_type c_12(range_type{3});
-  c_12[0] = 12;
-  c_12[1] = 20;
-  c_12[2] = 30;
+  c_12({0}) = 12;
+  c_12({1}) = 20;
+  c_12({2}) = 30;
 
   il_type lhs_il{{lhs_00, lhs_01, lhs_02}, {lhs_10, lhs_11, lhs_12}};
   il_type rhs_il{{lhs_01, lhs_02, rhs_02}, {lhs_11, lhs_12, rhs_12}};
@@ -4215,21 +4215,21 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(vov_inner_contraction, TestParam, test_params) {
   using policy_type = policy_type<TestParam>;
   range_type r2{2}, r3{3};
   inner_type lhs_0{r2};
-  lhs_0[0] = 0;
-  lhs_0[1] = 1;
+  lhs_0({0}) = 0;
+  lhs_0({1}) = 1;
   inner_type lhs_1{r3};
-  lhs_1[0] = 1;
-  lhs_1[1] = 2;
-  lhs_1[2] = 3;
+  lhs_1({0}) = 1;
+  lhs_1({1}) = 2;
+  lhs_1({2}) = 3;
   il_type lhs_il{lhs_0, lhs_1};
 
   inner_type rhs_0{r2};
-  rhs_0[0] = 1;
-  rhs_0[1] = 2;
+  rhs_0({0}) = 1;
+  rhs_0({1}) = 2;
   inner_type rhs_1{r3};
-  rhs_1[0] = 2;
-  rhs_1[1] = 3;
-  rhs_1[2] = 4;
+  rhs_1({0}) = 2;
+  rhs_1({1}) = 3;
+  rhs_1({2}) = 4;
   il_type rhs_il{rhs_0, rhs_1};
 
   using result_type = DistArray<Tensor<scalar_type<TestParam>>, policy_type>;