ValeevGroup · evaleev · Aug 2, 2023 · May 19, 2023 · May 21, 2023 · May 21, 2023
diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h
@@ -548,7 +548,7 @@ class DistArray : public madness::archive::ParallelSerializableObject {
   /// initialized using TiledArray::Cast<Tile,OtherTile>
   /// \param other The array to be copied
   template <typename OtherTile, typename = enable_if_not_my_type<OtherTile>>
-  explicit DistArray(const DistArray<OtherTile, Policy>& other) : pimpl_() {
+  DistArray(const DistArray<OtherTile, Policy>& other) : pimpl_() {
     *this = foreach<Tile>(other, [](Tile& result, const OtherTile& source) {
       result = TiledArray::Cast<Tile, OtherTile>{}(source);
     });

diff --git a/src/TiledArray/expressions/expr.h b/src/TiledArray/expressions/expr.h
@@ -252,13 +252,13 @@ class Expr {
                               >::type* = nullptr>
   void set_tile(A& array, const I index, const Future<T>& tile,
                 const std::shared_ptr<Op>& op) const {
-    auto eval_tile_fn =
-        &Expr_::template eval_tile<typename A::value_type, const T&,
-                                   TiledArray::Cast<typename A::value_type, T>,
-                                   Op>;
-    array.set(index, array.world().taskq.add(
-                         eval_tile_fn, tile,
-                         TiledArray::Cast<typename A::value_type, T>(), op));
+    auto eval_tile_fn = &Expr_::template eval_tile<
+        typename A::value_type, const T&,
+        TiledArray::Cast<typename Op::argument_type, T>, Op>;
+    array.set(index,
+              array.world().taskq.add(
+                  eval_tile_fn, tile,
+                  TiledArray::Cast<typename Op::argument_type, T>(), op));
   }
 
 #ifdef TILEDARRAY_HAS_CUDA
@@ -278,13 +278,13 @@ class Expr {
                 ::TiledArray::detail::is_cuda_tile_v<T>>::type* = nullptr>
   void set_tile(A& array, const I index, const Future<T>& tile,
                 const std::shared_ptr<Op>& op) const {
-    auto eval_tile_fn =
-        &Expr_::template eval_tile<typename A::value_type, const T&,
-                                   TiledArray::Cast<typename A::value_type, T>,
-                                   Op>;
-    array.set(index, madness::add_cuda_task(
-                         array.world(), eval_tile_fn, tile,
-                         TiledArray::Cast<typename A::value_type, T>(), op));
+    auto eval_tile_fn = &Expr_::template eval_tile<
+        typename A::value_type, const T&,
+        TiledArray::Cast<typename Op::argument_type, T>, Op>;
+    array.set(index,
+              madness::add_cuda_task(
+                  array.world(), eval_tile_fn, tile,
+                  TiledArray::Cast<typename Op::argument_type, T>(), op));
   }
 #endif
 

diff --git a/src/TiledArray/expressions/expr_engine.h b/src/TiledArray/expressions/expr_engine.h
@@ -73,7 +73,7 @@ class ExprEngine : private NO_DEFAULTS {
   World* world_;  ///< The world where this expression will be evaluated
   BipartiteIndexList
       indices_;  ///< The index list of this expression; bipartite due to need
-                 ///< to support recursive tensors (i.e. Tensor-of-Tensor)
+                 ///< to support nested tensors (e.g. tensors of tensors)
   bool permute_tiles_;  ///< Result tile permutation flag (\c true == permute
                         ///< tile)
   /// The permutation that will be applied to the outer tensor of tensors

diff --git a/src/TiledArray/expressions/mult_engine.h b/src/TiledArray/expressions/mult_engine.h
@@ -189,12 +189,14 @@ struct EngineTrait<ScalMultEngine<Left, Right, Scalar, Result>> {
 /// Multiplication expression engine
 
 /// This implements any expression encoded with the multiplication operator.
-/// This includes Hadamard product, e.g. \code (c("i,j")=)a("i,j")*b("i,j")
-/// \endcode , and pure contractions, e.g. \code (c("i,j")=)a("i,k")*b("k,j")
-/// \endcode . \internal mixed Hadamard-contraction case, e.g. \code
-/// c("i,j,l")=a("i,l,k")*b("j,l,k") \endcode , is not supported since
-///   this requires that the result labels are assigned by user (currently they
-///   are computed by this engine)
+/// This includes Hadamard product, e.g.
+/// \code (c("i,j")=)a("i,j")*b("i,j") \endcode ,
+/// and pure contractions, e.g. \code (c("i,j")=)a("i,k")*b("k,j") \endcode .
+/// \internal mixed Hadamard-contraction case, e.g.
+/// \code c("i,j,l")=a("i,l,k")*b("j,l,k") \endcode ,
+/// is not supported since
+/// this requires that the result labels are assigned by user (currently they
+/// are computed by this engine)
 /// \tparam Left The left-hand engine type
 /// \tparam Right The right-hand engine type
 /// \tparam Result The result tile type

diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h
@@ -176,6 +176,19 @@ using Array
 
 enum class HostExecutor { Thread, MADWorld, Default = MADWorld };
 
+namespace conversions {
+
+/// user defined conversions
+
+/// must define
+/// \code
+///  To operator()(From&& from);
+/// \endcode
+template <typename To, typename From>
+struct to;
+
+}  // namespace conversions
+
 }  // namespace TiledArray
 
 #ifndef TILEDARRAY_DISABLE_NAMESPACE_TA

diff --git a/src/TiledArray/math/linalg/scalapack/block_cyclic.h b/src/TiledArray/math/linalg/scalapack/block_cyclic.h
@@ -133,7 +133,7 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
   template <typename Tile,
             typename = std::enable_if_t<
                 TiledArray::detail::is_contiguous_tensor_v<Tile>>>
-  Tile extract_submatrix(std::vector<size_t> lo, std::vector<size_t> up) {
+  Tile extract_submatrix(std::array<size_t, 2> lo, std::array<size_t, 2> up) {
     assert(bc_dist_.i_own(lo[0], lo[1]));
 
     auto [i_st, j_st] = bc_dist_.local_indx(lo[0], lo[1]);
@@ -265,8 +265,8 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
                 local_mat_.block(i_local, j_local, i_extent, j_extent);
 
           } else {
-            std::vector<size_t> lo{i, j};
-            std::vector<size_t> up{i_last, j_last};
+            std::array<size_t, 2> lo{i, j};
+            std::array<size_t, 2> up{i_last, j_last};
             // N.B. send instead of task guarantees progress
             madness::Future<Tensor<T>> remtile_fut = world_base_t::send(
                 owner(i, j),

diff --git a/src/TiledArray/tensor/complex.h b/src/TiledArray/tensor/complex.h
@@ -27,6 +27,7 @@
 #define TILEDARRAY_SRC_TILEDARRAY_TENSOR_COMPLEX_H__INCLUDED
 
 #include <TiledArray/config.h>
+#include <TiledArray/fwd.h>
 #include <TiledArray/type_traits.h>
 
 namespace TiledArray {
@@ -301,6 +302,19 @@ TILEDARRAY_FORCE_INLINE
 }
 
 }  // namespace detail
+
+namespace conversions {
+
+template <typename T>
+struct to<T, std::complex<T>> {
+  T operator()(const std::complex<T>& v) {
+    TA_ASSERT(v.imag() == 0);
+    return v.real();
+  }
+};
+
+}  // namespace conversions
+
 }  // namespace TiledArray
 
 #endif  // TILEDARRAY_SRC_TILEDARRAY_TENSOR_COMPLEX_H__INCLUDED
diff --git a/src/TiledArray/tensor/kernels.h b/src/TiledArray/tensor/kernels.h
@@ -61,13 +61,14 @@ struct transform;
 /// \param tensor1 The first argument tensor
 /// \param tensors The remaining argument tensors
 template <typename TR, typename Op, typename T1, typename... Ts,
-          typename std::enable_if<
-              is_tensor<TR, T1, Ts...>::value ||
-              is_tensor_of_tensor<TR, T1, Ts...>::value>::type* = nullptr>
+          typename = std::enable_if_t<
+              detail::is_nested_tensor_v<TR, T1, Ts...> ||
+              std::is_invocable_r_v<TR, Op, const T1&, const Ts&...>>>
 inline TR tensor_op(Op&& op, const T1& tensor1, const Ts&... tensors) {
   if constexpr (std::is_invocable_r_v<TR, Op, const T1&, const Ts&...>) {
     return std::forward<Op>(op)(tensor1, tensors...);
   } else {
+    static_assert(detail::is_nested_tensor_v<TR, T1, Ts...>);
     return TiledArray::detail::transform<TR>()(std::forward<Op>(op), tensor1,
                                                tensors...);
   }
@@ -93,8 +94,7 @@ inline TR tensor_op(Op&& op, const T1& tensor1, const Ts&... tensors) {
 /// \param[in] tensors The remaining argument tensors
 template <typename TR, typename Op, typename T1, typename... Ts,
           typename std::enable_if<
-              (is_tensor<T1, Ts...>::value ||
-               is_tensor_of_tensor<TR, T1, Ts...>::value) &&
+              is_nested_tensor_v<T1, Ts...> &&
               is_contiguous_tensor<T1, Ts...>::value>::type* = nullptr>
 inline TR tensor_op(Op&& op, const Permutation& perm, const T1& tensor1,
                     const Ts&... tensors) {
@@ -219,7 +219,7 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
 /// \param[in] tensors The argument tensors
 template <typename Op, typename TR, typename... Ts,
           typename std::enable_if<
-              is_tensor_of_tensor<TR, Ts...>::value &&
+              !is_tensor_v<TR, Ts...> &&
               is_contiguous_tensor<TR, Ts...>::value>::type* = nullptr>
 inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
   TA_ASSERT(!empty(result, tensors...));
@@ -228,7 +228,11 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
   const auto volume = result.range().volume();
 
   for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
-    inplace_tensor_op(op, result.at_ordinal(ord), tensors.at_ordinal(ord)...);
+    if constexpr (std::is_invocable_r_v<void, Op, typename TR::value_type&,
+                                        typename Ts::value_type...>)
+      op(result.at_ordinal(ord), tensors.at_ordinal(ord)...);
+    else
+      inplace_tensor_op(op, result.at_ordinal(ord), tensors.at_ordinal(ord)...);
   }
 }
 
@@ -457,7 +461,7 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) {
                       tensors.data()...);
 }
 
-/// Initialize tensor of tensors with contiguous tensor arguments
+/// Initialize nested tensor with contiguous tensor arguments
 
 /// This function initializes the \c i -th element of \c result with the result
 /// of \c op(tensors[i]...)
@@ -470,17 +474,22 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) {
 /// \param[in] tensors The argument tensors
 template <
     typename Op, typename TR, typename... Ts,
-    typename std::enable_if<is_tensor_of_tensor<TR, Ts...>::value &&
+    typename std::enable_if<(is_nested_tensor<TR, Ts...>::value &&
+                             !is_tensor<TR, Ts...>::value) &&
                             is_contiguous_tensor<TR>::value>::type* = nullptr>
 inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) {
   TA_ASSERT(!empty(result, tensors...));
   TA_ASSERT(is_range_set_congruent(result, tensors...));
 
   const auto volume = result.range().volume();
 
-  for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
-    new (result.data() + ord) typename TR::value_type(
-        tensor_op<typename TR::value_type>(op, tensors.at_ordinal(ord)...));
+  if constexpr (std::is_invocable_r_v<TR, Op, const Ts&...>) {
+    result = std::forward<Op>(op)(tensors...);
+  } else {
+    for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
+      new (result.data() + ord) typename TR::value_type(
+          tensor_op<typename TR::value_type>(op, tensors.at_ordinal(ord)...));
+    }
   }
 }
 

diff --git a/src/TiledArray/tensor/operators.h b/src/TiledArray/tensor/operators.h
@@ -41,11 +41,8 @@ namespace TiledArray {
 /// \param right The right-hand tensor argument
 /// \return A tensor where element \c i is equal to <tt>left[i] + right[i]</tt>
 template <typename T1, typename T2,
-          typename = std::enable_if_t<
-              detail::is_tensor<detail::remove_cvr_t<T1>,
-                                detail::remove_cvr_t<T2>>::value ||
-              detail::is_tensor_of_tensor<detail::remove_cvr_t<T1>,
-                                          detail::remove_cvr_t<T2>>::value>>
+          typename = std::enable_if_t<detail::tensors_have_equal_nested_rank_v<
+              detail::remove_cvr_t<T1>, detail::remove_cvr_t<T2>>>>
 inline decltype(auto) operator+(T1&& left, T2&& right) {
   return add(std::forward<T1>(left), std::forward<T2>(right));
 }
@@ -58,14 +55,9 @@ inline decltype(auto) operator+(T1&& left, T2&& right) {
 /// \param left The left-hand tensor argument
 /// \param right The right-hand tensor argument
 /// \return A tensor where element \c i is equal to <tt>left[i] - right[i]</tt>
-template <
-    typename T1, typename T2,
-    typename std::enable_if<
-        detail::is_tensor<detail::remove_cvr_t<T1>,
-                          detail::remove_cvr_t<T2>>::value ||
-        detail::is_tensor_of_tensor<detail::remove_cvr_t<T1>,
-                                    detail::remove_cvr_t<T2>>::value>::type* =
-        nullptr>
+template <typename T1, typename T2,
+          typename = std::enable_if_t<detail::tensors_have_equal_nested_rank_v<
+              detail::remove_cvr_t<T1>, detail::remove_cvr_t<T2>>>>
 inline decltype(auto) operator-(T1&& left, T2&& right) {
   return subt(std::forward<T1>(left), std::forward<T2>(right));
 }
@@ -80,12 +72,8 @@ inline decltype(auto) operator-(T1&& left, T2&& right) {
 /// \return A tensor where element \c i is equal to <tt>left[i] * right[i]</tt>
 template <
     typename T1, typename T2,
-    typename std::enable_if<
-        detail::is_tensor<detail::remove_cvr_t<T1>,
-                          detail::remove_cvr_t<T2>>::value ||
-        detail::is_tensor_of_tensor<detail::remove_cvr_t<T1>,
-                                    detail::remove_cvr_t<T2>>::value>::type* =
-        nullptr>
+    typename std::enable_if<detail::is_nested_tensor_v<
+        detail::remove_cvr_t<T1>, detail::remove_cvr_t<T2>>>::type* = nullptr>
 inline decltype(auto) operator*(T1&& left, T2&& right) {
   return mult(std::forward<T1>(left), std::forward<T2>(right));
 }
@@ -100,8 +88,7 @@ inline decltype(auto) operator*(T1&& left, T2&& right) {
 /// \return A tensor where element \c i is equal to <tt> left[i] * right </tt>
 template <typename T, typename N,
           typename std::enable_if<
-              (detail::is_tensor<detail::remove_cvr_t<T>>::value ||
-               detail::is_tensor_of_tensor<detail::remove_cvr_t<T>>::value) &&
+              detail::is_nested_tensor_v<detail::remove_cvr_t<T>> &&
               detail::is_numeric_v<N>>::type* = nullptr>
 inline decltype(auto) operator*(T&& left, N right) {
   return scale(std::forward<T>(left), right);
@@ -118,9 +105,7 @@ template <
     typename N, typename T,
     typename std::enable_if<
         detail::is_numeric_v<N> &&
-        (detail::is_tensor<detail::remove_cvr_t<T>>::value ||
-         detail::is_tensor_of_tensor<detail::remove_cvr_t<T>>::value)>::type* =
-        nullptr>
+        detail::is_nested_tensor_v<detail::remove_cvr_t<T>>>::type* = nullptr>
 inline decltype(auto) operator*(N left, T&& right) {
   return scale(std::forward<T>(right), left);
 }

diff --git a/src/TiledArray/tensor/permute.h b/src/TiledArray/tensor/permute.h
@@ -97,10 +97,14 @@ inline void fuse_dimensions(SizeType* MADNESS_RESTRICT const fused_size,
 
 /// The expected signature of the input operations is:
 /// \code
-/// Result::value_type input_op(const Arg0::value_type, const
-/// Args::value_type...) \endcode The expected signature of the output
-/// operations is: \code void output_op(Result::value_type*, const
-/// Result::value_type) \endcode \tparam InputOp The input operation type
+/// Result::value_type input_op(const Arg0::value_type,
+///                             const Args::value_type...)
+/// \endcode
+/// The expected signature of the output
+/// operations is:
+/// \code void output_op(Result::value_type*, const Result::value_type)
+/// \endcode
+/// \tparam InputOp The input operation type
 /// \tparam OutputOp The output operation type
 /// \tparam Result The result tensor type
 /// \tparam Arg0 The first tensor argument type
@@ -146,13 +150,13 @@ inline void permute(InputOp&& input_op, OutputOp&& output_op, Result& result,
     };
 
     // Permute the data
-    for (typename Result::ordinal_type index = 0ul; index < volume;
-         index += block_size) {
-      const typename Result::ordinal_type perm_index = perm_index_op(index);
+    for (typename Result::ordinal_type ord = 0ul; ord < volume;
+         ord += block_size) {
+      const typename Result::ordinal_type perm_ord = perm_index_op(ord);
 
       // Copy the block
-      math::vector_ptr_op(op, block_size, result.data() + perm_index,
-                          arg0.data() + index, (args.data() + index)...);
+      math::vector_ptr_op(op, block_size, result.data() + perm_ord,
+                          &arg0.at_ordinal(ord), &args.at_ordinal(ord)...);
     }
 
   } else {
@@ -186,16 +190,16 @@ inline void permute(InputOp&& input_op, OutputOp&& output_op, Result& result,
     // Copy data from the input to the output matrix via a series of matrix
     // transposes.
     for (typename Result::ordinal_type i = 0ul; i < other_fused_size[0]; ++i) {
-      typename Result::ordinal_type index = i * other_fused_weight[0];
+      typename Result::ordinal_type ord = i * other_fused_weight[0];
       for (typename Result::ordinal_type j = 0ul; j < other_fused_size[2];
-           ++j, index += other_fused_weight[2]) {
+           ++j, ord += other_fused_weight[2]) {
         // Compute the ordinal index of the input and output matrices.
-        typename Result::ordinal_type perm_index = perm_index_op(index);
+        typename Result::ordinal_type perm_ord = perm_index_op(ord);
 
         math::transpose(input_op, output_op, other_fused_size[1],
                         other_fused_size[3], result_outer_stride,
-                        result.data() + perm_index, other_fused_weight[1],
-                        arg0.data() + index, (args.data() + index)...);
+                        &result.at_ordinal(perm_ord), other_fused_weight[1],
+                        &arg0.at_ordinal(ord), &args.at_ordinal(ord)...);
       }
     }
   }