Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/TiledArray/dist_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ class DistArray : public madness::archive::ParallelSerializableObject {
/// initialized using TiledArray::Cast<Tile,OtherTile>
/// \param other The array to be copied
template <typename OtherTile, typename = enable_if_not_my_type<OtherTile>>
explicit DistArray(const DistArray<OtherTile, Policy>& other) : pimpl_() {
DistArray(const DistArray<OtherTile, Policy>& other) : pimpl_() {
*this = foreach<Tile>(other, [](Tile& result, const OtherTile& source) {
result = TiledArray::Cast<Tile, OtherTile>{}(source);
});
Expand Down
28 changes: 14 additions & 14 deletions src/TiledArray/expressions/expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,13 @@ class Expr {
>::type* = nullptr>
void set_tile(A& array, const I index, const Future<T>& tile,
const std::shared_ptr<Op>& op) const {
auto eval_tile_fn =
&Expr_::template eval_tile<typename A::value_type, const T&,
TiledArray::Cast<typename A::value_type, T>,
Op>;
array.set(index, array.world().taskq.add(
eval_tile_fn, tile,
TiledArray::Cast<typename A::value_type, T>(), op));
auto eval_tile_fn = &Expr_::template eval_tile<
typename A::value_type, const T&,
TiledArray::Cast<typename Op::argument_type, T>, Op>;
array.set(index,
array.world().taskq.add(
eval_tile_fn, tile,
TiledArray::Cast<typename Op::argument_type, T>(), op));
}

#ifdef TILEDARRAY_HAS_CUDA
Expand All @@ -278,13 +278,13 @@ class Expr {
::TiledArray::detail::is_cuda_tile_v<T>>::type* = nullptr>
void set_tile(A& array, const I index, const Future<T>& tile,
const std::shared_ptr<Op>& op) const {
auto eval_tile_fn =
&Expr_::template eval_tile<typename A::value_type, const T&,
TiledArray::Cast<typename A::value_type, T>,
Op>;
array.set(index, madness::add_cuda_task(
array.world(), eval_tile_fn, tile,
TiledArray::Cast<typename A::value_type, T>(), op));
auto eval_tile_fn = &Expr_::template eval_tile<
typename A::value_type, const T&,
TiledArray::Cast<typename Op::argument_type, T>, Op>;
array.set(index,
madness::add_cuda_task(
array.world(), eval_tile_fn, tile,
TiledArray::Cast<typename Op::argument_type, T>(), op));
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/TiledArray/expressions/expr_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class ExprEngine : private NO_DEFAULTS {
World* world_; ///< The world where this expression will be evaluated
BipartiteIndexList
indices_; ///< The index list of this expression; bipartite due to need
///< to support recursive tensors (i.e. Tensor-of-Tensor)
///< to support nested tensors (e.g. tensors of tensors)
bool permute_tiles_; ///< Result tile permutation flag (\c true == permute
///< tile)
/// The permutation that will be applied to the outer tensor of tensors
Expand Down
14 changes: 8 additions & 6 deletions src/TiledArray/expressions/mult_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,14 @@ struct EngineTrait<ScalMultEngine<Left, Right, Scalar, Result>> {
/// Multiplication expression engine

/// This implements any expression encoded with the multiplication operator.
/// This includes Hadamard product, e.g. \code (c("i,j")=)a("i,j")*b("i,j")
/// \endcode , and pure contractions, e.g. \code (c("i,j")=)a("i,k")*b("k,j")
/// \endcode . \internal mixed Hadamard-contraction case, e.g. \code
/// c("i,j,l")=a("i,l,k")*b("j,l,k") \endcode , is not supported since
/// this requires that the result labels are assigned by user (currently they
/// are computed by this engine)
/// This includes Hadamard product, e.g.
/// \code (c("i,j")=)a("i,j")*b("i,j") \endcode ,
/// and pure contractions, e.g. \code (c("i,j")=)a("i,k")*b("k,j") \endcode .
/// \internal mixed Hadamard-contraction case, e.g.
/// \code c("i,j,l")=a("i,l,k")*b("j,l,k") \endcode ,
/// is not supported since
/// this requires that the result labels are assigned by user (currently they
/// are computed by this engine)
/// \tparam Left The left-hand engine type
/// \tparam Right The right-hand engine type
/// \tparam Result The result tile type
Expand Down
13 changes: 13 additions & 0 deletions src/TiledArray/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,19 @@ using Array

enum class HostExecutor { Thread, MADWorld, Default = MADWorld };

namespace conversions {

/// user defined conversions

/// must define
/// \code
/// To operator()(From&& from);
/// \endcode
template <typename To, typename From>
struct to;

} // namespace conversions

} // namespace TiledArray

#ifndef TILEDARRAY_DISABLE_NAMESPACE_TA
Expand Down
6 changes: 3 additions & 3 deletions src/TiledArray/math/linalg/scalapack/block_cyclic.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
template <typename Tile,
typename = std::enable_if_t<
TiledArray::detail::is_contiguous_tensor_v<Tile>>>
Tile extract_submatrix(std::vector<size_t> lo, std::vector<size_t> up) {
Tile extract_submatrix(std::array<size_t, 2> lo, std::array<size_t, 2> up) {
assert(bc_dist_.i_own(lo[0], lo[1]));

auto [i_st, j_st] = bc_dist_.local_indx(lo[0], lo[1]);
Expand Down Expand Up @@ -265,8 +265,8 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
local_mat_.block(i_local, j_local, i_extent, j_extent);

} else {
std::vector<size_t> lo{i, j};
std::vector<size_t> up{i_last, j_last};
std::array<size_t, 2> lo{i, j};
std::array<size_t, 2> up{i_last, j_last};
// N.B. send instead of task guarantees progress
madness::Future<Tensor<T>> remtile_fut = world_base_t::send(
owner(i, j),
Expand Down
14 changes: 14 additions & 0 deletions src/TiledArray/tensor/complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#define TILEDARRAY_SRC_TILEDARRAY_TENSOR_COMPLEX_H__INCLUDED

#include <TiledArray/config.h>
#include <TiledArray/fwd.h>
#include <TiledArray/type_traits.h>

namespace TiledArray {
Expand Down Expand Up @@ -301,6 +302,19 @@ TILEDARRAY_FORCE_INLINE
}

} // namespace detail

namespace conversions {

template <typename T>
struct to<T, std::complex<T>> {
T operator()(const std::complex<T>& v) {
TA_ASSERT(v.imag() == 0);
return v.real();
}
};

} // namespace conversions

} // namespace TiledArray

#endif // TILEDARRAY_SRC_TILEDARRAY_TENSOR_COMPLEX_H__INCLUDED
33 changes: 21 additions & 12 deletions src/TiledArray/tensor/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,14 @@ struct transform;
/// \param tensor1 The first argument tensor
/// \param tensors The remaining argument tensors
template <typename TR, typename Op, typename T1, typename... Ts,
typename std::enable_if<
is_tensor<TR, T1, Ts...>::value ||
is_tensor_of_tensor<TR, T1, Ts...>::value>::type* = nullptr>
typename = std::enable_if_t<
detail::is_nested_tensor_v<TR, T1, Ts...> ||
std::is_invocable_r_v<TR, Op, const T1&, const Ts&...>>>
inline TR tensor_op(Op&& op, const T1& tensor1, const Ts&... tensors) {
if constexpr (std::is_invocable_r_v<TR, Op, const T1&, const Ts&...>) {
return std::forward<Op>(op)(tensor1, tensors...);
} else {
static_assert(detail::is_nested_tensor_v<TR, T1, Ts...>);
return TiledArray::detail::transform<TR>()(std::forward<Op>(op), tensor1,
tensors...);
}
Expand All @@ -93,8 +94,7 @@ inline TR tensor_op(Op&& op, const T1& tensor1, const Ts&... tensors) {
/// \param[in] tensors The remaining argument tensors
template <typename TR, typename Op, typename T1, typename... Ts,
typename std::enable_if<
(is_tensor<T1, Ts...>::value ||
is_tensor_of_tensor<TR, T1, Ts...>::value) &&
is_nested_tensor_v<T1, Ts...> &&
is_contiguous_tensor<T1, Ts...>::value>::type* = nullptr>
inline TR tensor_op(Op&& op, const Permutation& perm, const T1& tensor1,
const Ts&... tensors) {
Expand Down Expand Up @@ -219,7 +219,7 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
/// \param[in] tensors The argument tensors
template <typename Op, typename TR, typename... Ts,
typename std::enable_if<
is_tensor_of_tensor<TR, Ts...>::value &&
!is_tensor_v<TR, Ts...> &&
is_contiguous_tensor<TR, Ts...>::value>::type* = nullptr>
inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
TA_ASSERT(!empty(result, tensors...));
Expand All @@ -228,7 +228,11 @@ inline void inplace_tensor_op(Op&& op, TR& result, const Ts&... tensors) {
const auto volume = result.range().volume();

for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
inplace_tensor_op(op, result.at_ordinal(ord), tensors.at_ordinal(ord)...);
if constexpr (std::is_invocable_r_v<void, Op, typename TR::value_type&,
typename Ts::value_type...>)
op(result.at_ordinal(ord), tensors.at_ordinal(ord)...);
else
inplace_tensor_op(op, result.at_ordinal(ord), tensors.at_ordinal(ord)...);
}
}

Expand Down Expand Up @@ -457,7 +461,7 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) {
tensors.data()...);
}

/// Initialize tensor of tensors with contiguous tensor arguments
/// Initialize nested tensor with contiguous tensor arguments

/// This function initializes the \c i -th element of \c result with the result
/// of \c op(tensors[i]...)
Expand All @@ -470,17 +474,22 @@ inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) {
/// \param[in] tensors The argument tensors
template <
typename Op, typename TR, typename... Ts,
typename std::enable_if<is_tensor_of_tensor<TR, Ts...>::value &&
typename std::enable_if<(is_nested_tensor<TR, Ts...>::value &&
!is_tensor<TR, Ts...>::value) &&
is_contiguous_tensor<TR>::value>::type* = nullptr>
inline void tensor_init(Op&& op, TR& result, const Ts&... tensors) {
TA_ASSERT(!empty(result, tensors...));
TA_ASSERT(is_range_set_congruent(result, tensors...));

const auto volume = result.range().volume();

for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
new (result.data() + ord) typename TR::value_type(
tensor_op<typename TR::value_type>(op, tensors.at_ordinal(ord)...));
if constexpr (std::is_invocable_r_v<TR, Op, const Ts&...>) {
result = std::forward<Op>(op)(tensors...);
} else {
for (decltype(result.range().volume()) ord = 0ul; ord < volume; ++ord) {
new (result.data() + ord) typename TR::value_type(
tensor_op<typename TR::value_type>(op, tensors.at_ordinal(ord)...));
}
}
}

Expand Down
33 changes: 9 additions & 24 deletions src/TiledArray/tensor/operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,8 @@ namespace TiledArray {
/// \param right The right-hand tensor argument
/// \return A tensor where element \c i is equal to <tt>left[i] + right[i]</tt>
template <typename T1, typename T2,
typename = std::enable_if_t<
detail::is_tensor<detail::remove_cvr_t<T1>,
detail::remove_cvr_t<T2>>::value ||
detail::is_tensor_of_tensor<detail::remove_cvr_t<T1>,
detail::remove_cvr_t<T2>>::value>>
typename = std::enable_if_t<detail::tensors_have_equal_nested_rank_v<
detail::remove_cvr_t<T1>, detail::remove_cvr_t<T2>>>>
inline decltype(auto) operator+(T1&& left, T2&& right) {
return add(std::forward<T1>(left), std::forward<T2>(right));
}
Expand All @@ -58,14 +55,9 @@ inline decltype(auto) operator+(T1&& left, T2&& right) {
/// \param left The left-hand tensor argument
/// \param right The right-hand tensor argument
/// \return A tensor where element \c i is equal to <tt>left[i] - right[i]</tt>
template <
typename T1, typename T2,
typename std::enable_if<
detail::is_tensor<detail::remove_cvr_t<T1>,
detail::remove_cvr_t<T2>>::value ||
detail::is_tensor_of_tensor<detail::remove_cvr_t<T1>,
detail::remove_cvr_t<T2>>::value>::type* =
nullptr>
template <typename T1, typename T2,
typename = std::enable_if_t<detail::tensors_have_equal_nested_rank_v<
detail::remove_cvr_t<T1>, detail::remove_cvr_t<T2>>>>
inline decltype(auto) operator-(T1&& left, T2&& right) {
return subt(std::forward<T1>(left), std::forward<T2>(right));
}
Expand All @@ -80,12 +72,8 @@ inline decltype(auto) operator-(T1&& left, T2&& right) {
/// \return A tensor where element \c i is equal to <tt>left[i] * right[i]</tt>
template <
typename T1, typename T2,
typename std::enable_if<
detail::is_tensor<detail::remove_cvr_t<T1>,
detail::remove_cvr_t<T2>>::value ||
detail::is_tensor_of_tensor<detail::remove_cvr_t<T1>,
detail::remove_cvr_t<T2>>::value>::type* =
nullptr>
typename std::enable_if<detail::is_nested_tensor_v<
detail::remove_cvr_t<T1>, detail::remove_cvr_t<T2>>>::type* = nullptr>
inline decltype(auto) operator*(T1&& left, T2&& right) {
return mult(std::forward<T1>(left), std::forward<T2>(right));
}
Expand All @@ -100,8 +88,7 @@ inline decltype(auto) operator*(T1&& left, T2&& right) {
/// \return A tensor where element \c i is equal to <tt> left[i] * right </tt>
template <typename T, typename N,
typename std::enable_if<
(detail::is_tensor<detail::remove_cvr_t<T>>::value ||
detail::is_tensor_of_tensor<detail::remove_cvr_t<T>>::value) &&
detail::is_nested_tensor_v<detail::remove_cvr_t<T>> &&
detail::is_numeric_v<N>>::type* = nullptr>
inline decltype(auto) operator*(T&& left, N right) {
return scale(std::forward<T>(left), right);
Expand All @@ -118,9 +105,7 @@ template <
typename N, typename T,
typename std::enable_if<
detail::is_numeric_v<N> &&
(detail::is_tensor<detail::remove_cvr_t<T>>::value ||
detail::is_tensor_of_tensor<detail::remove_cvr_t<T>>::value)>::type* =
nullptr>
detail::is_nested_tensor_v<detail::remove_cvr_t<T>>>::type* = nullptr>
inline decltype(auto) operator*(N left, T&& right) {
return scale(std::forward<T>(right), left);
}
Expand Down
32 changes: 18 additions & 14 deletions src/TiledArray/tensor/permute.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,14 @@ inline void fuse_dimensions(SizeType* MADNESS_RESTRICT const fused_size,

/// The expected signature of the input operations is:
/// \code
/// Result::value_type input_op(const Arg0::value_type, const
/// Args::value_type...) \endcode The expected signature of the output
/// operations is: \code void output_op(Result::value_type*, const
/// Result::value_type) \endcode \tparam InputOp The input operation type
/// Result::value_type input_op(const Arg0::value_type,
/// const Args::value_type...)
/// \endcode
/// The expected signature of the output
/// operations is:
/// \code void output_op(Result::value_type*, const Result::value_type)
/// \endcode
/// \tparam InputOp The input operation type
/// \tparam OutputOp The output operation type
/// \tparam Result The result tensor type
/// \tparam Arg0 The first tensor argument type
Expand Down Expand Up @@ -146,13 +150,13 @@ inline void permute(InputOp&& input_op, OutputOp&& output_op, Result& result,
};

// Permute the data
for (typename Result::ordinal_type index = 0ul; index < volume;
index += block_size) {
const typename Result::ordinal_type perm_index = perm_index_op(index);
for (typename Result::ordinal_type ord = 0ul; ord < volume;
ord += block_size) {
const typename Result::ordinal_type perm_ord = perm_index_op(ord);

// Copy the block
math::vector_ptr_op(op, block_size, result.data() + perm_index,
arg0.data() + index, (args.data() + index)...);
math::vector_ptr_op(op, block_size, result.data() + perm_ord,
&arg0.at_ordinal(ord), &args.at_ordinal(ord)...);
}

} else {
Expand Down Expand Up @@ -186,16 +190,16 @@ inline void permute(InputOp&& input_op, OutputOp&& output_op, Result& result,
// Copy data from the input to the output matrix via a series of matrix
// transposes.
for (typename Result::ordinal_type i = 0ul; i < other_fused_size[0]; ++i) {
typename Result::ordinal_type index = i * other_fused_weight[0];
typename Result::ordinal_type ord = i * other_fused_weight[0];
for (typename Result::ordinal_type j = 0ul; j < other_fused_size[2];
++j, index += other_fused_weight[2]) {
++j, ord += other_fused_weight[2]) {
// Compute the ordinal index of the input and output matrices.
typename Result::ordinal_type perm_index = perm_index_op(index);
typename Result::ordinal_type perm_ord = perm_index_op(ord);

math::transpose(input_op, output_op, other_fused_size[1],
other_fused_size[3], result_outer_stride,
result.data() + perm_index, other_fused_weight[1],
arg0.data() + index, (args.data() + index)...);
&result.at_ordinal(perm_ord), other_fused_weight[1],
&arg0.at_ordinal(ord), &args.at_ordinal(ord)...);
}
}
}
Expand Down
Loading