diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c0eaa7fa3..a8d770603f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,9 +59,9 @@ endif(TILEDARRAY_PRERELEASE_ID) # Set install paths ============================================================ -set(TILEDARRAY_INSTALL_BINDIR "bin" +set(TILEDARRAY_INSTALL_BINDIR "bin" CACHE PATH "TiledArray binary install directory") -set(TILEDARRAY_INSTALL_INCLUDEDIR "include" +set(TILEDARRAY_INSTALL_INCLUDEDIR "include" CACHE PATH "TiledArray INCLUDE install directory") set(TILEDARRAY_INSTALL_LIBDIR "lib" CACHE PATH "TiledArray LIB install directory") @@ -162,16 +162,15 @@ else () endif() redefaultable_option(CMAKE_POSITION_INDEPENDENT_CODE "Default value for POSITION_INDEPENDENT_CODE of targets" ${default_CMAKE_POSITION_INDEPENDENT_CODE}) +set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") if(BUILD_SHARED_LIBS) - set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH TRUE) else() - set(BLA_STATIC TRUE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH FALSE) endif() # miscellaneous cmake platform-neutral and platform-specific configuration ============================= -set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX +set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX set(CMAKE_SKIP_RPATH FALSE) set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_SKIP_INSTALL_RPATH FALSE) @@ -394,19 +393,19 @@ export(EXPORT tiledarray configure_package_config_file(cmake/tiledarray-config.cmake.in "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" INSTALL_DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" - PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR + PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR TILEDARRAY_INSTALL_INCLUDEDIR TILEDARRAY_INSTALL_LIBDIR TILEDARRAY_INSTALL_DOCDIR TILEDARRAY_INSTALL_CMAKEDIR) # Install config, version, and target files install(EXPORT tiledarray FILE "tiledarray-targets.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) install(FILES "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" "${PROJECT_BINARY_DIR}/tiledarray-config-version.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) diff --git a/examples/scalapack/evp.cpp b/examples/scalapack/evp.cpp index d32a110073..22964ccae0 100644 --- a/examples/scalapack/evp.cpp +++ b/examples/scalapack/evp.cpp @@ -28,7 +28,7 @@ #include #include -#include +#include using Array = TA::TArray; // using Array = TA::TSpArray; @@ -93,23 +93,22 @@ int main(int argc, char** argv) { tensor_symm("i,j") = 0.5 * (tensor("i,j") + tensor("j,i")); tensor("i,j") = tensor_symm("i,j"); - - auto [ evals, evecs_ta ] = TA::heig( tensor ); - + auto [evals, evecs_ta] = TA::scalapack::heig(tensor); //// Check EVP with TA - Array tmp = TA::foreach (evecs_ta, [evals = evals](TA::Tensor& result, - const TA::Tensor& arg) { - result = TA::clone(arg); - - auto range = arg.range(); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) { - result(m, n) = arg(m, n) * evals[n]; - } - }); + Array tmp = + TA::foreach (evecs_ta, [evals = evals](TA::Tensor& result, + const TA::Tensor& arg) { + result = TA::clone(arg); + + auto range = arg.range(); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + for (auto m = lo[0]; m < up[0]; ++m) + for (auto n = lo[1]; n < up[1]; ++n) { + result(m, n) = arg(m, n) * evals[n]; + } + }); world.gop.fence(); tensor("i,j") = tensor("i,j") - tmp("i,k") * evecs_ta("j,k"); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1b9f182cf..ddab08d0a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,6 +60,20 @@ TiledArray/zero_tensor.h TiledArray/algebra/conjgrad.h TiledArray/algebra/diis.h TiledArray/algebra/utils.h +TiledArray/algebra/cholesky.h +TiledArray/algebra/heig.h +TiledArray/algebra/lu.h +TiledArray/algebra/svd.h +TiledArray/algebra/types.h +TiledArray/algebra/lapack/cholesky.h +TiledArray/algebra/lapack/heig.h +TiledArray/algebra/lapack/util.h +TiledArray/algebra/lapack/lu.h +TiledArray/algebra/lapack/svd.h +TiledArray/algebra/scalapack/cholesky.h +TiledArray/algebra/scalapack/heig.h +TiledArray/algebra/scalapack/lu.h +TiledArray/algebra/scalapack/svd.h TiledArray/conversions/btas.h TiledArray/conversions/clone.h TiledArray/conversions/dense_to_sparse.h @@ -112,8 +126,6 @@ TiledArray/math/partial_reduce.h TiledArray/math/transpose.h TiledArray/math/vector_op.h TiledArray/math/scalapack.h -TiledArray/math/scalapack/heig.h -TiledArray/math/scalapack/chol.h TiledArray/pmap/blocked_pmap.h TiledArray/pmap/cyclic_pmap.h TiledArray/pmap/hash_pmap.h @@ -197,6 +209,7 @@ TiledArray/array_impl.cpp TiledArray/dist_array.cpp TiledArray/util/backtrace.cpp TiledArray/util/bug.cpp +TiledArray/algebra/lapack/lapack.cpp ) # the list of libraries on which TiledArray depends on, will be cached later @@ -292,5 +305,3 @@ install( FILES_MATCHING PATTERN "*.h" PATTERN "CMakeFiles" EXCLUDE ) - - diff --git a/src/TiledArray/algebra/cholesky.h b/src/TiledArray/algebra/cholesky.h new file mode 100644 index 0000000000..b34bf38292 --- /dev/null +++ b/src/TiledArray/algebra/cholesky.h @@ -0,0 +1,77 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#endif +#include + +namespace TiledArray { + +template +auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky(A, l_trange); +#endif + return lapack::cholesky(A, l_trange); +} + +template +auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky_linv(A, l_trange); +#endif + return lapack::cholesky_linv(A, l_trange); +} + +template +auto cholesky_solve(const Array& A, const Array& B, + TiledRange x_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky_solve(A, B, x_trange); +#endif + return lapack::cholesky_solve(A, B, x_trange); +} + +template +auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, + TiledRange l_trange = TiledRange(), + TiledRange x_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky_lsolve(transpose, A, B, l_trange, + x_trange); +#endif + return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); +} + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED diff --git a/src/TiledArray/algebra/heig.h b/src/TiledArray/algebra/heig.h new file mode 100644 index 0000000000..68e39c9de9 --- /dev/null +++ b/src/TiledArray/algebra/heig.h @@ -0,0 +1,57 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * heig.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED +#define TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#endif +#include + +namespace TiledArray { + +template +auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, evec_trange); + } +#endif + return lapack::heig(A, evec_trange); +} + +template +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, B, evec_trange); + } +#endif + return lapack::heig(A, B, evec_trange); +} + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/cholesky.h b/src/TiledArray/algebra/lapack/cholesky.h new file mode 100644 index 0000000000..44376bae9a --- /dev/null +++ b/src/TiledArray/algebra/lapack/cholesky.h @@ -0,0 +1,205 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED + +#include +#include +#include +#include + +namespace TiledArray { +namespace lapack { + +namespace detail { + +template +auto make_L_eig(const DistArray& A) { + using Array = DistArray; + using numeric_type = typename Array::numeric_type; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + if (world.rank() == 0) { + lapack::cholesky(A_eig); + } + world.gop.broadcast_serializable(A_eig, 0); + return A_eig; +} + +} // namespace detail + +/** + * @brief Compute the Cholesky factorization of a HPD rank-2 tensor + * + * A(i,j) = L(i,k) * conj(L(j,k)) + * + * Example Usage: + * + * auto L = cholesky(A, ...) + * + * @tparam Array a DistArray type (i.e., @c is_array_v is true) + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] l_trange TiledRange for resulting Cholesky factor. If left + * empty, will default to array.trange() + * + * @returns The lower triangular Cholesky factor L in TA format + * @note this is a collective operation with respect to the world of @p A + */ +template >> +auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { + auto L_eig = detail::make_L_eig(A); + detail::zero_out_upper_triangle(L_eig); + if (l_trange.rank() == 0) l_trange = A.trange(); + return eigen_to_array(A.world(), l_trange, L_eig); +} + +/** + * @brief Compute the Cholesky factorization of a HPD rank-2 tensor + * + * A(i,j) = L(i,k) * conj(L(j,k)) + * + * Example Usage: + * + * auto L = cholesky(A, ...) + * + * @tparam ContiguousTensor a contiguous tensor type (i.e., @c + * is_contiguous_tensor_v is true) + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @returns The lower triangular Cholesky factor L as a ContiguousTensor + * @note this is a non-collective operation, only computes on the rank on which + * invoked + */ +template >> +auto cholesky(const ContiguousTensor& A) { + auto A_eig = detail::to_eigen(A); + lapack::cholesky(A_eig); + detail::zero_out_upper_triangle(A_eig); + return detail::from_eigen(A_eig, A.range()); +} + +/** + * @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor. + * Optionally return the Cholesky factor itself + * + * A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv + * + * Example Usage: + * + * auto Linv = cholesky_Linv(A, ...) + * auto [L,Linv] = cholesky_Linv(A, ...) + * + * @tparam Array a DistArray type (i.e., @c is_array_v is true) + * @tparam RetL Whether or not to return the cholesky factor + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] l_trange TiledRange for resulting inverse Cholesky factor. + * If left empty, will default to array.trange() + * + * @returns The inverse lower triangular Cholesky factor in TA format + * @note this is a collective operation with respect to the world of @p A + */ +template >> +auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { + World& world = A.world(); + auto L_eig = detail::make_L_eig(A); + if constexpr (RetL) detail::zero_out_upper_triangle(L_eig); + + // if need to return L use its copy to compute inverse + decltype(L_eig) L_inv_eig; + + if (world.rank() == 0) { + if (RetL) L_inv_eig = L_eig; + auto& L_inv_eig_ref = RetL ? L_inv_eig : L_eig; + cholesky_linv(L_inv_eig_ref); + detail::zero_out_upper_triangle(L_inv_eig_ref); + } + world.gop.broadcast_serializable(RetL ? L_inv_eig : L_eig, 0); + + if (l_trange.rank() == 0) l_trange = A.trange(); + if constexpr (RetL) + return std::make_tuple(eigen_to_array(world, l_trange, L_eig), + eigen_to_array(world, l_trange, L_inv_eig)); + else + return eigen_to_array(world, l_trange, L_eig); +} + +template >> +auto cholesky_solve(const Array& A, const Array& B, + TiledRange x_trange = TiledRange()) { + using numeric_type = typename Array::numeric_type; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + auto A_eig = detail::to_eigen(A); + auto X_eig = detail::to_eigen(B); + World& world = A.world(); + if (world.rank() == 0) { + cholesky_solve(A_eig, X_eig); + } + world.gop.broadcast_serializable(X_eig, 0); + if (x_trange.rank() == 0) x_trange = B.trange(); + return eigen_to_array(world, x_trange, X_eig); +} + +template >> +auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, + TiledRange l_trange = TiledRange(), + TiledRange x_trange = TiledRange()) { + World& world = A.world(); + auto L_eig = detail::make_L_eig(A); + detail::zero_out_upper_triangle(L_eig); + + using numeric_type = typename Array::numeric_type; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + auto X_eig = detail::to_eigen(B); + if (world.rank() == 0) { + cholesky_lsolve(transpose, L_eig, X_eig); + } + world.gop.broadcast_serializable(X_eig, 0); + if (l_trange.rank() == 0) l_trange = A.trange(); + if (x_trange.rank() == 0) x_trange = B.trange(); + return std::make_tuple(eigen_to_array(world, l_trange, L_eig), + eigen_to_array(world, x_trange, X_eig)); +} + +} // namespace lapack +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/heig.h b/src/TiledArray/algebra/lapack/heig.h new file mode 100644 index 0000000000..5fe4270c8f --- /dev/null +++ b/src/TiledArray/algebra/lapack/heig.h @@ -0,0 +1,116 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * heig.h + * Created: 19 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED + +#include + +#include +#include +#include + +namespace TiledArray::lapack { + +/** + * @brief Solve the standard eigenvalue problem with LAPACK + * + * A(i,k) X(k,j) = X(i,j) E(j) + * + * Example Usage: + * + * auto [E, X] = heig(A, ...) + * + * @tparam Array Input array type + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, + * will default to array.trange() + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template +auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { + using numeric_type = typename lapack::array_traits::numeric_type; + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, evals); + } + world.gop.broadcast_serializable(A_eig, 0); + world.gop.broadcast_serializable(evals, 0); + if (evec_trange.rank() == 0) evec_trange = A.trange(); + return std::tuple( + evals, + eigen_to_array(world, evec_trange, A_eig) + ); +} + +/** + * @brief Solve the generalized eigenvalue problem with LAPACK + * + * A(i,k) X(k,j) = B(i,k) X(k,j) E(j) + * + * with + * + * X(k,i) B(k,l) X(l,j) = I(i,j) + * + * Example Usage: + * + * auto [E, X] = heig(A, B, ...) + * + * @tparam Array Input array type + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] B Positive-definite matrix + * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, + * will default to array.trange() + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { + using numeric_type = typename lapack::array_traits::numeric_type; + (void)lapack::array_traits{}; + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, B_eig, evals); + } + world.gop.broadcast_serializable(A_eig, 0); + world.gop.broadcast_serializable(evals, 0); + if (evec_trange.rank() == 0) evec_trange = A.trange(); + return std::tuple( + evals, + eigen_to_array(A.world(), evec_trange, A_eig) + ); +} + +} // namespace TiledArray::lapack + +#endif // TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/TiledArray/algebra/lapack/lapack.cpp b/src/TiledArray/algebra/lapack/lapack.cpp new file mode 100644 index 0000000000..640ef695f1 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lapack.cpp @@ -0,0 +1,282 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * cholesky.h + * Created: 16 October, 2020 + * + */ + +#include +#include +#include +#include +#include + +#define TA_LAPACK_CALL(name, args...) \ + typedef T numeric_type; \ + if constexpr (std::is_same_v) \ + d##name##_(args); \ + else if constexpr (std::is_same_v) \ + s##name##_(args); \ + else if constexpr (std::is_same_v>) \ + z##name##_(args); \ + else if constexpr (std::is_same_v>) \ + c##name##_(args); \ + else \ + std::abort(); + +#define TA_LAPACK_GESV(...) TA_LAPACK_CALL(gesv, __VA_ARGS__) +#define TA_LAPACK_GETRF(...) TA_LAPACK_CALL(getrf, __VA_ARGS__) +#define TA_LAPACK_GETRI(...) TA_LAPACK_CALL(getri, __VA_ARGS__) + +#ifdef MADNESS_LINALG_USE_LAPACKE + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__) + +#else + +#ifdef FORTRAN_LINKAGE_LCU +#define dtrtri dtrtri_ +#define dtrtrs dtrtrs_ +#define dposv dposv_ +#endif + +extern "C" { // these arent in madness/clapack_fortran.h +void dtrtri(const char* uplo, const char* diag, + const integer* n, + const real8* a, const integer* lda, + integer *info, + char_len, char_len); +void dtrtrs(const char* uplo, const char* trans, const char* diag, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len, char_len, char_len); +void dposv(const char* uplo, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len); +} + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__, sizeof(char), sizeof(char), sizeof(char)) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__, sizeof(char), sizeof(char)) + +#endif // MADNESS_LINALG_USE_LAPACKE + +namespace TiledArray::lapack { + +template +void cholesky(Matrix& A) { + char uplo = 'L'; + integer n = A.rows(); + auto* a = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_POTRF(&uplo, &n, a, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky failed"); +} + +template +void cholesky_linv(Matrix& A) { + char uplo = 'L'; + char diag = 'N'; + integer n = A.rows(); + auto* l = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_TRTRI(&uplo, &diag, &n, l, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_linv failed"); +} + +template +void cholesky_solve(Matrix& A, Matrix& X) { + char uplo = 'L'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_POSV(&uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_solve failed"); +} + +template +void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { + char uplo = 'L'; + char trans = transpose == TransposeFlag::Transpose + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + char diag = 'N'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_TRTRS(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_lsolve failed"); +} + +template +void heig(Matrix& A, std::vector& W) { + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void heig(Matrix& A, Matrix& B, std::vector& W) { + integer itype = 1; + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void svd(Matrix& A, std::vector& S, Matrix* U, Matrix* VT) { + integer m = A.rows(); + integer n = A.cols(); + T* a = A.data(); + integer lda = A.rows(); + + S.resize(std::min(m, n)); + T* s = S.data(); + + char jobu = 'N'; + T* u = nullptr; + integer ldu = m; + if (U) { + jobu = 'A'; + U->resize(m, n); + u = U->data(); + ldu = U->rows(); + } + + char jobvt = 'N'; + T* vt = nullptr; + integer ldvt = n; + if (VT) { + jobvt = 'A'; + VT->resize(n, m); + vt = VT->data(); + ldvt = VT->rows(); + } + + integer lwork = -1; + integer info; + T lwork_dummy; + + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::svd failed"); +} + +template +void lu_solve(Matrix &A, Matrix &B) { + integer n = A.rows(); + integer nrhs = B.cols(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + std::vector ipiv(n); + integer info; + TA_LAPACK_GESV(&n, &nrhs, a, &lda, ipiv.data(), b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_solve failed"); +} + +template +void lu_inv(Matrix &A) { + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + integer lwork = -1; + std::vector work(1); + std::vector ipiv(n); + integer info; + TA_LAPACK_GETRF(&n, &n, a, &lda, ipiv.data(), &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + lwork = (integer)work[0]; + work.resize(lwork); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); +} + + +#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ + template void cholesky(MATRIX&); \ + template void cholesky_linv(MATRIX&); \ + template void cholesky_solve(MATRIX&, MATRIX&); \ + template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ + template void heig(MATRIX&, VECTOR&); \ + template void heig(MATRIX&, MATRIX&, VECTOR&); \ + template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); \ + template void lu_solve(MATRIX&, MATRIX&); \ + template void lu_inv(MATRIX&); + +TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); +//TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); + +} // namespace TiledArray::lapack diff --git a/src/TiledArray/algebra/lapack/lapack.h b/src/TiledArray/algebra/lapack/lapack.h new file mode 100644 index 0000000000..78c767bfc5 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lapack.h @@ -0,0 +1,76 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED + +#include +#include +#include + +namespace TiledArray::lapack { + +template +struct array_traits { + using scalar_type = typename A::scalar_type; + using numeric_type = typename A::numeric_type; + static const bool complex = !std::is_same_v; + static_assert( + std::is_same_v, + "TA::lapack is only usable with a DistArray of scalar types" + ); +}; + +template +using Matrix = Eigen::Matrix; + +template +void cholesky(Matrix &A); + +template +void cholesky_linv(Matrix &A); + +template +void cholesky_solve(Matrix &A, Matrix &X); + +template +void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X); + +template +void heig(Matrix &A, std::vector &W); + +template +void heig(Matrix &A, Matrix &B, std::vector &W); + +template +void svd(Matrix &A, std::vector &S, Matrix *U, Matrix *VT); + +template +void lu_solve(Matrix &A, Matrix &B); + +template +void lu_inv(Matrix &A); + +} + +#endif // TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/lu.h b/src/TiledArray/algebra/lapack/lu.h new file mode 100644 index 0000000000..389af28942 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lu.h @@ -0,0 +1,70 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * lu.h + * Created: 19 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Solve a linear system via LU factorization + */ +template +auto lu_solve(const ArrayA& A, const ArrayB& B, TiledRange x_trange = TiledRange()) { + (void)lapack::array_traits{}; + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + if (world.rank() == 0) { + lapack::lu_solve(A_eig, B_eig); + } + world.gop.broadcast_serializable(B_eig, 0); + if (x_trange.rank() == 0) x_trange = B.trange(); + return eigen_to_array(world, x_trange, B_eig); +} + +/** + * @brief Invert a matrix via LU + */ +template +auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange()) { + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + if (world.rank() == 0) { + lapack::lu_inv(A_eig); + } + world.gop.broadcast_serializable(A_eig, 0); + if (ainv_trange.rank() == 0) ainv_trange = A.trange(); + return eigen_to_array(A.world(), ainv_trange, A_eig); +} + +} // namespace TiledArray::lapack + +#endif // TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/svd.h b/src/TiledArray/algebra/lapack/svd.h new file mode 100644 index 0000000000..8ea0afd3da --- /dev/null +++ b/src/TiledArray/algebra/lapack/svd.h @@ -0,0 +1,103 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * svd.h + * Created: 12 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Compute the singular value decomposition (SVD) via ScaLAPACK + * + * A(i,j) = S(k) U(i,k) conj(V(j,k)) + * + * Example Usage: + * + * auto S = svd (A, ...) + * auto [S, U] = svd (A, ...) + * auto [S, VT] = svd(A, ...) + * auto [S, U, VT] = svd (A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be decomposed. Must be rank-2 + * @param[in] u_trange TiledRange for resulting left singular vectors. + * @param[in] vt_trange TiledRange for resulting right singular vectors + * (transposed). + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { + + using T = typename Array::numeric_type; + + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + + constexpr bool svd_all_vectors = std::is_same_v; + constexpr bool need_u = std::is_same_v or svd_all_vectors; + constexpr bool need_vt = std::is_same_v or svd_all_vectors; + + std::vector S; + std::unique_ptr< Matrix > U, VT; + + if constexpr (need_u) U = std::make_unique< Matrix >(); + if constexpr (need_vt) VT = std::make_unique< Matrix >(); + + if (world.rank() == 0) { + lapack::svd(A_eig, S, U.get(), VT.get()); + } + + world.gop.broadcast_serializable(S, 0); + if (U) world.gop.broadcast_serializable(*U, 0); + if (VT) world.gop.broadcast_serializable(*VT, 0); + + auto make_array = [&world](auto && ... args) { + return eigen_to_array(world, args...); + }; + + if constexpr (need_u && need_vt) { + return std::tuple(S, make_array(u_trange, *U), make_array(vt_trange, *VT)); + } + if constexpr (need_u && !need_vt) { + return std::tuple(S, make_array(u_trange, *U)); + } + if constexpr (!need_u && need_vt) { + return std::tuple(S, make_array(vt_trange, *VT)); + } + + if constexpr (!need_u && !need_vt) return S; + +} + +} // namespace scalapack::TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/util.h b/src/TiledArray/algebra/lapack/util.h new file mode 100644 index 0000000000..daf822ff8e --- /dev/null +++ b/src/TiledArray/algebra/lapack/util.h @@ -0,0 +1,109 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * util.h + * Created: 19 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED + +#include +#include + +namespace TiledArray { +namespace lapack { +namespace detail { + +template +auto to_eigen(const DistArray& A) { + auto A_repl = A; + A_repl.make_replicated(); + return array_to_eigen(A_repl); +} + +template >> +auto to_eigen(const ContiguousTensor& A) { + using numeric_type = TiledArray::detail::numeric_t; + static_assert( + std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a ContiguousTensor of " + "scalar types"); + TA_ASSERT(A.range().rank() == 1 || A.range().rank() == 2); + using colmajor_matrix_type = Eigen::Matrix; + colmajor_matrix_type result(A.range().extent(0), + A.range().rank() == 2 ? A.range().extent(1) : 1); + constexpr const auto layout = + TiledArray::detail::ordinal_traits::type; + if (layout == TiledArray::OrdinalType::RowMajor) { + using rowmajor_matrix_type = Eigen::Matrix; + auto result_map = Eigen::Map( + A.data(), result.rows(), result.cols()); + result = result_map; + } else if constexpr (layout == TiledArray::OrdinalType::ColMajor) { + using rowmajor_matrix_type = Eigen::Matrix; + auto result_map = Eigen::Map( + A.data(), result.rows(), result.cols()); + result = result_map; + } else + abort(); + return result; +} + +template >> +auto from_eigen( + const Eigen::Matrix& A, + typename ContiguousTensor::range_type range = {}) { + using numeric_type = TiledArray::detail::numeric_t; + static_assert( + std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a ContiguousTensor of " + "scalar types"); + using range_type = typename ContiguousTensor::range_type; + if (range.rank() == 0) + range = range_type(A.rows(), A.cols()); + else + TA_ASSERT(A.rows() * A.cols() == range.volume()); + ContiguousTensor result(range); + auto result_map = eigen_map(result, A.rows(), A.cols()); + result_map = A; + return result; +} + +template +void zero_out_upper_triangle(Eigen::MatrixBase& A) { + A.template triangularView().setZero(); +} + +} // namespace detail + +} // namespace lapack +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED diff --git a/src/TiledArray/algebra/lu.h b/src/TiledArray/algebra/lu.h new file mode 100644 index 0000000000..330f49c8f7 --- /dev/null +++ b/src/TiledArray/algebra/lu.h @@ -0,0 +1,43 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * lu.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LU_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#endif +#include + +namespace TiledArray { + +#if TILEDARRAY_HAS_SCALAPACK +using scalapack::lu_inv; +using scalapack::lu_solve; +#else +#endif + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/all.h b/src/TiledArray/algebra/scalapack/all.h new file mode 100644 index 0000000000..fbba6826d0 --- /dev/null +++ b/src/TiledArray/algebra/scalapack/all.h @@ -0,0 +1,37 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * scalapack.h + * Created: 25 May, 2020 + * + */ + +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#include +#include +#include +#endif + +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/cholesky.h b/src/TiledArray/algebra/scalapack/cholesky.h new file mode 100644 index 0000000000..535db24d41 --- /dev/null +++ b/src/TiledArray/algebra/scalapack/cholesky.h @@ -0,0 +1,283 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * chol.h + * Created: 8 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK + +#include +#include +#include + +#include +#include +#include +#include + +namespace TiledArray { +namespace scalapack { + +/** + * @brief Compute the Cholesky factorization of a HPD rank-2 tensor + * + * A(i,j) = L(i,k) * conj(L(j,k)) + * + * Example Usage: + * + * auto L = cholesky(A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] l_trange TiledRange for resulting Cholesky factor. If left + * empty, will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 + * + * @returns The lower triangular Cholesky factor L in TA format + */ +template +auto cholesky(const Array& A, TiledRange l_trange = TiledRange(), + size_t NB = default_block_size()) { + auto& world = A.world(); + auto world_comm = world.mpi.comm().Get_mpi_comm(); + blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); + + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution + + auto [M, N] = matrix.dims(); + if (M != N) TA_EXCEPTION("Matrix must be square for Cholesky"); + + auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); + auto desc = matrix.dist().descinit_noerror(N, N, Mloc); + + auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + matrix.local_mat().data(), 1, 1, desc); + if (info) TA_EXCEPTION("Cholesky Failed"); + + // Zero out the upper triangle + zero_triangle(blacspp::Triangle::Upper, matrix); + + if (l_trange.rank() == 0) l_trange = A.trange(); + + world.gop.fence(); + auto L = scalapack::block_cyclic_to_array(matrix, l_trange); + world.gop.fence(); + + return L; +} + +/** + * @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor. + * Optionally return the Cholesky factor itself + * + * A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv + * + * Example Usage: + * + * auto Linv = cholesky_Linv(A, ...) + * auto [L,Linv] = cholesky_Linv(A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * @tparam RetL Whether or not to return the cholesky factor + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] l_trange TiledRange for resulting inverse Cholesky factor. + * If left empty, will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 + * + * @returns The inverse lower triangular Cholesky factor in TA format + */ +template +auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange(), + size_t NB = default_block_size()) { + using value_type = typename Array::element_type; + + auto& world = A.world(); + auto world_comm = world.mpi.comm().Get_mpi_comm(); + blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); + + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution + + auto [M, N] = matrix.dims(); + if (M != N) TA_EXCEPTION("Matrix must be square for Cholesky"); + + auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); + auto desc = matrix.dist().descinit_noerror(N, N, Mloc); + + auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + matrix.local_mat().data(), 1, 1, desc); + if (info) TA_EXCEPTION("Cholesky Failed"); + + // Zero out the upper triangle + zero_triangle(blacspp::Triangle::Upper, matrix); + + // Copy L if needed + std::shared_ptr> L_sca = nullptr; + if constexpr (RetL) { + L_sca = std::make_shared>( + world, grid, N, N, NB, NB); + L_sca->local_mat() = matrix.local_mat(); + } + + // Compute inverse + info = + scalapackpp::ptrtri(blacspp::Triangle::Lower, blacspp::Diagonal::NonUnit, + N, matrix.local_mat().data(), 1, 1, desc); + if (info) TA_EXCEPTION("TRTRI Failed"); + + if (l_trange.rank() == 0) l_trange = A.trange(); + + world.gop.fence(); + auto Linv = scalapack::block_cyclic_to_array(matrix, l_trange); + world.gop.fence(); + + if constexpr (RetL) { + auto L = scalapack::block_cyclic_to_array(*L_sca, l_trange); + world.gop.fence(); + return std::tuple(L, Linv); + } else { + return Linv; + } +} + +template +auto cholesky_solve(const Array& A, const Array& B, + TiledRange x_trange = TiledRange(), + size_t NB = default_block_size()) { + auto& world = A.world(); + /* + if( world != B.world() ) { + TA_EXCEPTION("A and B must be distributed on same MADWorld context"); + } + */ + auto world_comm = world.mpi.comm().Get_mpi_comm(); + blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); + + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution + + auto [M, N] = A_sca.dims(); + if (M != N) TA_EXCEPTION("A must be square for Cholesky Solve"); + + auto [B_N, NRHS] = B_sca.dims(); + if (B_N != N) TA_EXCEPTION("A and B dims must agree"); + + scalapackpp::scalapack_desc desc_a, desc_b; + { + auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); + desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); + } + + { + auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); + desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); + } + + auto info = scalapackpp::pposv(blacspp::Triangle::Lower, N, NRHS, + A_sca.local_mat().data(), 1, 1, desc_a, + B_sca.local_mat().data(), 1, 1, desc_b); + if (info) TA_EXCEPTION("Cholesky Solve Failed"); + + if (x_trange.rank() == 0) x_trange = B.trange(); + + world.gop.fence(); + auto X = scalapack::block_cyclic_to_array(B_sca, x_trange); + world.gop.fence(); + + return X; +} + +template +auto cholesky_lsolve(TransposeFlag trans, const Array& A, const Array& B, + TiledRange l_trange = TiledRange(), + TiledRange x_trange = TiledRange(), + size_t NB = default_block_size()) { + auto& world = A.world(); + /* + if( world != B.world() ) { + TA_EXCEPTION("A and B must be distributed on same MADWorld context"); + } + */ + auto world_comm = world.mpi.comm().Get_mpi_comm(); + blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); + + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution + + auto [M, N] = A_sca.dims(); + if (M != N) TA_EXCEPTION("A must be square for Cholesky Solve"); + + auto [B_N, NRHS] = B_sca.dims(); + if (B_N != N) TA_EXCEPTION("A and B dims must agree"); + + scalapackpp::scalapack_desc desc_a, desc_b; + { + auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); + desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); + } + + { + auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); + desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); + } + + auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + A_sca.local_mat().data(), 1, 1, desc_a); + if (info) TA_EXCEPTION("Cholesky Failed"); + + info = scalapackpp::ptrtrs( + blacspp::Triangle::Lower, to_scalapackpp_transposeflag(trans), + blacspp::Diagonal::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1, + desc_a, B_sca.local_mat().data(), 1, 1, desc_b); + if (info) TA_EXCEPTION("TRTRS Failed"); + + // Zero out the upper triangle + zero_triangle(blacspp::Triangle::Upper, A_sca); + + if (l_trange.rank() == 0) l_trange = A.trange(); + if (x_trange.rank() == 0) x_trange = B.trange(); + + world.gop.fence(); + auto L = scalapack::block_cyclic_to_array(A_sca, l_trange); + auto X = scalapack::block_cyclic_to_array(B_sca, x_trange); + world.gop.fence(); + + return std::tuple(L, X); +} + +} // namespace scalapack +} // namespace TiledArray + +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED diff --git a/src/TiledArray/math/scalapack/heig.h b/src/TiledArray/algebra/scalapack/heig.h similarity index 57% rename from src/TiledArray/math/scalapack/heig.h rename to src/TiledArray/algebra/scalapack/heig.h index e51361c651..488867b857 100644 --- a/src/TiledArray/math/scalapack/heig.h +++ b/src/TiledArray/algebra/scalapack/heig.h @@ -23,17 +23,18 @@ * Edited: 8 June, 2020 * */ -#ifndef TILEDARRAY_MATH_SCALAPACK_HEIG_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_HEIG_H__INCLUDED +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED #include #if TILEDARRAY_HAS_SCALAPACK #include -#include #include +#include namespace TiledArray { +namespace scalapack { /** * @brief Solve the standard eigenvalue problem with ScaLAPACK @@ -44,62 +45,55 @@ namespace TiledArray { * * auto [E, X] = heig(A, ...) * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, * will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -auto heig( const Array& A, size_t NB = 128, TiledRange evec_trange = TiledRange() ) { - +auto heig(const Array& A, TiledRange evec_trange = TiledRange(), + size_t NB = default_block_size()) { using value_type = typename Array::element_type; - using real_type = scalapackpp::detail::real_t; + using real_type = scalapackpp::detail::real_t; auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); - //auto world_comm = MPI_COMM_WORLD; + // auto world_comm = MPI_COMM_WORLD; blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = matrix.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for EVP"); + if (M != N) TA_EXCEPTION("Matrix must be square for EVP"); auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - std::vector evals( N ); - scalapack::BlockCyclicMatrix evecs( world, grid, N, N, NB, NB ); + std::vector evals(N); + scalapack::BlockCyclicMatrix evecs(world, grid, N, N, NB, NB); auto info = scalapackpp::hereig( - scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, - matrix.local_mat().data(), 1, 1, desc, evals.data(), - evecs.local_mat().data(), 1, 1, desc ); + scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, + matrix.local_mat().data(), 1, 1, desc, evals.data(), + evecs.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("EVP Failed"); - if( evec_trange.rank() == 0 ) evec_trange = A.trange(); + if (evec_trange.rank() == 0) evec_trange = A.trange(); world.gop.fence(); - auto evecs_ta = scalapack::block_cyclic_to_array( evecs, evec_trange ); + auto evecs_ta = scalapack::block_cyclic_to_array(evecs, evec_trange); world.gop.fence(); - - return std::tuple( evals, evecs_ta ); - + return std::tuple(evals, evecs_ta); } - - - - /** * @brief Solve the generalized eigenvalue problem with ScaLAPACK * @@ -113,69 +107,66 @@ auto heig( const Array& A, size_t NB = 128, TiledRange evec_trange = TiledRange( * * auto [E, X] = heig(A, B, ...) * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 * @param[in] B Metric - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, * will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -auto heig( const ArrayA& A, const ArrayB& B, - size_t NB = 128, TiledRange evec_trange = TiledRange() ) { - +auto heig(const ArrayA& A, const ArrayB& B, + TiledRange evec_trange = TiledRange(), + size_t NB = default_block_size()) { using value_type = typename ArrayA::element_type; - static_assert( std::is_same_v ); - using real_type = scalapackpp::detail::real_t; + static_assert(std::is_same_v); + using real_type = scalapackpp::detail::real_t; auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); - //auto world_comm = MPI_COMM_WORLD; + // auto world_comm = MPI_COMM_WORLD; blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for EVP"); + if (M != N) TA_EXCEPTION("Matrix must be square for EVP"); auto [B_M, B_N] = B_sca.dims(); - if( B_M != M or B_N != N ) + if (B_M != M or B_N != N) TA_EXCEPTION("A and B must have the same dimensions"); auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); auto desc = A_sca.dist().descinit_noerror(N, N, Mloc); - std::vector evals( N ); - scalapack::BlockCyclicMatrix evecs( world, grid, N, N, NB, NB ); + std::vector evals(N); + scalapack::BlockCyclicMatrix evecs(world, grid, N, N, NB, NB); auto info = scalapackpp::hereig_gen( - scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, - A_sca.local_mat().data(), 1, 1, desc, - B_sca.local_mat().data(), 1, 1, desc, - evals.data(), - evecs.local_mat().data(), 1, 1, desc ); + scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, + A_sca.local_mat().data(), 1, 1, desc, B_sca.local_mat().data(), 1, 1, + desc, evals.data(), evecs.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("EVP Failed"); - if( evec_trange.rank() == 0 ) evec_trange = A.trange(); + if (evec_trange.rank() == 0) evec_trange = A.trange(); world.gop.fence(); - auto evecs_ta = scalapack::block_cyclic_to_array( evecs, evec_trange ); + auto evecs_ta = + scalapack::block_cyclic_to_array(evecs, evec_trange); world.gop.fence(); - - return std::tuple( evals, evecs_ta ); - + return std::tuple(evals, evecs_ta); } -} // namespace TiledArray +} // namespace scalapack +} // namespace TiledArray -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED diff --git a/src/TiledArray/math/scalapack/lu.h b/src/TiledArray/algebra/scalapack/lu.h similarity index 50% rename from src/TiledArray/math/scalapack/lu.h rename to src/TiledArray/algebra/scalapack/lu.h index 7581425f2c..8c65383846 100644 --- a/src/TiledArray/math/scalapack/lu.h +++ b/src/TiledArray/algebra/scalapack/lu.h @@ -22,46 +22,46 @@ * Created: 19 June, 2020 * */ -#ifndef TILEDARRAY_MATH_SCALAPACK_LU_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_LU_H__INCLUDED +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED #include #if TILEDARRAY_HAS_SCALAPACK +#include #include -#include #include #include #include namespace TiledArray { +namespace scalapack { /** * @brief Solve a linear system via LU factorization */ template -auto lu_solve( const ArrayA& A, const ArrayB& B, size_t NB = 128, size_t MB = 128, - TiledRange x_trange = TiledRange() ) { - +auto lu_solve(const ArrayA& A, const ArrayB& B, + TiledRange x_trange = TiledRange(), + size_t NB = default_block_size(), + size_t MB = default_block_size()) { using value_type = typename ArrayA::element_type; - static_assert(std::is_same_v); + static_assert(std::is_same_v); auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, MB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, MB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, MB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution - auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for LU Solve"); + auto [M, N] = A_sca.dims(); + if (M != N) TA_EXCEPTION("A must be square for LU Solve"); auto [B_N, NRHS] = B_sca.dims(); - if( B_N != N ) - TA_EXCEPTION("A and B dims must agree"); + if (B_N != N) TA_EXCEPTION("A and B dims must agree"); auto [A_Mloc, A_Nloc] = A_sca.dist().get_local_dims(N, N); auto desc_a = A_sca.dist().descinit_noerror(N, N, A_Mloc); @@ -69,77 +69,68 @@ auto lu_solve( const ArrayA& A, const ArrayB& B, size_t NB = 128, size_t MB = 12 auto [B_Mloc, B_Nloc] = B_sca.dist().get_local_dims(N, NRHS); auto desc_b = B_sca.dist().descinit_noerror(N, NRHS, B_Mloc); - std::vector IPIV( A_Mloc + MB ); + std::vector IPIV(A_Mloc + MB); - auto info = scalapackpp::pgesv( N, NRHS, - A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data(), - B_sca.local_mat().data(), 1, 1, desc_b ); + auto info = + scalapackpp::pgesv(N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a, + IPIV.data(), B_sca.local_mat().data(), 1, 1, desc_b); if (info) TA_EXCEPTION("LU Solve Failed"); - if( x_trange.rank() == 0 ) x_trange = B.trange(); + if (x_trange.rank() == 0) x_trange = B.trange(); world.gop.fence(); - auto X = scalapack::block_cyclic_to_array( B_sca, x_trange ); + auto X = scalapack::block_cyclic_to_array(B_sca, x_trange); world.gop.fence(); return X; - } /** * @brief Invert a matrix via LU */ template -auto lu_inv( const Array& A, size_t NB = 128, size_t MB = 128, - TiledRange ainv_trange = TiledRange() ) { - +auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange(), + size_t NB = default_block_size(), + size_t MB = default_block_size()) { auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, MB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution - auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for LU Inverse"); + auto [M, N] = A_sca.dims(); + if (M != N) TA_EXCEPTION("A must be square for LU Inverse"); auto [A_Mloc, A_Nloc] = A_sca.dist().get_local_dims(N, N); auto desc_a = A_sca.dist().descinit_noerror(N, N, A_Mloc); - - std::vector IPIV( A_Mloc + MB ); + std::vector IPIV(A_Mloc + MB); { - auto info = scalapackpp::pgetrf( N, N, - A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data() ); - if (info) TA_EXCEPTION("LU Failed"); + auto info = scalapackpp::pgetrf(N, N, A_sca.local_mat().data(), 1, 1, + desc_a, IPIV.data()); + if (info) TA_EXCEPTION("LU Failed"); } { - auto info = scalapackpp::pgetri( N, - A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data() ); - if (info) TA_EXCEPTION("LU Inverse Failed"); + auto info = scalapackpp::pgetri(N, A_sca.local_mat().data(), 1, 1, desc_a, + IPIV.data()); + if (info) TA_EXCEPTION("LU Inverse Failed"); } - if( ainv_trange.rank() == 0 ) ainv_trange = A.trange(); + if (ainv_trange.rank() == 0) ainv_trange = A.trange(); world.gop.fence(); - auto Ainv = scalapack::block_cyclic_to_array( A_sca, ainv_trange ); + auto Ainv = scalapack::block_cyclic_to_array(A_sca, ainv_trange); world.gop.fence(); return Ainv; - } +} // namespace scalapack +} // namespace TiledArray - - - -} // namespace TiledArray - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED - - +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/svd.h b/src/TiledArray/algebra/scalapack/svd.h new file mode 100644 index 0000000000..d73f9e05dd --- /dev/null +++ b/src/TiledArray/algebra/scalapack/svd.h @@ -0,0 +1,155 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * svd.h + * Created: 12 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK + +#include +#include +#include + +namespace TiledArray { +namespace scalapack { + +/** + * @brief Compute the singular value decomposition (SVD) via ScaLAPACK + * + * A(i,j) = S(k) U(i,k) conj(V(j,k)) + * + * Example Usage: + * + * auto S = svd (A, ...) + * auto [S, U] = svd (A, ...) + * auto [S, VT] = svd(A, ...) + * auto [S, U, VT] = svd (A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be decomposed. Must be rank-2 + * @param[in] u_trange TiledRange for resulting left singular vectors. + * @param[in] vt_trange TiledRange for resulting right singular vectors + * (transposed). + * @param[in] MB ScaLAPACK row block size. Defaults to 128 + * @param[in] NB ScaLAPACK column block size. Defaults to 128 + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template > +auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange, + size_t MB = default_block_size(), size_t NB = default_block_size()) { + using value_type = typename Array::element_type; + using real_type = scalapackpp::detail::real_t; + + auto& world = A.world(); + auto world_comm = world.mpi.comm().Get_mpi_comm(); + // auto world_comm = MPI_COMM_WORLD; + blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); + + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution + + auto [M, N] = matrix.dims(); + auto SVD_SIZE = std::min(M, N); + + auto [AMloc, ANloc] = matrix.dist().get_local_dims(M, N); + auto [UMloc, UNloc] = matrix.dist().get_local_dims(M, SVD_SIZE); + auto [VTMloc, VTNloc] = matrix.dist().get_local_dims(SVD_SIZE, N); + + auto desc_a = matrix.dist().descinit_noerror(M, N, AMloc); + auto desc_u = matrix.dist().descinit_noerror(M, SVD_SIZE, UMloc); + auto desc_vt = matrix.dist().descinit_noerror(SVD_SIZE, N, VTMloc); + + std::vector S(SVD_SIZE); + + constexpr bool need_uv = std::is_same_v; + constexpr bool need_u = std::is_same_v or need_uv; + constexpr bool need_vt = std::is_same_v or need_uv; + + std::shared_ptr> U = nullptr, + VT = nullptr; + + scalapackpp::VectorFlag JOBU = scalapackpp::VectorFlag::NoVectors; + scalapackpp::VectorFlag JOBVT = scalapackpp::VectorFlag::NoVectors; + + value_type* U_ptr = nullptr; + value_type* VT_ptr = nullptr; + + if constexpr (need_u) { + JOBU = scalapackpp::VectorFlag::Vectors; + U = std::make_shared>( + world, grid, M, SVD_SIZE, MB, NB); + + U_ptr = U->local_mat().data(); + } + + if constexpr (need_vt) { + JOBVT = scalapackpp::VectorFlag::Vectors; + VT = std::make_shared>( + world, grid, SVD_SIZE, N, MB, NB); + + VT_ptr = VT->local_mat().data(); + } + + auto info = scalapackpp::pgesvd(JOBU, JOBVT, M, N, matrix.local_mat().data(), + 1, 1, desc_a, S.data(), U_ptr, 1, 1, desc_u, + VT_ptr, 1, 1, desc_vt); + if (info) TA_EXCEPTION("SVD Failed"); + + world.gop.fence(); + + if constexpr (need_uv) { + auto U_ta = scalapack::block_cyclic_to_array(*U, u_trange); + auto VT_ta = scalapack::block_cyclic_to_array(*VT, vt_trange); + world.gop.fence(); + + return std::tuple(S, U_ta, VT_ta); + + } else if constexpr (need_u) { + auto U_ta = scalapack::block_cyclic_to_array(*U, u_trange); + world.gop.fence(); + + return std::tuple(S, U_ta); + + } else if constexpr (need_vt) { + auto VT_ta = scalapack::block_cyclic_to_array(*VT, vt_trange); + world.gop.fence(); + + return std::tuple(S, VT_ta); + + } else { + return S; + } +} + +} // namespace scalapack +} // namespace TiledArray + +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/util.h b/src/TiledArray/algebra/scalapack/util.h new file mode 100644 index 0000000000..439d2fa590 --- /dev/null +++ b/src/TiledArray/algebra/scalapack/util.h @@ -0,0 +1,104 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * util.h + * Created: 19 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK + +#include +#include + +namespace TiledArray { + +namespace scalapack { + +inline scalapackpp::TransposeFlag to_scalapackpp_transposeflag( + TransposeFlag t) { + switch (t) { + case TransposeFlag::NoTranspose: + return scalapackpp::TransposeFlag::NoTranspose; + case TransposeFlag::Transpose: + return scalapackpp::TransposeFlag::Transpose; + case TransposeFlag::ConjTranspose: + return scalapackpp::TransposeFlag::ConjTranspose; + default: + abort(); + } +} + +template +void zero_triangle(blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, + bool zero_diag = false) { + auto zero_el = [&](size_t I, size_t J) { + if (A.dist().i_own(I, J)) { + auto [i, j] = A.dist().local_indx(I, J); + A.local_mat()(i, j) = 0.; + } + }; + + auto [M, N] = A.dims(); + + // Zero the lower triangle + if (tri == blacspp::Triangle::Lower) { + if (zero_diag) + for (size_t j = 0; j < N; ++j) + for (size_t i = j; i < M; ++i) zero_el(i, j); + else + for (size_t j = 0; j < N; ++j) + for (size_t i = j + 1; i < M; ++i) zero_el(i, j); + + // Zero the upper triangle + } else { + if (zero_diag) + for (size_t j = 0; j < N; ++j) + for (size_t i = 0; i <= std::min(j, M); ++i) zero_el(i, j); + else + for (size_t j = 0; j < N; ++j) + for (size_t i = 0; i < std::min(j, M); ++i) zero_el(i, j); + } +} + +namespace detail { +inline std::size_t& default_block_size_accessor() { + static std::size_t block_size = 128; + return block_size; +} +} // namespace detail + +inline std::size_t default_block_size() { + return detail::default_block_size_accessor(); +} + +inline void set_default_block_size(std::size_t NB) { + TA_ASSERT(NB > 0); + detail::default_block_size_accessor() = NB; +} + +} // namespace scalapack +} // namespace TiledArray + +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED diff --git a/src/TiledArray/algebra/svd.h b/src/TiledArray/algebra/svd.h new file mode 100644 index 0000000000..3c87fbc087 --- /dev/null +++ b/src/TiledArray/algebra/svd.h @@ -0,0 +1,48 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * svd.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SVD_H__INCLUDED + +#include +#ifdef TILEDARRAY_HAS_SCALAPACK +#include +#endif // TILEDARRAY_HAS_SCALAPACK +#include + +namespace TiledArray { + +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::svd(A, u_trange, vt_trange); + } +#endif + return lapack::svd(A, u_trange, vt_trange); +} + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/types.h b/src/TiledArray/algebra/types.h new file mode 100644 index 0000000000..1cc0a50e57 --- /dev/null +++ b/src/TiledArray/algebra/types.h @@ -0,0 +1,66 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * svd.h + * Created: 12 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED + +#include +#include + +namespace TiledArray { + +enum TransposeFlag { NoTranspose, Transpose, ConjTranspose }; + +struct SVDReturnType {}; +struct SVDValuesOnly : public SVDReturnType {}; +struct SVDLeftVectors : public SVDReturnType {}; +struct SVDRightVectors : public SVDReturnType {}; +struct SVDAllVectors : public SVDReturnType {}; + +namespace detail { + +template +struct is_svd_return_type : public std::false_type {}; + +template +struct is_svd_return_type< + SVDType, std::enable_if_t>> + : public std::true_type {}; + +template +inline constexpr bool is_svd_return_type_v = is_svd_return_type::value; + +template +struct enable_if_svd_return_type + : public std::enable_if, U> {}; + +template +using enable_if_svd_return_type_t = + typename enable_if_svd_return_type::type; + +} // namespace detail + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h index 7380a1faf1..b43602c98e 100644 --- a/src/TiledArray/conversions/eigen.h +++ b/src/TiledArray/conversions/eigen.h @@ -61,53 +61,57 @@ typedef Eigen::Matrix EigenVectorXl; /// Construct a const Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \param m The number of rows in the result matrix /// \param n The number of columns in the result matrix /// \return An m x n Eigen matrix map for \c tensor /// \throw TiledArray::Exception When m * n is not equal to \c tensor size -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(const T& tensor, const std::size_t m, const std::size_t n) { TA_ASSERT((m * n) == tensor.size()); return Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign>(tensor.data(), m, n); } /// Construct an Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \param m The number of rows in the result matrix /// \param n The number of columns in the result matrix /// \return An m x n Eigen matrix map for \c tensor /// \throw TiledArray::Exception When m * n is not equal to \c tensor size -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(T& tensor, const std::size_t m, const std::size_t n) { TA_ASSERT((m * n) == tensor.size()); return Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign>(tensor.data(), m, n); } /// Construct a const Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object -/// \param n The number of elements in the result matrix -/// \return An n element Eigen vector map for \c tensor -/// \throw TiledArray::Exception When n is not equal to \c tensor size +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \param tensor The +/// tensor object \param n The number of elements in the result matrix \return +/// An n element Eigen vector map for \c tensor \throw TiledArray::Exception +/// When n is not equal to \c tensor size template >* = nullptr> inline Eigen::Map< @@ -127,7 +131,7 @@ eigen_map(const T& tensor, const std::size_t n) { /// \param tensor The tensor object /// \param n The number of elements in the result matrix /// \return An n element Eigen vector map for \c tensor -/// \throw TiledArray::Exception When m * n is not equal to \c tensor size +/// \throw TiledArray::Exception When n is not equal to \c tensor size template >* = nullptr> inline Eigen::Map, @@ -141,42 +145,49 @@ eigen_map(T& tensor, const std::size_t n) { /// Construct a const Eigen::Map object for a given Tensor object -/// The dimensions of the result tensor -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// The dimensions of the result tensor are extracted from the tensor itself +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \return An Eigen matrix map for \c tensor /// \throw TiledArray::Exception When \c tensor dimensions are not equal to 2 /// or 1. -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(const T& tensor) { TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u)); const auto* MADNESS_RESTRICT const tensor_extent = tensor.range().extent_data(); - return eigen_map(tensor, tensor_extent[0], - (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); + return eigen_map( + tensor, tensor_extent[0], + (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); } /// Construct an Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// The dimensions of the result tensor are extracted from the tensor itself +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \return An Eigen matrix map for \c tensor /// \throw When \c tensor dimensions are not equal to 2 or 1. -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(T& tensor) { TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u)); const auto* MADNESS_RESTRICT const tensor_extent = tensor.range().extent_data(); - return eigen_map(tensor, tensor_extent[0], - (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); + return eigen_map( + tensor, tensor_extent[0], + (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); } /// Copy a block of an Eigen matrix into a tensor @@ -351,15 +362,19 @@ void counted_tensor_to_eigen_submatrix(const T& tensor, } // namespace detail +// clang-format off /// Convert an Eigen matrix into an Array object /// This function will copy the content of \c matrix into an \c Array object /// that is tiled according to the \c trange object. The copy operation is /// done in parallel, and this function will block until all elements of -/// \c matrix have been copied into the result array tiles. The size of -/// \c world.size() must be equal to 1 or \c replicate must be equal to -/// \c true . If \c replicate is \c true, it is your responsibility to ensure -/// that the data in matrix is identical on all nodes. +/// \c matrix have been copied into the result array tiles. +/// Each tile is created +/// using the local contents of \c matrix, hence +/// it is your responsibility to ensure that the data in \c matrix +/// is distributed correctly among the ranks. If in doubt, you should replicate +/// \c matrix among the ranks prior to calling this. +/// /// Usage: /// \code /// Eigen::MatrixXd m(100, 100); @@ -383,16 +398,18 @@ void counted_tensor_to_eigen_submatrix(const T& tensor, /// \param world The world where the array will live /// \param trange The tiled range of the new array /// \param matrix The Eigen matrix to be copied -/// \param replicated \c true indicates that the result array should be a -/// replicated array [default = false]. +/// \param replicated if true, the result will be replicated +/// [default = true]. +/// \param pmap the process map object [default=null]; initialized to the +/// default if \p replicated is false, or a replicated pmap if \p replicated +/// is true; ignored if \p replicated is true and \c world.size()>1 /// \return An \c Array object that is a copy of \c matrix -/// \throw TiledArray::Exception When world size is greater than 1 -/// \note If using 2 or more World ranks, set \c replicated=true and make sure -/// \c matrix is the same on each rank! +// clang-format on template A eigen_to_array(World& world, const typename A::trange_type& trange, const Eigen::MatrixBase& matrix, - bool replicated = false) { + bool replicated = false, + std::shared_ptr pmap = {}) { typedef typename A::index1_type size_type; // Check that trange matches the dimensions of other if ((matrix.cols() > 1) && (matrix.rows() > 1)) { @@ -417,36 +434,35 @@ A eigen_to_array(World& world, const typename A::trange_type& trange, "matrix size."); } - // Check that this is not a distributed computing environment - if (!replicated) - TA_USER_ASSERT(world.size() == 1, - "An array cannot be assigned with an Eigen::Matrix when the " - "number of World ranks is greater than 1."); - // Create a new tensor - A array = (replicated && (world.size() > 1) - ? A(world, trange, - std::static_pointer_cast( - std::make_shared( - world, trange.tiles_range().volume()))) - : A(world, trange)); + if (replicated && (world.size() > 1)) + pmap = std::static_pointer_cast( + std::make_shared( + world, trange.tiles_range().volume())); + A array = (pmap ? A(world, trange, pmap) : A(world, trange)); // Spawn tasks to copy Eigen to an Array madness::AtomicInt counter; counter = 0; std::int64_t n = 0; for (std::size_t i = 0; i < array.size(); ++i) { - world.taskq.add(&detail::counted_eigen_submatrix_to_tensor, - &matrix, &array, i, &counter); - ++n; + if (array.is_local(i)) { + world.taskq.add(&detail::counted_eigen_submatrix_to_tensor, + &matrix, &array, i, &counter); + ++n; + } } // Wait until the write tasks are complete array.world().await([&counter, n]() { return counter == n; }); + // truncate, n.b. this can replace the wait above + array.truncate(); + return array; } +// clang-format off /// Convert an Array object into an Eigen matrix object /// This function will copy the content of an \c Array object into matrix. The @@ -465,10 +481,14 @@ A eigen_to_array(World& world, const typename A::trange_type& trange, /// \tparam EigenStorageOrder The storage order of the resulting Eigen::Matrix /// object; the default is Eigen::ColMajor, i.e. the column-major storage /// \param array The array to be converted. It must be replicated if using 2 or -/// more World ranks. \return an Eigen matrix; it will contain same data on each -/// World rank. \throw TiledArray::Exception When world size is greater than 1 -/// and \c array is not replicated. \throw TiledArray::Exception When the number +/// more World ranks. +/// \return an Eigen matrix; it will contain same data on each +/// World rank. +/// \throw TiledArray::Exception When world size is greater than 1 +/// and \c array is not replicated. +/// \throw TiledArray::Exception When the number /// of dimensions of \c array is not equal to 1 or 2. +// clang-format on template Eigen::Matrix& array) { /// This function will copy the content of \c buffer into an \c Array object /// that is tiled according to the \c trange object. The copy operation is /// done in parallel, and this function will block until all elements of -/// \c matrix have been copied into the result array tiles. The size of -/// \c world.size() must be equal to 1 or \c replicate must be equal to -/// \c true . If \c replicate is \c true, it is your responsibility to ensure -/// that the data in \c buffer is identical on all nodes. +/// \c matrix have been copied into the result array tiles. +/// Each tile is created +/// using the local contents of \c matrix, hence +/// it is your responsibility to ensure that the data in \c matrix +/// is distributed correctly among the ranks. If in doubt, you should replicate +/// \c matrix among the ranks prior to calling this. +/// /// Usage: /// \code /// double* buffer = new double[100 * 100]; @@ -557,8 +580,11 @@ array_to_eigen(const DistArray& array) { /// \param buffer The row-major matrix buffer to be copied /// \param m The number of rows in the matrix /// \param n The number of columns in the matrix -/// \param replicated \c true indicates that the result array should be a -/// replicated array [default = false]. +/// \param replicated if true, the result will be replicated +/// [default = true]. +/// \param pmap the process map object [default=null]; initialized to the +/// default if \p replicated is false, or a replicated pmap if \p replicated +/// is true; ignored if \p replicated is true and \c world.size()>1 /// \return An \c Array object that is a copy of \c matrix /// \throw TiledArray::Exception When \c m and \c n are not equal to the /// number of rows or columns in tiled range. @@ -566,7 +592,8 @@ template inline A row_major_buffer_to_array( World& world, const typename A::trange_type& trange, const typename A::value_type::value_type* buffer, const std::size_t m, - const std::size_t n, const bool replicated = false) { + const std::size_t n, const bool replicated = false, + std::shared_ptr pmap = {}) { TA_USER_ASSERT(trange.elements_range().extent(0) == m, "TiledArray::eigen_to_array(): The number of rows in trange " "is not equal to m."); @@ -579,8 +606,8 @@ inline A row_major_buffer_to_array( matrix_type; return eigen_to_array( world, trange, - Eigen::Map(buffer, m, n), - replicated); + Eigen::Map(buffer, m, n), replicated, + pmap); } /// Convert a column-major matrix buffer into an Array object @@ -588,10 +615,13 @@ inline A row_major_buffer_to_array( /// This function will copy the content of \c buffer into an \c Array object /// that is tiled according to the \c trange object. The copy operation is /// done in parallel, and this function will block until all elements of -/// \c matrix have been copied into the result array tiles. The size of -/// \c world.size() must be equal to 1 or \c replicate must be equal to -/// \c true . If \c replicate is \c true, it is your responsibility to ensure -/// that the data in \c buffer is identical on all nodes. +/// \c matrix have been copied into the result array tiles. +/// Each tile is created +/// using the local contents of \c matrix, hence +/// it is your responsibility to ensure that the data in \c matrix +/// is distributed correctly among the ranks. If in doubt, you should replicate +/// \c matrix among the ranks prior to calling this. +/// /// Usage: /// \code /// double* buffer = new double[100 * 100]; @@ -619,8 +649,11 @@ inline A row_major_buffer_to_array( /// \param buffer The row-major matrix buffer to be copied /// \param m The number of rows in the matrix /// \param n The number of columns in the matrix -/// \param replicated \c true indicates that the result array should be a -/// replicated array [default = false]. +/// \param replicated if true, the result will be replicated +/// [default = true]. +/// \param pmap the process map object [default=null]; initialized to the +/// default if \p replicated is false, or a replicated pmap if \p replicated +/// is true; ignored if \p replicated is true and \c world.size()>1 /// \return An \c Array object that is a copy of \c matrix /// \throw TiledArray::Exception When \c m and \c n are not equal to the /// number of rows or columns in tiled range. @@ -628,7 +661,8 @@ template inline A column_major_buffer_to_array( World& world, const typename A::trange_type& trange, const typename A::value_type::value_type* buffer, const std::size_t m, - const std::size_t n, const bool replicated = false) { + const std::size_t n, const bool replicated = false, + std::shared_ptr pmap = {}) { TA_USER_ASSERT(trange.elements_range().extent(0) == m, "TiledArray::eigen_to_array(): The number of rows in trange " "is not equal to m."); @@ -641,8 +675,8 @@ inline A column_major_buffer_to_array( matrix_type; return eigen_to_array( world, trange, - Eigen::Map(buffer, m, n), - replicated); + Eigen::Map(buffer, m, n), replicated, + pmap); } } // namespace TiledArray diff --git a/src/TiledArray/external/btas.h b/src/TiledArray/external/btas.h index 51da57337c..ffa6e72f22 100644 --- a/src/TiledArray/external/btas.h +++ b/src/TiledArray/external/btas.h @@ -768,6 +768,13 @@ struct is_btas_tensor> : public std::true_type {}; template constexpr const bool is_btas_tensor_v = is_btas_tensor::value; +/// btas::RangeNd can be col or row-major +template +struct ordinal_traits> { + static constexpr const auto type = + _Order == CblasRowMajor ? OrdinalType::RowMajor : OrdinalType::ColMajor; +}; + } // namespace detail } // namespace TiledArray diff --git a/src/TiledArray/external/eigen.h b/src/TiledArray/external/eigen.h index b296c56f3a..90e8196c2d 100644 --- a/src/TiledArray/external/eigen.h +++ b/src/TiledArray/external/eigen.h @@ -86,4 +86,56 @@ TILEDARRAY_PRAGMA_GCC(system_header) TILEDARRAY_PRAGMA_GCC(diagnostic pop) +namespace madness { +namespace archive { + +template +class archive_array; +template +inline archive_array wrap(const T*, unsigned int); +template +struct ArchiveStoreImpl; +template +struct ArchiveLoadImpl; + +template +struct ArchiveStoreImpl< + Archive, + Eigen::Matrix> { + static inline void store( + const Archive& ar, + const Eigen::Matrix& t) { + ar& t.rows() & t.cols(); + if (t.size()) ar& madness::archive::wrap(t.data(), t.size()); + } +}; + +template +struct ArchiveLoadImpl< + Archive, + Eigen::Matrix> { + static inline void load( + const Archive& ar, + Eigen::Matrix& t) { + typename Eigen::Matrix::Index nrows(0), + ncols(0); + ar& nrows& ncols; + t.resize(nrows, ncols); + if (t.size()) ar& madness::archive::wrap(t.data(), t.size()); + } +}; + +} // namespace archive +} // namespace madness + #endif // TILEDARRAY_EXTERNAL_EIGEN_H__INCLUDED diff --git a/src/TiledArray/math/scalapack.h b/src/TiledArray/math/scalapack.h index d2e3a65afe..c7ed2c3a73 100644 --- a/src/TiledArray/math/scalapack.h +++ b/src/TiledArray/math/scalapack.h @@ -25,14 +25,9 @@ #ifndef TILEDARRAY_MATH_SCALAPACK_H__INCLUDED #define TILEDARRAY_MATH_SCALAPACK_H__INCLUDED -#if TILEDARRAY_HAS_SCALAPACK +#warning \ + "TiledArray/math/scalapack.h header is deprecated, please include TiledArray/algebra/scalapack/all.h" -#include -#include -#include -#include +#include #endif - -#endif - diff --git a/src/TiledArray/math/scalapack/chol.h b/src/TiledArray/math/scalapack/chol.h deleted file mode 100644 index eff1d4b20a..0000000000 --- a/src/TiledArray/math/scalapack/chol.h +++ /dev/null @@ -1,304 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2020 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * David Williams-Young - * Computational Research Division, Lawrence Berkeley National Laboratory - * - * chol.h - * Created: 8 June, 2020 - * - */ -#ifndef TILEDARRAY_MATH_SCALAPACK_CHOL_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_CHOL_H__INCLUDED - -#include -#if TILEDARRAY_HAS_SCALAPACK - -#include -#include - -#include -#include -#include -#include - -namespace TiledArray { - -/** - * @brief Compute the Cholesky factorization of a HPD rank-2 tensor - * - * A(i,j) = L(i,k) * conj(L(j,k)) - * - * Example Usage: - * - * auto L = cholesky(A, ...) - * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix - * - * @param[in] A Input array to be diagonalized. Must be rank-2 - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 - * @param[in] l_trange TiledRange for resulting Cholesky factor. If left empty, - * will default to array.trange() - * - * @returns The lower triangular Cholesky factor L in TA format - */ -template -auto cholesky( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange() ) { - - auto& world = A.world(); - auto world_comm = world.mpi.comm().Get_mpi_comm(); - blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution - - auto [M, N] = matrix.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for Cholesky"); - - auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); - auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - - auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N, - matrix.local_mat().data(), 1, 1, desc ); - if (info) TA_EXCEPTION("Cholesky Failed"); - - // Zero out the upper triangle - detail::scalapack_zero_triangle( blacspp::Triangle::Upper, matrix ); - - if( l_trange.rank() == 0 ) l_trange = A.trange(); - - world.gop.fence(); - auto L = scalapack::block_cyclic_to_array( matrix, l_trange ); - world.gop.fence(); - - - return L; - -} - - - - - - - -/** - * @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor. - * Optinally return the Cholesky factor itself - * - * A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv - * - * Example Usage: - * - * auto Linv = cholesky_Linv(A, ...) - * auto [L,Linv] = cholesky_Linv(A, ...) - * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix - * @tparam RetL Whether or not to return the cholesky factor - * - * @param[in] A Input array to be diagonalized. Must be rank-2 - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 - * @param[in] l_trange TiledRange for resulting inverse Cholesky factor. - * If left empty, will default to array.trange() - * - * @returns The inverse lower triangular Cholesky factor in TA format - */ -template -auto cholesky_linv( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange() ) { - - using value_type = typename Array::element_type; - - auto& world = A.world(); - auto world_comm = world.mpi.comm().Get_mpi_comm(); - blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution - - auto [M, N] = matrix.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for Cholesky"); - - auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); - auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - - auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N, - matrix.local_mat().data(), 1, 1, desc ); - if (info) TA_EXCEPTION("Cholesky Failed"); - - // Zero out the upper triangle - detail::scalapack_zero_triangle( blacspp::Triangle::Upper, matrix ); - - // Copy L if needed - std::shared_ptr> L_sca = nullptr; - if constexpr (RetL) { - L_sca = std::make_shared>( - world, grid, N, N, NB, NB - ); - L_sca->local_mat() = matrix.local_mat(); - } - - // Compute inverse - info = scalapackpp::ptrtri( blacspp::Triangle::Lower, - blacspp::Diagonal::NonUnit, N, matrix.local_mat().data(), 1, 1, desc ); - if (info) TA_EXCEPTION("TRTRI Failed"); - - - if( l_trange.rank() == 0 ) l_trange = A.trange(); - - world.gop.fence(); - auto Linv = scalapack::block_cyclic_to_array( matrix, l_trange ); - world.gop.fence(); - - - if constexpr (RetL) { - auto L = scalapack::block_cyclic_to_array( *L_sca, l_trange); - world.gop.fence(); - return std::tuple( L, Linv ); - } else { - return Linv; - } - -} - - -template -auto cholesky_solve( const Array& A, const Array& B, size_t NB = 128, - TiledRange x_trange = TiledRange() ) { - - auto& world = A.world(); - /* - if( world != B.world() ) { - TA_EXCEPTION("A and B must be distributed on same MADWorld context"); - } - */ - auto world_comm = world.mpi.comm().Get_mpi_comm(); - blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution - - auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for Cholesky Solve"); - - auto [B_N, NRHS] = B_sca.dims(); - if( B_N != N ) - TA_EXCEPTION("A and B dims must agree"); - - - scalapackpp::scalapack_desc desc_a, desc_b; - { - auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); - desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); - } - - { - auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); - desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); - } - - auto info = scalapackpp::pposv( blacspp::Triangle::Lower, N, NRHS, - A_sca.local_mat().data(), 1, 1, desc_a, B_sca.local_mat().data(), - 1, 1, desc_b ); - if (info) TA_EXCEPTION("Cholesky Solve Failed"); - - if( x_trange.rank() == 0 ) x_trange = B.trange(); - - world.gop.fence(); - auto X = scalapack::block_cyclic_to_array( B_sca, x_trange ); - world.gop.fence(); - - return X; -} - - - - -template -auto cholesky_lsolve( scalapackpp::TransposeFlag trans, - const Array& A, const Array& B, size_t NB = 128, - TiledRange l_trange = TiledRange(), - TiledRange x_trange = TiledRange() ) { - - auto& world = A.world(); - /* - if( world != B.world() ) { - TA_EXCEPTION("A and B must be distributed on same MADWorld context"); - } - */ - auto world_comm = world.mpi.comm().Get_mpi_comm(); - blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution - - auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for Cholesky Solve"); - - auto [B_N, NRHS] = B_sca.dims(); - if( B_N != N ) - TA_EXCEPTION("A and B dims must agree"); - - - scalapackpp::scalapack_desc desc_a, desc_b; - { - auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); - desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); - } - - { - auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); - desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); - } - - auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N, - A_sca.local_mat().data(), 1, 1, desc_a ); - if (info) TA_EXCEPTION("Cholesky Failed"); - - info = scalapackpp::ptrtrs( blacspp::Triangle::Lower, trans, - blacspp::Diagonal::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a, - B_sca.local_mat().data(), 1, 1, desc_b ); - if (info) TA_EXCEPTION("TRTRS Failed"); - - // Zero out the upper triangle - detail::scalapack_zero_triangle( blacspp::Triangle::Upper, A_sca ); - - if( l_trange.rank() == 0 ) l_trange = A.trange(); - if( x_trange.rank() == 0 ) x_trange = B.trange(); - - world.gop.fence(); - auto L = scalapack::block_cyclic_to_array( A_sca, l_trange ); - auto X = scalapack::block_cyclic_to_array( B_sca, x_trange ); - world.gop.fence(); - - return std::tuple(L, X); -} - -} // namespace TiledArray - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED - diff --git a/src/TiledArray/math/scalapack/svd.h b/src/TiledArray/math/scalapack/svd.h deleted file mode 100644 index da8274acd5..0000000000 --- a/src/TiledArray/math/scalapack/svd.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2020 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * David Williams-Young - * Computational Research Division, Lawrence Berkeley National Laboratory - * - * svd.h - * Created: 12 June, 2020 - * - */ -#ifndef TILEDARRAY_MATH_SCALAPACK_SVD_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_SVD_H__INCLUDED - -#include -#if TILEDARRAY_HAS_SCALAPACK - -#include -#include - -namespace TiledArray { - - -struct SVDReturnType{ }; -struct SVDValuesOnly : public SVDReturnType { }; -struct SVDLeftVectors : public SVDReturnType { }; -struct SVDRightVectors : public SVDReturnType { }; -struct SVDAllVectors : public SVDReturnType { }; - -namespace detail { - -template -struct is_svd_return_type : public std::false_type { }; - -template -struct is_svd_return_type< - SVDType, - std::enable_if_t> -> : public std::true_type { }; - -template -inline constexpr bool is_svd_return_type_v = is_svd_return_type::value; - -template -struct enable_if_svd_return_type : - public std::enable_if< is_svd_return_type_v, U > { }; - -template -using enable_if_svd_return_type_t = - typename enable_if_svd_return_type::type; - -} - -/** - * @brief Compute the singular value decomposition (SVD) via ScaLAPACK - * - * A(i,j) = S(k) U(i,k) conj(V(j,k)) - * - * Example Usage: - * - * auto S = svd (A, ...) - * auto [S, U] = svd (A, ...) - * auto [S, VT] = svd(A, ...) - * auto [S, U, VT] = svd (A, ...) - * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix - * - * @param[in] A Input array to be decomposed. Must be rank-2 - * @param[in] MB ScaLAPACK row blocking factor. Defaults to 128 - * @param[in] NB ScaLAPACK column blocking factor. Defaults to 128 - * @param[in] u_trange TiledRange for resulting left singlar vectors. - * @param[in] vt_trange TiledRange for resulting right singlar vectors (transposed). - * - * @returns A tuple containing the eigenvalues and eigenvectors of input array - * as std::vector and in TA format, respectively. - */ -template -> -auto svd( const Array& A, TiledRange u_trange, TiledRange vt_trange, - size_t MB = 128, size_t NB = 128 -) { - - using value_type = typename Array::element_type; - using real_type = scalapackpp::detail::real_t; - - auto& world = A.world(); - auto world_comm = world.mpi.comm().Get_mpi_comm(); - //auto world_comm = MPI_COMM_WORLD; - blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, MB, NB ); - world.gop.fence(); // stage ScaLAPACK execution - - auto [M, N] = matrix.dims(); - auto SVD_SIZE = std::min(M,N); - - auto [AMloc, ANloc] = matrix.dist().get_local_dims(M, N); - auto [UMloc, UNloc] = matrix.dist().get_local_dims(M, SVD_SIZE); - auto [VTMloc, VTNloc] = matrix.dist().get_local_dims(SVD_SIZE, N); - - - auto desc_a = matrix.dist().descinit_noerror(M, N, AMloc ); - auto desc_u = matrix.dist().descinit_noerror(M, SVD_SIZE, UMloc ); - auto desc_vt = matrix.dist().descinit_noerror(SVD_SIZE, N, VTMloc); - - std::vector S( SVD_SIZE ); - - constexpr bool need_uv = std::is_same_v< SVDType, SVDAllVectors >; - constexpr bool need_u = std::is_same_v< SVDType, SVDLeftVectors > or need_uv; - constexpr bool need_vt = std::is_same_v< SVDType, SVDRightVectors > or need_uv; - - std::shared_ptr> U = nullptr, VT = nullptr; - - scalapackpp::VectorFlag JOBU = scalapackpp::VectorFlag::NoVectors; - scalapackpp::VectorFlag JOBVT = scalapackpp::VectorFlag::NoVectors; - - value_type* U_ptr = nullptr; - value_type* VT_ptr = nullptr; - - if constexpr (need_u) { - JOBU = scalapackpp::VectorFlag::Vectors; - U = std::make_shared>( - world, grid, M, SVD_SIZE, MB, NB - ); - - U_ptr = U->local_mat().data(); - } - - if constexpr (need_vt) { - JOBVT = scalapackpp::VectorFlag::Vectors; - VT = std::make_shared>( - world, grid, SVD_SIZE, N, MB, NB - ); - - VT_ptr = VT->local_mat().data(); - } - - - auto info = scalapackpp::pgesvd( JOBU, JOBVT, M, N, - matrix.local_mat().data(), 1, 1, desc_a, S.data(), - U_ptr, 1, 1, desc_u, VT_ptr, 1, 1, desc_vt ); - if (info) TA_EXCEPTION("SVD Failed"); - - world.gop.fence(); - - - if constexpr (need_uv) { - - auto U_ta = scalapack::block_cyclic_to_array( *U, u_trange ); - auto VT_ta = scalapack::block_cyclic_to_array( *VT, vt_trange ); - world.gop.fence(); - - return std::tuple( S, U_ta, VT_ta ); - - } else if constexpr (need_u) { - - auto U_ta = scalapack::block_cyclic_to_array( *U, u_trange ); - world.gop.fence(); - - return std::tuple( S, U_ta ); - - } else if constexpr (need_vt) { - - auto VT_ta = scalapack::block_cyclic_to_array( *VT, vt_trange ); - world.gop.fence(); - - return std::tuple( S, VT_ta ); - - } else { - - return S; - - } - - - -} - -} // namespace TiledArray - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED - diff --git a/src/TiledArray/math/scalapack/util.h b/src/TiledArray/math/scalapack/util.h deleted file mode 100644 index fe750f61e2..0000000000 --- a/src/TiledArray/math/scalapack/util.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2020 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * David Williams-Young - * Computational Research Division, Lawrence Berkeley National Laboratory - * - * util.h - * Created: 19 June, 2020 - * - */ -#ifndef TILEDARRAY_MATH_SCALAPACK_UTIL_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_UTIL_H__INCLUDED - -#include -#if TILEDARRAY_HAS_SCALAPACK - -#include - -namespace TiledArray { - -namespace detail { - -template -void scalapack_zero_triangle( - blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, bool zero_diag = false -) { - - auto zero_el = [&]( size_t I, size_t J ) { - if( A.dist().i_own(I,J) ) { - auto [i,j] = A.dist().local_indx(I,J); - A.local_mat()(i,j) = 0.; - } - }; - - auto [M,N] = A.dims(); - - // Zero the lower triangle - if( tri == blacspp::Triangle::Lower ) { - - if( zero_diag ) - for( size_t j = 0; j < N; ++j ) - for( size_t i = j; i < M; ++i ) - zero_el( i,j ); - else - for( size_t j = 0; j < N; ++j ) - for( size_t i = j+1; i < M; ++i ) - zero_el( i,j ); - - // Zero the upper triangle - } else { - - if( zero_diag ) - for( size_t j = 0; j < N; ++j ) - for( size_t i = 0; i <= std::min(j,M); ++i ) - zero_el( i,j ); - else - for( size_t j = 0; j < N; ++j ) - for( size_t i = 0; i < std::min(j,M); ++i ) - zero_el( i,j ); - - } -} - -} -} - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED - diff --git a/src/TiledArray/tensor/type_traits.h b/src/TiledArray/tensor/type_traits.h index 65c775c6d8..6690eb1f61 100644 --- a/src/TiledArray/tensor/type_traits.h +++ b/src/TiledArray/tensor/type_traits.h @@ -310,6 +310,34 @@ static constexpr const auto is_bipartite_permutable_v = const T&, const TiledArray::BipartitePermutation&>; } // namespace detail + +/// Specifies how coordinates are mapped to ordinal values +/// - RowMajor: stride decreases as mode index increases +/// - ColMajor: stride increases with the mode index +/// - Other: unknown or dynamic order +enum class OrdinalType { RowMajor = -1, ColMajor = 1, Other = 0, Invalid }; + +namespace detail { + +/// ordinal trait specifies properties of the ordinal +template +struct ordinal_traits; + +/// TA::Range is hardwired to row-major +template <> +struct ordinal_traits { + static constexpr const auto type = OrdinalType::RowMajor; +}; + +/// ordinal traits of contiguous tensors are defined by their range type +template +struct ordinal_traits>> { + static constexpr const auto type = ordinal_traits< + std::decay_t().range())>>::type; +}; + +} // namespace detail + } // namespace TiledArray #endif // TILEDARRAY_TENSOR_TYPE_TRAITS_H__INCLUDED diff --git a/src/tiledarray.h b/src/tiledarray.h index 274958ec38..a20d1c86bc 100644 --- a/src/tiledarray.h +++ b/src/tiledarray.h @@ -60,8 +60,8 @@ // ScaLAPACK functions #ifdef TILEDARRAY_HAS_SCALAPACK +#include #include -#include #endif #endif // TILEDARRAY_H__INCLUDED diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8799ff95ee..75889908de 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -32,97 +32,98 @@ set(executable ta_test) # N.B.: The order of files here represents the order in which the tests are run. # N.B. 2: if you want to trim this down you may need to resolve linker errors due to missing fixture deps manually set(ta_test_src_files ta_test.cpp - range.cpp - btas.cpp - meta.cpp - type_traits.cpp - utility.cpp - permutation.cpp - symm_permutation_group.cpp - symm_irrep.cpp - symm_representation.cpp - block_range.cpp - perm_index.cpp - transform_iterator.cpp - bitset.cpp - math_outer.cpp - math_partial_reduce.cpp - math_transpose.cpp - math_blas.cpp - tensor.cpp - tensor_of_tensor.cpp - tensor_tensor_view.cpp - tensor_shift_wrapper.cpp - tiled_range1.cpp - tiled_range.cpp - blocked_pmap.cpp - hash_pmap.cpp - cyclic_pmap.cpp - replicated_pmap.cpp - dense_shape.cpp - sparse_shape.cpp - distributed_storage.cpp - tensor_impl.cpp - array_impl.cpp - index_list.cpp - bipartite_index_list.cpp - dist_array.cpp - conversions.cpp - eigen.cpp - dist_op_dist_cache.cpp - dist_op_group.cpp - dist_op_communicator.cpp - tile_op_noop.cpp - tile_op_scal.cpp - dist_eval_array_eval.cpp - dist_eval_unary_eval.cpp - tile_op_add.cpp - tile_op_scal_add.cpp - tile_op_subt.cpp - tile_op_scal_subt.cpp - dist_eval_binary_eval.cpp - tile_op_mult.cpp - tile_op_scal_mult.cpp - tile_op_contract_reduce.cpp - reduce_task.cpp - proc_grid.cpp - dist_eval_contraction_eval.cpp - expressions.cpp - expressions_sparse.cpp - expressions_complex.cpp - expressions_btas.cpp - expressions_mixed.cpp - foreach.cpp - solvers.cpp - initializer_list.cpp - diagonal_array.cpp - retile.cpp - tot_dist_array_part1.cpp - tot_dist_array_part2.cpp - random.cpp - trace.cpp - tot_expressions.cpp - annotation.cpp - diagonal_array.cpp - contraction_helpers.cpp - s_t_t_contract_.cpp - t_t_t_contract_.cpp - t_s_t_contract_.cpp - t_tot_tot_contract_.cpp - tot_tot_tot_contract_.cpp - einsum.cpp + # range.cpp + # btas.cpp + # meta.cpp + # type_traits.cpp + # utility.cpp + # permutation.cpp + # symm_permutation_group.cpp + # symm_irrep.cpp + # symm_representation.cpp + # block_range.cpp + # perm_index.cpp + # transform_iterator.cpp + # bitset.cpp + # math_outer.cpp + # math_partial_reduce.cpp + # math_transpose.cpp + # math_blas.cpp + # tensor.cpp + # tensor_of_tensor.cpp + # tensor_tensor_view.cpp + # tensor_shift_wrapper.cpp + # tiled_range1.cpp + # tiled_range.cpp + # blocked_pmap.cpp + # hash_pmap.cpp + # cyclic_pmap.cpp + # replicated_pmap.cpp + # dense_shape.cpp + # sparse_shape.cpp + # distributed_storage.cpp + # tensor_impl.cpp + # array_impl.cpp + # index_list.cpp + # bipartite_index_list.cpp + # dist_array.cpp + # conversions.cpp + # eigen.cpp + # dist_op_dist_cache.cpp + # dist_op_group.cpp + # dist_op_communicator.cpp + # tile_op_noop.cpp + # tile_op_scal.cpp + # dist_eval_array_eval.cpp + # dist_eval_unary_eval.cpp + # tile_op_add.cpp + # tile_op_scal_add.cpp + # tile_op_subt.cpp + # tile_op_scal_subt.cpp + # dist_eval_binary_eval.cpp + # tile_op_mult.cpp + # tile_op_scal_mult.cpp + # tile_op_contract_reduce.cpp + # reduce_task.cpp + # proc_grid.cpp + # dist_eval_contraction_eval.cpp + # expressions.cpp + # expressions_sparse.cpp + # expressions_complex.cpp + # expressions_btas.cpp + # expressions_mixed.cpp + # foreach.cpp + # solvers.cpp + # initializer_list.cpp + # diagonal_array.cpp + # retile.cpp + # tot_dist_array_part1.cpp + # tot_dist_array_part2.cpp + # random.cpp + # trace.cpp + # tot_expressions.cpp + # annotation.cpp + # diagonal_array.cpp + # contraction_helpers.cpp + # s_t_t_contract_.cpp + # t_t_t_contract_.cpp + # t_s_t_contract_.cpp + # t_tot_tot_contract_.cpp + # tot_tot_tot_contract_.cpp + # einsum.cpp + linear_algebra.cpp ) if(CUDA_FOUND) list(APPEND ta_test_src_files cutt.cpp expressions_cuda_um.cpp tensor_um.cpp) endif() -if (TARGET TiledArray_SCALAPACK) - list(APPEND ta_test_src_files scalapack.cpp) -endif(TARGET TiledArray_SCALAPACK) +# if (TARGET TiledArray_SCALAPACK) +# list(APPEND ta_test_src_files scalapack.cpp) +# endif(TARGET TiledArray_SCALAPACK) # if tiledarray library was compiled without exceptions, use TA header-only (see below) -if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND) +if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND AND FALSE) add_ta_executable(${executable} "${ta_test_src_files}" "MADworld;${TILEDARRAY_PRIVATE_LINK_LIBRARIES}") target_compile_definitions(${executable} PRIVATE TILEDARRAY_HEADER_ONLY=1) if (LAPACK_INCLUDE_DIRS) diff --git a/tests/btas.cpp b/tests/btas.cpp index 99cf14ec97..df9122695a 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -21,6 +21,7 @@ */ #include +#include #ifdef TILEDARRAY_HAS_BTAS @@ -32,6 +33,28 @@ using namespace TiledArray; +static_assert(detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::RangeNd<> is row-major"); +static_assert(detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::RangeNd is row-major"); +static_assert(detail::ordinal_traits>::type == + OrdinalType::ColMajor, + "btas::RangeNd is col-major"); +static_assert(detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::Tenspr is row-major"); +static_assert( + detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::Tenspr is row-major"); +static_assert( + detail::ordinal_traits< + TiledArray::Tile>>::type == + OrdinalType::RowMajor, + "TA::Tile> is row-major"); + // test both bare (deep-copy) BTAS tensor as well as its shallow-copy wrap in // Tile<>, using both btas::RangeNd<> and TiledArray::Range as the range type typedef boost::mpl::list< diff --git a/tests/eigen.cpp b/tests/eigen.cpp index 96e128c66e..37eef88220 100644 --- a/tests/eigen.cpp +++ b/tests/eigen.cpp @@ -158,26 +158,9 @@ BOOST_AUTO_TEST_CASE(matrix_to_array) { // Fill the matrix with random data matrix = decltype(matrix)::Random(matrix.rows(), matrix.cols()); - if (GlobalFixture::world->size() == 1) { - // Copy matrix to array - BOOST_CHECK_NO_THROW((array = eigen_to_array(*GlobalFixture::world, - trange, matrix))); - } else { - // Check that eigen_to_array does not work in distributed environments -#if !defined(TA_USER_ASSERT_DISABLED) - BOOST_CHECK_THROW( - (eigen_to_array(*GlobalFixture::world, trange, matrix)), - TiledArray::Exception); -#endif - - // Note: The following tests constructs a replicated array, but the data may - // not be identical. That is OK here since we are check the local data, but - // in real applications the data should be identical. - - // Copy matrix to a replicated array - BOOST_CHECK_NO_THROW((array = eigen_to_array( - *GlobalFixture::world, trange, matrix, true))); - } + // Copy matrix to array + BOOST_CHECK_NO_THROW( + (array = eigen_to_array(*GlobalFixture::world, trange, matrix))); // Check that the data in array is equal to that in matrix for (Range::const_iterator it = array.range().begin(); @@ -195,27 +178,9 @@ BOOST_AUTO_TEST_CASE(vector_to_array) { // Fill the vector with random data vector = Eigen::VectorXi::Random(vector.size()); - if (GlobalFixture::world->size() == 1) { - // Convert the vector to an array - BOOST_CHECK_NO_THROW((array1 = eigen_to_array( - *GlobalFixture::world, trange1, vector))); - - } else { - // Check that eigen_to_array does not work in distributed environments -#if !defined(TA_USER_ASSERT_DISABLED) - BOOST_CHECK_THROW( - (eigen_to_array(*GlobalFixture::world, trange1, vector)), - TiledArray::Exception); -#endif - - // Note: The following tests constructs a replicated array, but the data may - // not be identical. That is OK here since we are check the local data, but - // in real applications the data should be identical. - - // Convert the vector to an array - BOOST_CHECK_NO_THROW((array1 = eigen_to_array( - *GlobalFixture::world, trange1, vector, true))); - } + // Convert the vector to an array + BOOST_CHECK_NO_THROW((array1 = eigen_to_array(*GlobalFixture::world, + trange1, vector))); // Check that the data in array matches the data in vector for (Range::const_iterator it = array1.range().begin(); diff --git a/tests/scalapack.cpp b/tests/linear_algebra.cpp similarity index 50% rename from tests/scalapack.cpp rename to tests/linear_algebra.cpp index 1554dc8529..07f9a58be2 100644 --- a/tests/scalapack.cpp +++ b/tests/linear_algebra.cpp @@ -1,87 +1,119 @@ #include #include #include "TiledArray/config.h" -#include "range_fixture.h" +//#include "range_fixture.h" #include "unit_test_config.h" -#include "TiledArray/math/scalapack.h" - -struct ScaLAPACKFixture { - blacspp::Grid grid; - scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? +#include "TiledArray/algebra/lapack/cholesky.h" +#include "TiledArray/algebra/lapack/heig.h" +#include "TiledArray/algebra/lapack/lu.h" +#include "TiledArray/algebra/lapack/svd.h" + +#include "TiledArray/algebra/cholesky.h" +#include "TiledArray/algebra/heig.h" +#include "TiledArray/algebra/lu.h" +#include "TiledArray/algebra/svd.h" + +namespace TA = TiledArray; +namespace lapack = TA::lapack; + +#if TILEDARRAY_HAS_SCALAPACK + +#include "TiledArray/algebra/scalapack/all.h" +namespace scalapack = TA::scalapack; +#define TILEDARRAY_SCALAPACK_TEST(F, E) \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray", lapack::F, TiledArray::F, E); +#else +#define TILEDARRAY_SCALAPACK_TEST(...) +#endif +struct ReferenceFixture { + size_t N; std::vector htoeplitz_vector; std::vector exact_evals; - inline - double matrix_element_generator( int64_t i, int64_t j ) { - #if 0 + inline double matrix_element_generator(int64_t i, int64_t j) { +#if 0 // Generates a Hankel matrix: absurd condition number return i+j; - #else +#else // Generates a Circulant matrix: good condition number - return htoeplitz_vector[std::abs(i-j)]; - #endif + return htoeplitz_vector[std::abs(i - j)]; +#endif } - inline - double make_ta_reference(TA::Tensor& t, + inline double make_ta_reference(TA::Tensor& t, TA::Range const& range) { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) { for (auto n = lo[1]; n < up[1]; ++n) { - t(m, n) = matrix_element_generator(m,n); + t(m, n) = matrix_element_generator(m, n); } } return t.norm(); }; - inline void construct_scalapack( scalapack::BlockCyclicMatrix& A ) { - auto [M,N] = A.dims(); - for (size_t i = 0; i < M; ++i) - for (size_t j = 0; j < N; ++j) - if (A.dist().i_own(i, j)) { - auto [i_local, j_local] = A.dist().local_indx(i, j); - A.local_mat()(i_local, j_local) = matrix_element_generator(i,j); - } - - } - - ScaLAPACKFixture(int64_t N, int64_t NB) - : grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? - ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB), - htoeplitz_vector( N ), exact_evals( N ) { - + ReferenceFixture(int64_t N = 1000) + : N(N), htoeplitz_vector(N), exact_evals(N) { // Generate an hermitian Circulant vector - std::fill( htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0 ); + std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); htoeplitz_vector[0] = 100; std::default_random_engine gen(0); std::uniform_real_distribution<> dist(0., 1.); - for( int64_t i = 1; i <= (N/2); ++i ) { + for (int64_t i = 1; i <= (N / 2); ++i) { double val = dist(gen); - htoeplitz_vector[i] = val; - htoeplitz_vector[N-i] = val; + htoeplitz_vector[i] = val; + htoeplitz_vector[N - i] = val; } // Compute exact eigenvalues const double ff = 2. * M_PI / N; - for( int64_t j = 0; j < N; ++j ) { - double val = htoeplitz_vector[0];; - for( int64_t k = 1; k < N; ++k ) - val += htoeplitz_vector[N-k] * std::cos( ff * j * k ); + for (int64_t j = 0; j < N; ++j) { + double val = htoeplitz_vector[0]; + ; + for (int64_t k = 1; k < N; ++k) + val += htoeplitz_vector[N - k] * std::cos(ff * j * k); exact_evals[j] = val; } - std::sort( exact_evals.begin(), exact_evals.end() ); - - // Fill reference matrix - construct_scalapack( ref_matrix ); + std::sort(exact_evals.begin(), exact_evals.end()); } +}; + +struct LinearAlgebraFixture : ReferenceFixture { +#if TILEDARRAY_HAS_SCALAPACK + + blacspp::Grid grid; + scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? - ScaLAPACKFixture() : ScaLAPACKFixture(1000, 128) {} + LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) + : ReferenceFixture(N), + grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? + ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) { + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + if (ref_matrix.dist().i_own(i, j)) { + auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); + ref_matrix.local_mat()(i_local, j_local) = + matrix_element_generator(i, j); + } + } + } + } + template + static void compare(const char* context, const A& lapack, const A& result, + double e) { + BOOST_TEST_CONTEXT(context); + auto diff_with_lapack = (lapack("i,j") - result("i,j")).norm().get(); + BOOST_CHECK_SMALL(diff_with_lapack, e); + } +#endif }; TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { @@ -106,7 +138,9 @@ TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { return TA::TiledRange(ranges.begin(), ranges.end()); }; -BOOST_FIXTURE_TEST_SUITE(scalapack_suite, ScaLAPACKFixture) +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) + +#if TILEDARRAY_HAS_SCALAPACK BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { GlobalFixture::world->gop.fence(); @@ -118,27 +152,25 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { GlobalFixture::world->gop.fence(); @@ -147,29 +179,27 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { auto NB = ref_matrix.dist().nb(); - auto trange = gen_trange(N, {static_cast(NB/2)}); + auto trange = gen_trange(N, {static_cast(NB / 2)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); @@ -180,15 +210,14 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -201,47 +230,40 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - - - - BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); - auto NB = ref_matrix.dist().nb(); + [[maybe_unused]] auto NB = ref_matrix.dist().nb(); auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); @@ -252,15 +274,14 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -273,14 +294,11 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - - BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { GlobalFixture::world->gop.fence(); @@ -291,21 +309,21 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = scalapack::block_cyclic_to_array>( + ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; @@ -320,15 +338,14 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -341,14 +358,11 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - - BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); @@ -359,15 +373,14 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { auto trange = gen_trange(N, {static_cast(NB)}); - const TA::TArray ref_ta = TA::make_array >( + const TA::TArray ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -380,481 +393,473 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; +#endif // TILEDARRAY_HAS_SCALAPACK -BOOST_AUTO_TEST_CASE( sca_heig_same_tiling ) { - +BOOST_AUTO_TEST_CASE(heig_same_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - const auto ref_ta = TA::make_array >( + const auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [evals, evecs] = heig( ref_ta ); - //auto evals = heig( ref_ta ); + auto [evals, evecs] = lapack::heig(ref_ta); + auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta); + // auto evals = heig( ref_ta ); - BOOST_CHECK( evecs.trange() == ref_ta.trange() ); + BOOST_CHECK(evecs.trange() == ref_ta.trange()); - // TODO: Check validity of eigenvectors, not crutial for the time being + // check eigenvectors against lapack only, for now ... + decltype(evecs) evecs_error; + evecs_error("i,j") = evecs_lapack("i,j") - evecs("i,j"); + // TODO need to fix phases of the eigenvectors to be able to compare ... + // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(), + // N * N * std::numeric_limits::epsilon()); // Check eigenvalue correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) { + BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); + BOOST_CHECK_SMALL(std::abs(evals_lapack[i] - exact_evals[i]), tol); + } GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_heig_diff_tiling ) { - +BOOST_AUTO_TEST_CASE(heig_diff_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( - *GlobalFixture::world, trange, + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); auto new_trange = gen_trange(N, {64ul}); - auto [evals, evecs] = heig( ref_ta, 128, new_trange ); + auto [evals, evecs] = lapack::heig(ref_ta, new_trange); + auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta, new_trange); - BOOST_CHECK( evecs.trange() == new_trange ); + BOOST_CHECK(evecs.trange() == new_trange); - // TODO: Check validity of eigenvectors, not crutial for the time being + // check eigenvectors against lapack only, for now ... + decltype(evecs) evecs_error; + evecs_error("i,j") = evecs_lapack("i,j") - evecs("i,j"); + // TODO need to fix phases of the eigenvectors to be able to compare ... + // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(), + // N * N * std::numeric_limits::epsilon()); // Check eigenvalue correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) { + BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); + BOOST_CHECK_SMALL(std::abs(evals_lapack[i] - exact_evals[i]), tol); + } GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_heig_generalized ) { - +BOOST_AUTO_TEST_CASE(heig_generalized) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto dense_iden = TA::make_array >( + auto dense_iden = TA::make_array>( *GlobalFixture::world, trange, [](TA::Tensor& t, TA::Range const& range) -> double { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) - t(m, n) = 1.; - - return t.norm(); + for (auto m = lo[0]; m < up[0]; ++m) + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) t(m, n) = 1.; + + return t.norm(); }); GlobalFixture::world->gop.fence(); - auto [evals, evecs] = heig( ref_ta, dense_iden ); - //auto evals = heig( ref_ta ); + auto [evals, evecs] = lapack::heig(ref_ta, dense_iden); + // auto evals = heig( ref_ta ); - BOOST_CHECK( evecs.trange() == ref_ta.trange() ); + BOOST_CHECK(evecs.trange() == ref_ta.trange()); - // TODO: Check validity of eigenvectors, not crutial for the time being + // TODO: Check validity of eigenvectors, not crucial for the time being // Check eigenvalue correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); GlobalFixture::world->gop.fence(); } - - -BOOST_AUTO_TEST_CASE( sca_chol ) { - +BOOST_AUTO_TEST_CASE(cholesky) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto L = cholesky( ref_ta ); - - BOOST_CHECK( L.trange() == ref_ta.trange() ); + auto L = lapack::cholesky(A); - ref_ta("i,j") -= L("i,k") * L("j,k").conj(); + BOOST_CHECK(L.trange() == A.trange()); - double diff_norm = ref_ta("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( diff_norm, N*N*std::numeric_limits::epsilon() ); + decltype(A) A_minus_LLt; + A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj(); - GlobalFixture::world->gop.fence(); -} + BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); + // check against LAPACK also + auto L_ref = TiledArray::lapack::cholesky(A); + decltype(L) L_diff; + L_diff("i,j") = L("i,j") - L_ref("i,j"); + BOOST_CHECK_SMALL(L_diff("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); -BOOST_AUTO_TEST_CASE( sca_chol_linv ) { + GlobalFixture::world->gop.fence(); +} +BOOST_AUTO_TEST_CASE(cholesky_linv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); + decltype(A) Acopy = A.clone(); - - auto Linv = cholesky_linv( ref_ta ); + auto Linv = lapack::cholesky_linv(A); - BOOST_CHECK( Linv.trange() == ref_ta.trange() ); + BOOST_CHECK(Linv.trange() == A.trange()); - TA::TArray tmp( *GlobalFixture::world, trange ); - tmp("i,j") = Linv("i,k") * ref_ta("k,j"); - ref_ta("i,j") = tmp ("i,k") * Linv ("j,k"); + TA::TArray tmp(*GlobalFixture::world, trange); + tmp("i,j") = Linv("i,k") * A("k,j"); + A("i,j") = tmp("i,k") * Linv("j,k"); - TA::foreach_inplace( ref_ta, []( TA::Tensor& tile ) { + TA::foreach_inplace(A, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); - double norm = ref_ta("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); - - GlobalFixture::world->gop.fence(); -} + double epsilon = N * N * std::numeric_limits::epsilon(); + double norm = A("i,j").norm().get(); + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(Acopy), epsilon); + GlobalFixture::world->gop.fence(); +} -BOOST_AUTO_TEST_CASE( sca_chol_linv_retl ) { - +BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - - auto [L, Linv] = cholesky_linv( ref_ta ); + auto [L, Linv] = lapack::cholesky_linv(A); - BOOST_CHECK( Linv.trange() == ref_ta.trange() ); - BOOST_CHECK( L.trange() == ref_ta.trange() ); + BOOST_CHECK(Linv.trange() == A.trange()); + BOOST_CHECK(L.trange() == A.trange()); - TA::TArray tmp( *GlobalFixture::world, trange ); + TA::TArray tmp(*GlobalFixture::world, trange); tmp("i,j") = Linv("i,k") * L("k,j"); - TA::foreach_inplace( tmp, []( TA::Tensor& tile ) { + TA::foreach_inplace(tmp, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = tmp("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); - GlobalFixture::world->gop.fence(); -} + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); + GlobalFixture::world->gop.fence(); +} -BOOST_AUTO_TEST_CASE( sca_chol_solve ) { - +BOOST_AUTO_TEST_CASE(cholesky_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - - auto iden = cholesky_solve( ref_ta, ref_ta ); + auto iden = lapack::cholesky_solve(A, A); + BOOST_CHECK(iden.trange() == A.trange()); - BOOST_CHECK( iden.trange() == ref_ta.trange() ); + auto iden_lapack = lapack::cholesky_solve(A, A); + decltype(iden) iden_error; + iden_error("i,j") = iden("i,j") - iden_lapack("i,j"); + BOOST_CHECK_SMALL(iden_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); - TA::foreach_inplace( iden, []( TA::Tensor& tile ) { + TA::foreach_inplace(iden, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } - - - - -BOOST_AUTO_TEST_CASE( sca_chol_lsolve ) { - +BOOST_AUTO_TEST_CASE(cholesky_lsolve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - // Should produce X = L**H - auto [L, X] = cholesky_lsolve( scalapackpp::TransposeFlag::NoTranspose, - ref_ta, ref_ta ); - - BOOST_CHECK( X.trange() == ref_ta.trange() ); - BOOST_CHECK( L.trange() == ref_ta.trange() ); + auto [L, X] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); + BOOST_CHECK(X.trange() == A.trange()); + BOOST_CHECK(L.trange() == A.trange()); + + // first, test against LAPACK + auto [L_lapack, X_lapack] = + lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); + decltype(L) L_error; + L_error("i,j") = L("i,j") - L_lapack("i,j"); + BOOST_CHECK_SMALL(L_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); + decltype(X) X_error; + X_error("i,j") = X("i,j") - X_lapack("i,j"); + BOOST_CHECK_SMALL(X_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); X("i,j") -= L("j,i"); double norm = X("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_lu_solve ) { - +BOOST_AUTO_TEST_CASE(lu_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - - auto iden = lu_solve( ref_ta, ref_ta ); + auto iden = lapack::lu_solve(ref_ta, ref_ta); - BOOST_CHECK( iden.trange() == ref_ta.trange() ); + BOOST_CHECK(iden.trange() == ref_ta.trange()); - TA::foreach_inplace( iden, []( TA::Tensor& tile ) { + TA::foreach_inplace(iden, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon); GlobalFixture::world->gop.fence(); } - -BOOST_AUTO_TEST_CASE( sca_lu_inv ) { - +BOOST_AUTO_TEST_CASE(lu_inv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - TA::TArray iden( *GlobalFixture::world, trange ); - - auto Ainv = lu_inv( ref_ta ); - iden("i,j") = Ainv("i,k") * ref_ta("k,j"); + TA::TArray iden(*GlobalFixture::world, trange); + auto Ainv = lapack::lu_inv(ref_ta); + iden("i,j") = Ainv("i,k") * ref_ta("k,j"); - BOOST_CHECK( iden.trange() == ref_ta.trange() ); + BOOST_CHECK(iden.trange() == ref_ta.trange()); - TA::foreach_inplace( iden, []( TA::Tensor& tile ) { + TA::foreach_inplace(iden, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon); GlobalFixture::world->gop.fence(); } - #if 1 -BOOST_AUTO_TEST_CASE( sca_svd_values_only ) { - +BOOST_AUTO_TEST_CASE(svd_values_only) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto S = svd( ref_ta, trange, trange ); + auto S = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_svd_leftvectors ) { - +BOOST_AUTO_TEST_CASE(svd_leftvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [S,U] = svd( ref_ta, trange, trange ); + auto [S, U] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_svd_rightvectors ) { - +BOOST_AUTO_TEST_CASE(svd_rightvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [S,VT] = svd( ref_ta, trange, trange ); + auto [S, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_svd_allvectors ) { - +BOOST_AUTO_TEST_CASE(svd_allvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [S,U,VT] = svd( ref_ta, trange, trange ); + auto [S, U, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } #endif