From a05cc5ed302d306bd447a5d7c85aa40b277739f4 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 10:14:05 -0400 Subject: [PATCH 01/36] moved scalapack solvers to algebra, added generic solver interface (Scalapack-only for now) --- examples/scalapack/evp.cpp | 31 +- src/CMakeLists.txt | 11 +- src/TiledArray/algebra/chol.h | 44 ++ src/TiledArray/algebra/heig.h | 43 ++ src/TiledArray/algebra/lu.h | 44 ++ src/TiledArray/algebra/scalapack/all.h | 37 ++ .../{math => algebra}/scalapack/chol.h | 187 ++++---- .../{math => algebra}/scalapack/heig.h | 98 ++-- .../{math => algebra}/scalapack/lu.h | 88 ++-- .../{math => algebra}/scalapack/svd.h | 133 ++---- .../{math => algebra}/scalapack/util.h | 57 +-- src/TiledArray/algebra/svd.h | 43 ++ src/TiledArray/algebra/svd_types.h | 64 +++ src/TiledArray/math/scalapack.h | 11 +- src/tiledarray.h | 2 +- tests/CMakeLists.txt | 2 +- tests/{scalapack.cpp => all.cpp} | 445 ++++++++---------- 17 files changed, 729 insertions(+), 611 deletions(-) create mode 100644 src/TiledArray/algebra/chol.h create mode 100644 src/TiledArray/algebra/heig.h create mode 100644 src/TiledArray/algebra/lu.h create mode 100644 src/TiledArray/algebra/scalapack/all.h rename src/TiledArray/{math => algebra}/scalapack/chol.h (51%) rename src/TiledArray/{math => algebra}/scalapack/heig.h (61%) rename src/TiledArray/{math => algebra}/scalapack/lu.h (53%) rename src/TiledArray/{math => algebra}/scalapack/svd.h (50%) rename src/TiledArray/{math => algebra}/scalapack/util.h (55%) create mode 100644 src/TiledArray/algebra/svd.h create mode 100644 src/TiledArray/algebra/svd_types.h rename tests/{scalapack.cpp => all.cpp} (61%) diff --git a/examples/scalapack/evp.cpp b/examples/scalapack/evp.cpp index d32a110073..f4277b132c 100644 --- a/examples/scalapack/evp.cpp +++ b/examples/scalapack/evp.cpp @@ -28,7 +28,7 @@ #include #include -#include +#include using Array = TA::TArray; // using Array = TA::TSpArray; @@ -93,23 +93,22 @@ int main(int argc, char** argv) { tensor_symm("i,j") = 0.5 * (tensor("i,j") + tensor("j,i")); tensor("i,j") = tensor_symm("i,j"); - - auto [ evals, evecs_ta ] = TA::heig( tensor ); - + auto [evals, evecs_ta] = TA::heig(tensor); //// Check EVP with TA - Array tmp = TA::foreach (evecs_ta, [evals = evals](TA::Tensor& result, - const TA::Tensor& arg) { - result = TA::clone(arg); - - auto range = arg.range(); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) { - result(m, n) = arg(m, n) * evals[n]; - } - }); + Array tmp = + TA::foreach (evecs_ta, [evals = evals](TA::Tensor& result, + const TA::Tensor& arg) { + result = TA::clone(arg); + + auto range = arg.range(); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + for (auto m = lo[0]; m < up[0]; ++m) + for (auto n = lo[1]; n < up[1]; ++n) { + result(m, n) = arg(m, n) * evals[n]; + } + }); world.gop.fence(); tensor("i,j") = tensor("i,j") - tmp("i,k") * evecs_ta("j,k"); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 989251d4f9..1d3669ab57 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,6 +60,15 @@ TiledArray/zero_tensor.h TiledArray/algebra/conjgrad.h TiledArray/algebra/diis.h TiledArray/algebra/utils.h +TiledArray/algebra/chol.h +TiledArray/algebra/heig.h +TiledArray/algebra/lu.h +TiledArray/algebra/svd.h +TiledArray/algebra/svd_types.h +TiledArray/algebra/scalapack/chol.h +TiledArray/algebra/scalapack/heig.h +TiledArray/algebra/scalapack/lu.h +TiledArray/algebra/scalapack/svd.h TiledArray/conversions/btas.h TiledArray/conversions/clone.h TiledArray/conversions/dense_to_sparse.h @@ -112,8 +121,6 @@ TiledArray/math/partial_reduce.h TiledArray/math/transpose.h TiledArray/math/vector_op.h TiledArray/math/scalapack.h -TiledArray/math/scalapack/heig.h -TiledArray/math/scalapack/chol.h TiledArray/pmap/blocked_pmap.h TiledArray/pmap/cyclic_pmap.h TiledArray/pmap/hash_pmap.h diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/chol.h new file mode 100644 index 0000000000..334551104e --- /dev/null +++ b/src/TiledArray/algebra/chol.h @@ -0,0 +1,44 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#else +// eigen +#endif + +namespace TiledArray { +#if TILEDARRAY_HAS_SCALAPACK +using scalapack::cholesky; +using scalapack::cholesky_linv; +using scalapack::cholesky_lsolve; +using scalapack::cholesky_solve; +#endif + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED diff --git a/src/TiledArray/algebra/heig.h b/src/TiledArray/algebra/heig.h new file mode 100644 index 0000000000..1a3816da5f --- /dev/null +++ b/src/TiledArray/algebra/heig.h @@ -0,0 +1,43 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * heig.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED +#define TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#else +// eigen +#endif + +namespace TiledArray { + +#if TILEDARRAY_HAS_SCALAPACK +using scalapack::heig; +#endif +#endif + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/lu.h b/src/TiledArray/algebra/lu.h new file mode 100644 index 0000000000..fa1bfca49e --- /dev/null +++ b/src/TiledArray/algebra/lu.h @@ -0,0 +1,44 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * lu.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LU_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#else +#// include eigen +#endif + +namespace TiledArray { + +#if TILEDARRAY_HAS_SCALAPACK +using scalapack::lu_inv; +using scalapack::lu_solve; +#else +#endif + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/all.h b/src/TiledArray/algebra/scalapack/all.h new file mode 100644 index 0000000000..2599d0280e --- /dev/null +++ b/src/TiledArray/algebra/scalapack/all.h @@ -0,0 +1,37 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * scalapack.h + * Created: 25 May, 2020 + * + */ + +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#include +#include +#include +#endif + +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED diff --git a/src/TiledArray/math/scalapack/chol.h b/src/TiledArray/algebra/scalapack/chol.h similarity index 51% rename from src/TiledArray/math/scalapack/chol.h rename to src/TiledArray/algebra/scalapack/chol.h index eff1d4b20a..a15e47c84b 100644 --- a/src/TiledArray/math/scalapack/chol.h +++ b/src/TiledArray/algebra/scalapack/chol.h @@ -28,15 +28,16 @@ #include #if TILEDARRAY_HAS_SCALAPACK +#include #include -#include #include -#include #include #include +#include namespace TiledArray { +namespace scalapack { /** * @brief Compute the Cholesky factorization of a HPD rank-2 tensor @@ -47,60 +48,51 @@ namespace TiledArray { * * auto L = cholesky(A, ...) * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 - * @param[in] l_trange TiledRange for resulting Cholesky factor. If left empty, - * will default to array.trange() + * @param[in] l_trange TiledRange for resulting Cholesky factor. If left + * empty, will default to array.trange() * * @returns The lower triangular Cholesky factor L in TA format */ template -auto cholesky( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange() ) { - +auto cholesky(const Array& A, size_t NB = 128, + TiledRange l_trange = TiledRange()) { auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = matrix.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for Cholesky"); + if (M != N) TA_EXCEPTION("Matrix must be square for Cholesky"); auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N, - matrix.local_mat().data(), 1, 1, desc ); + auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + matrix.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("Cholesky Failed"); // Zero out the upper triangle - detail::scalapack_zero_triangle( blacspp::Triangle::Upper, matrix ); + detail::scalapack_zero_triangle(blacspp::Triangle::Upper, matrix); - if( l_trange.rank() == 0 ) l_trange = A.trange(); + if (l_trange.rank() == 0) l_trange = A.trange(); world.gop.fence(); - auto L = scalapack::block_cyclic_to_array( matrix, l_trange ); + auto L = scalapack::block_cyclic_to_array(matrix, l_trange); world.gop.fence(); - return L; - } - - - - - - /** * @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor. - * Optinally return the Cholesky factor itself + * Optionally return the Cholesky factor itself * * A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv * @@ -109,7 +101,7 @@ auto cholesky( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange * auto Linv = cholesky_Linv(A, ...) * auto [L,Linv] = cholesky_Linv(A, ...) * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * @tparam RetL Whether or not to return the cholesky factor * * @param[in] A Input array to be diagonalized. Must be rank-2 @@ -120,69 +112,63 @@ auto cholesky( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange * @returns The inverse lower triangular Cholesky factor in TA format */ template -auto cholesky_linv( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange() ) { - +auto cholesky_linv(const Array& A, size_t NB = 128, + TiledRange l_trange = TiledRange()) { using value_type = typename Array::element_type; auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = matrix.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for Cholesky"); + if (M != N) TA_EXCEPTION("Matrix must be square for Cholesky"); auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N, - matrix.local_mat().data(), 1, 1, desc ); + auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + matrix.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("Cholesky Failed"); // Zero out the upper triangle - detail::scalapack_zero_triangle( blacspp::Triangle::Upper, matrix ); + detail::scalapack_zero_triangle(blacspp::Triangle::Upper, matrix); // Copy L if needed std::shared_ptr> L_sca = nullptr; if constexpr (RetL) { L_sca = std::make_shared>( - world, grid, N, N, NB, NB - ); + world, grid, N, N, NB, NB); L_sca->local_mat() = matrix.local_mat(); } // Compute inverse - info = scalapackpp::ptrtri( blacspp::Triangle::Lower, - blacspp::Diagonal::NonUnit, N, matrix.local_mat().data(), 1, 1, desc ); + info = + scalapackpp::ptrtri(blacspp::Triangle::Lower, blacspp::Diagonal::NonUnit, + N, matrix.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("TRTRI Failed"); - - if( l_trange.rank() == 0 ) l_trange = A.trange(); + if (l_trange.rank() == 0) l_trange = A.trange(); world.gop.fence(); - auto Linv = scalapack::block_cyclic_to_array( matrix, l_trange ); + auto Linv = scalapack::block_cyclic_to_array(matrix, l_trange); world.gop.fence(); - if constexpr (RetL) { - auto L = scalapack::block_cyclic_to_array( *L_sca, l_trange); + auto L = scalapack::block_cyclic_to_array(*L_sca, l_trange); world.gop.fence(); - return std::tuple( L, Linv ); + return std::tuple(L, Linv); } else { return Linv; } - } - template -auto cholesky_solve( const Array& A, const Array& B, size_t NB = 128, - TiledRange x_trange = TiledRange() ) { - +auto cholesky_solve(const Array& A, const Array& B, size_t NB = 128, + TiledRange x_trange = TiledRange()) { auto& world = A.world(); /* if( world != B.world() ) { @@ -192,54 +178,47 @@ auto cholesky_solve( const Array& A, const Array& B, size_t NB = 128, auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for Cholesky Solve"); + if (M != N) TA_EXCEPTION("A must be square for Cholesky Solve"); auto [B_N, NRHS] = B_sca.dims(); - if( B_N != N ) - TA_EXCEPTION("A and B dims must agree"); - + if (B_N != N) TA_EXCEPTION("A and B dims must agree"); scalapackpp::scalapack_desc desc_a, desc_b; { - auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); - desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); + auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); + desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); } { - auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); - desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); + auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); + desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); } - auto info = scalapackpp::pposv( blacspp::Triangle::Lower, N, NRHS, - A_sca.local_mat().data(), 1, 1, desc_a, B_sca.local_mat().data(), - 1, 1, desc_b ); + auto info = scalapackpp::pposv(blacspp::Triangle::Lower, N, NRHS, + A_sca.local_mat().data(), 1, 1, desc_a, + B_sca.local_mat().data(), 1, 1, desc_b); if (info) TA_EXCEPTION("Cholesky Solve Failed"); - if( x_trange.rank() == 0 ) x_trange = B.trange(); + if (x_trange.rank() == 0) x_trange = B.trange(); world.gop.fence(); - auto X = scalapack::block_cyclic_to_array( B_sca, x_trange ); + auto X = scalapack::block_cyclic_to_array(B_sca, x_trange); world.gop.fence(); return X; } - - - template -auto cholesky_lsolve( scalapackpp::TransposeFlag trans, - const Array& A, const Array& B, size_t NB = 128, - TiledRange l_trange = TiledRange(), - TiledRange x_trange = TiledRange() ) { - +auto cholesky_lsolve(scalapackpp::TransposeFlag trans, const Array& A, + const Array& B, size_t NB = 128, + TiledRange l_trange = TiledRange(), + TiledRange x_trange = TiledRange()) { auto& world = A.world(); /* if( world != B.world() ) { @@ -249,56 +228,54 @@ auto cholesky_lsolve( scalapackpp::TransposeFlag trans, auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for Cholesky Solve"); + if (M != N) TA_EXCEPTION("A must be square for Cholesky Solve"); auto [B_N, NRHS] = B_sca.dims(); - if( B_N != N ) - TA_EXCEPTION("A and B dims must agree"); - + if (B_N != N) TA_EXCEPTION("A and B dims must agree"); scalapackpp::scalapack_desc desc_a, desc_b; { - auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); - desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); + auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); + desc_a = A_sca.dist().descinit_noerror(N, N, Mloc); } { - auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); - desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); + auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS); + desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); } - auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N, - A_sca.local_mat().data(), 1, 1, desc_a ); + auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + A_sca.local_mat().data(), 1, 1, desc_a); if (info) TA_EXCEPTION("Cholesky Failed"); - info = scalapackpp::ptrtrs( blacspp::Triangle::Lower, trans, - blacspp::Diagonal::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a, - B_sca.local_mat().data(), 1, 1, desc_b ); + info = scalapackpp::ptrtrs(blacspp::Triangle::Lower, trans, + blacspp::Diagonal::NonUnit, N, NRHS, + A_sca.local_mat().data(), 1, 1, desc_a, + B_sca.local_mat().data(), 1, 1, desc_b); if (info) TA_EXCEPTION("TRTRS Failed"); // Zero out the upper triangle - detail::scalapack_zero_triangle( blacspp::Triangle::Upper, A_sca ); + detail::scalapack_zero_triangle(blacspp::Triangle::Upper, A_sca); - if( l_trange.rank() == 0 ) l_trange = A.trange(); - if( x_trange.rank() == 0 ) x_trange = B.trange(); + if (l_trange.rank() == 0) l_trange = A.trange(); + if (x_trange.rank() == 0) x_trange = B.trange(); world.gop.fence(); - auto L = scalapack::block_cyclic_to_array( A_sca, l_trange ); - auto X = scalapack::block_cyclic_to_array( B_sca, x_trange ); + auto L = scalapack::block_cyclic_to_array(A_sca, l_trange); + auto X = scalapack::block_cyclic_to_array(B_sca, x_trange); world.gop.fence(); return std::tuple(L, X); } -} // namespace TiledArray - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +} // namespace scalapack +} // namespace TiledArray +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED diff --git a/src/TiledArray/math/scalapack/heig.h b/src/TiledArray/algebra/scalapack/heig.h similarity index 61% rename from src/TiledArray/math/scalapack/heig.h rename to src/TiledArray/algebra/scalapack/heig.h index e51361c651..0762fc3432 100644 --- a/src/TiledArray/math/scalapack/heig.h +++ b/src/TiledArray/algebra/scalapack/heig.h @@ -30,10 +30,11 @@ #if TILEDARRAY_HAS_SCALAPACK #include -#include #include +#include namespace TiledArray { +namespace scalapack { /** * @brief Solve the standard eigenvalue problem with ScaLAPACK @@ -44,7 +45,7 @@ namespace TiledArray { * * auto [E, X] = heig(A, ...) * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 @@ -55,51 +56,44 @@ namespace TiledArray { * as std::vector and in TA format, respectively. */ template -auto heig( const Array& A, size_t NB = 128, TiledRange evec_trange = TiledRange() ) { - +auto heig(const Array& A, size_t NB = 128, + TiledRange evec_trange = TiledRange()) { using value_type = typename Array::element_type; - using real_type = scalapackpp::detail::real_t; + using real_type = scalapackpp::detail::real_t; auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); - //auto world_comm = MPI_COMM_WORLD; + // auto world_comm = MPI_COMM_WORLD; blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = matrix.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for EVP"); + if (M != N) TA_EXCEPTION("Matrix must be square for EVP"); auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - std::vector evals( N ); - scalapack::BlockCyclicMatrix evecs( world, grid, N, N, NB, NB ); + std::vector evals(N); + scalapack::BlockCyclicMatrix evecs(world, grid, N, N, NB, NB); auto info = scalapackpp::hereig( - scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, - matrix.local_mat().data(), 1, 1, desc, evals.data(), - evecs.local_mat().data(), 1, 1, desc ); + scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, + matrix.local_mat().data(), 1, 1, desc, evals.data(), + evecs.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("EVP Failed"); - if( evec_trange.rank() == 0 ) evec_trange = A.trange(); + if (evec_trange.rank() == 0) evec_trange = A.trange(); world.gop.fence(); - auto evecs_ta = scalapack::block_cyclic_to_array( evecs, evec_trange ); + auto evecs_ta = scalapack::block_cyclic_to_array(evecs, evec_trange); world.gop.fence(); - - return std::tuple( evals, evecs_ta ); - + return std::tuple(evals, evecs_ta); } - - - - /** * @brief Solve the generalized eigenvalue problem with ScaLAPACK * @@ -113,7 +107,7 @@ auto heig( const Array& A, size_t NB = 128, TiledRange evec_trange = TiledRange( * * auto [E, X] = heig(A, B, ...) * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 * @param[in] B Metric @@ -125,57 +119,53 @@ auto heig( const Array& A, size_t NB = 128, TiledRange evec_trange = TiledRange( * as std::vector and in TA format, respectively. */ template -auto heig( const ArrayA& A, const ArrayB& B, - size_t NB = 128, TiledRange evec_trange = TiledRange() ) { - +auto heig(const ArrayA& A, const ArrayB& B, size_t NB = 128, + TiledRange evec_trange = TiledRange()) { using value_type = typename ArrayA::element_type; - static_assert( std::is_same_v ); - using real_type = scalapackpp::detail::real_t; + static_assert(std::is_same_v); + using real_type = scalapackpp::detail::real_t; auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); - //auto world_comm = MPI_COMM_WORLD; + // auto world_comm = MPI_COMM_WORLD; blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("Matrix must be square for EVP"); + if (M != N) TA_EXCEPTION("Matrix must be square for EVP"); auto [B_M, B_N] = B_sca.dims(); - if( B_M != M or B_N != N ) + if (B_M != M or B_N != N) TA_EXCEPTION("A and B must have the same dimensions"); auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N); auto desc = A_sca.dist().descinit_noerror(N, N, Mloc); - std::vector evals( N ); - scalapack::BlockCyclicMatrix evecs( world, grid, N, N, NB, NB ); + std::vector evals(N); + scalapack::BlockCyclicMatrix evecs(world, grid, N, N, NB, NB); auto info = scalapackpp::hereig_gen( - scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, - A_sca.local_mat().data(), 1, 1, desc, - B_sca.local_mat().data(), 1, 1, desc, - evals.data(), - evecs.local_mat().data(), 1, 1, desc ); + scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, + A_sca.local_mat().data(), 1, 1, desc, B_sca.local_mat().data(), 1, 1, + desc, evals.data(), evecs.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("EVP Failed"); - if( evec_trange.rank() == 0 ) evec_trange = A.trange(); + if (evec_trange.rank() == 0) evec_trange = A.trange(); world.gop.fence(); - auto evecs_ta = scalapack::block_cyclic_to_array( evecs, evec_trange ); + auto evecs_ta = + scalapack::block_cyclic_to_array(evecs, evec_trange); world.gop.fence(); - - return std::tuple( evals, evecs_ta ); - + return std::tuple(evals, evecs_ta); } -} // namespace TiledArray +} // namespace scalapack +} // namespace TiledArray -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED diff --git a/src/TiledArray/math/scalapack/lu.h b/src/TiledArray/algebra/scalapack/lu.h similarity index 53% rename from src/TiledArray/math/scalapack/lu.h rename to src/TiledArray/algebra/scalapack/lu.h index 7581425f2c..460f03c489 100644 --- a/src/TiledArray/math/scalapack/lu.h +++ b/src/TiledArray/algebra/scalapack/lu.h @@ -28,40 +28,38 @@ #include #if TILEDARRAY_HAS_SCALAPACK +#include #include -#include #include #include #include namespace TiledArray { +namespace scalapack { /** * @brief Solve a linear system via LU factorization */ template -auto lu_solve( const ArrayA& A, const ArrayB& B, size_t NB = 128, size_t MB = 128, - TiledRange x_trange = TiledRange() ) { - +auto lu_solve(const ArrayA& A, const ArrayB& B, size_t NB = 128, + size_t MB = 128, TiledRange x_trange = TiledRange()) { using value_type = typename ArrayA::element_type; - static_assert(std::is_same_v); + static_assert(std::is_same_v); auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, MB, NB ); - auto B_sca = scalapack::array_to_block_cyclic( B, grid, MB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, MB, NB); + auto B_sca = scalapack::array_to_block_cyclic(B, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution - auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for LU Solve"); + auto [M, N] = A_sca.dims(); + if (M != N) TA_EXCEPTION("A must be square for LU Solve"); auto [B_N, NRHS] = B_sca.dims(); - if( B_N != N ) - TA_EXCEPTION("A and B dims must agree"); + if (B_N != N) TA_EXCEPTION("A and B dims must agree"); auto [A_Mloc, A_Nloc] = A_sca.dist().get_local_dims(N, N); auto desc_a = A_sca.dist().descinit_noerror(N, N, A_Mloc); @@ -69,77 +67,67 @@ auto lu_solve( const ArrayA& A, const ArrayB& B, size_t NB = 128, size_t MB = 12 auto [B_Mloc, B_Nloc] = B_sca.dist().get_local_dims(N, NRHS); auto desc_b = B_sca.dist().descinit_noerror(N, NRHS, B_Mloc); - std::vector IPIV( A_Mloc + MB ); + std::vector IPIV(A_Mloc + MB); - auto info = scalapackpp::pgesv( N, NRHS, - A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data(), - B_sca.local_mat().data(), 1, 1, desc_b ); + auto info = + scalapackpp::pgesv(N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a, + IPIV.data(), B_sca.local_mat().data(), 1, 1, desc_b); if (info) TA_EXCEPTION("LU Solve Failed"); - if( x_trange.rank() == 0 ) x_trange = B.trange(); + if (x_trange.rank() == 0) x_trange = B.trange(); world.gop.fence(); - auto X = scalapack::block_cyclic_to_array( B_sca, x_trange ); + auto X = scalapack::block_cyclic_to_array(B_sca, x_trange); world.gop.fence(); return X; - } /** * @brief Invert a matrix via LU */ template -auto lu_inv( const Array& A, size_t NB = 128, size_t MB = 128, - TiledRange ainv_trange = TiledRange() ) { - +auto lu_inv(const Array& A, size_t NB = 128, size_t MB = 128, + TiledRange ainv_trange = TiledRange()) { auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto A_sca = scalapack::array_to_block_cyclic( A, grid, MB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto A_sca = scalapack::array_to_block_cyclic(A, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution - auto [M, N] = A_sca.dims(); - if( M != N ) - TA_EXCEPTION("A must be square for LU Inverse"); + auto [M, N] = A_sca.dims(); + if (M != N) TA_EXCEPTION("A must be square for LU Inverse"); auto [A_Mloc, A_Nloc] = A_sca.dist().get_local_dims(N, N); auto desc_a = A_sca.dist().descinit_noerror(N, N, A_Mloc); - - std::vector IPIV( A_Mloc + MB ); + std::vector IPIV(A_Mloc + MB); { - auto info = scalapackpp::pgetrf( N, N, - A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data() ); - if (info) TA_EXCEPTION("LU Failed"); + auto info = scalapackpp::pgetrf(N, N, A_sca.local_mat().data(), 1, 1, + desc_a, IPIV.data()); + if (info) TA_EXCEPTION("LU Failed"); } { - auto info = scalapackpp::pgetri( N, - A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data() ); - if (info) TA_EXCEPTION("LU Inverse Failed"); + auto info = scalapackpp::pgetri(N, A_sca.local_mat().data(), 1, 1, desc_a, + IPIV.data()); + if (info) TA_EXCEPTION("LU Inverse Failed"); } - if( ainv_trange.rank() == 0 ) ainv_trange = A.trange(); + if (ainv_trange.rank() == 0) ainv_trange = A.trange(); world.gop.fence(); - auto Ainv = scalapack::block_cyclic_to_array( A_sca, ainv_trange ); + auto Ainv = scalapack::block_cyclic_to_array(A_sca, ainv_trange); world.gop.fence(); return Ainv; - } +} // namespace scalapack +} // namespace TiledArray - - - -} // namespace TiledArray - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED - - +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED diff --git a/src/TiledArray/math/scalapack/svd.h b/src/TiledArray/algebra/scalapack/svd.h similarity index 50% rename from src/TiledArray/math/scalapack/svd.h rename to src/TiledArray/algebra/scalapack/svd.h index da8274acd5..29044fff2d 100644 --- a/src/TiledArray/math/scalapack/svd.h +++ b/src/TiledArray/algebra/scalapack/svd.h @@ -28,41 +28,12 @@ #include #if TILEDARRAY_HAS_SCALAPACK +#include #include #include namespace TiledArray { - - -struct SVDReturnType{ }; -struct SVDValuesOnly : public SVDReturnType { }; -struct SVDLeftVectors : public SVDReturnType { }; -struct SVDRightVectors : public SVDReturnType { }; -struct SVDAllVectors : public SVDReturnType { }; - -namespace detail { - -template -struct is_svd_return_type : public std::false_type { }; - -template -struct is_svd_return_type< - SVDType, - std::enable_if_t> -> : public std::true_type { }; - -template -inline constexpr bool is_svd_return_type_v = is_svd_return_type::value; - -template -struct enable_if_svd_return_type : - public std::enable_if< is_svd_return_type_v, U > { }; - -template -using enable_if_svd_return_type_t = - typename enable_if_svd_return_type::type; - -} +namespace scalapack { /** * @brief Compute the singular value decomposition (SVD) via ScaLAPACK @@ -76,123 +47,109 @@ using enable_if_svd_return_type_t = * auto [S, VT] = svd(A, ...) * auto [S, U, VT] = svd (A, ...) * - * @tparam Array Input array type, must be convertable to BlockCyclicMatrix + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be decomposed. Must be rank-2 + * @param[in] u_trange TiledRange for resulting left singular vectors. + * @param[in] vt_trange TiledRange for resulting right singular vectors + * (transposed). * @param[in] MB ScaLAPACK row blocking factor. Defaults to 128 * @param[in] NB ScaLAPACK column blocking factor. Defaults to 128 - * @param[in] u_trange TiledRange for resulting left singlar vectors. - * @param[in] vt_trange TiledRange for resulting right singlar vectors (transposed). * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -> -auto svd( const Array& A, TiledRange u_trange, TiledRange vt_trange, - size_t MB = 128, size_t NB = 128 -) { - + typename = detail::enable_if_svd_return_type> +auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange, + size_t MB = 128, size_t NB = 128) { using value_type = typename Array::element_type; - using real_type = scalapackpp::detail::real_t; + using real_type = scalapackpp::detail::real_t; auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); - //auto world_comm = MPI_COMM_WORLD; + // auto world_comm = MPI_COMM_WORLD; blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); - world.gop.fence(); // stage ScaLAPACK execution - auto matrix = scalapack::array_to_block_cyclic( A, grid, MB, NB ); - world.gop.fence(); // stage ScaLAPACK execution + world.gop.fence(); // stage ScaLAPACK execution + auto matrix = scalapack::array_to_block_cyclic(A, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution auto [M, N] = matrix.dims(); - auto SVD_SIZE = std::min(M,N); + auto SVD_SIZE = std::min(M, N); - auto [AMloc, ANloc] = matrix.dist().get_local_dims(M, N); - auto [UMloc, UNloc] = matrix.dist().get_local_dims(M, SVD_SIZE); + auto [AMloc, ANloc] = matrix.dist().get_local_dims(M, N); + auto [UMloc, UNloc] = matrix.dist().get_local_dims(M, SVD_SIZE); auto [VTMloc, VTNloc] = matrix.dist().get_local_dims(SVD_SIZE, N); - - auto desc_a = matrix.dist().descinit_noerror(M, N, AMloc ); - auto desc_u = matrix.dist().descinit_noerror(M, SVD_SIZE, UMloc ); + auto desc_a = matrix.dist().descinit_noerror(M, N, AMloc); + auto desc_u = matrix.dist().descinit_noerror(M, SVD_SIZE, UMloc); auto desc_vt = matrix.dist().descinit_noerror(SVD_SIZE, N, VTMloc); - std::vector S( SVD_SIZE ); + std::vector S(SVD_SIZE); - constexpr bool need_uv = std::is_same_v< SVDType, SVDAllVectors >; - constexpr bool need_u = std::is_same_v< SVDType, SVDLeftVectors > or need_uv; - constexpr bool need_vt = std::is_same_v< SVDType, SVDRightVectors > or need_uv; + constexpr bool need_uv = std::is_same_v; + constexpr bool need_u = std::is_same_v or need_uv; + constexpr bool need_vt = std::is_same_v or need_uv; - std::shared_ptr> U = nullptr, VT = nullptr; + std::shared_ptr> U = nullptr, + VT = nullptr; - scalapackpp::VectorFlag JOBU = scalapackpp::VectorFlag::NoVectors; + scalapackpp::VectorFlag JOBU = scalapackpp::VectorFlag::NoVectors; scalapackpp::VectorFlag JOBVT = scalapackpp::VectorFlag::NoVectors; - value_type* U_ptr = nullptr; + value_type* U_ptr = nullptr; value_type* VT_ptr = nullptr; if constexpr (need_u) { JOBU = scalapackpp::VectorFlag::Vectors; - U = std::make_shared>( - world, grid, M, SVD_SIZE, MB, NB - ); + U = std::make_shared>( + world, grid, M, SVD_SIZE, MB, NB); U_ptr = U->local_mat().data(); } if constexpr (need_vt) { JOBVT = scalapackpp::VectorFlag::Vectors; - VT = std::make_shared>( - world, grid, SVD_SIZE, N, MB, NB - ); + VT = std::make_shared>( + world, grid, SVD_SIZE, N, MB, NB); VT_ptr = VT->local_mat().data(); } - - auto info = scalapackpp::pgesvd( JOBU, JOBVT, M, N, - matrix.local_mat().data(), 1, 1, desc_a, S.data(), - U_ptr, 1, 1, desc_u, VT_ptr, 1, 1, desc_vt ); + auto info = scalapackpp::pgesvd(JOBU, JOBVT, M, N, matrix.local_mat().data(), + 1, 1, desc_a, S.data(), U_ptr, 1, 1, desc_u, + VT_ptr, 1, 1, desc_vt); if (info) TA_EXCEPTION("SVD Failed"); world.gop.fence(); - if constexpr (need_uv) { - - auto U_ta = scalapack::block_cyclic_to_array( *U, u_trange ); - auto VT_ta = scalapack::block_cyclic_to_array( *VT, vt_trange ); + auto U_ta = scalapack::block_cyclic_to_array(*U, u_trange); + auto VT_ta = scalapack::block_cyclic_to_array(*VT, vt_trange); world.gop.fence(); - return std::tuple( S, U_ta, VT_ta ); + return std::tuple(S, U_ta, VT_ta); } else if constexpr (need_u) { - - auto U_ta = scalapack::block_cyclic_to_array( *U, u_trange ); + auto U_ta = scalapack::block_cyclic_to_array(*U, u_trange); world.gop.fence(); - return std::tuple( S, U_ta ); + return std::tuple(S, U_ta); } else if constexpr (need_vt) { - - auto VT_ta = scalapack::block_cyclic_to_array( *VT, vt_trange ); + auto VT_ta = scalapack::block_cyclic_to_array(*VT, vt_trange); world.gop.fence(); - return std::tuple( S, VT_ta ); + return std::tuple(S, VT_ta); } else { - return S; - } - - - } -} // namespace TiledArray - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +} // namespace scalapack +} // namespace TiledArray +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED diff --git a/src/TiledArray/math/scalapack/util.h b/src/TiledArray/algebra/scalapack/util.h similarity index 55% rename from src/TiledArray/math/scalapack/util.h rename to src/TiledArray/algebra/scalapack/util.h index fe750f61e2..db125117f3 100644 --- a/src/TiledArray/math/scalapack/util.h +++ b/src/TiledArray/algebra/scalapack/util.h @@ -35,49 +35,40 @@ namespace TiledArray { namespace detail { template -void scalapack_zero_triangle( - blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, bool zero_diag = false -) { - - auto zero_el = [&]( size_t I, size_t J ) { - if( A.dist().i_own(I,J) ) { - auto [i,j] = A.dist().local_indx(I,J); - A.local_mat()(i,j) = 0.; +void scalapack_zero_triangle(blacspp::Triangle tri, + scalapack::BlockCyclicMatrix& A, + bool zero_diag = false) { + auto zero_el = [&](size_t I, size_t J) { + if (A.dist().i_own(I, J)) { + auto [i, j] = A.dist().local_indx(I, J); + A.local_mat()(i, j) = 0.; } }; - auto [M,N] = A.dims(); + auto [M, N] = A.dims(); // Zero the lower triangle - if( tri == blacspp::Triangle::Lower ) { - - if( zero_diag ) - for( size_t j = 0; j < N; ++j ) - for( size_t i = j; i < M; ++i ) - zero_el( i,j ); + if (tri == blacspp::Triangle::Lower) { + if (zero_diag) + for (size_t j = 0; j < N; ++j) + for (size_t i = j; i < M; ++i) zero_el(i, j); else - for( size_t j = 0; j < N; ++j ) - for( size_t i = j+1; i < M; ++i ) - zero_el( i,j ); + for (size_t j = 0; j < N; ++j) + for (size_t i = j + 1; i < M; ++i) zero_el(i, j); - // Zero the upper triangle + // Zero the upper triangle } else { - - if( zero_diag ) - for( size_t j = 0; j < N; ++j ) - for( size_t i = 0; i <= std::min(j,M); ++i ) - zero_el( i,j ); + if (zero_diag) + for (size_t j = 0; j < N; ++j) + for (size_t i = 0; i <= std::min(j, M); ++i) zero_el(i, j); else - for( size_t j = 0; j < N; ++j ) - for( size_t i = 0; i < std::min(j,M); ++i ) - zero_el( i,j ); - + for (size_t j = 0; j < N; ++j) + for (size_t i = 0; i < std::min(j, M); ++i) zero_el(i, j); } } -} -} - -#endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +} // namespace detail +} // namespace TiledArray +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED diff --git a/src/TiledArray/algebra/svd.h b/src/TiledArray/algebra/svd.h new file mode 100644 index 0000000000..b31c7932da --- /dev/null +++ b/src/TiledArray/algebra/svd.h @@ -0,0 +1,43 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * svd.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SVD_H__INCLUDED + +#include +#ifdef TILEDARRAY_HAS_SCALAPACK +#include +#else +// include eigen +#endif // TILEDARRAY_HAS_SCALAPACK + +namespace TiledArray { + +#ifdef TILEDARRAY_HAS_SCALAPACK +using scalapack::svd; +#else +#endif + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/svd_types.h b/src/TiledArray/algebra/svd_types.h new file mode 100644 index 0000000000..d9ff0c9227 --- /dev/null +++ b/src/TiledArray/algebra/svd_types.h @@ -0,0 +1,64 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * svd.h + * Created: 12 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED + +#include +#include + +namespace TiledArray { + +struct SVDReturnType {}; +struct SVDValuesOnly : public SVDReturnType {}; +struct SVDLeftVectors : public SVDReturnType {}; +struct SVDRightVectors : public SVDReturnType {}; +struct SVDAllVectors : public SVDReturnType {}; + +namespace detail { + +template +struct is_svd_return_type : public std::false_type {}; + +template +struct is_svd_return_type< + SVDType, std::enable_if_t>> + : public std::true_type {}; + +template +inline constexpr bool is_svd_return_type_v = is_svd_return_type::value; + +template +struct enable_if_svd_return_type + : public std::enable_if, U> {}; + +template +using enable_if_svd_return_type_t = + typename enable_if_svd_return_type::type; + +} // namespace detail + +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED diff --git a/src/TiledArray/math/scalapack.h b/src/TiledArray/math/scalapack.h index d2e3a65afe..c7ed2c3a73 100644 --- a/src/TiledArray/math/scalapack.h +++ b/src/TiledArray/math/scalapack.h @@ -25,14 +25,9 @@ #ifndef TILEDARRAY_MATH_SCALAPACK_H__INCLUDED #define TILEDARRAY_MATH_SCALAPACK_H__INCLUDED -#if TILEDARRAY_HAS_SCALAPACK +#warning \ + "TiledArray/math/scalapack.h header is deprecated, please include TiledArray/algebra/scalapack/all.h" -#include -#include -#include -#include +#include #endif - -#endif - diff --git a/src/tiledarray.h b/src/tiledarray.h index 274958ec38..9897657c19 100644 --- a/src/tiledarray.h +++ b/src/tiledarray.h @@ -60,8 +60,8 @@ // ScaLAPACK functions #ifdef TILEDARRAY_HAS_SCALAPACK +#include #include -#include #endif #endif // TILEDARRAY_H__INCLUDED diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8d5f74b44d..1f57cf576d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -117,7 +117,7 @@ if(CUDA_FOUND) endif() if (TARGET TiledArray_SCALAPACK) - list(APPEND ta_test_src_files scalapack.cpp) + list(APPEND ta_test_src_files all.cpp) endif(TARGET TiledArray_SCALAPACK) # if tiledarray library was compiled without exceptions, use TA header-only (see below) diff --git a/tests/scalapack.cpp b/tests/all.cpp similarity index 61% rename from tests/scalapack.cpp rename to tests/all.cpp index 1554dc8529..d991cc6efc 100644 --- a/tests/scalapack.cpp +++ b/tests/all.cpp @@ -4,81 +4,81 @@ #include "range_fixture.h" #include "unit_test_config.h" -#include "TiledArray/math/scalapack.h" +#include "TiledArray/algebra/scalapack.h" + +using namespace TiledArray::scalapack; struct ScaLAPACKFixture { blacspp::Grid grid; - scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? + BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? std::vector htoeplitz_vector; std::vector exact_evals; - inline - double matrix_element_generator( int64_t i, int64_t j ) { - #if 0 + inline double matrix_element_generator(int64_t i, int64_t j) { +#if 0 // Generates a Hankel matrix: absurd condition number return i+j; - #else +#else // Generates a Circulant matrix: good condition number - return htoeplitz_vector[std::abs(i-j)]; - #endif + return htoeplitz_vector[std::abs(i - j)]; +#endif } - inline - double make_ta_reference(TA::Tensor& t, + inline double make_ta_reference(TA::Tensor& t, TA::Range const& range) { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) { for (auto n = lo[1]; n < up[1]; ++n) { - t(m, n) = matrix_element_generator(m,n); + t(m, n) = matrix_element_generator(m, n); } } return t.norm(); }; - inline void construct_scalapack( scalapack::BlockCyclicMatrix& A ) { - auto [M,N] = A.dims(); + inline void construct_scalapack(BlockCyclicMatrix& A) { + auto [M, N] = A.dims(); for (size_t i = 0; i < M; ++i) - for (size_t j = 0; j < N; ++j) - if (A.dist().i_own(i, j)) { - auto [i_local, j_local] = A.dist().local_indx(i, j); - A.local_mat()(i_local, j_local) = matrix_element_generator(i,j); - } - + for (size_t j = 0; j < N; ++j) + if (A.dist().i_own(i, j)) { + auto [i_local, j_local] = A.dist().local_indx(i, j); + A.local_mat()(i_local, j_local) = matrix_element_generator(i, j); + } } ScaLAPACKFixture(int64_t N, int64_t NB) : grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB), - htoeplitz_vector( N ), exact_evals( N ) { - + htoeplitz_vector(N), + exact_evals(N) { // Generate an hermitian Circulant vector - std::fill( htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0 ); + std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); htoeplitz_vector[0] = 100; std::default_random_engine gen(0); std::uniform_real_distribution<> dist(0., 1.); - for( int64_t i = 1; i <= (N/2); ++i ) { + for (int64_t i = 1; i <= (N / 2); ++i) { double val = dist(gen); - htoeplitz_vector[i] = val; - htoeplitz_vector[N-i] = val; + htoeplitz_vector[i] = val; + htoeplitz_vector[N - i] = val; } // Compute exact eigenvalues const double ff = 2. * M_PI / N; - for( int64_t j = 0; j < N; ++j ) { - double val = htoeplitz_vector[0];; - for( int64_t k = 1; k < N; ++k ) - val += htoeplitz_vector[N-k] * std::cos( ff * j * k ); + for (int64_t j = 0; j < N; ++j) { + double val = htoeplitz_vector[0]; + ; + for (int64_t k = 1; k < N; ++k) + val += htoeplitz_vector[N - k] * std::cos(ff * j * k); exact_evals[j] = val; } - std::sort( exact_evals.begin(), exact_evals.end() ); + std::sort(exact_evals.begin(), exact_evals.end()); // Fill reference matrix - construct_scalapack( ref_matrix ); + construct_scalapack(ref_matrix); } ScaLAPACKFixture() : ScaLAPACKFixture(1000, 128) {} @@ -118,27 +118,24 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { GlobalFixture::world->gop.fence(); @@ -147,29 +144,26 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { auto NB = ref_matrix.dist().nb(); - auto trange = gen_trange(N, {static_cast(NB/2)}); + auto trange = gen_trange(N, {static_cast(NB / 2)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); @@ -180,15 +174,14 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -201,16 +194,11 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - - - - BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { GlobalFixture::world->gop.fence(); @@ -221,27 +209,24 @@ BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); @@ -252,15 +237,14 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -273,14 +257,11 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - - BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { GlobalFixture::world->gop.fence(); @@ -291,21 +272,21 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_ta = TA::scalapack::block_cyclic_to_array>( ref_matrix, trange ); + auto test_ta = + block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); + (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; @@ -320,15 +301,14 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { auto trange = gen_trange(N, {static_cast(NB)}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -341,14 +321,11 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - - - BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); @@ -359,15 +336,14 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { auto trange = gen_trange(N, {static_cast(NB)}); - const TA::TArray ref_ta = TA::make_array >( + const TA::TArray ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - GlobalFixture::world->gop.fence(); - auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB ); + auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -380,481 +356,444 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { norm_diff = std::sqrt(norm_diff); - BOOST_CHECK_SMALL( norm_diff, std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); }; - -BOOST_AUTO_TEST_CASE( sca_heig_same_tiling ) { - +BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - const auto ref_ta = TA::make_array >( + const auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [evals, evecs] = heig( ref_ta ); - //auto evals = heig( ref_ta ); + auto [evals, evecs] = heig(ref_ta); + // auto evals = heig( ref_ta ); - BOOST_CHECK( evecs.trange() == ref_ta.trange() ); + BOOST_CHECK(evecs.trange() == ref_ta.trange()); // TODO: Check validity of eigenvectors, not crutial for the time being // Check eigenvalue correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_heig_diff_tiling ) { - +BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( - *GlobalFixture::world, trange, + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); auto new_trange = gen_trange(N, {64ul}); - auto [evals, evecs] = heig( ref_ta, 128, new_trange ); + auto [evals, evecs] = heig(ref_ta, 128, new_trange); - BOOST_CHECK( evecs.trange() == new_trange ); + BOOST_CHECK(evecs.trange() == new_trange); // TODO: Check validity of eigenvectors, not crutial for the time being // Check eigenvalue correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_heig_generalized ) { - +BOOST_AUTO_TEST_CASE(sca_heig_generalized) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto dense_iden = TA::make_array >( + auto dense_iden = TA::make_array>( *GlobalFixture::world, trange, [](TA::Tensor& t, TA::Range const& range) -> double { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) - t(m, n) = 1.; - - return t.norm(); + for (auto m = lo[0]; m < up[0]; ++m) + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) t(m, n) = 1.; + + return t.norm(); }); GlobalFixture::world->gop.fence(); - auto [evals, evecs] = heig( ref_ta, dense_iden ); - //auto evals = heig( ref_ta ); + auto [evals, evecs] = heig(ref_ta, dense_iden); + // auto evals = heig( ref_ta ); - BOOST_CHECK( evecs.trange() == ref_ta.trange() ); + BOOST_CHECK(evecs.trange() == ref_ta.trange()); // TODO: Check validity of eigenvectors, not crutial for the time being // Check eigenvalue correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); GlobalFixture::world->gop.fence(); } - - -BOOST_AUTO_TEST_CASE( sca_chol ) { - +BOOST_AUTO_TEST_CASE(sca_chol) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto L = cholesky( ref_ta ); + auto L = cholesky(ref_ta); - BOOST_CHECK( L.trange() == ref_ta.trange() ); + BOOST_CHECK(L.trange() == ref_ta.trange()); ref_ta("i,j") -= L("i,k") * L("j,k").conj(); double diff_norm = ref_ta("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( diff_norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(diff_norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } - - -BOOST_AUTO_TEST_CASE( sca_chol_linv ) { - +BOOST_AUTO_TEST_CASE(sca_chol_linv) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - - auto Linv = cholesky_linv( ref_ta ); + auto Linv = cholesky_linv(ref_ta); - BOOST_CHECK( Linv.trange() == ref_ta.trange() ); + BOOST_CHECK(Linv.trange() == ref_ta.trange()); - TA::TArray tmp( *GlobalFixture::world, trange ); - tmp("i,j") = Linv("i,k") * ref_ta("k,j"); - ref_ta("i,j") = tmp ("i,k") * Linv ("j,k"); + TA::TArray tmp(*GlobalFixture::world, trange); + tmp("i,j") = Linv("i,k") * ref_ta("k,j"); + ref_ta("i,j") = tmp("i,k") * Linv("j,k"); - TA::foreach_inplace( ref_ta, []( TA::Tensor& tile ) { + TA::foreach_inplace(ref_ta, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); double norm = ref_ta("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } - - - -BOOST_AUTO_TEST_CASE( sca_chol_linv_retl ) { - +BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - - auto [L, Linv] = cholesky_linv( ref_ta ); + auto [L, Linv] = cholesky_linv(ref_ta); - BOOST_CHECK( Linv.trange() == ref_ta.trange() ); - BOOST_CHECK( L.trange() == ref_ta.trange() ); + BOOST_CHECK(Linv.trange() == ref_ta.trange()); + BOOST_CHECK(L.trange() == ref_ta.trange()); - TA::TArray tmp( *GlobalFixture::world, trange ); + TA::TArray tmp(*GlobalFixture::world, trange); tmp("i,j") = Linv("i,k") * L("k,j"); - TA::foreach_inplace( tmp, []( TA::Tensor& tile ) { + TA::foreach_inplace(tmp, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); double norm = tmp("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } - - -BOOST_AUTO_TEST_CASE( sca_chol_solve ) { - +BOOST_AUTO_TEST_CASE(sca_chol_solve) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - - auto iden = cholesky_solve( ref_ta, ref_ta ); + auto iden = cholesky_solve(ref_ta, ref_ta); - BOOST_CHECK( iden.trange() == ref_ta.trange() ); + BOOST_CHECK(iden.trange() == ref_ta.trange()); - TA::foreach_inplace( iden, []( TA::Tensor& tile ) { + TA::foreach_inplace(iden, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } - - - - -BOOST_AUTO_TEST_CASE( sca_chol_lsolve ) { - +BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - // Should produce X = L**H - auto [L, X] = cholesky_lsolve( scalapackpp::TransposeFlag::NoTranspose, - ref_ta, ref_ta ); + auto [L, X] = + cholesky_lsolve(scalapackpp::TransposeFlag::NoTranspose, ref_ta, ref_ta); - BOOST_CHECK( X.trange() == ref_ta.trange() ); - BOOST_CHECK( L.trange() == ref_ta.trange() ); + BOOST_CHECK(X.trange() == ref_ta.trange()); + BOOST_CHECK(L.trange() == ref_ta.trange()); X("i,j") -= L("j,i"); double norm = X("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_lu_solve ) { - +BOOST_AUTO_TEST_CASE(sca_lu_solve) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - - auto iden = lu_solve( ref_ta, ref_ta ); + auto iden = lu_solve(ref_ta, ref_ta); - BOOST_CHECK( iden.trange() == ref_ta.trange() ); + BOOST_CHECK(iden.trange() == ref_ta.trange()); - TA::foreach_inplace( iden, []( TA::Tensor& tile ) { + TA::foreach_inplace(iden, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } - -BOOST_AUTO_TEST_CASE( sca_lu_inv ) { - +BOOST_AUTO_TEST_CASE(sca_lu_inv) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - TA::TArray iden( *GlobalFixture::world, trange ); - - auto Ainv = lu_inv( ref_ta ); - iden("i,j") = Ainv("i,k") * ref_ta("k,j"); + TA::TArray iden(*GlobalFixture::world, trange); + auto Ainv = lu_inv(ref_ta); + iden("i,j") = Ainv("i,k") * ref_ta("k,j"); - BOOST_CHECK( iden.trange() == ref_ta.trange() ); + BOOST_CHECK(iden.trange() == ref_ta.trange()); - TA::foreach_inplace( iden, []( TA::Tensor& tile ) { + TA::foreach_inplace(iden, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if( m == n ) { - tile(m,n) -= 1.; - } + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { + tile(m, n) -= 1.; + } }); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits::epsilon() ); + BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } - #if 1 -BOOST_AUTO_TEST_CASE( sca_svd_values_only ) { - +BOOST_AUTO_TEST_CASE(sca_svd_values_only) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto S = svd( ref_ta, trange, trange ); + auto S = svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_svd_leftvectors ) { - +BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [S,U] = svd( ref_ta, trange, trange ); + auto [S, U] = svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_svd_rightvectors ) { - +BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [S,VT] = svd( ref_ta, trange, trange ); + auto [S, VT] = svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE( sca_svd_allvectors ) { - +BOOST_AUTO_TEST_CASE(sca_svd_allvectors) { GlobalFixture::world->gop.fence(); auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array >( + auto ref_ta = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [S,U,VT] = svd( ref_ta, trange, trange ); + auto [S, U, VT] = svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; - std::sort( exact_singular_values.begin(), exact_singular_values.end(), - std::greater() ); + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); // Check singular value correctness - double tol = N*N*std::numeric_limits::epsilon(); - for( int64_t i = 0; i < N; ++i ) - BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol ); + double tol = N * N * std::numeric_limits::epsilon(); + for (int64_t i = 0; i < N; ++i) + BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); GlobalFixture::world->gop.fence(); } #endif From 1494977220fe7c44f02880c4411e22a0c997cf43 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 10:19:16 -0400 Subject: [PATCH 02/36] amended a05cc5ed302d306bd447a5d7c85aa40b277739f4 --- src/tiledarray.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tiledarray.h b/src/tiledarray.h index 9897657c19..a20d1c86bc 100644 --- a/src/tiledarray.h +++ b/src/tiledarray.h @@ -60,7 +60,7 @@ // ScaLAPACK functions #ifdef TILEDARRAY_HAS_SCALAPACK -#include +#include #include #endif From aab124ff60aac44bf54c4d0ba68114f2454d2e6e Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 10:38:37 -0400 Subject: [PATCH 03/36] added stub lapack cholesky API --- src/CMakeLists.txt | 3 +- src/TiledArray/algebra/chol.h | 6 +- src/TiledArray/algebra/lapack/chol.h | 116 ++++++++++++++++++ src/TiledArray/algebra/scalapack/chol.h | 12 +- src/TiledArray/algebra/scalapack/heig.h | 6 +- src/TiledArray/algebra/scalapack/lu.h | 6 +- src/TiledArray/algebra/scalapack/svd.h | 8 +- src/TiledArray/algebra/scalapack/util.h | 6 +- .../algebra/{svd_types.h => types.h} | 2 + tests/all.cpp | 3 +- 10 files changed, 145 insertions(+), 23 deletions(-) create mode 100644 src/TiledArray/algebra/lapack/chol.h rename src/TiledArray/algebra/{svd_types.h => types.h} (97%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1d3669ab57..66d780df3e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,7 +64,8 @@ TiledArray/algebra/chol.h TiledArray/algebra/heig.h TiledArray/algebra/lu.h TiledArray/algebra/svd.h -TiledArray/algebra/svd_types.h +TiledArray/algebra/types.h +TiledArray/algebra/lapack/chol.h TiledArray/algebra/scalapack/chol.h TiledArray/algebra/scalapack/heig.h TiledArray/algebra/scalapack/lu.h diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/chol.h index 334551104e..4b15f42510 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/chol.h @@ -28,7 +28,7 @@ #if TILEDARRAY_HAS_SCALAPACK #include #else -// eigen +#include #endif namespace TiledArray { @@ -37,6 +37,10 @@ using scalapack::cholesky; using scalapack::cholesky_linv; using scalapack::cholesky_lsolve; using scalapack::cholesky_solve; +else using lapack::cholesky; +using lapack::cholesky_linv; +using lapack::cholesky_lsolve; +using lapack::cholesky_solve; #endif } // namespace TiledArray diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h new file mode 100644 index 0000000000..f5ece1f12c --- /dev/null +++ b/src/TiledArray/algebra/lapack/chol.h @@ -0,0 +1,116 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED + +#include + +namespace TiledArray { +namespace lapack { + +/** + * @brief Compute the Cholesky factorization of a HPD rank-2 tensor + * + * A(i,j) = L(i,k) * conj(L(j,k)) + * + * Example Usage: + * + * auto L = cholesky(A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 + * @param[in] l_trange TiledRange for resulting Cholesky factor. If left + * empty, will default to array.trange() + * + * @returns The lower triangular Cholesky factor L in TA format + */ +template +auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { + auto& world = A.world(); + + // // Call lapack verson of LLT dpotrf_, we have to reverse stuff since + // lapack + // // will think all of our matrices are Col Major + // // RowMatrixXd A_copy = A; + // + // char uplo = 'U'; // Do lower, but need to use U because Row -> Col + // integer n = A.rows(); + // real8* a = A.data(); + // integer lda = n; + // integer info; + // + //#ifdef MADNESS_LINALG_USE_LAPACKE + // dpotrf_(&uplo, &n, a, &lda, &info); + //#else + // dpotrf_(&uplo, &n, a, &lda, &info, sizeof(char)); + //#endif + // + // return L; + abort(); +} + +/** + * @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor. + * Optionally return the Cholesky factor itself + * + * A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv + * + * Example Usage: + * + * auto Linv = cholesky_Linv(A, ...) + * auto [L,Linv] = cholesky_Linv(A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * @tparam RetL Whether or not to return the cholesky factor + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] l_trange TiledRange for resulting inverse Cholesky factor. + * If left empty, will default to array.trange() + * + * @returns The inverse lower triangular Cholesky factor in TA format + */ +template +auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { + abort(); +} + +template +auto cholesky_solve(const Array& A, const Array& B, + TiledRange x_trange = TiledRange()) { + abort(); +} + +template +auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, + TiledRange l_trange = TiledRange(), + TiledRange x_trange = TiledRange()) { + abort(); +} + +} // namespace lapack +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/chol.h b/src/TiledArray/algebra/scalapack/chol.h index a15e47c84b..adfd2f03a7 100644 --- a/src/TiledArray/algebra/scalapack/chol.h +++ b/src/TiledArray/algebra/scalapack/chol.h @@ -22,13 +22,14 @@ * Created: 8 June, 2020 * */ -#ifndef TILEDARRAY_MATH_SCALAPACK_CHOL_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_CHOL_H__INCLUDED +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED #include #if TILEDARRAY_HAS_SCALAPACK #include +#include #include #include @@ -215,9 +216,8 @@ auto cholesky_solve(const Array& A, const Array& B, size_t NB = 128, } template -auto cholesky_lsolve(scalapackpp::TransposeFlag trans, const Array& A, - const Array& B, size_t NB = 128, - TiledRange l_trange = TiledRange(), +auto cholesky_lsolve(TransposeFlag trans, const Array& A, const Array& B, + size_t NB = 128, TiledRange l_trange = TiledRange(), TiledRange x_trange = TiledRange()) { auto& world = A.world(); /* @@ -278,4 +278,4 @@ auto cholesky_lsolve(scalapackpp::TransposeFlag trans, const Array& A, } // namespace TiledArray #endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/heig.h b/src/TiledArray/algebra/scalapack/heig.h index 0762fc3432..4e63181c5e 100644 --- a/src/TiledArray/algebra/scalapack/heig.h +++ b/src/TiledArray/algebra/scalapack/heig.h @@ -23,8 +23,8 @@ * Edited: 8 June, 2020 * */ -#ifndef TILEDARRAY_MATH_SCALAPACK_HEIG_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_HEIG_H__INCLUDED +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED #include #if TILEDARRAY_HAS_SCALAPACK @@ -168,4 +168,4 @@ auto heig(const ArrayA& A, const ArrayB& B, size_t NB = 128, } // namespace TiledArray #endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/lu.h b/src/TiledArray/algebra/scalapack/lu.h index 460f03c489..f3603cfa50 100644 --- a/src/TiledArray/algebra/scalapack/lu.h +++ b/src/TiledArray/algebra/scalapack/lu.h @@ -22,8 +22,8 @@ * Created: 19 June, 2020 * */ -#ifndef TILEDARRAY_MATH_SCALAPACK_LU_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_LU_H__INCLUDED +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED #include #if TILEDARRAY_HAS_SCALAPACK @@ -130,4 +130,4 @@ auto lu_inv(const Array& A, size_t NB = 128, size_t MB = 128, } // namespace TiledArray #endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/svd.h b/src/TiledArray/algebra/scalapack/svd.h index 29044fff2d..71f539cea3 100644 --- a/src/TiledArray/algebra/scalapack/svd.h +++ b/src/TiledArray/algebra/scalapack/svd.h @@ -22,13 +22,13 @@ * Created: 12 June, 2020 * */ -#ifndef TILEDARRAY_MATH_SCALAPACK_SVD_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_SVD_H__INCLUDED +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED #include #if TILEDARRAY_HAS_SCALAPACK -#include +#include #include #include @@ -152,4 +152,4 @@ auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange, } // namespace TiledArray #endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/scalapack/util.h b/src/TiledArray/algebra/scalapack/util.h index db125117f3..33c1c15403 100644 --- a/src/TiledArray/algebra/scalapack/util.h +++ b/src/TiledArray/algebra/scalapack/util.h @@ -22,8 +22,8 @@ * Created: 19 June, 2020 * */ -#ifndef TILEDARRAY_MATH_SCALAPACK_UTIL_H__INCLUDED -#define TILEDARRAY_MATH_SCALAPACK_UTIL_H__INCLUDED +#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED #include #if TILEDARRAY_HAS_SCALAPACK @@ -71,4 +71,4 @@ void scalapack_zero_triangle(blacspp::Triangle tri, } // namespace TiledArray #endif // TILEDARRAY_HAS_SCALAPACK -#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED diff --git a/src/TiledArray/algebra/svd_types.h b/src/TiledArray/algebra/types.h similarity index 97% rename from src/TiledArray/algebra/svd_types.h rename to src/TiledArray/algebra/types.h index d9ff0c9227..1cc0a50e57 100644 --- a/src/TiledArray/algebra/svd_types.h +++ b/src/TiledArray/algebra/types.h @@ -30,6 +30,8 @@ namespace TiledArray { +enum TransposeFlag { NoTranspose, Transpose, ConjTranspose }; + struct SVDReturnType {}; struct SVDValuesOnly : public SVDReturnType {}; struct SVDLeftVectors : public SVDReturnType {}; diff --git a/tests/all.cpp b/tests/all.cpp index d991cc6efc..75d673c945 100644 --- a/tests/all.cpp +++ b/tests/all.cpp @@ -607,8 +607,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { }); // Should produce X = L**H - auto [L, X] = - cholesky_lsolve(scalapackpp::TransposeFlag::NoTranspose, ref_ta, ref_ta); + auto [L, X] = cholesky_lsolve(TransposeFlag::NoTranspose, ref_ta, ref_ta); BOOST_CHECK(X.trange() == ref_ta.trange()); BOOST_CHECK(L.trange() == ref_ta.trange()); From 068a75d435767c7e2f1255f46ec2a261b7dc00ed Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 12:00:38 -0400 Subject: [PATCH 04/36] fixed typo in aab124ff60aac44bf54c4d0ba68114f2454d2e6e --- src/TiledArray/algebra/chol.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/chol.h index 4b15f42510..061d9aa71e 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/chol.h @@ -37,7 +37,8 @@ using scalapack::cholesky; using scalapack::cholesky_linv; using scalapack::cholesky_lsolve; using scalapack::cholesky_solve; -else using lapack::cholesky; +#else +using lapack::cholesky; using lapack::cholesky_linv; using lapack::cholesky_lsolve; using lapack::cholesky_solve; From b3ea08c8c3c1283afbef371b815a5fc5c86f5e25 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 13:27:33 -0400 Subject: [PATCH 05/36] fixed scalapack include path --- tests/all.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/all.cpp b/tests/all.cpp index 75d673c945..283072ae3d 100644 --- a/tests/all.cpp +++ b/tests/all.cpp @@ -4,7 +4,7 @@ #include "range_fixture.h" #include "unit_test_config.h" -#include "TiledArray/algebra/scalapack.h" +#include "TiledArray/algebra/scalapack/all.h" using namespace TiledArray::scalapack; From 3453f769c23356553560f3adc33e01586ef12c4d Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 13:28:08 -0400 Subject: [PATCH 06/36] hushed warning about copies in range-based for --- tests/btas.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/btas.cpp b/tests/btas.cpp index 0f54fa2698..70d7c5340f 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -293,7 +293,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(dense_array_conversion, bTensor, tensor_types) { *GlobalFixture::world, trange, src, replicated)); // check the array contents - for (const auto& t : dst) { + for (auto&& t : dst) { const auto& tile = t.get(); const auto& tile_range = tile.range(); auto src_blk_range = TiledArray::BlockRange( From a36249dead52e85ef74cd64e4bc3653d8f480a37 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 13:29:08 -0400 Subject: [PATCH 07/36] dox fixup --- src/TiledArray/algebra/lapack/chol.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index f5ece1f12c..396171e120 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -41,7 +41,6 @@ namespace lapack { * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 * @param[in] l_trange TiledRange for resulting Cholesky factor. If left * empty, will default to array.trange() * From c7198597144f850d28f612c8cbf800f0a5a74c06 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 13:30:54 -0400 Subject: [PATCH 08/36] default scalapack block size can be controlled at runtime --- src/TiledArray/algebra/scalapack/chol.h | 28 +++++++++++++------------ src/TiledArray/algebra/scalapack/heig.h | 13 ++++++------ src/TiledArray/algebra/scalapack/lu.h | 11 ++++++---- src/TiledArray/algebra/scalapack/svd.h | 6 +++--- src/TiledArray/algebra/scalapack/util.h | 24 +++++++++++++++++---- 5 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/TiledArray/algebra/scalapack/chol.h b/src/TiledArray/algebra/scalapack/chol.h index adfd2f03a7..f24ed0aa15 100644 --- a/src/TiledArray/algebra/scalapack/chol.h +++ b/src/TiledArray/algebra/scalapack/chol.h @@ -52,15 +52,15 @@ namespace scalapack { * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 * @param[in] l_trange TiledRange for resulting Cholesky factor. If left * empty, will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns The lower triangular Cholesky factor L in TA format */ template -auto cholesky(const Array& A, size_t NB = 128, - TiledRange l_trange = TiledRange()) { +auto cholesky(const Array& A, TiledRange l_trange = TiledRange(), + size_t NB = default_block_size()) { auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); @@ -80,7 +80,7 @@ auto cholesky(const Array& A, size_t NB = 128, if (info) TA_EXCEPTION("Cholesky Failed"); // Zero out the upper triangle - detail::scalapack_zero_triangle(blacspp::Triangle::Upper, matrix); + zero_triangle(blacspp::Triangle::Upper, matrix); if (l_trange.rank() == 0) l_trange = A.trange(); @@ -106,15 +106,15 @@ auto cholesky(const Array& A, size_t NB = 128, * @tparam RetL Whether or not to return the cholesky factor * * @param[in] A Input array to be diagonalized. Must be rank-2 - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 * @param[in] l_trange TiledRange for resulting inverse Cholesky factor. * If left empty, will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns The inverse lower triangular Cholesky factor in TA format */ template -auto cholesky_linv(const Array& A, size_t NB = 128, - TiledRange l_trange = TiledRange()) { +auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange(), + size_t NB = default_block_size()) { using value_type = typename Array::element_type; auto& world = A.world(); @@ -136,7 +136,7 @@ auto cholesky_linv(const Array& A, size_t NB = 128, if (info) TA_EXCEPTION("Cholesky Failed"); // Zero out the upper triangle - detail::scalapack_zero_triangle(blacspp::Triangle::Upper, matrix); + zero_triangle(blacspp::Triangle::Upper, matrix); // Copy L if needed std::shared_ptr> L_sca = nullptr; @@ -168,8 +168,9 @@ auto cholesky_linv(const Array& A, size_t NB = 128, } template -auto cholesky_solve(const Array& A, const Array& B, size_t NB = 128, - TiledRange x_trange = TiledRange()) { +auto cholesky_solve(const Array& A, const Array& B, + TiledRange x_trange = TiledRange(), + size_t NB = default_block_size()) { auto& world = A.world(); /* if( world != B.world() ) { @@ -217,8 +218,9 @@ auto cholesky_solve(const Array& A, const Array& B, size_t NB = 128, template auto cholesky_lsolve(TransposeFlag trans, const Array& A, const Array& B, - size_t NB = 128, TiledRange l_trange = TiledRange(), - TiledRange x_trange = TiledRange()) { + TiledRange l_trange = TiledRange(), + TiledRange x_trange = TiledRange(), + size_t NB = default_block_size()) { auto& world = A.world(); /* if( world != B.world() ) { @@ -261,7 +263,7 @@ auto cholesky_lsolve(TransposeFlag trans, const Array& A, const Array& B, if (info) TA_EXCEPTION("TRTRS Failed"); // Zero out the upper triangle - detail::scalapack_zero_triangle(blacspp::Triangle::Upper, A_sca); + zero_triangle(blacspp::Triangle::Upper, A_sca); if (l_trange.rank() == 0) l_trange = A.trange(); if (x_trange.rank() == 0) x_trange = B.trange(); diff --git a/src/TiledArray/algebra/scalapack/heig.h b/src/TiledArray/algebra/scalapack/heig.h index 4e63181c5e..488867b857 100644 --- a/src/TiledArray/algebra/scalapack/heig.h +++ b/src/TiledArray/algebra/scalapack/heig.h @@ -48,16 +48,16 @@ namespace scalapack { * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * * @param[in] A Input array to be diagonalized. Must be rank-2 - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, * will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -auto heig(const Array& A, size_t NB = 128, - TiledRange evec_trange = TiledRange()) { +auto heig(const Array& A, TiledRange evec_trange = TiledRange(), + size_t NB = default_block_size()) { using value_type = typename Array::element_type; using real_type = scalapackpp::detail::real_t; @@ -111,16 +111,17 @@ auto heig(const Array& A, size_t NB = 128, * * @param[in] A Input array to be diagonalized. Must be rank-2 * @param[in] B Metric - * @param[in] NB ScaLAPACK blocking factor. Defaults to 128 * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, * will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -auto heig(const ArrayA& A, const ArrayB& B, size_t NB = 128, - TiledRange evec_trange = TiledRange()) { +auto heig(const ArrayA& A, const ArrayB& B, + TiledRange evec_trange = TiledRange(), + size_t NB = default_block_size()) { using value_type = typename ArrayA::element_type; static_assert(std::is_same_v); using real_type = scalapackpp::detail::real_t; diff --git a/src/TiledArray/algebra/scalapack/lu.h b/src/TiledArray/algebra/scalapack/lu.h index f3603cfa50..8c65383846 100644 --- a/src/TiledArray/algebra/scalapack/lu.h +++ b/src/TiledArray/algebra/scalapack/lu.h @@ -42,8 +42,10 @@ namespace scalapack { * @brief Solve a linear system via LU factorization */ template -auto lu_solve(const ArrayA& A, const ArrayB& B, size_t NB = 128, - size_t MB = 128, TiledRange x_trange = TiledRange()) { +auto lu_solve(const ArrayA& A, const ArrayB& B, + TiledRange x_trange = TiledRange(), + size_t NB = default_block_size(), + size_t MB = default_block_size()) { using value_type = typename ArrayA::element_type; static_assert(std::is_same_v); @@ -87,8 +89,9 @@ auto lu_solve(const ArrayA& A, const ArrayB& B, size_t NB = 128, * @brief Invert a matrix via LU */ template -auto lu_inv(const Array& A, size_t NB = 128, size_t MB = 128, - TiledRange ainv_trange = TiledRange()) { +auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange(), + size_t NB = default_block_size(), + size_t MB = default_block_size()) { auto& world = A.world(); auto world_comm = world.mpi.comm().Get_mpi_comm(); blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); diff --git a/src/TiledArray/algebra/scalapack/svd.h b/src/TiledArray/algebra/scalapack/svd.h index 71f539cea3..c6d61242ea 100644 --- a/src/TiledArray/algebra/scalapack/svd.h +++ b/src/TiledArray/algebra/scalapack/svd.h @@ -53,8 +53,8 @@ namespace scalapack { * @param[in] u_trange TiledRange for resulting left singular vectors. * @param[in] vt_trange TiledRange for resulting right singular vectors * (transposed). - * @param[in] MB ScaLAPACK row blocking factor. Defaults to 128 - * @param[in] NB ScaLAPACK column blocking factor. Defaults to 128 + * @param[in] MB ScaLAPACK row block size. Defaults to 128 + * @param[in] NB ScaLAPACK column block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. @@ -62,7 +62,7 @@ namespace scalapack { template > auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange, - size_t MB = 128, size_t NB = 128) { + size_t MB = default_block_size(), size_t NB = default_block_size()) { using value_type = typename Array::element_type; using real_type = scalapackpp::detail::real_t; diff --git a/src/TiledArray/algebra/scalapack/util.h b/src/TiledArray/algebra/scalapack/util.h index 33c1c15403..2e87d42604 100644 --- a/src/TiledArray/algebra/scalapack/util.h +++ b/src/TiledArray/algebra/scalapack/util.h @@ -32,12 +32,11 @@ namespace TiledArray { -namespace detail { +namespace scalapack { template -void scalapack_zero_triangle(blacspp::Triangle tri, - scalapack::BlockCyclicMatrix& A, - bool zero_diag = false) { +void zero_triangle(blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, + bool zero_diag = false) { auto zero_el = [&](size_t I, size_t J) { if (A.dist().i_own(I, J)) { auto [i, j] = A.dist().local_indx(I, J); @@ -67,7 +66,24 @@ void scalapack_zero_triangle(blacspp::Triangle tri, } } +} // namespace scalapack + +namespace detail { +inline std::size_t& default_block_size_accessor() { + static std::size_t block_size = 128; + return block_size; +} } // namespace detail + +inline std::size_t default_block_size() { + return detail::default_block_size_accessor(); +} + +inline void set_default_block_size(std::size_t NB) { + TA_ASSERT(NB > 0); + detail::default_block_size_accessor() = NB; +} + } // namespace TiledArray #endif // TILEDARRAY_HAS_SCALAPACK From c2c7a86b4ab60c6631e2ad0c699fb81d975445f3 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 13:32:19 -0400 Subject: [PATCH 09/36] replaced generic solver interface for cholesky from namespace import to explicit runtime dispatch to scalapack vs lapack with primitive hardwired logic --- src/TiledArray/algebra/chol.h | 54 ++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/chol.h index 061d9aa71e..96df69e207 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/chol.h @@ -27,22 +27,54 @@ #include #if TILEDARRAY_HAS_SCALAPACK #include -#else -#include #endif +#include namespace TiledArray { + +template +auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky(A, l_trange); + else +#endif + return lapack::cholesky(A, l_trange); +} + +template +auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky_linv(A, l_trange); + else +#endif + return lapack::cholesky_linv(A, l_trange); +} + +template +auto cholesky_solve(const Array& A, const Array& B, + TiledRange x_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky_solve(A, B, x_trange); + else +#endif + return lapack::cholesky_solve(A, B, x_trange); +} + +template +auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, + TiledRange l_trange = TiledRange(), + TiledRange x_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK -using scalapack::cholesky; -using scalapack::cholesky_linv; -using scalapack::cholesky_lsolve; -using scalapack::cholesky_solve; -#else -using lapack::cholesky; -using lapack::cholesky_linv; -using lapack::cholesky_lsolve; -using lapack::cholesky_solve; + if (A.world().size() > 1 && A.range().volume() > 10000000) + return scalapack::cholesky_lsolve(transpose, A, B, l_trange, + x_trange); + else #endif + return lapack::cholesky_solve(transpose, A, B, l_trange, x_trange); +} } // namespace TiledArray From bda146abff10ccaf19a1e64d5b17712b1a4da3d0 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 16:34:31 -0400 Subject: [PATCH 10/36] fixup --- src/TiledArray/algebra/lapack/chol.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 396171e120..2d2d87be3a 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -69,6 +69,7 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { // // return L; abort(); + return Array{}; } /** @@ -94,12 +95,17 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { template auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { abort(); + if constexpr (RetL) + return std::make_tuple(Array{}, Array{}); + else + return Array{}; } template auto cholesky_solve(const Array& A, const Array& B, TiledRange x_trange = TiledRange()) { abort(); + return Array{}; } template @@ -107,6 +113,7 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, TiledRange l_trange = TiledRange(), TiledRange x_trange = TiledRange()) { abort(); + return Array{}; } } // namespace lapack From 7a5c9f05b2a6027ffd259107e051c2366050f9c8 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 16 Oct 2020 17:06:54 -0400 Subject: [PATCH 11/36] cast TransposeFlag to scalapackpp's variant --- src/TiledArray/algebra/scalapack/chol.h | 8 ++++---- src/TiledArray/algebra/scalapack/svd.h | 2 +- src/TiledArray/algebra/scalapack/util.h | 18 ++++++++++++++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/TiledArray/algebra/scalapack/chol.h b/src/TiledArray/algebra/scalapack/chol.h index f24ed0aa15..535db24d41 100644 --- a/src/TiledArray/algebra/scalapack/chol.h +++ b/src/TiledArray/algebra/scalapack/chol.h @@ -256,10 +256,10 @@ auto cholesky_lsolve(TransposeFlag trans, const Array& A, const Array& B, A_sca.local_mat().data(), 1, 1, desc_a); if (info) TA_EXCEPTION("Cholesky Failed"); - info = scalapackpp::ptrtrs(blacspp::Triangle::Lower, trans, - blacspp::Diagonal::NonUnit, N, NRHS, - A_sca.local_mat().data(), 1, 1, desc_a, - B_sca.local_mat().data(), 1, 1, desc_b); + info = scalapackpp::ptrtrs( + blacspp::Triangle::Lower, to_scalapackpp_transposeflag(trans), + blacspp::Diagonal::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1, + desc_a, B_sca.local_mat().data(), 1, 1, desc_b); if (info) TA_EXCEPTION("TRTRS Failed"); // Zero out the upper triangle diff --git a/src/TiledArray/algebra/scalapack/svd.h b/src/TiledArray/algebra/scalapack/svd.h index c6d61242ea..d73f9e05dd 100644 --- a/src/TiledArray/algebra/scalapack/svd.h +++ b/src/TiledArray/algebra/scalapack/svd.h @@ -60,7 +60,7 @@ namespace scalapack { * as std::vector and in TA format, respectively. */ template > + typename = TiledArray::detail::enable_if_svd_return_type> auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange, size_t MB = default_block_size(), size_t NB = default_block_size()) { using value_type = typename Array::element_type; diff --git a/src/TiledArray/algebra/scalapack/util.h b/src/TiledArray/algebra/scalapack/util.h index 2e87d42604..439d2fa590 100644 --- a/src/TiledArray/algebra/scalapack/util.h +++ b/src/TiledArray/algebra/scalapack/util.h @@ -28,12 +28,27 @@ #include #if TILEDARRAY_HAS_SCALAPACK +#include #include namespace TiledArray { namespace scalapack { +inline scalapackpp::TransposeFlag to_scalapackpp_transposeflag( + TransposeFlag t) { + switch (t) { + case TransposeFlag::NoTranspose: + return scalapackpp::TransposeFlag::NoTranspose; + case TransposeFlag::Transpose: + return scalapackpp::TransposeFlag::Transpose; + case TransposeFlag::ConjTranspose: + return scalapackpp::TransposeFlag::ConjTranspose; + default: + abort(); + } +} + template void zero_triangle(blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, bool zero_diag = false) { @@ -66,8 +81,6 @@ void zero_triangle(blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, } } -} // namespace scalapack - namespace detail { inline std::size_t& default_block_size_accessor() { static std::size_t block_size = 128; @@ -84,6 +97,7 @@ inline void set_default_block_size(std::size_t NB) { detail::default_block_size_accessor() = NB; } +} // namespace scalapack } // namespace TiledArray #endif // TILEDARRAY_HAS_SCALAPACK From e4a0b65b33b591a92dc2ebdf8ac586bdf73ac7c8 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 17 Oct 2020 09:31:45 -0400 Subject: [PATCH 12/36] reverted rename of scalapack.cpp to all.cpp in a05cc5ed302d306bd447a5d7c85aa40b277739f4 --- tests/CMakeLists.txt | 2 +- tests/{all.cpp => scalapack.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/{all.cpp => scalapack.cpp} (100%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1f57cf576d..8d5f74b44d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -117,7 +117,7 @@ if(CUDA_FOUND) endif() if (TARGET TiledArray_SCALAPACK) - list(APPEND ta_test_src_files all.cpp) + list(APPEND ta_test_src_files scalapack.cpp) endif(TARGET TiledArray_SCALAPACK) # if tiledarray library was compiled without exceptions, use TA header-only (see below) diff --git a/tests/all.cpp b/tests/scalapack.cpp similarity index 100% rename from tests/all.cpp rename to tests/scalapack.cpp From 4754712aa46c06ba07c4fcda67b0769f41e71d6b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 17 Oct 2020 10:14:26 -0400 Subject: [PATCH 13/36] fixup --- tests/scalapack.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scalapack.cpp b/tests/scalapack.cpp index 283072ae3d..3d031d4616 100644 --- a/tests/scalapack.cpp +++ b/tests/scalapack.cpp @@ -205,7 +205,7 @@ BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { auto [M, N] = ref_matrix.dims(); BOOST_REQUIRE_EQUAL(M, N); - auto NB = ref_matrix.dist().nb(); + [[maybe_unused]] auto NB = ref_matrix.dist().nb(); auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); @@ -402,7 +402,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { }); auto new_trange = gen_trange(N, {64ul}); - auto [evals, evecs] = heig(ref_ta, 128, new_trange); + auto [evals, evecs] = heig(ref_ta, new_trange, 128); BOOST_CHECK(evecs.trange() == new_trange); From 9a07b4de5e8c667822759ed1e64df91e9ee07de2 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 18 Oct 2020 13:20:26 -0400 Subject: [PATCH 14/36] more fixes for apple clang (12) warnings --- tests/btas.cpp | 2 +- tests/tot_dist_array_part2.cpp | 253 ++++++++++++++++----------------- 2 files changed, 120 insertions(+), 135 deletions(-) diff --git a/tests/btas.cpp b/tests/btas.cpp index 70d7c5340f..99cf14ec97 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -356,7 +356,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(sparse_array_conversion, bTensor, tensor_types) { *GlobalFixture::world, trange, src, replicated)); // check the array contents - for (const auto& t : dst) { + for (auto&& t : dst) { const auto& tile = t.get(); const auto& tile_range = tile.range(); auto src_blk_range = TiledArray::BlockRange( diff --git a/tests/tot_dist_array_part2.cpp b/tests/tot_dist_array_part2.cpp index c99bab6203..90a9a9b88b 100644 --- a/tests/tot_dist_array_part2.cpp +++ b/tests/tot_dist_array_part2.cpp @@ -27,22 +27,22 @@ BOOST_FIXTURE_TEST_SUITE(tot_array_suite2, ToTArrayFixture) * works and fill_local forwards its arguments correctly, fill_local should * work too. */ -BOOST_AUTO_TEST_CASE_TEMPLATE(fill_local, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(fill_local, TestParam, test_params) { using tensor_type = tensor_type; using inner_type = inner_type; using except_t = TiledArray::Exception; // Throws if PIMPL is empty { tensor_type t; - if(m_world.nproc() == 1) { + if (m_world.nproc() == 1) { BOOST_CHECK_THROW(t.fill_local(inner_type{}), except_t); } } - for(auto tr_t : run_all()){ + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto inner_rank = std::get<1>(tr_t); - auto& already_set = std::get<2>(tr_t); + [[maybe_unused]] auto& already_set = std::get<2>(tr_t); // Test that it skips filled tiles /*{ @@ -83,15 +83,15 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(fill, TestParam, test_params) { // Throws if PIMPL is empty { tensor_type t; - if(m_world.nproc() == 1) { + if (m_world.nproc() == 1) { BOOST_CHECK_THROW(t.fill(inner_type{}), except_t); } } - for(auto tr_t : run_all()){ + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto inner_rank = std::get<1>(tr_t); - auto& already_set = std::get<2>(tr_t); + [[maybe_unused]] auto& already_set = std::get<2>(tr_t); // Test that it skips filled tiles /*{ @@ -136,19 +136,18 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(fill, TestParam, test_params) { */ BOOST_AUTO_TEST_CASE_TEMPLATE(init_tiles, TestParam, test_params) { using tensor_type = tensor_type; - using inner_type = inner_type; using except_t = TiledArray::Exception; // Throws if PIMPL is empty { tensor_type t; - if(m_world.nproc() == 1) { - auto l = [](const Range&){return tile_type{}; }; + if (m_world.nproc() == 1) { + auto l = [](const Range&) { return tile_type{}; }; BOOST_CHECK_THROW(t.init_tiles(l), except_t); } } - for(auto tr_t : run_all()){ + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto inner_rank = std::get<1>(tr_t); auto& corr = std::get<2>(tr_t); @@ -194,24 +193,25 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(init_elements, TestParam, test_params) { using except_t = TiledArray::Exception; using index_type = typename tensor_type::index; - // Throws if PIMPL is empty { tensor_type t; - auto l = [](const index_type&){ return inner_type{}; }; - if(m_world.nproc() == 1) { + auto l = [](const index_type&) { return inner_type{}; }; + if (m_world.nproc() == 1) { BOOST_CHECK_THROW(t.init_elements(l), except_t); } } - for(auto tr_t : run_all()){ + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto inner_rank = std::get<1>(tr_t); auto& corr = std::get<2>(tr_t); - auto l = [this, inner_rank](const index_type& idx)->inner_type{ - if(inner_rank == 1) return inner_vector_tile(idx); - else return inner_matrix_tile(idx); + auto l = [this, inner_rank](const index_type& idx) -> inner_type { + if (inner_rank == 1) + return inner_vector_tile(idx); + else + return inner_matrix_tile(idx); }; // Test that it skips filled tiles @@ -242,11 +242,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(init_elements, TestParam, test_params) { BOOST_AUTO_TEST_CASE_TEMPLATE(trange, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.trange(), TiledArray::Exception); } - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); BOOST_TEST(corr.trange() == tr); @@ -256,11 +256,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(trange, TestParam, test_params) { BOOST_AUTO_TEST_CASE_TEMPLATE(range, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.range(), TiledArray::Exception); } - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); bool are_same = corr.range() == tr.tiles_range(); @@ -271,11 +271,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(range, TestParam, test_params) { BOOST_AUTO_TEST_CASE_TEMPLATE(elements_range, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.elements_range(), TiledArray::Exception); } - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); bool are_same = corr.elements_range() == tr.elements_range(); @@ -286,48 +286,48 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(elements_range, TestParam, test_params) { BOOST_AUTO_TEST_CASE_TEMPLATE(size, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.size(), TiledArray::Exception); } - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); BOOST_TEST(corr.size() == tr.tiles_range().volume()); } } -BOOST_AUTO_TEST_CASE_TEMPLATE(world, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(world, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.world(), TiledArray::Exception); } - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& corr = std::get<2>(tr_t); BOOST_TEST(&corr.world() == &m_world); } } /// TODO: Check pmap value -BOOST_AUTO_TEST_CASE_TEMPLATE(pmap, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(pmap, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.pmap(), TiledArray::Exception); } } -BOOST_AUTO_TEST_CASE_TEMPLATE(shape, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(shape, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.shape(), TiledArray::Exception); } using shape_type = typename tensor_type::shape_type; - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); bool are_same = corr.shape() == shape_type(1, tr); BOOST_TEST(are_same); @@ -338,15 +338,15 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(shape, TestParam, test_params){ // Call Operators //------------------------------------------------------------------------------ -BOOST_AUTO_TEST_CASE_TEMPLATE(call_operator, TestParam, test_params){ - for(auto tr_t : run_all()) { +BOOST_AUTO_TEST_CASE_TEMPLATE(call_operator, TestParam, test_params) { + for (auto tr_t : run_all()) { auto inner_rank = std::get<1>(tr_t); - auto& t = std::get<2>(tr_t); + auto& t = std::get<2>(tr_t); auto outer_rank = t.range().rank(); std::string outer_idx = (outer_rank == 1 ? "i" : "i,j"); std::string inner_idx = (inner_rank == 1 ? "k" : "k,l"); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { using except_t = TiledArray::Exception; // Throws if no semicolon BOOST_CHECK_THROW(t(outer_idx), except_t); @@ -361,15 +361,15 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(call_operator, TestParam, test_params){ } } -BOOST_AUTO_TEST_CASE_TEMPLATE(const_call_operator, TestParam, test_params){ - for(auto tr_t : run_all()) { +BOOST_AUTO_TEST_CASE_TEMPLATE(const_call_operator, TestParam, test_params) { + for (auto tr_t : run_all()) { auto inner_rank = std::get<1>(tr_t); - const auto& t = std::get<2>(tr_t); + const auto& t = std::get<2>(tr_t); auto outer_rank = t.range().rank(); std::string outer_idx = (outer_rank == 1 ? "i" : "i,j"); std::string inner_idx = (inner_rank == 1 ? "k" : "k,l"); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { using except_t = TiledArray::Exception; // Throws if no semicolon BOOST_CHECK_THROW(t(outer_idx), except_t); @@ -393,36 +393,34 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(const_call_operator, TestParam, test_params){ * of code to test is ensuring an exception is raised when the PIMPL is not * initialized. */ -BOOST_AUTO_TEST_CASE_TEMPLATE(is_dense, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(is_dense, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.is_dense(), TiledArray::Exception); } using shape_type = typename tensor_type::shape_type; - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); BOOST_TEST(corr.is_dense() == shape_type(1, tr).is_dense()); } } -BOOST_AUTO_TEST_CASE_TEMPLATE(owner, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(owner, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.owner(0), TiledArray::Exception); } - using shape_type = typename tensor_type::shape_type; - - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { const auto& upbound = tr.tiles_range().upbound(); // Test throws if index is out of bounds @@ -433,7 +431,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(owner, TestParam, test_params){ BOOST_CHECK_THROW(corr.owner(bad_idx), TiledArray::Exception); } - for(auto idx : corr.range()) { + for (auto idx : corr.range()) { const auto ordinal = corr.range().ordinal(idx); BOOST_TEST(corr.owner(idx) == corr.pmap()->owner(ordinal)); BOOST_TEST(corr.owner(ordinal) == corr.pmap()->owner(ordinal)); @@ -441,62 +439,57 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(owner, TestParam, test_params){ } } -BOOST_AUTO_TEST_CASE_TEMPLATE(owner_init_list, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(owner_init_list, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.owner({0}), TiledArray::Exception); } - using shape_type = typename tensor_type::shape_type; - - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); - auto rank = tr.rank(); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); + auto rank = tr.rank(); auto& corr = std::get<2>(tr_t); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { const auto& upbound = tr.tiles_range().upbound(); using except_t = TiledArray::Exception; // Test throws if index is out of bounds - if(rank == 1) + if (rank == 1) BOOST_CHECK_THROW(corr.owner({upbound[0]}), except_t); - else if(rank == 2) + else if (rank == 2) BOOST_CHECK_THROW(corr.owner({upbound[0], upbound[1]}), except_t); // Throws if index has wrong rank - std::initializer_list il2{0,0,0,0,0,0}; + std::initializer_list il2{0, 0, 0, 0, 0, 0}; BOOST_CHECK_THROW(corr.owner(il2), except_t); } - for(auto idx : corr.range()) { + for (auto idx : corr.range()) { const auto ordinal = corr.range().ordinal(idx); - const auto owner = corr.pmap()->owner(ordinal); - if(rank == 1){ + const auto owner = corr.pmap()->owner(ordinal); + if (rank == 1) { BOOST_TEST(corr.owner({idx[0]}) == owner); - } - else if(rank == 2){ + } else if (rank == 2) { BOOST_TEST(corr.owner({idx[0], idx[1]}) == owner); } } } } -BOOST_AUTO_TEST_CASE_TEMPLATE(is_local, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(is_local, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.is_local(0), TiledArray::Exception); } - using shape_type = typename tensor_type::shape_type; - - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { const auto& upbound = tr.tiles_range().upbound(); // Test throws if index is out of bounds @@ -507,7 +500,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_local, TestParam, test_params){ BOOST_CHECK_THROW(corr.is_local(bad_idx), TiledArray::Exception); } - for(auto idx : corr.range()) { + for (auto idx : corr.range()) { const auto ordinal = corr.range().ordinal(idx); BOOST_TEST(corr.is_local(idx) == corr.pmap()->is_local(ordinal)); BOOST_TEST(corr.is_local(ordinal) == corr.pmap()->is_local(ordinal)); @@ -515,62 +508,57 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_local, TestParam, test_params){ } } -BOOST_AUTO_TEST_CASE_TEMPLATE(is_local_init_list, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(is_local_init_list, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.is_local({0}), TiledArray::Exception); } - using shape_type = typename tensor_type::shape_type; - - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); - auto rank = tr.rank(); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); + auto rank = tr.rank(); auto& corr = std::get<2>(tr_t); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { const auto& upbound = tr.tiles_range().upbound(); using except_t = TiledArray::Exception; // Test throws if index is out of bounds - if(rank == 1) + if (rank == 1) BOOST_CHECK_THROW(corr.is_local({upbound[0]}), except_t); - else if(rank == 2) + else if (rank == 2) BOOST_CHECK_THROW(corr.is_local({upbound[0], upbound[1]}), except_t); // Throws if index has wrong rank - std::initializer_list il2{0,0,0,0,0,0}; + std::initializer_list il2{0, 0, 0, 0, 0, 0}; BOOST_CHECK_THROW(corr.is_local(il2), except_t); } - for(auto idx : corr.range()) { + for (auto idx : corr.range()) { const auto ordinal = corr.range().ordinal(idx); - const auto is_local = corr.pmap()->is_local(ordinal); - if(rank == 1){ + const auto is_local = corr.pmap()->is_local(ordinal); + if (rank == 1) { BOOST_TEST(corr.is_local({idx[0]}) == is_local); - } - else if(rank == 2){ + } else if (rank == 2) { BOOST_TEST(corr.is_local({idx[0], idx[1]}) == is_local); } } } } -BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.is_zero(0), TiledArray::Exception); } - using shape_type = typename tensor_type::shape_type; - - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { const auto& upbound = tr.tiles_range().upbound(); // Test throws if index is out of bounds @@ -581,7 +569,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero, TestParam, test_params){ BOOST_CHECK_THROW(corr.is_zero(bad_idx), TiledArray::Exception); } - for(auto idx : corr.range()) { + for (auto idx : corr.range()) { const auto ordinal = corr.range().ordinal(idx); BOOST_TEST(corr.is_zero(idx) == corr.shape().is_zero(ordinal)); BOOST_TEST(corr.owner(ordinal) == corr.pmap()->owner(ordinal)); @@ -589,50 +577,47 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero, TestParam, test_params){ } } -BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero_init_list, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero_init_list, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.is_zero({0}), TiledArray::Exception); } - using shape_type = typename tensor_type::shape_type; - - for(auto tr_t : run_all()) { - auto& tr = std::get<0>(tr_t); - auto rank = tr.rank(); + for (auto tr_t : run_all()) { + auto& tr = std::get<0>(tr_t); + auto rank = tr.rank(); auto& corr = std::get<2>(tr_t); - if(m_world.nproc() == 1){ + if (m_world.nproc() == 1) { const auto& upbound = tr.tiles_range().upbound(); using except_t = TiledArray::Exception; // Test throws if index is out of bounds - if(rank == 1) + if (rank == 1) BOOST_CHECK_THROW(corr.is_zero({upbound[0]}), except_t); - else if(rank == 2) + else if (rank == 2) BOOST_CHECK_THROW(corr.is_zero({upbound[0], upbound[1]}), except_t); // Throws if index has wrong rank - std::initializer_list il2{0,0,0,0,0,0}; + std::initializer_list il2{0, 0, 0, 0, 0, 0}; BOOST_CHECK_THROW(corr.is_zero(il2), except_t); } - for(auto idx : corr.range()) { + for (auto idx : corr.range()) { const auto ordinal = corr.range().ordinal(idx); - const auto is_zero = corr.shape().is_zero(ordinal); - if(rank == 1){ + const auto is_zero = corr.shape().is_zero(ordinal); + if (rank == 1) { BOOST_TEST(corr.is_zero({idx[0]}) == is_zero); - } - else if(rank == 2){ + } else if (rank == 2) { BOOST_TEST(corr.is_zero({idx[0], idx[1]}) == is_zero); } } } } -BOOST_AUTO_TEST_CASE_TEMPLATE(swap, TestParam, test_params){ - for(auto tr_t : run_all()) { +BOOST_AUTO_TEST_CASE_TEMPLATE(swap, TestParam, test_params) { + for (auto tr_t : run_all()) { auto& corr = std::get<2>(tr_t); auto copy_corr = corr.clone(); tensor_type t, t2; @@ -642,43 +627,43 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(swap, TestParam, test_params){ } } -//TODO: Actually check that it makes the array replicated. -BOOST_AUTO_TEST_CASE_TEMPLATE(make_replicated, TestParam, test_params){ +// TODO: Actually check that it makes the array replicated. +BOOST_AUTO_TEST_CASE_TEMPLATE(make_replicated, TestParam, test_params) { { tensor_type t; - if(m_world.nproc() == 1) + if (m_world.nproc() == 1) BOOST_CHECK_THROW(t.make_replicated(), TiledArray::Exception); } - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& corr = std::get<2>(tr_t); BOOST_CHECK_NO_THROW(corr.make_replicated()); } } // TODO: Actually check that it truncates -BOOST_AUTO_TEST_CASE_TEMPLATE(truncate, TestParam, test_params){ - for(auto tr_t : run_all()) { +BOOST_AUTO_TEST_CASE_TEMPLATE(truncate, TestParam, test_params) { + for (auto tr_t : run_all()) { auto& corr = std::get<2>(tr_t); BOOST_CHECK_NO_THROW(corr.truncate()); } } -BOOST_AUTO_TEST_CASE_TEMPLATE(is_initialized, TestParam, test_params){ +BOOST_AUTO_TEST_CASE_TEMPLATE(is_initialized, TestParam, test_params) { // Not initialized { tensor_type t1; BOOST_TEST(t1.is_initialized() == false); } - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& corr = std::get<2>(tr_t); BOOST_TEST(corr.is_initialized()); } } BOOST_AUTO_TEST_CASE_TEMPLATE(serialization, TestParam, test_params) { - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& corr = std::get<2>(tr_t); char file_name[] = "tmp.XXXXXX"; mktemp(file_name); @@ -697,7 +682,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(serialization, TestParam, test_params) { } BOOST_AUTO_TEST_CASE_TEMPLATE(parallel_serialization, TestParam, test_params) { - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { auto& corr = std::get<2>(tr_t); const int nio = 1; // use 1 rank for I/O char file_name[] = "tmp.XXXXXX"; @@ -721,12 +706,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(parallel_serialization, TestParam, test_params) { * them to the string representation of the tile. */ BOOST_AUTO_TEST_CASE_TEMPLATE(printing, TestParam, test_params) { - for(auto tr_t : run_all()) { + for (auto tr_t : run_all()) { const auto& t = std::get<2>(tr_t); std::stringstream corr; - if(m_world.rank() == 0) { + if (m_world.rank() == 0) { for (auto i = 0; i < t.size(); ++i) { - if(t.is_zero(i)) continue; + if (t.is_zero(i)) continue; corr << i << ": " << t.find(i).get() << std::endl; } } From b93b3f6e6a5476233b8f1c07a27aa466672c30dc Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 18 Oct 2020 13:22:46 -0400 Subject: [PATCH 15/36] fixed evp example (includes and use TA::scalapack::heig, not TA::heig) --- examples/scalapack/evp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/scalapack/evp.cpp b/examples/scalapack/evp.cpp index f4277b132c..22964ccae0 100644 --- a/examples/scalapack/evp.cpp +++ b/examples/scalapack/evp.cpp @@ -28,7 +28,7 @@ #include #include -#include +#include using Array = TA::TArray; // using Array = TA::TSpArray; @@ -93,7 +93,7 @@ int main(int argc, char** argv) { tensor_symm("i,j") = 0.5 * (tensor("i,j") + tensor("j,i")); tensor("i,j") = tensor_symm("i,j"); - auto [evals, evecs_ta] = TA::heig(tensor); + auto [evals, evecs_ta] = TA::scalapack::heig(tensor); //// Check EVP with TA Array tmp = From 3e800a4a67fd6d1bd72322667c69777156b0860f Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 19 Oct 2020 12:42:09 -0400 Subject: [PATCH 16/36] TA::lapack::cholesky* work, tested only as part of scalapack suite --- src/TiledArray/algebra/lapack/chol.h | 180 ++++++++++++++++++++++----- tests/scalapack.cpp | 94 ++++++++++---- 2 files changed, 219 insertions(+), 55 deletions(-) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 2d2d87be3a..943685292c 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -25,10 +25,68 @@ #define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED #include +#include namespace TiledArray { namespace lapack { +namespace detail { + +#define MADNESS_DISPATCH_LAPACK_FN(name, args...) \ + if constexpr (std::is_same_v) \ + d##name##_(args); \ + else if constexpr (std::is_same_v) \ + s##name##_(args); \ + else if constexpr (std::is_same_v>) \ + z##name##_(args); \ + else if constexpr (std::is_same_v>) \ + c##name##_(args); \ + else \ + std::abort(); + +template +auto to_eigen(const DistArray& A) { + auto A_repl = A; + A_repl.make_replicated(); + return array_to_eigen(A_repl); +} + +template +auto make_L_eig(const DistArray& A) { + using Array = DistArray; + using numeric_type = typename Array::numeric_type; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + Eigen::Matrix A_eig; + World& world = A.world(); + if (world.rank() == 0) { + A_eig = detail::to_eigen(A); + char uplo = 'L'; + integer n = A_eig.rows(); + numeric_type* a = A_eig.data(); + integer lda = n; + integer info = 0; +#if defined(MADNESS_LINALG_USE_LAPACKE) + MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info); +#else + MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); +#endif + + if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); + } + world.gop.broadcast(A_eig, 0); + return A_eig; +} + +template +void zero_out_upper_triangle(Eigen::MatrixBase& A) { + A.template triangularView().setZero(); +} + +} // namespace detail + /** * @brief Compute the Cholesky factorization of a HPD rank-2 tensor * @@ -46,30 +104,13 @@ namespace lapack { * * @returns The lower triangular Cholesky factor L in TA format */ -template +template >> auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { - auto& world = A.world(); - - // // Call lapack verson of LLT dpotrf_, we have to reverse stuff since - // lapack - // // will think all of our matrices are Col Major - // // RowMatrixXd A_copy = A; - // - // char uplo = 'U'; // Do lower, but need to use U because Row -> Col - // integer n = A.rows(); - // real8* a = A.data(); - // integer lda = n; - // integer info; - // - //#ifdef MADNESS_LINALG_USE_LAPACKE - // dpotrf_(&uplo, &n, a, &lda, &info); - //#else - // dpotrf_(&uplo, &n, a, &lda, &info, sizeof(char)); - //#endif - // - // return L; - abort(); - return Array{}; + auto L_eig = detail::make_L_eig(A); + detail::zero_out_upper_triangle(L_eig); + if (l_trange.rank() == 0) l_trange = A.trange(); + return eigen_to_array(A.world(), l_trange, L_eig); } /** @@ -94,26 +135,105 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { */ template auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { - abort(); + World& world = A.world(); + auto L_eig = detail::make_L_eig(A); + if constexpr (RetL) detail::zero_out_upper_triangle(L_eig); + + // if need to return L use its copy to compute inverse + decltype(L_eig) L_inv_eig; + if (RetL && world.rank() == 0) L_inv_eig = L_eig; + + if (world.rank() == 0) { + auto& L_inv_eig_ref = RetL ? L_inv_eig : L_eig; + + char uplo = 'L'; + char diag = 'N'; + integer n = L_eig.rows(); + using numeric_type = typename Array::numeric_type; + numeric_type* l = L_inv_eig_ref.data(); + integer lda = n; + integer info = 0; + MADNESS_DISPATCH_LAPACK_FN(trtri, &uplo, &diag, &n, l, &lda, &info); + if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); + + detail::zero_out_upper_triangle(L_inv_eig_ref); + } + world.gop.broadcast(RetL ? L_inv_eig : L_eig, 0); + + if (l_trange.rank() == 0) l_trange = A.trange(); if constexpr (RetL) - return std::make_tuple(Array{}, Array{}); + return std::make_tuple(eigen_to_array(world, l_trange, L_eig), + eigen_to_array(world, l_trange, L_inv_eig)); else - return Array{}; + return eigen_to_array(world, l_trange, L_eig); } template auto cholesky_solve(const Array& A, const Array& B, TiledRange x_trange = TiledRange()) { - abort(); - return Array{}; + using numeric_type = typename Array::numeric_type; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + Eigen::Matrix X_eig; + World& world = A.world(); + if (world.rank() == 0) { + auto A_eig = detail::to_eigen(A); + X_eig = detail::to_eigen(B); + char uplo = 'L'; + integer n = A_eig.rows(); + integer nrhs = X_eig.cols(); + numeric_type* a = A_eig.data(); + numeric_type* b = X_eig.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + MADNESS_DISPATCH_LAPACK_FN(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); + } + world.gop.broadcast(X_eig, 0); + if (x_trange.rank() == 0) x_trange = B.trange(); + return eigen_to_array(world, x_trange, X_eig); } template auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, TiledRange l_trange = TiledRange(), TiledRange x_trange = TiledRange()) { - abort(); - return Array{}; + World& world = A.world(); + auto L_eig = detail::make_L_eig(A); + detail::zero_out_upper_triangle(L_eig); + + using numeric_type = typename Array::numeric_type; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + Eigen::Matrix X_eig; + if (world.rank() == 0) { + X_eig = detail::to_eigen(B); + char uplo = 'L'; + char trans = transpose == TransposeFlag::Transpose + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + char diag = 'N'; + integer n = L_eig.rows(); + integer nrhs = X_eig.cols(); + numeric_type* a = L_eig.data(); + numeric_type* b = X_eig.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + MADNESS_DISPATCH_LAPACK_FN(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, + b, &ldb, &info); + if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); + } + world.gop.broadcast(X_eig, 0); + if (l_trange.rank() == 0) l_trange = A.trange(); + if (x_trange.rank() == 0) x_trange = B.trange(); + return std::make_tuple(eigen_to_array(world, l_trange, L_eig), + eigen_to_array(world, x_trange, X_eig)); } } // namespace lapack diff --git a/tests/scalapack.cpp b/tests/scalapack.cpp index 3d031d4616..79662bee20 100644 --- a/tests/scalapack.cpp +++ b/tests/scalapack.cpp @@ -4,6 +4,7 @@ #include "range_fixture.h" #include "unit_test_config.h" +#include "TiledArray/algebra/lapack/chol.h" #include "TiledArray/algebra/scalapack/all.h" using namespace TiledArray::scalapack; @@ -465,20 +466,29 @@ BOOST_AUTO_TEST_CASE(sca_chol) { auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array>( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto L = cholesky(ref_ta); + auto L = cholesky(A); + + BOOST_CHECK(L.trange() == A.trange()); + + decltype(A) A_minus_LLt; + A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj(); - BOOST_CHECK(L.trange() == ref_ta.trange()); + BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); - ref_ta("i,j") -= L("i,k") * L("j,k").conj(); + // check against LAPACK also + auto L_ref = TiledArray::lapack::cholesky(A); + decltype(L) L_diff; + L_diff("i,j") = L("i,j") - L_ref("i,j"); - double diff_norm = ref_ta("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(diff_norm, N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(L_diff("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); GlobalFixture::world->gop.fence(); } @@ -490,21 +500,22 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array>( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto Linv = cholesky_linv(ref_ta); + auto Linv = cholesky_linv(A); + auto Linv_lapack = lapack::cholesky_linv(A); - BOOST_CHECK(Linv.trange() == ref_ta.trange()); + BOOST_CHECK(Linv.trange() == A.trange()); TA::TArray tmp(*GlobalFixture::world, trange); - tmp("i,j") = Linv("i,k") * ref_ta("k,j"); - ref_ta("i,j") = tmp("i,k") * Linv("j,k"); + tmp("i,j") = Linv("i,k") * A("k,j"); + A("i,j") = tmp("i,k") * Linv("j,k"); - TA::foreach_inplace(ref_ta, [](TA::Tensor& tile) { + TA::foreach_inplace(A, [](TA::Tensor& tile) { auto range = tile.range(); auto lo = range.lobound_data(); auto up = range.upbound_data(); @@ -515,9 +526,15 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { } }); - double norm = ref_ta("i,j").norm(*GlobalFixture::world).get(); + double norm = A("i,j").norm().get(); BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + // test against LAPACK + decltype(Linv) Linv_error; + Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); + BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); + GlobalFixture::world->gop.fence(); } @@ -528,16 +545,17 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array>( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto [L, Linv] = cholesky_linv(ref_ta); + auto [L, Linv] = cholesky_linv(A); + auto [L_lapack, Linv_lapack] = lapack::cholesky_linv(A); - BOOST_CHECK(Linv.trange() == ref_ta.trange()); - BOOST_CHECK(L.trange() == ref_ta.trange()); + BOOST_CHECK(Linv.trange() == A.trange()); + BOOST_CHECK(L.trange() == A.trange()); TA::TArray tmp(*GlobalFixture::world, trange); tmp("i,j") = Linv("i,k") * L("k,j"); @@ -556,6 +574,16 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { double norm = tmp("i,j").norm(*GlobalFixture::world).get(); BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + // test against LAPACK + decltype(L) L_error; + L_error("i,j") = L("i,j") - L_lapack("i,j"); + BOOST_CHECK_SMALL(L_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); + decltype(Linv) Linv_error; + Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); + BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); + GlobalFixture::world->gop.fence(); } @@ -566,15 +594,20 @@ BOOST_AUTO_TEST_CASE(sca_chol_solve) { auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array>( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); - auto iden = cholesky_solve(ref_ta, ref_ta); + auto iden = cholesky_solve(A, A); + BOOST_CHECK(iden.trange() == A.trange()); - BOOST_CHECK(iden.trange() == ref_ta.trange()); + auto iden_lapack = lapack::cholesky_solve(A, A); + decltype(iden) iden_error; + iden_error("i,j") = iden("i,j") - iden_lapack("i,j"); + BOOST_CHECK_SMALL(iden_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); TA::foreach_inplace(iden, [](TA::Tensor& tile) { auto range = tile.range(); @@ -600,17 +633,28 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { auto trange = gen_trange(N, {128ul}); - auto ref_ta = TA::make_array>( + auto A = TA::make_array>( *GlobalFixture::world, trange, [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); // Should produce X = L**H - auto [L, X] = cholesky_lsolve(TransposeFlag::NoTranspose, ref_ta, ref_ta); - - BOOST_CHECK(X.trange() == ref_ta.trange()); - BOOST_CHECK(L.trange() == ref_ta.trange()); + auto [L, X] = cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + BOOST_CHECK(X.trange() == A.trange()); + BOOST_CHECK(L.trange() == A.trange()); + + // first, test against LAPACK + auto [L_lapack, X_lapack] = + lapack::cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + decltype(L) L_error; + L_error("i,j") = L("i,j") - L_lapack("i,j"); + BOOST_CHECK_SMALL(L_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); + decltype(X) X_error; + X_error("i,j") = X("i,j") - X_lapack("i,j"); + BOOST_CHECK_SMALL(X_error("i,j").norm().get(), + N * N * std::numeric_limits::epsilon()); X("i,j") -= L("j,i"); From 1e31689c8ec3811607da016d6b27d5a5790cdcf6 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 19 Oct 2020 13:05:53 -0400 Subject: [PATCH 17/36] nrank>1 fixes for lapack::cholesky* --- src/TiledArray/algebra/lapack/chol.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 943685292c..196466a2c1 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -59,10 +59,9 @@ auto make_L_eig(const DistArray& A) { "TA::lapack::{cholesky*} are only usable with a DistArray of " "scalar types"); - Eigen::Matrix A_eig; World& world = A.world(); + auto A_eig = detail::to_eigen(A); if (world.rank() == 0) { - A_eig = detail::to_eigen(A); char uplo = 'L'; integer n = A_eig.rows(); numeric_type* a = A_eig.data(); @@ -176,11 +175,10 @@ auto cholesky_solve(const Array& A, const Array& B, "TA::lapack::{cholesky*} are only usable with a DistArray of " "scalar types"); - Eigen::Matrix X_eig; + auto A_eig = detail::to_eigen(A); + auto X_eig = detail::to_eigen(B); World& world = A.world(); if (world.rank() == 0) { - auto A_eig = detail::to_eigen(A); - X_eig = detail::to_eigen(B); char uplo = 'L'; integer n = A_eig.rows(); integer nrhs = X_eig.cols(); @@ -210,9 +208,8 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, "TA::lapack::{cholesky*} are only usable with a DistArray of " "scalar types"); - Eigen::Matrix X_eig; + auto X_eig = detail::to_eigen(B); if (world.rank() == 0) { - X_eig = detail::to_eigen(B); char uplo = 'L'; char trans = transpose == TransposeFlag::Transpose ? 'T' From 00ca849e19307517b896d55e083e76d140ce21e4 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 19 Oct 2020 16:10:11 -0400 Subject: [PATCH 18/36] eigen_to_array assumes that the user knows what she's doing + accepts additional pmap parameter to override the default maps. --- src/TiledArray/conversions/eigen.h | 91 +++++++++++++++++------------- 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h index 7380a1faf1..bc805c92ab 100644 --- a/src/TiledArray/conversions/eigen.h +++ b/src/TiledArray/conversions/eigen.h @@ -356,10 +356,13 @@ void counted_tensor_to_eigen_submatrix(const T& tensor, /// This function will copy the content of \c matrix into an \c Array object /// that is tiled according to the \c trange object. The copy operation is /// done in parallel, and this function will block until all elements of -/// \c matrix have been copied into the result array tiles. The size of -/// \c world.size() must be equal to 1 or \c replicate must be equal to -/// \c true . If \c replicate is \c true, it is your responsibility to ensure -/// that the data in matrix is identical on all nodes. +/// \c matrix have been copied into the result array tiles. +/// Each tile is created +/// using the local contents of \c matrix, hence +/// it is your responsibility to ensure that the data in \c matrix +/// is distributed correctly among the ranks. If in doubt, you should replicate +/// \c matrix among the ranks prior to calling this. +/// /// Usage: /// \code /// Eigen::MatrixXd m(100, 100); @@ -383,16 +386,17 @@ void counted_tensor_to_eigen_submatrix(const T& tensor, /// \param world The world where the array will live /// \param trange The tiled range of the new array /// \param matrix The Eigen matrix to be copied -/// \param replicated \c true indicates that the result array should be a -/// replicated array [default = false]. +/// \param replicated if true, the result will be replicated +/// [default = true]. +/// \param pmap the process map object [default=null]; initialized to the +/// default if \p replicated is false, or a replicated pmap if \p replicated +/// is true; ignored if \p replicated is true and \c world.size()>1 /// \return An \c Array object that is a copy of \c matrix -/// \throw TiledArray::Exception When world size is greater than 1 -/// \note If using 2 or more World ranks, set \c replicated=true and make sure -/// \c matrix is the same on each rank! template A eigen_to_array(World& world, const typename A::trange_type& trange, const Eigen::MatrixBase& matrix, - bool replicated = false) { + bool replicated = false, + std::shared_ptr pmap = {}) { typedef typename A::index1_type size_type; // Check that trange matches the dimensions of other if ((matrix.cols() > 1) && (matrix.rows() > 1)) { @@ -417,19 +421,12 @@ A eigen_to_array(World& world, const typename A::trange_type& trange, "matrix size."); } - // Check that this is not a distributed computing environment - if (!replicated) - TA_USER_ASSERT(world.size() == 1, - "An array cannot be assigned with an Eigen::Matrix when the " - "number of World ranks is greater than 1."); - // Create a new tensor - A array = (replicated && (world.size() > 1) - ? A(world, trange, - std::static_pointer_cast( - std::make_shared( - world, trange.tiles_range().volume()))) - : A(world, trange)); + if (replicated && (world.size() > 1)) + pmap = std::static_pointer_cast( + std::make_shared( + world, trange.tiles_range().volume())); + A array = (pmap ? A(world, trange, pmap) : A(world, trange)); // Spawn tasks to copy Eigen to an Array madness::AtomicInt counter; @@ -526,10 +523,13 @@ array_to_eigen(const DistArray& array) { /// This function will copy the content of \c buffer into an \c Array object /// that is tiled according to the \c trange object. The copy operation is /// done in parallel, and this function will block until all elements of -/// \c matrix have been copied into the result array tiles. The size of -/// \c world.size() must be equal to 1 or \c replicate must be equal to -/// \c true . If \c replicate is \c true, it is your responsibility to ensure -/// that the data in \c buffer is identical on all nodes. +/// \c matrix have been copied into the result array tiles. +/// Each tile is created +/// using the local contents of \c matrix, hence +/// it is your responsibility to ensure that the data in \c matrix +/// is distributed correctly among the ranks. If in doubt, you should replicate +/// \c matrix among the ranks prior to calling this. +/// /// Usage: /// \code /// double* buffer = new double[100 * 100]; @@ -557,8 +557,11 @@ array_to_eigen(const DistArray& array) { /// \param buffer The row-major matrix buffer to be copied /// \param m The number of rows in the matrix /// \param n The number of columns in the matrix -/// \param replicated \c true indicates that the result array should be a -/// replicated array [default = false]. +/// \param replicated if true, the result will be replicated +/// [default = true]. +/// \param pmap the process map object [default=null]; initialized to the +/// default if \p replicated is false, or a replicated pmap if \p replicated +/// is true; ignored if \p replicated is true and \c world.size()>1 /// \return An \c Array object that is a copy of \c matrix /// \throw TiledArray::Exception When \c m and \c n are not equal to the /// number of rows or columns in tiled range. @@ -566,7 +569,8 @@ template inline A row_major_buffer_to_array( World& world, const typename A::trange_type& trange, const typename A::value_type::value_type* buffer, const std::size_t m, - const std::size_t n, const bool replicated = false) { + const std::size_t n, const bool replicated = false, + std::shared_ptr pmap = {}) { TA_USER_ASSERT(trange.elements_range().extent(0) == m, "TiledArray::eigen_to_array(): The number of rows in trange " "is not equal to m."); @@ -579,8 +583,8 @@ inline A row_major_buffer_to_array( matrix_type; return eigen_to_array( world, trange, - Eigen::Map(buffer, m, n), - replicated); + Eigen::Map(buffer, m, n), replicated, + pmap); } /// Convert a column-major matrix buffer into an Array object @@ -588,10 +592,13 @@ inline A row_major_buffer_to_array( /// This function will copy the content of \c buffer into an \c Array object /// that is tiled according to the \c trange object. The copy operation is /// done in parallel, and this function will block until all elements of -/// \c matrix have been copied into the result array tiles. The size of -/// \c world.size() must be equal to 1 or \c replicate must be equal to -/// \c true . If \c replicate is \c true, it is your responsibility to ensure -/// that the data in \c buffer is identical on all nodes. +/// \c matrix have been copied into the result array tiles. +/// Each tile is created +/// using the local contents of \c matrix, hence +/// it is your responsibility to ensure that the data in \c matrix +/// is distributed correctly among the ranks. If in doubt, you should replicate +/// \c matrix among the ranks prior to calling this. +/// /// Usage: /// \code /// double* buffer = new double[100 * 100]; @@ -619,8 +626,11 @@ inline A row_major_buffer_to_array( /// \param buffer The row-major matrix buffer to be copied /// \param m The number of rows in the matrix /// \param n The number of columns in the matrix -/// \param replicated \c true indicates that the result array should be a -/// replicated array [default = false]. +/// \param replicated if true, the result will be replicated +/// [default = true]. +/// \param pmap the process map object [default=null]; initialized to the +/// default if \p replicated is false, or a replicated pmap if \p replicated +/// is true; ignored if \p replicated is true and \c world.size()>1 /// \return An \c Array object that is a copy of \c matrix /// \throw TiledArray::Exception When \c m and \c n are not equal to the /// number of rows or columns in tiled range. @@ -628,7 +638,8 @@ template inline A column_major_buffer_to_array( World& world, const typename A::trange_type& trange, const typename A::value_type::value_type* buffer, const std::size_t m, - const std::size_t n, const bool replicated = false) { + const std::size_t n, const bool replicated = false, + std::shared_ptr pmap = {}) { TA_USER_ASSERT(trange.elements_range().extent(0) == m, "TiledArray::eigen_to_array(): The number of rows in trange " "is not equal to m."); @@ -641,8 +652,8 @@ inline A column_major_buffer_to_array( matrix_type; return eigen_to_array( world, trange, - Eigen::Map(buffer, m, n), - replicated); + Eigen::Map(buffer, m, n), replicated, + pmap); } } // namespace TiledArray From d15946e41fa0dcbcc071d7dc0f229950b760282b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 19 Oct 2020 17:01:49 -0400 Subject: [PATCH 19/36] TA did not know how to serialize Eigen matrices ... really?! --- src/TiledArray/algebra/lapack/chol.h | 8 ++--- src/TiledArray/conversions/eigen.h | 24 +++++++++---- src/TiledArray/external/eigen.h | 52 ++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 196466a2c1..1923cd167a 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -75,7 +75,7 @@ auto make_L_eig(const DistArray& A) { if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); } - world.gop.broadcast(A_eig, 0); + world.gop.broadcast_serializable(A_eig, 0); return A_eig; } @@ -157,7 +157,7 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { detail::zero_out_upper_triangle(L_inv_eig_ref); } - world.gop.broadcast(RetL ? L_inv_eig : L_eig, 0); + world.gop.broadcast_serializable(RetL ? L_inv_eig : L_eig, 0); if (l_trange.rank() == 0) l_trange = A.trange(); if constexpr (RetL) @@ -190,7 +190,7 @@ auto cholesky_solve(const Array& A, const Array& B, MADNESS_DISPATCH_LAPACK_FN(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); } - world.gop.broadcast(X_eig, 0); + world.gop.broadcast_serializable(X_eig, 0); if (x_trange.rank() == 0) x_trange = B.trange(); return eigen_to_array(world, x_trange, X_eig); } @@ -226,7 +226,7 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, b, &ldb, &info); if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); } - world.gop.broadcast(X_eig, 0); + world.gop.broadcast_serializable(X_eig, 0); if (l_trange.rank() == 0) l_trange = A.trange(); if (x_trange.rank() == 0) x_trange = B.trange(); return std::make_tuple(eigen_to_array(world, l_trange, L_eig), diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h index bc805c92ab..bb931d8d1d 100644 --- a/src/TiledArray/conversions/eigen.h +++ b/src/TiledArray/conversions/eigen.h @@ -351,6 +351,7 @@ void counted_tensor_to_eigen_submatrix(const T& tensor, } // namespace detail +// clang-format off /// Convert an Eigen matrix into an Array object /// This function will copy the content of \c matrix into an \c Array object @@ -392,6 +393,7 @@ void counted_tensor_to_eigen_submatrix(const T& tensor, /// default if \p replicated is false, or a replicated pmap if \p replicated /// is true; ignored if \p replicated is true and \c world.size()>1 /// \return An \c Array object that is a copy of \c matrix +// clang-format on template A eigen_to_array(World& world, const typename A::trange_type& trange, const Eigen::MatrixBase& matrix, @@ -433,17 +435,23 @@ A eigen_to_array(World& world, const typename A::trange_type& trange, counter = 0; std::int64_t n = 0; for (std::size_t i = 0; i < array.size(); ++i) { - world.taskq.add(&detail::counted_eigen_submatrix_to_tensor, - &matrix, &array, i, &counter); - ++n; + if (array.is_local(i)) { + world.taskq.add(&detail::counted_eigen_submatrix_to_tensor, + &matrix, &array, i, &counter); + ++n; + } } // Wait until the write tasks are complete array.world().await([&counter, n]() { return counter == n; }); + // truncate, n.b. this can replace the wait above + array.truncate(); + return array; } +// clang-format off /// Convert an Array object into an Eigen matrix object /// This function will copy the content of an \c Array object into matrix. The @@ -462,10 +470,14 @@ A eigen_to_array(World& world, const typename A::trange_type& trange, /// \tparam EigenStorageOrder The storage order of the resulting Eigen::Matrix /// object; the default is Eigen::ColMajor, i.e. the column-major storage /// \param array The array to be converted. It must be replicated if using 2 or -/// more World ranks. \return an Eigen matrix; it will contain same data on each -/// World rank. \throw TiledArray::Exception When world size is greater than 1 -/// and \c array is not replicated. \throw TiledArray::Exception When the number +/// more World ranks. +/// \return an Eigen matrix; it will contain same data on each +/// World rank. +/// \throw TiledArray::Exception When world size is greater than 1 +/// and \c array is not replicated. +/// \throw TiledArray::Exception When the number /// of dimensions of \c array is not equal to 1 or 2. +// clang-format on template Eigen::Matrix +class archive_array; +template +inline archive_array wrap(const T*, unsigned int); +template +struct ArchiveStoreImpl; +template +struct ArchiveLoadImpl; + +template +struct ArchiveStoreImpl< + Archive, + Eigen::Matrix> { + static inline void store( + const Archive& ar, + const Eigen::Matrix& t) { + ar& t.rows() & t.cols(); + if (t.size()) ar& madness::archive::wrap(t.data(), t.size()); + } +}; + +template +struct ArchiveLoadImpl< + Archive, + Eigen::Matrix> { + static inline void load( + const Archive& ar, + Eigen::Matrix& t) { + typename Eigen::Matrix::Index nrows(0), + ncols(0); + ar& nrows& ncols; + t.resize(nrows, ncols); + if (t.size()) ar& madness::archive::wrap(t.data(), t.size()); + } +}; + +} // namespace archive +} // namespace madness + #endif // TILEDARRAY_EXTERNAL_EIGEN_H__INCLUDED From 03f686cf7d0529836b9528a99f87f957f3c048e2 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 19 Oct 2020 23:42:51 -0400 Subject: [PATCH 20/36] amended eigen unit tests for 00ca849e19307517b896d55e083e76d140ce21e4 --- tests/eigen.cpp | 47 ++++++----------------------------------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/tests/eigen.cpp b/tests/eigen.cpp index 96e128c66e..37eef88220 100644 --- a/tests/eigen.cpp +++ b/tests/eigen.cpp @@ -158,26 +158,9 @@ BOOST_AUTO_TEST_CASE(matrix_to_array) { // Fill the matrix with random data matrix = decltype(matrix)::Random(matrix.rows(), matrix.cols()); - if (GlobalFixture::world->size() == 1) { - // Copy matrix to array - BOOST_CHECK_NO_THROW((array = eigen_to_array(*GlobalFixture::world, - trange, matrix))); - } else { - // Check that eigen_to_array does not work in distributed environments -#if !defined(TA_USER_ASSERT_DISABLED) - BOOST_CHECK_THROW( - (eigen_to_array(*GlobalFixture::world, trange, matrix)), - TiledArray::Exception); -#endif - - // Note: The following tests constructs a replicated array, but the data may - // not be identical. That is OK here since we are check the local data, but - // in real applications the data should be identical. - - // Copy matrix to a replicated array - BOOST_CHECK_NO_THROW((array = eigen_to_array( - *GlobalFixture::world, trange, matrix, true))); - } + // Copy matrix to array + BOOST_CHECK_NO_THROW( + (array = eigen_to_array(*GlobalFixture::world, trange, matrix))); // Check that the data in array is equal to that in matrix for (Range::const_iterator it = array.range().begin(); @@ -195,27 +178,9 @@ BOOST_AUTO_TEST_CASE(vector_to_array) { // Fill the vector with random data vector = Eigen::VectorXi::Random(vector.size()); - if (GlobalFixture::world->size() == 1) { - // Convert the vector to an array - BOOST_CHECK_NO_THROW((array1 = eigen_to_array( - *GlobalFixture::world, trange1, vector))); - - } else { - // Check that eigen_to_array does not work in distributed environments -#if !defined(TA_USER_ASSERT_DISABLED) - BOOST_CHECK_THROW( - (eigen_to_array(*GlobalFixture::world, trange1, vector)), - TiledArray::Exception); -#endif - - // Note: The following tests constructs a replicated array, but the data may - // not be identical. That is OK here since we are check the local data, but - // in real applications the data should be identical. - - // Convert the vector to an array - BOOST_CHECK_NO_THROW((array1 = eigen_to_array( - *GlobalFixture::world, trange1, vector, true))); - } + // Convert the vector to an array + BOOST_CHECK_NO_THROW((array1 = eigen_to_array(*GlobalFixture::world, + trange1, vector))); // Check that the data in array matches the data in vector for (Range::const_iterator it = array1.range().begin(); From 6223ab990e56aee441ac8cb81a2c2bdceb26a1c5 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 20 Oct 2020 13:29:57 -0400 Subject: [PATCH 21/36] typo --- tests/scalapack.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scalapack.cpp b/tests/scalapack.cpp index 79662bee20..b2e1b64c68 100644 --- a/tests/scalapack.cpp +++ b/tests/scalapack.cpp @@ -449,7 +449,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_generalized) { BOOST_CHECK(evecs.trange() == ref_ta.trange()); - // TODO: Check validity of eigenvectors, not crutial for the time being + // TODO: Check validity of eigenvectors, not crucial for the time being // Check eigenvalue correctness double tol = N * N * std::numeric_limits::epsilon(); From 5297c1858c69e9a46f4991f85abe70d225205994 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 20 Oct 2020 13:33:57 -0400 Subject: [PATCH 22/36] added TA::lapack::heig + reorganized TA::lapack::cholesky --- src/CMakeLists.txt | 2 + src/TiledArray/algebra/lapack/chol.h | 67 +++++------ src/TiledArray/algebra/lapack/heig.h | 166 +++++++++++++++++++++++++++ src/TiledArray/algebra/lapack/util.h | 122 ++++++++++++++++++++ tests/scalapack.cpp | 25 +++- 5 files changed, 339 insertions(+), 43 deletions(-) create mode 100644 src/TiledArray/algebra/lapack/heig.h create mode 100644 src/TiledArray/algebra/lapack/util.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 66d780df3e..8f1c266365 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -66,6 +66,8 @@ TiledArray/algebra/lu.h TiledArray/algebra/svd.h TiledArray/algebra/types.h TiledArray/algebra/lapack/chol.h +TiledArray/algebra/lapack/heig.h +TiledArray/algebra/lapack/util.h TiledArray/algebra/scalapack/chol.h TiledArray/algebra/scalapack/heig.h TiledArray/algebra/scalapack/lu.h diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 1923cd167a..ae21470fbc 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -24,6 +24,7 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED +#include #include #include @@ -32,23 +33,24 @@ namespace lapack { namespace detail { -#define MADNESS_DISPATCH_LAPACK_FN(name, args...) \ - if constexpr (std::is_same_v) \ - d##name##_(args); \ - else if constexpr (std::is_same_v) \ - s##name##_(args); \ - else if constexpr (std::is_same_v>) \ - z##name##_(args); \ - else if constexpr (std::is_same_v>) \ - c##name##_(args); \ - else \ - std::abort(); +template +void chol_eig( + Eigen::Matrix& A) { + using numeric_type = Scalar; + char uplo = 'L'; + integer n = A.rows(); + numeric_type* a = A.data(); + integer lda = n; + integer info = 0; +#if defined(MADNESS_LINALG_USE_LAPACKE) + MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info); +#else + MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); +#endif -template -auto to_eigen(const DistArray& A) { - auto A_repl = A; - A_repl.make_replicated(); - return array_to_eigen(A_repl); + if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); } template @@ -62,28 +64,12 @@ auto make_L_eig(const DistArray& A) { World& world = A.world(); auto A_eig = detail::to_eigen(A); if (world.rank() == 0) { - char uplo = 'L'; - integer n = A_eig.rows(); - numeric_type* a = A_eig.data(); - integer lda = n; - integer info = 0; -#if defined(MADNESS_LINALG_USE_LAPACKE) - MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info); -#else - MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); -#endif - - if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); + chol_eig(A_eig); } world.gop.broadcast_serializable(A_eig, 0); return A_eig; } -template -void zero_out_upper_triangle(Eigen::MatrixBase& A) { - A.template triangularView().setZero(); -} - } // namespace detail /** @@ -95,7 +81,7 @@ void zero_out_upper_triangle(Eigen::MatrixBase& A) { * * auto L = cholesky(A, ...) * - * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * @tparam Array a DistArray type (i.e., @c is_array_v is true) * * @param[in] A Input array to be diagonalized. Must be rank-2 * @param[in] l_trange TiledRange for resulting Cholesky factor. If left @@ -123,7 +109,7 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { * auto Linv = cholesky_Linv(A, ...) * auto [L,Linv] = cholesky_Linv(A, ...) * - * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * @tparam Array a DistArray type (i.e., @c is_array_v is true) * @tparam RetL Whether or not to return the cholesky factor * * @param[in] A Input array to be diagonalized. Must be rank-2 @@ -132,7 +118,8 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { * * @returns The inverse lower triangular Cholesky factor in TA format */ -template +template >> auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { World& world = A.world(); auto L_eig = detail::make_L_eig(A); @@ -167,7 +154,8 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { return eigen_to_array(world, l_trange, L_eig); } -template +template >> auto cholesky_solve(const Array& A, const Array& B, TiledRange x_trange = TiledRange()) { using numeric_type = typename Array::numeric_type; @@ -195,7 +183,8 @@ auto cholesky_solve(const Array& A, const Array& B, return eigen_to_array(world, x_trange, X_eig); } -template +template >> auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, TiledRange l_trange = TiledRange(), TiledRange x_trange = TiledRange()) { @@ -236,4 +225,4 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, } // namespace lapack } // namespace TiledArray -#endif // TILEDARRAY_ALGEBRA_LAPACK_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/heig.h b/src/TiledArray/algebra/lapack/heig.h new file mode 100644 index 0000000000..817ef3e846 --- /dev/null +++ b/src/TiledArray/algebra/lapack/heig.h @@ -0,0 +1,166 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * heig.h + * Created: 19 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED + +#include +#include +#include + +namespace TiledArray { +namespace lapack { + +/** + * @brief Solve the standard eigenvalue problem with ScaLAPACK + * + * A(i,k) X(k,j) = X(i,j) E(j) + * + * Example Usage: + * + * auto [E, X] = heig(A, ...) + * + * @tparam Array Input array type + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, + * will default to array.trange() + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template +auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { + using scalar_type = typename Array::scalar_type; + using numeric_type = typename Array::numeric_type; + constexpr const bool is_real = std::is_same_v; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + std::vector evals; + if (world.rank() == 0) { + char jobz = 'V'; + char uplo = 'L'; + integer n = A_eig.rows(); + numeric_type* a = A_eig.data(); + integer lda = n; + integer info = 0; + evals.resize(n); + integer lwork = -1; + std::vector work(1); + // run once to query, then to compute + while (lwork != static_cast(work.size())) { + if (lwork > 0) { + work.resize(lwork); + } + if constexpr (is_real) { +#if defined(MADNESS_LINALG_USE_LAPACKE) + MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, + evals.data(), work.data(), &lwork, &info); +#else + MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, + evals.data(), work.data(), &lwork, &info, + sizeof(char), sizeof(char)); +#endif + } else { + std::vector rwork; + if (lwork == static_cast(work.size())) rwork.resize(3 * n - 2); +#if defined(MADNESS_LINALG_USE_LAPACKE) + MADNESS_DISPATCH_LAPACK_FN(heev, &jobz, &uplo, &n, a, &lda, + evals.data(), work.data(), &lwork, + &rwork.data(), &info); +#else + MADNESS_DISPATCH_LAPACK_FN( + heev, &jobz, &uplo, &n, a, &lda, evals.data(), work.data(), &lwork, + &rwork.data(), &info, sizeof(char), sizeof(char)); +#endif + } + if (lwork == -1) { + if constexpr (is_real) { + lwork = static_cast(work[0]); + } else { + lwork = static_cast(work[0].real()); + } + TA_ASSERT(lwork > 1); + } + }; + + if (info != 0) { + if (is_real) + TA_EXCEPTION("LAPACK::syev failed"); + else + TA_EXCEPTION("LAPACK::heev failed"); + } + } + world.gop.broadcast_serializable(A_eig, 0); + world.gop.broadcast_serializable(evals, 0); + if (evec_trange.rank() == 0) evec_trange = A.trange(); + return std::tuple(evals, + eigen_to_array(A.world(), evec_trange, A_eig)); +} + +/** + * @brief Solve the generalized eigenvalue problem with ScaLAPACK + * + * A(i,k) X(k,j) = B(i,k) X(k,j) E(j) + * + * with + * + * X(k,i) B(k,l) X(l,j) = I(i,j) + * + * Example Usage: + * + * auto [E, X] = heig(A, B, ...) + * + * @tparam Array Input array type + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] B Positive-definite matrix + * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, + * will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template +auto heig(const ArrayA& A, const ArrayB& B, + TiledRange evec_trange = TiledRange()) { + using scalar_type = typename ArrayA::scalar_type; + using numeric_type = typename ArrayA::numeric_type; + constexpr const bool is_real = std::is_same_v; + static_assert(std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a DistArray of " + "scalar types"); + + abort(); + return std::tuple(std::vector{}, EVecType{}); +} + +} // namespace lapack +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/util.h b/src/TiledArray/algebra/lapack/util.h new file mode 100644 index 0000000000..280308bf4a --- /dev/null +++ b/src/TiledArray/algebra/lapack/util.h @@ -0,0 +1,122 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * util.h + * Created: 19 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED + +#include +#include + +namespace TiledArray { +namespace lapack { + +#define MADNESS_DISPATCH_LAPACK_FN(name, args...) \ + if constexpr (std::is_same_v) \ + d##name##_(args); \ + else if constexpr (std::is_same_v) \ + s##name##_(args); \ + else if constexpr (std::is_same_v>) \ + z##name##_(args); \ + else if constexpr (std::is_same_v>) \ + c##name##_(args); \ + else \ + std::abort(); + +namespace detail { + +template +auto to_eigen(const DistArray& A) { + auto A_repl = A; + A_repl.make_replicated(); + return array_to_eigen(A_repl); +} + +template >> +auto to_eigen(const ContiguousTensor& A) { + using numeric_type = TiledArray::detail::numeric_t; + static_assert( + std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a ContiguousTensor of " + "scalar types"); + TA_ASSERT(A.range().rank() == 1 || A.range().rank() == 2); + using colmajor_matrix_type = Eigen::Matrix; + colmajor_matrix_type result(A.range().extent(0), + A.range().rank() == 2 ? A.range().extent(1) : 1); + constexpr const auto layout = + TiledArray::detail::ordinal_trait::layout; + if (layout == TiledArray::OrdinalType::RowMajor) { + using rowmajor_matrix_type = Eigen::Matrix; + auto result_map = Eigen::Map( + A.data(), result.rows(), result.cols()); + result = result_map; + } else if constexpr (layout == TiledArray::OrdinalType::ColMajor) { + using rowmajor_matrix_type = Eigen::Matrix; + auto result_map = Eigen::Map( + A.data(), result.rows(), result.cols()); + result = result_map; + } else + abort(); + return result; +} + +template >> +auto from_eigen( + const Eigen::Matrix& A, + typename ContiguousTensor::range_type range = {}) { + using numeric_type = TiledArray::detail::numeric_t; + static_assert( + std::is_same_v, + "TA::lapack::{cholesky*} are only usable with a ContiguousTensor of " + "scalar types"); + using range_type = typename ContiguousTensor::range_type; + if (range.rank() == 0) + range = range_type(A.rows(), A.cols()); + else + TA_ASSERT(A.rows() * A.cols() == range.volume()); + ContiguousTensor result(range); + auto result_map = eigen_map(result, A.rows(), A.cols()); + result_map = A; + return result; +} + +template +void zero_out_upper_triangle(Eigen::MatrixBase& A) { + A.template triangularView().setZero(); +} + +} // namespace detail + +} // namespace lapack +} // namespace TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED diff --git a/tests/scalapack.cpp b/tests/scalapack.cpp index b2e1b64c68..f728938a25 100644 --- a/tests/scalapack.cpp +++ b/tests/scalapack.cpp @@ -5,6 +5,7 @@ #include "unit_test_config.h" #include "TiledArray/algebra/lapack/chol.h" +#include "TiledArray/algebra/lapack/heig.h" #include "TiledArray/algebra/scalapack/all.h" using namespace TiledArray::scalapack; @@ -376,16 +377,24 @@ BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { }); auto [evals, evecs] = heig(ref_ta); + auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta); // auto evals = heig( ref_ta ); BOOST_CHECK(evecs.trange() == ref_ta.trange()); - // TODO: Check validity of eigenvectors, not crutial for the time being + // check eigenvectors against lapack only, for now ... + decltype(evecs) evecs_error; + evecs_error("i,j") = evecs_lapack("i,j") - evecs("i,j"); + // TODO need to fix phases of the eigenvectors to be able to compare ... + // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(), + // N * N * std::numeric_limits::epsilon()); // Check eigenvalue correctness double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) + for (int64_t i = 0; i < N; ++i) { BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); + BOOST_CHECK_SMALL(std::abs(evals_lapack[i] - exact_evals[i]), tol); + } GlobalFixture::world->gop.fence(); } @@ -404,15 +413,23 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { auto new_trange = gen_trange(N, {64ul}); auto [evals, evecs] = heig(ref_ta, new_trange, 128); + auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta, new_trange); BOOST_CHECK(evecs.trange() == new_trange); - // TODO: Check validity of eigenvectors, not crutial for the time being + // check eigenvectors against lapack only, for now ... + decltype(evecs) evecs_error; + evecs_error("i,j") = evecs_lapack("i,j") - evecs("i,j"); + // TODO need to fix phases of the eigenvectors to be able to compare ... + // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(), + // N * N * std::numeric_limits::epsilon()); // Check eigenvalue correctness double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) + for (int64_t i = 0; i < N; ++i) { BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); + BOOST_CHECK_SMALL(std::abs(evals_lapack[i] - exact_evals[i]), tol); + } GlobalFixture::world->gop.fence(); } From 13da0a5dd9168499030f7b67e77e7fb7981e6f13 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 20 Oct 2020 13:34:57 -0400 Subject: [PATCH 23/36] TA::eigen_map generalized to accept target matrix layout (default is still row-major --- src/TiledArray/conversions/eigen.h | 69 +++++++++++++++++------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h index bb931d8d1d..b43602c98e 100644 --- a/src/TiledArray/conversions/eigen.h +++ b/src/TiledArray/conversions/eigen.h @@ -61,53 +61,57 @@ typedef Eigen::Matrix EigenVectorXl; /// Construct a const Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \param m The number of rows in the result matrix /// \param n The number of columns in the result matrix /// \return An m x n Eigen matrix map for \c tensor /// \throw TiledArray::Exception When m * n is not equal to \c tensor size -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(const T& tensor, const std::size_t m, const std::size_t n) { TA_ASSERT((m * n) == tensor.size()); return Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign>(tensor.data(), m, n); } /// Construct an Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \param m The number of rows in the result matrix /// \param n The number of columns in the result matrix /// \return An m x n Eigen matrix map for \c tensor /// \throw TiledArray::Exception When m * n is not equal to \c tensor size -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(T& tensor, const std::size_t m, const std::size_t n) { TA_ASSERT((m * n) == tensor.size()); return Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign>(tensor.data(), m, n); } /// Construct a const Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object -/// \param n The number of elements in the result matrix -/// \return An n element Eigen vector map for \c tensor -/// \throw TiledArray::Exception When n is not equal to \c tensor size +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \param tensor The +/// tensor object \param n The number of elements in the result matrix \return +/// An n element Eigen vector map for \c tensor \throw TiledArray::Exception +/// When n is not equal to \c tensor size template >* = nullptr> inline Eigen::Map< @@ -127,7 +131,7 @@ eigen_map(const T& tensor, const std::size_t n) { /// \param tensor The tensor object /// \param n The number of elements in the result matrix /// \return An n element Eigen vector map for \c tensor -/// \throw TiledArray::Exception When m * n is not equal to \c tensor size +/// \throw TiledArray::Exception When n is not equal to \c tensor size template >* = nullptr> inline Eigen::Map, @@ -141,42 +145,49 @@ eigen_map(T& tensor, const std::size_t n) { /// Construct a const Eigen::Map object for a given Tensor object -/// The dimensions of the result tensor -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// The dimensions of the result tensor are extracted from the tensor itself +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \return An Eigen matrix map for \c tensor /// \throw TiledArray::Exception When \c tensor dimensions are not equal to 2 /// or 1. -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(const T& tensor) { TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u)); const auto* MADNESS_RESTRICT const tensor_extent = tensor.range().extent_data(); - return eigen_map(tensor, tensor_extent[0], - (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); + return eigen_map( + tensor, tensor_extent[0], + (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); } /// Construct an Eigen::Map object for a given Tensor object -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \param tensor The tensor object +/// The dimensions of the result tensor are extracted from the tensor itself +/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c +/// TiledArray::detail::is_contiguous_tensor_v must be true \tparam Storage +/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor +/// \param tensor The tensor object, laid out according to Storage /// \return An Eigen matrix map for \c tensor /// \throw When \c tensor dimensions are not equal to 2 or 1. -template >* = nullptr> inline Eigen::Map, + Eigen::Dynamic, Storage>, Eigen::AutoAlign> eigen_map(T& tensor) { TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u)); const auto* MADNESS_RESTRICT const tensor_extent = tensor.range().extent_data(); - return eigen_map(tensor, tensor_extent[0], - (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); + return eigen_map( + tensor, tensor_extent[0], + (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul)); } /// Copy a block of an Eigen matrix into a tensor From 16499ce8197bfe49d2586fd90fc8886c94b52901 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 20 Oct 2020 13:35:32 -0400 Subject: [PATCH 24/36] Introduced OrdinalType --- src/TiledArray/algebra/lapack/util.h | 2 +- src/TiledArray/external/btas.h | 7 +++++++ src/TiledArray/tensor/type_traits.h | 28 ++++++++++++++++++++++++++++ tests/btas.cpp | 23 +++++++++++++++++++++++ 4 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/algebra/lapack/util.h b/src/TiledArray/algebra/lapack/util.h index 280308bf4a..82f3048c93 100644 --- a/src/TiledArray/algebra/lapack/util.h +++ b/src/TiledArray/algebra/lapack/util.h @@ -66,7 +66,7 @@ auto to_eigen(const ContiguousTensor& A) { colmajor_matrix_type result(A.range().extent(0), A.range().rank() == 2 ? A.range().extent(1) : 1); constexpr const auto layout = - TiledArray::detail::ordinal_trait::layout; + TiledArray::detail::ordinal_traits::layout; if (layout == TiledArray::OrdinalType::RowMajor) { using rowmajor_matrix_type = Eigen::Matrix; diff --git a/src/TiledArray/external/btas.h b/src/TiledArray/external/btas.h index 8c13e6467f..4c6ca25729 100644 --- a/src/TiledArray/external/btas.h +++ b/src/TiledArray/external/btas.h @@ -702,6 +702,13 @@ template struct is_contiguous_tensor_helper> : public std::true_type {}; +/// btas::RangeNd can be col or row-major +template +struct ordinal_traits> { + static constexpr const auto type = + _Order == CblasRowMajor ? OrdinalType::RowMajor : OrdinalType::ColMajor; +}; + } // namespace detail } // namespace TiledArray diff --git a/src/TiledArray/tensor/type_traits.h b/src/TiledArray/tensor/type_traits.h index 162dbf3770..83ec789c44 100644 --- a/src/TiledArray/tensor/type_traits.h +++ b/src/TiledArray/tensor/type_traits.h @@ -250,6 +250,34 @@ struct is_cuda_tile> #endif } // namespace detail + +/// Specifies how coordinates are mapped to ordinal values +/// - RowMajor: stride decreases as mode index increases +/// - ColMajor: stride increases with the mode index +/// - Other: unknown or dynamic order +enum class OrdinalType { RowMajor = -1, ColMajor = 1, Other = 0, Invalid }; + +namespace detail { + +/// ordinal trait specifies properties of the ordinal +template +struct ordinal_traits; + +/// TA::Range is hardwired to row-major +template <> +struct ordinal_traits { + static constexpr const auto type = OrdinalType::RowMajor; +}; + +/// ordinal traits of contiguous tensors are defined by their range type +template +struct ordinal_traits>> { + static constexpr const auto type = ordinal_traits< + std::decay_t().range())>>::type; +}; + +} // namespace detail + } // namespace TiledArray #endif // TILEDARRAY_TENSOR_TYPE_TRAITS_H__INCLUDED diff --git a/tests/btas.cpp b/tests/btas.cpp index 99cf14ec97..df9122695a 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -21,6 +21,7 @@ */ #include +#include #ifdef TILEDARRAY_HAS_BTAS @@ -32,6 +33,28 @@ using namespace TiledArray; +static_assert(detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::RangeNd<> is row-major"); +static_assert(detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::RangeNd is row-major"); +static_assert(detail::ordinal_traits>::type == + OrdinalType::ColMajor, + "btas::RangeNd is col-major"); +static_assert(detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::Tenspr is row-major"); +static_assert( + detail::ordinal_traits>::type == + OrdinalType::RowMajor, + "btas::Tenspr is row-major"); +static_assert( + detail::ordinal_traits< + TiledArray::Tile>>::type == + OrdinalType::RowMajor, + "TA::Tile> is row-major"); + // test both bare (deep-copy) BTAS tensor as well as its shallow-copy wrap in // Tile<>, using both btas::RangeNd<> and TiledArray::Range as the range type typedef boost::mpl::list< From 114cf4845b7caa32ca301f09f8ce1f3171f0d8f1 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 20 Oct 2020 13:45:59 -0400 Subject: [PATCH 25/36] TA::lapack::cholesky also works for TA::Tensor and ony other contiguous tensor --- src/TiledArray/algebra/lapack/chol.h | 10 ++++ tests/CMakeLists.txt | 1 + tests/lapack.cpp | 83 ++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 tests/lapack.cpp diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index ae21470fbc..c10a628cf5 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -98,6 +98,16 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { return eigen_to_array(A.world(), l_trange, L_eig); } +template >> +auto cholesky(const ContiguousTensor& A) { + auto A_eig = detail::to_eigen(A); + detail::chol_eig(A_eig); + detail::zero_out_upper_triangle(A_eig); + return detail::from_eigen(A_eig, A.range()); +} + /** * @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor. * Optionally return the Cholesky factor itself diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8d5f74b44d..f21ca1ffca 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -110,6 +110,7 @@ set(ta_test_src_files ta_test.cpp t_tot_tot_contract_.cpp tot_tot_tot_contract_.cpp einsum.cpp + lapack.cpp ) if(CUDA_FOUND) diff --git a/tests/lapack.cpp b/tests/lapack.cpp new file mode 100644 index 0000000000..287acab547 --- /dev/null +++ b/tests/lapack.cpp @@ -0,0 +1,83 @@ +#include +#include +#include "TiledArray/config.h" +#include "range_fixture.h" +#include "unit_test_config.h" + +#include "TiledArray/algebra/lapack/chol.h" +#include "TiledArray/algebra/lapack/heig.h" + +using namespace TiledArray::lapack; + +struct LAPACKFixture { + int64_t N; + std::vector htoeplitz_vector; + std::vector exact_evals; + + inline double matrix_element_generator(int64_t i, int64_t j) { + // Generates a Circulant matrix: good condition number + return htoeplitz_vector[std::abs(i - j)]; + } + + inline double make_ta_reference(TA::Tensor& t, + TA::Range const& range) { + t = TA::Tensor(range, 0.0); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + for (auto m = lo[0]; m < up[0]; ++m) { + for (auto n = lo[1]; n < up[1]; ++n) { + t(m, n) = matrix_element_generator(m, n); + } + } + + return t.norm(); + }; + + LAPACKFixture(int64_t N) : N(N), htoeplitz_vector(N), exact_evals(N) { + // Generate an hermitian Circulant vector + std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); + htoeplitz_vector[0] = 100; + std::default_random_engine gen(0); + std::uniform_real_distribution<> dist(0., 1.); + for (int64_t i = 1; i <= (N / 2); ++i) { + double val = dist(gen); + htoeplitz_vector[i] = val; + htoeplitz_vector[N - i] = val; + } + + // Compute exact eigenvalues + const double ff = 2. * M_PI / N; + for (int64_t j = 0; j < N; ++j) { + double val = htoeplitz_vector[0]; + for (int64_t k = 1; k < N; ++k) + val += htoeplitz_vector[N - k] * std::cos(ff * j * k); + exact_evals[j] = val; + } + + std::sort(exact_evals.begin(), exact_evals.end()); + } + + LAPACKFixture() : LAPACKFixture(1000) {} +}; + +BOOST_FIXTURE_TEST_SUITE(lapack_suite, LAPACKFixture) + +BOOST_AUTO_TEST_CASE(chol) { + auto range = TA::Range{N, N}; + + TA::Tensor A; + this->make_ta_reference(A, range); + + auto L = cholesky(A); + + decltype(A) A_minus_LLt; + A_minus_LLt = A.clone(); + A_minus_LLt.gemm(L, L, -1, + math::GemmHelper{madness::cblas::NoTrans, + madness::cblas::ConjTrans, 2, 2, 2}); + + BOOST_CHECK_SMALL(A_minus_LLt.norm(), + N * N * std::numeric_limits::epsilon()); +} + +BOOST_AUTO_TEST_SUITE_END() From 833a2da26c4177088f74a20d6791500dc5ccb89a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 20 Oct 2020 13:51:20 -0400 Subject: [PATCH 26/36] amended 16499ce8197bfe49d2586fd90fc8886c94b52901 --- src/TiledArray/algebra/lapack/util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TiledArray/algebra/lapack/util.h b/src/TiledArray/algebra/lapack/util.h index 82f3048c93..8907f8e9d9 100644 --- a/src/TiledArray/algebra/lapack/util.h +++ b/src/TiledArray/algebra/lapack/util.h @@ -66,7 +66,7 @@ auto to_eigen(const ContiguousTensor& A) { colmajor_matrix_type result(A.range().extent(0), A.range().rank() == 2 ? A.range().extent(1) : 1); constexpr const auto layout = - TiledArray::detail::ordinal_traits::layout; + TiledArray::detail::ordinal_traits::type; if (layout == TiledArray::OrdinalType::RowMajor) { using rowmajor_matrix_type = Eigen::Matrix; From b8b459a4f40d298b77d95ca555201eda2053761c Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 20 Oct 2020 13:54:19 -0400 Subject: [PATCH 27/36] dox++ --- src/TiledArray/algebra/lapack/chol.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index c10a628cf5..766970e996 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -88,6 +88,7 @@ auto make_L_eig(const DistArray& A) { * empty, will default to array.trange() * * @returns The lower triangular Cholesky factor L in TA format + * @note this is a collective operation with respect to the world of @p A */ template >> @@ -98,6 +99,23 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { return eigen_to_array(A.world(), l_trange, L_eig); } +/** + * @brief Compute the Cholesky factorization of a HPD rank-2 tensor + * + * A(i,j) = L(i,k) * conj(L(j,k)) + * + * Example Usage: + * + * auto L = cholesky(A, ...) + * + * @tparam ContiguousTensor a contiguous tensor type (i.e., @c + * is_contiguous_tensor_v is true) + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @returns The lower triangular Cholesky factor L as a ContiguousTensor + * @note this is a non-collective operation, only computes on the rank on which + * invoked + */ template >> @@ -127,6 +145,7 @@ auto cholesky(const ContiguousTensor& A) { * If left empty, will default to array.trange() * * @returns The inverse lower triangular Cholesky factor in TA format + * @note this is a collective operation with respect to the world of @p A */ template >> From 9a1e84fde51e5383e739d3b0fc54d1bff4f7ac89 Mon Sep 17 00:00:00 2001 From: asadchev Date: Mon, 16 Nov 2020 15:50:52 -0500 Subject: [PATCH 28/36] Refactor lapack bindings --- src/CMakeLists.txt | 3 +- src/TiledArray/algebra/chol.h | 2 +- src/TiledArray/algebra/lapack/chol.h | 66 +------- src/TiledArray/algebra/lapack/heig.h | 107 ++++++------ src/TiledArray/algebra/lapack/lapack.cc | 206 ++++++++++++++++++++++++ src/TiledArray/algebra/lapack/lapack.h | 59 +++++++ src/TiledArray/algebra/lapack/util.h | 13 -- tests/CMakeLists.txt | 160 +++++++++--------- 8 files changed, 408 insertions(+), 208 deletions(-) create mode 100644 src/TiledArray/algebra/lapack/lapack.cc create mode 100644 src/TiledArray/algebra/lapack/lapack.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c3ca6675e2..8a6ca49265 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -207,6 +207,7 @@ TiledArray/array_impl.cpp TiledArray/dist_array.cpp TiledArray/util/backtrace.cpp TiledArray/util/bug.cpp +TiledArray/algebra/lapack/lapack.cc ) # the list of libraries on which TiledArray depends on, will be cached later @@ -302,5 +303,3 @@ install( FILES_MATCHING PATTERN "*.h" PATTERN "CMakeFiles" EXCLUDE ) - - diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/chol.h index 96df69e207..222f80806d 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/chol.h @@ -73,7 +73,7 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, x_trange); else #endif - return lapack::cholesky_solve(transpose, A, B, l_trange, x_trange); + return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); } } // namespace TiledArray diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 766970e996..0d3a6fbeed 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -24,8 +24,9 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED -#include #include +#include +#include #include namespace TiledArray { @@ -33,26 +34,6 @@ namespace lapack { namespace detail { -template -void chol_eig( - Eigen::Matrix& A) { - using numeric_type = Scalar; - char uplo = 'L'; - integer n = A.rows(); - numeric_type* a = A.data(); - integer lda = n; - integer info = 0; -#if defined(MADNESS_LINALG_USE_LAPACKE) - MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info); -#else - MADNESS_DISPATCH_LAPACK_FN(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); -#endif - - if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); -} - template auto make_L_eig(const DistArray& A) { using Array = DistArray; @@ -121,7 +102,7 @@ template >> auto cholesky(const ContiguousTensor& A) { auto A_eig = detail::to_eigen(A); - detail::chol_eig(A_eig); + lapack::cholesky(A_eig); detail::zero_out_upper_triangle(A_eig); return detail::from_eigen(A_eig, A.range()); } @@ -156,21 +137,11 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { // if need to return L use its copy to compute inverse decltype(L_eig) L_inv_eig; - if (RetL && world.rank() == 0) L_inv_eig = L_eig; if (world.rank() == 0) { + if (RetL) L_inv_eig = L_eig; auto& L_inv_eig_ref = RetL ? L_inv_eig : L_eig; - - char uplo = 'L'; - char diag = 'N'; - integer n = L_eig.rows(); - using numeric_type = typename Array::numeric_type; - numeric_type* l = L_inv_eig_ref.data(); - integer lda = n; - integer info = 0; - MADNESS_DISPATCH_LAPACK_FN(trtri, &uplo, &diag, &n, l, &lda, &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); - + cholesky_linv(L_inv_eig_ref); detail::zero_out_upper_triangle(L_inv_eig_ref); } world.gop.broadcast_serializable(RetL ? L_inv_eig : L_eig, 0); @@ -196,16 +167,7 @@ auto cholesky_solve(const Array& A, const Array& B, auto X_eig = detail::to_eigen(B); World& world = A.world(); if (world.rank() == 0) { - char uplo = 'L'; - integer n = A_eig.rows(); - integer nrhs = X_eig.cols(); - numeric_type* a = A_eig.data(); - numeric_type* b = X_eig.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - MADNESS_DISPATCH_LAPACK_FN(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); - if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); + cholesky_solve(A_eig, X_eig); } world.gop.broadcast_serializable(X_eig, 0); if (x_trange.rank() == 0) x_trange = B.trange(); @@ -228,21 +190,7 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, auto X_eig = detail::to_eigen(B); if (world.rank() == 0) { - char uplo = 'L'; - char trans = transpose == TransposeFlag::Transpose - ? 'T' - : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); - char diag = 'N'; - integer n = L_eig.rows(); - integer nrhs = X_eig.cols(); - numeric_type* a = L_eig.data(); - numeric_type* b = X_eig.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - MADNESS_DISPATCH_LAPACK_FN(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, - b, &ldb, &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); + cholesky_lsolve(transpose, L_eig, X_eig); } world.gop.broadcast_serializable(X_eig, 0); if (l_trange.rank() == 0) l_trange = A.trange(); diff --git a/src/TiledArray/algebra/lapack/heig.h b/src/TiledArray/algebra/lapack/heig.h index 817ef3e846..4d779a176e 100644 --- a/src/TiledArray/algebra/lapack/heig.h +++ b/src/TiledArray/algebra/lapack/heig.h @@ -61,60 +61,61 @@ auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { World& world = A.world(); auto A_eig = detail::to_eigen(A); std::vector evals; - if (world.rank() == 0) { - char jobz = 'V'; - char uplo = 'L'; - integer n = A_eig.rows(); - numeric_type* a = A_eig.data(); - integer lda = n; - integer info = 0; - evals.resize(n); - integer lwork = -1; - std::vector work(1); - // run once to query, then to compute - while (lwork != static_cast(work.size())) { - if (lwork > 0) { - work.resize(lwork); - } - if constexpr (is_real) { -#if defined(MADNESS_LINALG_USE_LAPACKE) - MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, - evals.data(), work.data(), &lwork, &info); -#else - MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, - evals.data(), work.data(), &lwork, &info, - sizeof(char), sizeof(char)); -#endif - } else { - std::vector rwork; - if (lwork == static_cast(work.size())) rwork.resize(3 * n - 2); -#if defined(MADNESS_LINALG_USE_LAPACKE) - MADNESS_DISPATCH_LAPACK_FN(heev, &jobz, &uplo, &n, a, &lda, - evals.data(), work.data(), &lwork, - &rwork.data(), &info); -#else - MADNESS_DISPATCH_LAPACK_FN( - heev, &jobz, &uplo, &n, a, &lda, evals.data(), work.data(), &lwork, - &rwork.data(), &info, sizeof(char), sizeof(char)); -#endif - } - if (lwork == -1) { - if constexpr (is_real) { - lwork = static_cast(work[0]); - } else { - lwork = static_cast(work[0].real()); - } - TA_ASSERT(lwork > 1); - } - }; +// if (world.rank() == 0) { +// char jobz = 'V'; +// char uplo = 'L'; +// integer n = A_eig.rows(); +// numeric_type* a = A_eig.data(); +// integer lda = n; +// integer info = 0; +// evals.resize(n); +// integer lwork = -1; +// std::vector work(1); +// // run once to query, then to compute +// while (lwork != static_cast(work.size())) { +// if (lwork > 0) { +// work.resize(lwork); +// } +// if constexpr (is_real) { +// #if defined(MADNESS_LINALG_USE_LAPACKE) +// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, +// evals.data(), work.data(), &lwork, &info); +// #else +// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, +// evals.data(), work.data(), &lwork, &info, +// sizeof(char), sizeof(char)); +// #endif +// } else { +// std::vector rwork; +// if (lwork == static_cast(work.size())) rwork.resize(3 * n - 2); +// #if defined(MADNESS_LINALG_USE_LAPACKE) +// MADNESS_DISPATCH_LAPACK_FN(heev, &jobz, &uplo, &n, a, &lda, +// evals.data(), work.data(), &lwork, +// &rwork.data(), &info); +// #else +// MADNESS_DISPATCH_LAPACK_FN( +// heev, &jobz, &uplo, &n, a, &lda, evals.data(), work.data(), &lwork, +// &rwork.data(), &info, sizeof(char), sizeof(char)); +// #endif +// } +// if (lwork == -1) { +// if constexpr (is_real) { +// lwork = static_cast(work[0]); +// } else { +// lwork = static_cast(work[0].real()); +// } +// TA_ASSERT(lwork > 1); +// } +// }; + +// if (info != 0) { +// if (is_real) +// TA_EXCEPTION("LAPACK::syev failed"); +// else +// TA_EXCEPTION("LAPACK::heev failed"); +// } +// } - if (info != 0) { - if (is_real) - TA_EXCEPTION("LAPACK::syev failed"); - else - TA_EXCEPTION("LAPACK::heev failed"); - } - } world.gop.broadcast_serializable(A_eig, 0); world.gop.broadcast_serializable(evals, 0); if (evec_trange.rank() == 0) evec_trange = A.trange(); diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc new file mode 100644 index 0000000000..a2d6e91626 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lapack.cc @@ -0,0 +1,206 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ + +#include +#include +#include +#include +#include + +#define TA_LAPACK_CALL(name, args...) \ + typedef T numeric_type; \ + if constexpr (std::is_same_v) \ + d##name##_(args); \ + else if constexpr (std::is_same_v) \ + s##name##_(args); \ + else if constexpr (std::is_same_v>) \ + z##name##_(args); \ + else if constexpr (std::is_same_v>) \ + c##name##_(args); \ + else \ + std::abort(); + +namespace TiledArray::lapack { + +template +void cholesky(Matrix &A) { + char uplo = 'L'; + integer n = A.rows(); + auto* a = A.data(); + integer lda = n; + integer info = 0; +#if defined(MADNESS_LINALG_USE_LAPACKE) + TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info); +#else + TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); +#endif + if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); +} + +template +void cholesky_linv(Matrix &A) { + char uplo = 'L'; + char diag = 'N'; + integer n = A.rows(); + auto* l = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_CALL(trtri, &uplo, &diag, &n, l, &lda, &info); + if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); +} + +template +void cholesky_solve(Matrix &A, Matrix &X) { + char uplo = 'L'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + //TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); +} + +template +void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { + char uplo = 'L'; + char trans = transpose == TransposeFlag::Transpose + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + char diag = 'N'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + //TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); +} + +template +void hereig(Matrix &A, Vector &W) { + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); + lwork = integer(lwork_dummy); + Vector work(lwork); + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); + if (info != 0) TA_EXCEPTION("lapack::hereig failed"); +} + +template +void hereig_gen(Matrix &A, Matrix &B, Vector &W) { + integer itype = 1; + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); + lwork = integer(lwork_dummy); + Vector work(lwork); + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); + if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); +} + +template +void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { + integer m = A.rows(); + integer n = A.cols(); + T* a = A.data(); + integer lda = A.rows(); + + S.resize(std::max(m,n)); + T* s = S.data(); + + char jobu = 'N'; + T* u = nullptr; + integer ldu = 0; + if (U) { + jobu = 'A'; + U->resize(m,n); + u = U->data(); + ldu = U->rows(); + } + + char jobvt = 'N'; + T* vt = nullptr; + integer ldvt = 0; + if (VT) { + jobvt = 'A'; + VT->resize(n,m); + vt = VT->data(); + ldvt = VT->rows(); + } + + integer lwork = -1; + integer info; + T lwork_dummy; + + TA_LAPACK_CALL( + gesvd, + &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, + sizeof(char), sizeof(char) + ); + lwork = integer(lwork_dummy); + Vector work(lwork); + TA_LAPACK_CALL( + gesvd, + &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, + sizeof(char), sizeof(char) + ); + if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); +} + + +#define TA_LAPACK_EXPLICIT(MATRIX,VECTOR) \ + template void cholesky(MATRIX&); \ + template void cholesky_linv(MATRIX&); \ + template void cholesky_solve(MATRIX&,MATRIX&); \ + template void cholesky_lsolve(TransposeFlag,MATRIX&,MATRIX&); \ + template void hereig(MATRIX&,VECTOR&); \ + template void hereig_gen(MATRIX&,MATRIX&,VECTOR&); \ + template void svd(MATRIX&,VECTOR&,MATRIX*,MATRIX*); + + +TA_LAPACK_EXPLICIT(lapack::Matrix, lapack::Vector); + +} diff --git a/src/TiledArray/algebra/lapack/lapack.h b/src/TiledArray/algebra/lapack/lapack.h new file mode 100644 index 0000000000..1ccf68719f --- /dev/null +++ b/src/TiledArray/algebra/lapack/lapack.h @@ -0,0 +1,59 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED + +#include +#include +#include + +namespace TiledArray::lapack { + +template +using Vector = Eigen::Matrix; + +template +using Matrix = Eigen::Matrix; + +template +void cholesky(Matrix &A); + +template +void cholesky_linv(Matrix &A); + +template +void cholesky_solve(Matrix &A, Matrix &X); + +template +void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X); + +template +void hereig(Matrix &A, Vector &W); + +template +void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT); + +} + +#endif // TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/util.h b/src/TiledArray/algebra/lapack/util.h index 8907f8e9d9..daf822ff8e 100644 --- a/src/TiledArray/algebra/lapack/util.h +++ b/src/TiledArray/algebra/lapack/util.h @@ -29,19 +29,6 @@ namespace TiledArray { namespace lapack { - -#define MADNESS_DISPATCH_LAPACK_FN(name, args...) \ - if constexpr (std::is_same_v) \ - d##name##_(args); \ - else if constexpr (std::is_same_v) \ - s##name##_(args); \ - else if constexpr (std::is_same_v>) \ - z##name##_(args); \ - else if constexpr (std::is_same_v>) \ - c##name##_(args); \ - else \ - std::abort(); - namespace detail { template diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 88c1995f9e..9004369a5c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -32,85 +32,85 @@ set(executable ta_test) # N.B.: The order of files here represents the order in which the tests are run. # N.B. 2: if you want to trim this down you may need to resolve linker errors due to missing fixture deps manually set(ta_test_src_files ta_test.cpp - range.cpp - btas.cpp - meta.cpp - type_traits.cpp - utility.cpp - permutation.cpp - symm_permutation_group.cpp - symm_irrep.cpp - symm_representation.cpp - block_range.cpp - perm_index.cpp - transform_iterator.cpp - bitset.cpp - math_outer.cpp - math_partial_reduce.cpp - math_transpose.cpp - math_blas.cpp - tensor.cpp - tensor_of_tensor.cpp - tensor_tensor_view.cpp - tensor_shift_wrapper.cpp - tiled_range1.cpp - tiled_range.cpp - blocked_pmap.cpp - hash_pmap.cpp - cyclic_pmap.cpp - replicated_pmap.cpp - dense_shape.cpp - sparse_shape.cpp - distributed_storage.cpp - tensor_impl.cpp - array_impl.cpp - index_list.cpp - bipartite_index_list.cpp - dist_array.cpp - conversions.cpp - eigen.cpp - dist_op_dist_cache.cpp - dist_op_group.cpp - dist_op_communicator.cpp - tile_op_noop.cpp - tile_op_scal.cpp - dist_eval_array_eval.cpp - dist_eval_unary_eval.cpp - tile_op_add.cpp - tile_op_scal_add.cpp - tile_op_subt.cpp - tile_op_scal_subt.cpp - dist_eval_binary_eval.cpp - tile_op_mult.cpp - tile_op_scal_mult.cpp - tile_op_contract_reduce.cpp - reduce_task.cpp - proc_grid.cpp - dist_eval_contraction_eval.cpp - expressions.cpp - expressions_sparse.cpp - expressions_complex.cpp - expressions_btas.cpp - expressions_mixed.cpp - foreach.cpp - solvers.cpp - initializer_list.cpp - diagonal_array.cpp - retile.cpp - tot_dist_array_part1.cpp - tot_dist_array_part2.cpp - random.cpp - trace.cpp - tot_expressions.cpp - annotation.cpp - diagonal_array.cpp - contraction_helpers.cpp - s_t_t_contract_.cpp - t_t_t_contract_.cpp - t_s_t_contract_.cpp - t_tot_tot_contract_.cpp - tot_tot_tot_contract_.cpp - einsum.cpp + # range.cpp + # btas.cpp + # meta.cpp + # type_traits.cpp + # utility.cpp + # permutation.cpp + # symm_permutation_group.cpp + # symm_irrep.cpp + # symm_representation.cpp + # block_range.cpp + # perm_index.cpp + # transform_iterator.cpp + # bitset.cpp + # math_outer.cpp + # math_partial_reduce.cpp + # math_transpose.cpp + # math_blas.cpp + # tensor.cpp + # tensor_of_tensor.cpp + # tensor_tensor_view.cpp + # tensor_shift_wrapper.cpp + # tiled_range1.cpp + # tiled_range.cpp + # blocked_pmap.cpp + # hash_pmap.cpp + # cyclic_pmap.cpp + # replicated_pmap.cpp + # dense_shape.cpp + # sparse_shape.cpp + # distributed_storage.cpp + # tensor_impl.cpp + # array_impl.cpp + # index_list.cpp + # bipartite_index_list.cpp + # dist_array.cpp + # conversions.cpp + # eigen.cpp + # dist_op_dist_cache.cpp + # dist_op_group.cpp + # dist_op_communicator.cpp + # tile_op_noop.cpp + # tile_op_scal.cpp + # dist_eval_array_eval.cpp + # dist_eval_unary_eval.cpp + # tile_op_add.cpp + # tile_op_scal_add.cpp + # tile_op_subt.cpp + # tile_op_scal_subt.cpp + # dist_eval_binary_eval.cpp + # tile_op_mult.cpp + # tile_op_scal_mult.cpp + # tile_op_contract_reduce.cpp + # reduce_task.cpp + # proc_grid.cpp + # dist_eval_contraction_eval.cpp + # expressions.cpp + # expressions_sparse.cpp + # expressions_complex.cpp + # expressions_btas.cpp + # expressions_mixed.cpp + # foreach.cpp + # solvers.cpp + # initializer_list.cpp + # diagonal_array.cpp + # retile.cpp + # tot_dist_array_part1.cpp + # tot_dist_array_part2.cpp + # random.cpp + # trace.cpp + # tot_expressions.cpp + # annotation.cpp + # diagonal_array.cpp + # contraction_helpers.cpp + # s_t_t_contract_.cpp + # t_t_t_contract_.cpp + # t_s_t_contract_.cpp + # t_tot_tot_contract_.cpp + # tot_tot_tot_contract_.cpp + # einsum.cpp lapack.cpp ) @@ -123,7 +123,7 @@ if (TARGET TiledArray_SCALAPACK) endif(TARGET TiledArray_SCALAPACK) # if tiledarray library was compiled without exceptions, use TA header-only (see below) -if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND) +if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND AND FALSE) add_ta_executable(${executable} "${ta_test_src_files}" "MADworld;${TILEDARRAY_PRIVATE_LINK_LIBRARIES}") target_compile_definitions(${executable} PRIVATE TILEDARRAY_HEADER_ONLY=1) if (LAPACK_INCLUDE_DIRS) From 40ddb05eb07d2aea584f93f2ac80f5ee7bfa13f3 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 17 Nov 2020 11:29:10 -0500 Subject: [PATCH 29/36] fixes for differences in F77 BLAS/LAPACK API in MADNESS and rest of world. --- src/TiledArray/algebra/lapack/chol.h | 4 +- src/TiledArray/algebra/lapack/lapack.cc | 117 +++++++++++++++--------- 2 files changed, 74 insertions(+), 47 deletions(-) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 0d3a6fbeed..44376bae9a 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -24,9 +24,9 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED -#include #include #include +#include #include namespace TiledArray { @@ -45,7 +45,7 @@ auto make_L_eig(const DistArray& A) { World& world = A.world(); auto A_eig = detail::to_eigen(A); if (world.rank() == 0) { - chol_eig(A_eig); + lapack::cholesky(A_eig); } world.gop.broadcast_serializable(A_eig, 0); return A_eig; diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc index a2d6e91626..977d6292b4 100644 --- a/src/TiledArray/algebra/lapack/lapack.cc +++ b/src/TiledArray/algebra/lapack/lapack.cc @@ -22,9 +22,9 @@ * */ -#include #include #include +#include #include #include @@ -43,8 +43,8 @@ namespace TiledArray::lapack { -template -void cholesky(Matrix &A) { +template +void cholesky(Matrix& A) { char uplo = 'L'; integer n = A.rows(); auto* a = A.data(); @@ -58,8 +58,8 @@ void cholesky(Matrix &A) { if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); } -template -void cholesky_linv(Matrix &A) { +template +void cholesky_linv(Matrix& A) { char uplo = 'L'; char diag = 'N'; integer n = A.rows(); @@ -70,8 +70,8 @@ void cholesky_linv(Matrix &A) { if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); } -template -void cholesky_solve(Matrix &A, Matrix &X) { +template +void cholesky_solve(Matrix& A, Matrix& X) { char uplo = 'L'; integer n = A.rows(); integer nrhs = X.cols(); @@ -80,16 +80,16 @@ void cholesky_solve(Matrix &A, Matrix &X) { integer lda = n; integer ldb = n; integer info = 0; - //TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + // TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); } -template -void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { +template +void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { char uplo = 'L'; char trans = transpose == TransposeFlag::Transpose - ? 'T' - : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); char diag = 'N'; integer n = A.rows(); integer nrhs = X.cols(); @@ -98,12 +98,13 @@ void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { integer lda = n; integer ldb = n; integer info = 0; - //TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + // TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, + // &info); if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); } -template -void hereig(Matrix &A, Vector &W) { +template +void hereig(Matrix& A, Vector& W) { char jobz = 'V'; char uplo = 'L'; integer n = A.rows(); @@ -113,15 +114,27 @@ void hereig(Matrix &A, Vector &W) { integer lwork = -1; integer info; T lwork_dummy; - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, + &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, + &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, + sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, + &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig failed"); } -template -void hereig_gen(Matrix &A, Matrix &B, Vector &W) { +template +void hereig_gen(Matrix& A, Matrix& B, Vector& W) { integer itype = 1; char jobz = 'V'; char uplo = 'L'; @@ -134,21 +147,33 @@ void hereig_gen(Matrix &A, Matrix &B, Vector &W) { integer lwork = -1; integer info; T lwork_dummy; - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + &lwork_dummy, &lwork, &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); } -template -void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { +template +void svd(Matrix& A, Vector& S, Matrix* U, Matrix* VT) { integer m = A.rows(); integer n = A.cols(); T* a = A.data(); integer lda = A.rows(); - S.resize(std::max(m,n)); + S.resize(std::max(m, n)); T* s = S.data(); char jobu = 'N'; @@ -156,7 +181,7 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer ldu = 0; if (U) { jobu = 'A'; - U->resize(m,n); + U->resize(m, n); u = U->data(); ldu = U->rows(); } @@ -166,7 +191,7 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer ldvt = 0; if (VT) { jobvt = 'A'; - VT->resize(n,m); + VT->resize(n, m); vt = VT->data(); ldvt = VT->rows(); } @@ -175,32 +200,34 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer info; T lwork_dummy; - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); } - -#define TA_LAPACK_EXPLICIT(MATRIX,VECTOR) \ +#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ template void cholesky(MATRIX&); \ template void cholesky_linv(MATRIX&); \ - template void cholesky_solve(MATRIX&,MATRIX&); \ - template void cholesky_lsolve(TransposeFlag,MATRIX&,MATRIX&); \ - template void hereig(MATRIX&,VECTOR&); \ - template void hereig_gen(MATRIX&,MATRIX&,VECTOR&); \ - template void svd(MATRIX&,VECTOR&,MATRIX*,MATRIX*); - + template void cholesky_solve(MATRIX&, MATRIX&); \ + template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ + template void hereig(MATRIX&, VECTOR&); \ + template void hereig_gen(MATRIX&, MATRIX&, VECTOR&); \ + template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); TA_LAPACK_EXPLICIT(lapack::Matrix, lapack::Vector); -} +} // namespace TiledArray::lapack From 3d8bde60a1fdc7e05c3f09b53056de344937fcea Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 17 Nov 2020 11:29:10 -0500 Subject: [PATCH 30/36] fixes for differences in F77 BLAS/LAPACK API in MADNESS and rest of world. --- src/TiledArray/algebra/lapack/chol.h | 4 +- src/TiledArray/algebra/lapack/lapack.cc | 117 +++++++++++++++--------- 2 files changed, 74 insertions(+), 47 deletions(-) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 0d3a6fbeed..44376bae9a 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -24,9 +24,9 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED -#include #include #include +#include #include namespace TiledArray { @@ -45,7 +45,7 @@ auto make_L_eig(const DistArray& A) { World& world = A.world(); auto A_eig = detail::to_eigen(A); if (world.rank() == 0) { - chol_eig(A_eig); + lapack::cholesky(A_eig); } world.gop.broadcast_serializable(A_eig, 0); return A_eig; diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc index a2d6e91626..977d6292b4 100644 --- a/src/TiledArray/algebra/lapack/lapack.cc +++ b/src/TiledArray/algebra/lapack/lapack.cc @@ -22,9 +22,9 @@ * */ -#include #include #include +#include #include #include @@ -43,8 +43,8 @@ namespace TiledArray::lapack { -template -void cholesky(Matrix &A) { +template +void cholesky(Matrix& A) { char uplo = 'L'; integer n = A.rows(); auto* a = A.data(); @@ -58,8 +58,8 @@ void cholesky(Matrix &A) { if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); } -template -void cholesky_linv(Matrix &A) { +template +void cholesky_linv(Matrix& A) { char uplo = 'L'; char diag = 'N'; integer n = A.rows(); @@ -70,8 +70,8 @@ void cholesky_linv(Matrix &A) { if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); } -template -void cholesky_solve(Matrix &A, Matrix &X) { +template +void cholesky_solve(Matrix& A, Matrix& X) { char uplo = 'L'; integer n = A.rows(); integer nrhs = X.cols(); @@ -80,16 +80,16 @@ void cholesky_solve(Matrix &A, Matrix &X) { integer lda = n; integer ldb = n; integer info = 0; - //TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + // TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); } -template -void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { +template +void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { char uplo = 'L'; char trans = transpose == TransposeFlag::Transpose - ? 'T' - : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); char diag = 'N'; integer n = A.rows(); integer nrhs = X.cols(); @@ -98,12 +98,13 @@ void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { integer lda = n; integer ldb = n; integer info = 0; - //TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + // TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, + // &info); if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); } -template -void hereig(Matrix &A, Vector &W) { +template +void hereig(Matrix& A, Vector& W) { char jobz = 'V'; char uplo = 'L'; integer n = A.rows(); @@ -113,15 +114,27 @@ void hereig(Matrix &A, Vector &W) { integer lwork = -1; integer info; T lwork_dummy; - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, + &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, + &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, + sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, + &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig failed"); } -template -void hereig_gen(Matrix &A, Matrix &B, Vector &W) { +template +void hereig_gen(Matrix& A, Matrix& B, Vector& W) { integer itype = 1; char jobz = 'V'; char uplo = 'L'; @@ -134,21 +147,33 @@ void hereig_gen(Matrix &A, Matrix &B, Vector &W) { integer lwork = -1; integer info; T lwork_dummy; - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + &lwork_dummy, &lwork, &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); } -template -void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { +template +void svd(Matrix& A, Vector& S, Matrix* U, Matrix* VT) { integer m = A.rows(); integer n = A.cols(); T* a = A.data(); integer lda = A.rows(); - S.resize(std::max(m,n)); + S.resize(std::max(m, n)); T* s = S.data(); char jobu = 'N'; @@ -156,7 +181,7 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer ldu = 0; if (U) { jobu = 'A'; - U->resize(m,n); + U->resize(m, n); u = U->data(); ldu = U->rows(); } @@ -166,7 +191,7 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer ldvt = 0; if (VT) { jobvt = 'A'; - VT->resize(n,m); + VT->resize(n, m); vt = VT->data(); ldvt = VT->rows(); } @@ -175,32 +200,34 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer info; T lwork_dummy; - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); } - -#define TA_LAPACK_EXPLICIT(MATRIX,VECTOR) \ +#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ template void cholesky(MATRIX&); \ template void cholesky_linv(MATRIX&); \ - template void cholesky_solve(MATRIX&,MATRIX&); \ - template void cholesky_lsolve(TransposeFlag,MATRIX&,MATRIX&); \ - template void hereig(MATRIX&,VECTOR&); \ - template void hereig_gen(MATRIX&,MATRIX&,VECTOR&); \ - template void svd(MATRIX&,VECTOR&,MATRIX*,MATRIX*); - + template void cholesky_solve(MATRIX&, MATRIX&); \ + template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ + template void hereig(MATRIX&, VECTOR&); \ + template void hereig_gen(MATRIX&, MATRIX&, VECTOR&); \ + template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); TA_LAPACK_EXPLICIT(lapack::Matrix, lapack::Vector); -} +} // namespace TiledArray::lapack From 76c0d0520fc139f0727977d073c527cc2276378a Mon Sep 17 00:00:00 2001 From: asadchev Date: Tue, 17 Nov 2020 14:15:17 -0500 Subject: [PATCH 31/36] LAPACK algebra backend --- src/CMakeLists.txt | 2 +- src/TiledArray/algebra/chol.h | 12 +- src/TiledArray/algebra/heig.h | 20 +- src/TiledArray/algebra/lapack/heig.h | 119 +++------- src/TiledArray/algebra/lapack/lapack.cc | 233 ------------------- src/TiledArray/algebra/lapack/lapack.cpp | 282 +++++++++++++++++++++++ src/TiledArray/algebra/lapack/lapack.h | 25 +- src/TiledArray/algebra/lapack/lu.h | 70 ++++++ src/TiledArray/algebra/lapack/svd.h | 103 +++++++++ src/TiledArray/algebra/lu.h | 3 +- src/TiledArray/algebra/svd.h | 15 +- tests/lapack.cpp | 24 +- 12 files changed, 566 insertions(+), 342 deletions(-) delete mode 100644 src/TiledArray/algebra/lapack/lapack.cc create mode 100644 src/TiledArray/algebra/lapack/lapack.cpp create mode 100644 src/TiledArray/algebra/lapack/lu.h create mode 100644 src/TiledArray/algebra/lapack/svd.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8a6ca49265..2d89f1c4be 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -207,7 +207,7 @@ TiledArray/array_impl.cpp TiledArray/dist_array.cpp TiledArray/util/backtrace.cpp TiledArray/util/bug.cpp -TiledArray/algebra/lapack/lapack.cc +TiledArray/algebra/lapack/lapack.cpp ) # the list of libraries on which TiledArray depends on, will be cached later diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/chol.h index 222f80806d..06eb98fb19 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/chol.h @@ -37,9 +37,8 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky(A, l_trange); - else #endif - return lapack::cholesky(A, l_trange); + return lapack::cholesky(A, l_trange); } template @@ -47,9 +46,8 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_linv(A, l_trange); - else #endif - return lapack::cholesky_linv(A, l_trange); + return lapack::cholesky_linv(A, l_trange); } template @@ -58,9 +56,8 @@ auto cholesky_solve(const Array& A, const Array& B, #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_solve(A, B, x_trange); - else #endif - return lapack::cholesky_solve(A, B, x_trange); + return lapack::cholesky_solve(A, B, x_trange); } template @@ -71,9 +68,8 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); - else #endif - return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); + return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); } } // namespace TiledArray diff --git a/src/TiledArray/algebra/heig.h b/src/TiledArray/algebra/heig.h index 1a3816da5f..68e39c9de9 100644 --- a/src/TiledArray/algebra/heig.h +++ b/src/TiledArray/algebra/heig.h @@ -27,16 +27,30 @@ #include #if TILEDARRAY_HAS_SCALAPACK #include -#else -// eigen #endif +#include namespace TiledArray { +template +auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK -using scalapack::heig; + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, evec_trange); + } #endif + return lapack::heig(A, evec_trange); +} + +template +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, B, evec_trange); + } #endif + return lapack::heig(A, B, evec_trange); +} } // namespace TiledArray diff --git a/src/TiledArray/algebra/lapack/heig.h b/src/TiledArray/algebra/lapack/heig.h index 4d779a176e..5fe4270c8f 100644 --- a/src/TiledArray/algebra/lapack/heig.h +++ b/src/TiledArray/algebra/lapack/heig.h @@ -24,15 +24,16 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED -#include #include + +#include +#include #include -namespace TiledArray { -namespace lapack { +namespace TiledArray::lapack { /** - * @brief Solve the standard eigenvalue problem with ScaLAPACK + * @brief Solve the standard eigenvalue problem with LAPACK * * A(i,k) X(k,j) = X(i,j) E(j) * @@ -51,80 +52,24 @@ namespace lapack { */ template auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { - using scalar_type = typename Array::scalar_type; - using numeric_type = typename Array::numeric_type; - constexpr const bool is_real = std::is_same_v; - static_assert(std::is_same_v, - "TA::lapack::{cholesky*} are only usable with a DistArray of " - "scalar types"); - + using numeric_type = typename lapack::array_traits::numeric_type; World& world = A.world(); auto A_eig = detail::to_eigen(A); - std::vector evals; -// if (world.rank() == 0) { -// char jobz = 'V'; -// char uplo = 'L'; -// integer n = A_eig.rows(); -// numeric_type* a = A_eig.data(); -// integer lda = n; -// integer info = 0; -// evals.resize(n); -// integer lwork = -1; -// std::vector work(1); -// // run once to query, then to compute -// while (lwork != static_cast(work.size())) { -// if (lwork > 0) { -// work.resize(lwork); -// } -// if constexpr (is_real) { -// #if defined(MADNESS_LINALG_USE_LAPACKE) -// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, &info); -// #else -// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, &info, -// sizeof(char), sizeof(char)); -// #endif -// } else { -// std::vector rwork; -// if (lwork == static_cast(work.size())) rwork.resize(3 * n - 2); -// #if defined(MADNESS_LINALG_USE_LAPACKE) -// MADNESS_DISPATCH_LAPACK_FN(heev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, -// &rwork.data(), &info); -// #else -// MADNESS_DISPATCH_LAPACK_FN( -// heev, &jobz, &uplo, &n, a, &lda, evals.data(), work.data(), &lwork, -// &rwork.data(), &info, sizeof(char), sizeof(char)); -// #endif -// } -// if (lwork == -1) { -// if constexpr (is_real) { -// lwork = static_cast(work[0]); -// } else { -// lwork = static_cast(work[0].real()); -// } -// TA_ASSERT(lwork > 1); -// } -// }; - -// if (info != 0) { -// if (is_real) -// TA_EXCEPTION("LAPACK::syev failed"); -// else -// TA_EXCEPTION("LAPACK::heev failed"); -// } -// } - + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, evals); + } world.gop.broadcast_serializable(A_eig, 0); world.gop.broadcast_serializable(evals, 0); if (evec_trange.rank() == 0) evec_trange = A.trange(); - return std::tuple(evals, - eigen_to_array(A.world(), evec_trange, A_eig)); + return std::tuple( + evals, + eigen_to_array(world, evec_trange, A_eig) + ); } /** - * @brief Solve the generalized eigenvalue problem with ScaLAPACK + * @brief Solve the generalized eigenvalue problem with LAPACK * * A(i,k) X(k,j) = B(i,k) X(k,j) E(j) * @@ -142,26 +87,30 @@ auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { * @param[in] B Positive-definite matrix * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, * will default to array.trange() - * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -auto heig(const ArrayA& A, const ArrayB& B, - TiledRange evec_trange = TiledRange()) { - using scalar_type = typename ArrayA::scalar_type; - using numeric_type = typename ArrayA::numeric_type; - constexpr const bool is_real = std::is_same_v; - static_assert(std::is_same_v, - "TA::lapack::{cholesky*} are only usable with a DistArray of " - "scalar types"); - - abort(); - return std::tuple(std::vector{}, EVecType{}); +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { + using numeric_type = typename lapack::array_traits::numeric_type; + (void)lapack::array_traits{}; + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, B_eig, evals); + } + world.gop.broadcast_serializable(A_eig, 0); + world.gop.broadcast_serializable(evals, 0); + if (evec_trange.rank() == 0) evec_trange = A.trange(); + return std::tuple( + evals, + eigen_to_array(A.world(), evec_trange, A_eig) + ); } -} // namespace lapack -} // namespace TiledArray +} // namespace TiledArray::lapack -#endif // TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc deleted file mode 100644 index 977d6292b4..0000000000 --- a/src/TiledArray/algebra/lapack/lapack.cc +++ /dev/null @@ -1,233 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2020 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Eduard Valeyev - * - * chol.h - * Created: 16 October, 2020 - * - */ - -#include -#include -#include -#include -#include - -#define TA_LAPACK_CALL(name, args...) \ - typedef T numeric_type; \ - if constexpr (std::is_same_v) \ - d##name##_(args); \ - else if constexpr (std::is_same_v) \ - s##name##_(args); \ - else if constexpr (std::is_same_v>) \ - z##name##_(args); \ - else if constexpr (std::is_same_v>) \ - c##name##_(args); \ - else \ - std::abort(); - -namespace TiledArray::lapack { - -template -void cholesky(Matrix& A) { - char uplo = 'L'; - integer n = A.rows(); - auto* a = A.data(); - integer lda = n; - integer info = 0; -#if defined(MADNESS_LINALG_USE_LAPACKE) - TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info); -#else - TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); -#endif - if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); -} - -template -void cholesky_linv(Matrix& A) { - char uplo = 'L'; - char diag = 'N'; - integer n = A.rows(); - auto* l = A.data(); - integer lda = n; - integer info = 0; - TA_LAPACK_CALL(trtri, &uplo, &diag, &n, l, &lda, &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); -} - -template -void cholesky_solve(Matrix& A, Matrix& X) { - char uplo = 'L'; - integer n = A.rows(); - integer nrhs = X.cols(); - auto* a = A.data(); - auto* b = X.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - // TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); - if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); -} - -template -void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { - char uplo = 'L'; - char trans = transpose == TransposeFlag::Transpose - ? 'T' - : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); - char diag = 'N'; - integer n = A.rows(); - integer nrhs = X.cols(); - auto* a = A.data(); - auto* b = X.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - // TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, - // &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); -} - -template -void hereig(Matrix& A, Vector& W) { - char jobz = 'V'; - char uplo = 'L'; - integer n = A.rows(); - T* a = A.data(); - integer lda = A.rows(); - T* w = W.data(); - integer lwork = -1; - integer info; - T lwork_dummy; -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, - &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, - &info); -#endif - lwork = integer(lwork_dummy); - Vector work(lwork); -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, - sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, - &info); -#endif - if (info != 0) TA_EXCEPTION("lapack::hereig failed"); -} - -template -void hereig_gen(Matrix& A, Matrix& B, Vector& W) { - integer itype = 1; - char jobz = 'V'; - char uplo = 'L'; - integer n = A.rows(); - T* a = A.data(); - integer lda = A.rows(); - T* b = B.data(); - integer ldb = B.rows(); - T* w = W.data(); - integer lwork = -1; - integer info; - T lwork_dummy; -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - &lwork_dummy, &lwork, &info); -#endif - lwork = integer(lwork_dummy); - Vector work(lwork); -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - work.data(), &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - work.data(), &lwork, &info); -#endif - if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); -} - -template -void svd(Matrix& A, Vector& S, Matrix* U, Matrix* VT) { - integer m = A.rows(); - integer n = A.cols(); - T* a = A.data(); - integer lda = A.rows(); - - S.resize(std::max(m, n)); - T* s = S.data(); - - char jobu = 'N'; - T* u = nullptr; - integer ldu = 0; - if (U) { - jobu = 'A'; - U->resize(m, n); - u = U->data(); - ldu = U->rows(); - } - - char jobvt = 'N'; - T* vt = nullptr; - integer ldvt = 0; - if (VT) { - jobvt = 'A'; - VT->resize(n, m); - vt = VT->data(); - ldvt = VT->rows(); - } - - integer lwork = -1; - integer info; - T lwork_dummy; - -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info); -#endif - lwork = integer(lwork_dummy); - Vector work(lwork); -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info); -#endif - if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); -} - -#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ - template void cholesky(MATRIX&); \ - template void cholesky_linv(MATRIX&); \ - template void cholesky_solve(MATRIX&, MATRIX&); \ - template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ - template void hereig(MATRIX&, VECTOR&); \ - template void hereig_gen(MATRIX&, MATRIX&, VECTOR&); \ - template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); - -TA_LAPACK_EXPLICIT(lapack::Matrix, lapack::Vector); - -} // namespace TiledArray::lapack diff --git a/src/TiledArray/algebra/lapack/lapack.cpp b/src/TiledArray/algebra/lapack/lapack.cpp new file mode 100644 index 0000000000..85867e193d --- /dev/null +++ b/src/TiledArray/algebra/lapack/lapack.cpp @@ -0,0 +1,282 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ + +#include +#include +#include +#include +#include + +#define TA_LAPACK_CALL(name, args...) \ + typedef T numeric_type; \ + if constexpr (std::is_same_v) \ + d##name##_(args); \ + else if constexpr (std::is_same_v) \ + s##name##_(args); \ + else if constexpr (std::is_same_v>) \ + z##name##_(args); \ + else if constexpr (std::is_same_v>) \ + c##name##_(args); \ + else \ + std::abort(); + +#define TA_LAPACK_GESV(...) TA_LAPACK_CALL(gesv, __VA_ARGS__) +#define TA_LAPACK_GETRF(...) TA_LAPACK_CALL(getrf, __VA_ARGS__) +#define TA_LAPACK_GETRI(...) TA_LAPACK_CALL(getri, __VA_ARGS__) + +#ifdef MADNESS_LINALG_USE_LAPACKE + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__) + +#else + +#ifdef FORTRAN_LINKAGE_LCU +#define dtrtri dtrtri_ +#define dtrtrs dtrtrs_ +#define dposv dposv_ +#endif + +extern "C" { // these arent in madness/clapack_fortran.h +void dtrtri(const char* uplo, const char* diag, + const integer* n, + const real8* a, const integer* lda, + integer *info, + char_len, char_len); +void dtrtrs(const char* uplo, const char* trans, const char* diag, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len, char_len, char_len); +void dposv(const char* uplo, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len); +} + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__, sizeof(char), sizeof(char), sizeof(char)) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__, sizeof(char), sizeof(char)) + +#endif // MADNESS_LINALG_USE_LAPACKE + +namespace TiledArray::lapack { + +template +void cholesky(Matrix& A) { + char uplo = 'L'; + integer n = A.rows(); + auto* a = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_POTRF(&uplo, &n, a, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky failed"); +} + +template +void cholesky_linv(Matrix& A) { + char uplo = 'L'; + char diag = 'N'; + integer n = A.rows(); + auto* l = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_TRTRI(&uplo, &diag, &n, l, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_linv failed"); +} + +template +void cholesky_solve(Matrix& A, Matrix& X) { + char uplo = 'L'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_POSV(&uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_solve failed"); +} + +template +void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { + char uplo = 'L'; + char trans = transpose == TransposeFlag::Transpose + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + char diag = 'N'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_TRTRS(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_lsolve failed"); +} + +template +void heig(Matrix& A, std::vector& W) { + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void heig(Matrix& A, Matrix& B, std::vector& W) { + integer itype = 1; + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void svd(Matrix& A, std::vector& S, Matrix* U, Matrix* VT) { + integer m = A.rows(); + integer n = A.cols(); + T* a = A.data(); + integer lda = A.rows(); + + S.resize(std::min(m, n)); + T* s = S.data(); + + char jobu = 'N'; + T* u = nullptr; + integer ldu = m; + if (U) { + jobu = 'A'; + U->resize(m, n); + u = U->data(); + ldu = U->rows(); + } + + char jobvt = 'N'; + T* vt = nullptr; + integer ldvt = n; + if (VT) { + jobvt = 'A'; + VT->resize(n, m); + vt = VT->data(); + ldvt = VT->rows(); + } + + integer lwork = -1; + integer info; + T lwork_dummy; + + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::svd failed"); +} + +template +void lu_solve(Matrix &A, Matrix &B) { + integer n = A.rows(); + integer nrhs = B.cols(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + std::vector ipiv(n); + integer info; + TA_LAPACK_GESV(&n, &nrhs, a, &lda, ipiv.data(), b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_solve failed"); +} + +template +void lu_inv(Matrix &A) { + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + integer lwork = -1; + std::vector work(1); + std::vector ipiv(n); + integer info; + TA_LAPACK_GETRF(&n, &n, a, &lda, ipiv.data(), &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + lwork = (integer)work[0]; + work.resize(lwork); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); +} + + +#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ + template void cholesky(MATRIX&); \ + template void cholesky_linv(MATRIX&); \ + template void cholesky_solve(MATRIX&, MATRIX&); \ + template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ + template void heig(MATRIX&, VECTOR&); \ + template void heig(MATRIX&, MATRIX&, VECTOR&); \ + template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); \ + template void lu_solve(MATRIX&, MATRIX&); \ + template void lu_inv(MATRIX&); + +TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); +//TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); + +} // namespace TiledArray::lapack diff --git a/src/TiledArray/algebra/lapack/lapack.h b/src/TiledArray/algebra/lapack/lapack.h index 1ccf68719f..78c767bfc5 100644 --- a/src/TiledArray/algebra/lapack/lapack.h +++ b/src/TiledArray/algebra/lapack/lapack.h @@ -30,8 +30,16 @@ namespace TiledArray::lapack { -template -using Vector = Eigen::Matrix; +template +struct array_traits { + using scalar_type = typename A::scalar_type; + using numeric_type = typename A::numeric_type; + static const bool complex = !std::is_same_v; + static_assert( + std::is_same_v, + "TA::lapack is only usable with a DistArray of scalar types" + ); +}; template using Matrix = Eigen::Matrix; @@ -49,10 +57,19 @@ template void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X); template -void hereig(Matrix &A, Vector &W); +void heig(Matrix &A, std::vector &W); + +template +void heig(Matrix &A, Matrix &B, std::vector &W); + +template +void svd(Matrix &A, std::vector &S, Matrix *U, Matrix *VT); + +template +void lu_solve(Matrix &A, Matrix &B); template -void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT); +void lu_inv(Matrix &A); } diff --git a/src/TiledArray/algebra/lapack/lu.h b/src/TiledArray/algebra/lapack/lu.h new file mode 100644 index 0000000000..389af28942 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lu.h @@ -0,0 +1,70 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * lu.h + * Created: 19 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Solve a linear system via LU factorization + */ +template +auto lu_solve(const ArrayA& A, const ArrayB& B, TiledRange x_trange = TiledRange()) { + (void)lapack::array_traits{}; + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + if (world.rank() == 0) { + lapack::lu_solve(A_eig, B_eig); + } + world.gop.broadcast_serializable(B_eig, 0); + if (x_trange.rank() == 0) x_trange = B.trange(); + return eigen_to_array(world, x_trange, B_eig); +} + +/** + * @brief Invert a matrix via LU + */ +template +auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange()) { + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + if (world.rank() == 0) { + lapack::lu_inv(A_eig); + } + world.gop.broadcast_serializable(A_eig, 0); + if (ainv_trange.rank() == 0) ainv_trange = A.trange(); + return eigen_to_array(A.world(), ainv_trange, A_eig); +} + +} // namespace TiledArray::lapack + +#endif // TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/svd.h b/src/TiledArray/algebra/lapack/svd.h new file mode 100644 index 0000000000..8ea0afd3da --- /dev/null +++ b/src/TiledArray/algebra/lapack/svd.h @@ -0,0 +1,103 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * svd.h + * Created: 12 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Compute the singular value decomposition (SVD) via ScaLAPACK + * + * A(i,j) = S(k) U(i,k) conj(V(j,k)) + * + * Example Usage: + * + * auto S = svd (A, ...) + * auto [S, U] = svd (A, ...) + * auto [S, VT] = svd(A, ...) + * auto [S, U, VT] = svd (A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be decomposed. Must be rank-2 + * @param[in] u_trange TiledRange for resulting left singular vectors. + * @param[in] vt_trange TiledRange for resulting right singular vectors + * (transposed). + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { + + using T = typename Array::numeric_type; + + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + + constexpr bool svd_all_vectors = std::is_same_v; + constexpr bool need_u = std::is_same_v or svd_all_vectors; + constexpr bool need_vt = std::is_same_v or svd_all_vectors; + + std::vector S; + std::unique_ptr< Matrix > U, VT; + + if constexpr (need_u) U = std::make_unique< Matrix >(); + if constexpr (need_vt) VT = std::make_unique< Matrix >(); + + if (world.rank() == 0) { + lapack::svd(A_eig, S, U.get(), VT.get()); + } + + world.gop.broadcast_serializable(S, 0); + if (U) world.gop.broadcast_serializable(*U, 0); + if (VT) world.gop.broadcast_serializable(*VT, 0); + + auto make_array = [&world](auto && ... args) { + return eigen_to_array(world, args...); + }; + + if constexpr (need_u && need_vt) { + return std::tuple(S, make_array(u_trange, *U), make_array(vt_trange, *VT)); + } + if constexpr (need_u && !need_vt) { + return std::tuple(S, make_array(u_trange, *U)); + } + if constexpr (!need_u && need_vt) { + return std::tuple(S, make_array(vt_trange, *VT)); + } + + if constexpr (!need_u && !need_vt) return S; + +} + +} // namespace scalapack::TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/lu.h b/src/TiledArray/algebra/lu.h index fa1bfca49e..330f49c8f7 100644 --- a/src/TiledArray/algebra/lu.h +++ b/src/TiledArray/algebra/lu.h @@ -27,9 +27,8 @@ #include #if TILEDARRAY_HAS_SCALAPACK #include -#else -#// include eigen #endif +#include namespace TiledArray { diff --git a/src/TiledArray/algebra/svd.h b/src/TiledArray/algebra/svd.h index b31c7932da..3c87fbc087 100644 --- a/src/TiledArray/algebra/svd.h +++ b/src/TiledArray/algebra/svd.h @@ -27,16 +27,21 @@ #include #ifdef TILEDARRAY_HAS_SCALAPACK #include -#else -// include eigen #endif // TILEDARRAY_HAS_SCALAPACK +#include namespace TiledArray { -#ifdef TILEDARRAY_HAS_SCALAPACK -using scalapack::svd; -#else +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::svd(A, u_trange, vt_trange); + } #endif + return lapack::svd(A, u_trange, vt_trange); +} } // namespace TiledArray diff --git a/tests/lapack.cpp b/tests/lapack.cpp index 287acab547..5666be717d 100644 --- a/tests/lapack.cpp +++ b/tests/lapack.cpp @@ -6,6 +6,8 @@ #include "TiledArray/algebra/lapack/chol.h" #include "TiledArray/algebra/lapack/heig.h" +#include "TiledArray/algebra/lapack/svd.h" +#include "TiledArray/algebra/lapack/lu.h" using namespace TiledArray::lapack; @@ -68,7 +70,7 @@ BOOST_AUTO_TEST_CASE(chol) { TA::Tensor A; this->make_ta_reference(A, range); - auto L = cholesky(A); + auto L = lapack::cholesky(A); decltype(A) A_minus_LLt; A_minus_LLt = A.clone(); @@ -80,4 +82,24 @@ BOOST_AUTO_TEST_CASE(chol) { N * N * std::numeric_limits::epsilon()); } +BOOST_AUTO_TEST_CASE(svd) { + TA::TArray A; + { auto S = lapack::svd(A); } + { auto [S,U] = lapack::svd(A); } + { auto [S,VT] = lapack::svd(A); } + { auto [S,U,VT] = lapack::svd(A); } +} + +BOOST_AUTO_TEST_CASE(heig) { + TA::TArray A; + { auto W = lapack::heig(A); } + { auto W = lapack::heig(A,A); } +} + +BOOST_AUTO_TEST_CASE(lu) { + TA::TArray A; + { auto W = lapack::lu_solve(A,A); } + { auto W = lapack::lu_inv(A); } +} + BOOST_AUTO_TEST_SUITE_END() From acefee4bae05b6b025e767d80ada804bae5525ee Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 20:31:05 -0500 Subject: [PATCH 32/36] Rename chol.h to cholesky.h --- src/CMakeLists.txt | 8 +++++--- src/TiledArray/algebra/{chol.h => cholesky.h} | 4 ++-- src/TiledArray/algebra/lapack/{chol.h => cholesky.h} | 0 src/TiledArray/algebra/lapack/lapack.cpp | 2 +- src/TiledArray/algebra/scalapack/all.h | 2 +- src/TiledArray/algebra/scalapack/{chol.h => cholesky.h} | 0 6 files changed, 9 insertions(+), 7 deletions(-) rename src/TiledArray/algebra/{chol.h => cholesky.h} (96%) rename src/TiledArray/algebra/lapack/{chol.h => cholesky.h} (100%) rename src/TiledArray/algebra/scalapack/{chol.h => cholesky.h} (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2d89f1c4be..ddab08d0a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,15 +60,17 @@ TiledArray/zero_tensor.h TiledArray/algebra/conjgrad.h TiledArray/algebra/diis.h TiledArray/algebra/utils.h -TiledArray/algebra/chol.h +TiledArray/algebra/cholesky.h TiledArray/algebra/heig.h TiledArray/algebra/lu.h TiledArray/algebra/svd.h TiledArray/algebra/types.h -TiledArray/algebra/lapack/chol.h +TiledArray/algebra/lapack/cholesky.h TiledArray/algebra/lapack/heig.h TiledArray/algebra/lapack/util.h -TiledArray/algebra/scalapack/chol.h +TiledArray/algebra/lapack/lu.h +TiledArray/algebra/lapack/svd.h +TiledArray/algebra/scalapack/cholesky.h TiledArray/algebra/scalapack/heig.h TiledArray/algebra/scalapack/lu.h TiledArray/algebra/scalapack/svd.h diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/cholesky.h similarity index 96% rename from src/TiledArray/algebra/chol.h rename to src/TiledArray/algebra/cholesky.h index 06eb98fb19..b34bf38292 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/cholesky.h @@ -26,9 +26,9 @@ #include #if TILEDARRAY_HAS_SCALAPACK -#include +#include #endif -#include +#include namespace TiledArray { diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/cholesky.h similarity index 100% rename from src/TiledArray/algebra/lapack/chol.h rename to src/TiledArray/algebra/lapack/cholesky.h diff --git a/src/TiledArray/algebra/lapack/lapack.cpp b/src/TiledArray/algebra/lapack/lapack.cpp index 85867e193d..640ef695f1 100644 --- a/src/TiledArray/algebra/lapack/lapack.cpp +++ b/src/TiledArray/algebra/lapack/lapack.cpp @@ -17,7 +17,7 @@ * * Eduard Valeyev * - * chol.h + * cholesky.h * Created: 16 October, 2020 * */ diff --git a/src/TiledArray/algebra/scalapack/all.h b/src/TiledArray/algebra/scalapack/all.h index 2599d0280e..fbba6826d0 100644 --- a/src/TiledArray/algebra/scalapack/all.h +++ b/src/TiledArray/algebra/scalapack/all.h @@ -28,7 +28,7 @@ #include #if TILEDARRAY_HAS_SCALAPACK -#include +#include #include #include #include diff --git a/src/TiledArray/algebra/scalapack/chol.h b/src/TiledArray/algebra/scalapack/cholesky.h similarity index 100% rename from src/TiledArray/algebra/scalapack/chol.h rename to src/TiledArray/algebra/scalapack/cholesky.h From abe7576a5b4a379c527c25a60be94d8b59084950 Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 16:12:07 -0500 Subject: [PATCH 33/36] Refactor linalg unit tests --- tests/CMakeLists.txt | 8 +- tests/lapack.cpp | 105 --------- tests/{scalapack.cpp => linear_algebra.cpp} | 234 ++++++++++---------- 3 files changed, 124 insertions(+), 223 deletions(-) delete mode 100644 tests/lapack.cpp rename tests/{scalapack.cpp => linear_algebra.cpp} (81%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9004369a5c..75889908de 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -111,16 +111,16 @@ set(ta_test_src_files ta_test.cpp # t_tot_tot_contract_.cpp # tot_tot_tot_contract_.cpp # einsum.cpp - lapack.cpp + linear_algebra.cpp ) if(CUDA_FOUND) list(APPEND ta_test_src_files cutt.cpp expressions_cuda_um.cpp tensor_um.cpp) endif() -if (TARGET TiledArray_SCALAPACK) - list(APPEND ta_test_src_files scalapack.cpp) -endif(TARGET TiledArray_SCALAPACK) +# if (TARGET TiledArray_SCALAPACK) +# list(APPEND ta_test_src_files scalapack.cpp) +# endif(TARGET TiledArray_SCALAPACK) # if tiledarray library was compiled without exceptions, use TA header-only (see below) if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND AND FALSE) diff --git a/tests/lapack.cpp b/tests/lapack.cpp deleted file mode 100644 index 5666be717d..0000000000 --- a/tests/lapack.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include -#include -#include "TiledArray/config.h" -#include "range_fixture.h" -#include "unit_test_config.h" - -#include "TiledArray/algebra/lapack/chol.h" -#include "TiledArray/algebra/lapack/heig.h" -#include "TiledArray/algebra/lapack/svd.h" -#include "TiledArray/algebra/lapack/lu.h" - -using namespace TiledArray::lapack; - -struct LAPACKFixture { - int64_t N; - std::vector htoeplitz_vector; - std::vector exact_evals; - - inline double matrix_element_generator(int64_t i, int64_t j) { - // Generates a Circulant matrix: good condition number - return htoeplitz_vector[std::abs(i - j)]; - } - - inline double make_ta_reference(TA::Tensor& t, - TA::Range const& range) { - t = TA::Tensor(range, 0.0); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) { - for (auto n = lo[1]; n < up[1]; ++n) { - t(m, n) = matrix_element_generator(m, n); - } - } - - return t.norm(); - }; - - LAPACKFixture(int64_t N) : N(N), htoeplitz_vector(N), exact_evals(N) { - // Generate an hermitian Circulant vector - std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); - htoeplitz_vector[0] = 100; - std::default_random_engine gen(0); - std::uniform_real_distribution<> dist(0., 1.); - for (int64_t i = 1; i <= (N / 2); ++i) { - double val = dist(gen); - htoeplitz_vector[i] = val; - htoeplitz_vector[N - i] = val; - } - - // Compute exact eigenvalues - const double ff = 2. * M_PI / N; - for (int64_t j = 0; j < N; ++j) { - double val = htoeplitz_vector[0]; - for (int64_t k = 1; k < N; ++k) - val += htoeplitz_vector[N - k] * std::cos(ff * j * k); - exact_evals[j] = val; - } - - std::sort(exact_evals.begin(), exact_evals.end()); - } - - LAPACKFixture() : LAPACKFixture(1000) {} -}; - -BOOST_FIXTURE_TEST_SUITE(lapack_suite, LAPACKFixture) - -BOOST_AUTO_TEST_CASE(chol) { - auto range = TA::Range{N, N}; - - TA::Tensor A; - this->make_ta_reference(A, range); - - auto L = lapack::cholesky(A); - - decltype(A) A_minus_LLt; - A_minus_LLt = A.clone(); - A_minus_LLt.gemm(L, L, -1, - math::GemmHelper{madness::cblas::NoTrans, - madness::cblas::ConjTrans, 2, 2, 2}); - - BOOST_CHECK_SMALL(A_minus_LLt.norm(), - N * N * std::numeric_limits::epsilon()); -} - -BOOST_AUTO_TEST_CASE(svd) { - TA::TArray A; - { auto S = lapack::svd(A); } - { auto [S,U] = lapack::svd(A); } - { auto [S,VT] = lapack::svd(A); } - { auto [S,U,VT] = lapack::svd(A); } -} - -BOOST_AUTO_TEST_CASE(heig) { - TA::TArray A; - { auto W = lapack::heig(A); } - { auto W = lapack::heig(A,A); } -} - -BOOST_AUTO_TEST_CASE(lu) { - TA::TArray A; - { auto W = lapack::lu_solve(A,A); } - { auto W = lapack::lu_inv(A); } -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/scalapack.cpp b/tests/linear_algebra.cpp similarity index 81% rename from tests/scalapack.cpp rename to tests/linear_algebra.cpp index f728938a25..94d8643759 100644 --- a/tests/scalapack.cpp +++ b/tests/linear_algebra.cpp @@ -1,19 +1,36 @@ #include #include #include "TiledArray/config.h" -#include "range_fixture.h" +//#include "range_fixture.h" #include "unit_test_config.h" -#include "TiledArray/algebra/lapack/chol.h" +#include "TiledArray/algebra/lapack/cholesky.h" #include "TiledArray/algebra/lapack/heig.h" -#include "TiledArray/algebra/scalapack/all.h" +#include "TiledArray/algebra/lapack/lu.h" +#include "TiledArray/algebra/lapack/svd.h" -using namespace TiledArray::scalapack; +#include "TiledArray/algebra/cholesky.h" +#include "TiledArray/algebra/heig.h" +#include "TiledArray/algebra/lu.h" +#include "TiledArray/algebra/svd.h" -struct ScaLAPACKFixture { - blacspp::Grid grid; - BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? +namespace TA = TiledArray; +namespace lapack = TA::lapack; + +#if TILEDARRAY_HAS_SCALAPACK +#include "TiledArray/algebra/scalapack/all.h" +namespace scalapack = TA::scalapack; +#define TILEDARRAY_SCALAPACK_TEST(F,E) \ + compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + compare("TiledArray", lapack::F, TiledArray::F, E); +#else +#define TILEDARRAY_SCALAPACK_TEST(...) +#endif + +struct ReferenceFixture { + + size_t N; std::vector htoeplitz_vector; std::vector exact_evals; @@ -27,8 +44,7 @@ struct ScaLAPACKFixture { #endif } - inline double make_ta_reference(TA::Tensor& t, - TA::Range const& range) { + inline double make_ta_reference(TA::Tensor& t, TA::Range const& range) { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); @@ -41,21 +57,9 @@ struct ScaLAPACKFixture { return t.norm(); }; - inline void construct_scalapack(BlockCyclicMatrix& A) { - auto [M, N] = A.dims(); - for (size_t i = 0; i < M; ++i) - for (size_t j = 0; j < N; ++j) - if (A.dist().i_own(i, j)) { - auto [i_local, j_local] = A.dist().local_indx(i, j); - A.local_mat()(i_local, j_local) = matrix_element_generator(i, j); - } - } - - ScaLAPACKFixture(int64_t N, int64_t NB) - : grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? - ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB), - htoeplitz_vector(N), - exact_evals(N) { + ReferenceFixture(int64_t N = 1000) + : N(N), htoeplitz_vector(N), exact_evals(N) + { // Generate an hermitian Circulant vector std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); htoeplitz_vector[0] = 100; @@ -79,11 +83,39 @@ struct ScaLAPACKFixture { std::sort(exact_evals.begin(), exact_evals.end()); - // Fill reference matrix - construct_scalapack(ref_matrix); } - ScaLAPACKFixture() : ScaLAPACKFixture(1000, 128) {} +}; + +struct LinearAlgebraFixture : ReferenceFixture { + +#if TILEDARRAY_HAS_SCALAPACK + + blacspp::Grid grid; + scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? + + LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) : + ReferenceFixture(N), + grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? + ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) + { + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + if (ref_matrix.dist().i_own(i, j)) { + auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); + ref_matrix.local_mat()(i_local, j_local) = matrix_element_generator(i, j); + } + } + } + } + template + static void compare(const char *context, const A& lapack, const A& result, double e) { + BOOST_TEST_CONTEXT(context); + auto diff_with_lapack = (lapack("i,j") - result("i,j")).norm().get(); + BOOST_CHECK_SMALL(diff_with_lapack, e); + } +#endif + }; TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { @@ -108,7 +140,10 @@ TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { return TA::TiledRange(ranges.begin(), ranges.end()); }; -BOOST_FIXTURE_TEST_SUITE(scalapack_suite, ScaLAPACKFixture) + +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) + +#if TILEDARRAY_HAS_SCALAPACK BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { GlobalFixture::world->gop.fence(); @@ -127,7 +162,7 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -155,7 +190,7 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -183,7 +218,7 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -218,7 +253,7 @@ BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -246,7 +281,7 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -282,7 +317,7 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { GlobalFixture::world->gop.fence(); auto test_ta = - block_cyclic_to_array>(ref_matrix, trange); + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -310,7 +345,7 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -345,7 +380,7 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -363,10 +398,10 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); }; -BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { +#endif // TILEDARRAY_HAS_SCALAPACK + +BOOST_AUTO_TEST_CASE(heig_same_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -376,7 +411,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { return this->make_ta_reference(t, range); }); - auto [evals, evecs] = heig(ref_ta); + auto [evals, evecs] = lapack::heig(ref_ta); auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta); // auto evals = heig( ref_ta ); @@ -399,10 +434,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { +BOOST_AUTO_TEST_CASE(heig_diff_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); auto ref_ta = TA::make_array>( @@ -412,7 +445,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { }); auto new_trange = gen_trange(N, {64ul}); - auto [evals, evecs] = heig(ref_ta, new_trange, 128); + auto [evals, evecs] = lapack::heig(ref_ta, new_trange); auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta, new_trange); BOOST_CHECK(evecs.trange() == new_trange); @@ -434,10 +467,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_heig_generalized) { +BOOST_AUTO_TEST_CASE(heig_generalized) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -461,7 +492,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_generalized) { }); GlobalFixture::world->gop.fence(); - auto [evals, evecs] = heig(ref_ta, dense_iden); + auto [evals, evecs] = lapack::heig(ref_ta, dense_iden); // auto evals = heig( ref_ta ); BOOST_CHECK(evecs.trange() == ref_ta.trange()); @@ -476,10 +507,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_generalized) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol) { +BOOST_AUTO_TEST_CASE(cholesky) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -489,7 +518,7 @@ BOOST_AUTO_TEST_CASE(sca_chol) { return this->make_ta_reference(t, range); }); - auto L = cholesky(A); + auto L = lapack::cholesky(A); BOOST_CHECK(L.trange() == A.trange()); @@ -510,10 +539,8 @@ BOOST_AUTO_TEST_CASE(sca_chol) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_linv) { +BOOST_AUTO_TEST_CASE(cholesky_linv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -523,8 +550,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { return this->make_ta_reference(t, range); }); - auto Linv = cholesky_linv(A); - auto Linv_lapack = lapack::cholesky_linv(A); + auto Linv = lapack::cholesky_linv(A); BOOST_CHECK(Linv.trange() == A.trange()); @@ -543,22 +569,18 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = A("i,j").norm().get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - // test against LAPACK - decltype(Linv) Linv_error; - Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); - BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(norm, epsilon); + + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { +BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -568,8 +590,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { return this->make_ta_reference(t, range); }); - auto [L, Linv] = cholesky_linv(A); - auto [L_lapack, Linv_lapack] = lapack::cholesky_linv(A); + auto [L, Linv] = lapack::cholesky_linv(A); BOOST_CHECK(Linv.trange() == A.trange()); BOOST_CHECK(L.trange() == A.trange()); @@ -588,26 +609,18 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = tmp("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - // test against LAPACK - decltype(L) L_error; - L_error("i,j") = L("i,j") - L_lapack("i,j"); - BOOST_CHECK_SMALL(L_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); - decltype(Linv) Linv_error; - Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); - BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(norm, epsilon); + + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_solve) { +BOOST_AUTO_TEST_CASE(cholesky_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -617,7 +630,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_solve) { return this->make_ta_reference(t, range); }); - auto iden = cholesky_solve(A, A); + auto iden = lapack::cholesky_solve(A, A); BOOST_CHECK(iden.trange() == A.trange()); auto iden_lapack = lapack::cholesky_solve(A, A); @@ -643,10 +656,8 @@ BOOST_AUTO_TEST_CASE(sca_chol_solve) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { +BOOST_AUTO_TEST_CASE(cholesky_lsolve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -657,13 +668,12 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { }); // Should produce X = L**H - auto [L, X] = cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + auto [L, X] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); BOOST_CHECK(X.trange() == A.trange()); BOOST_CHECK(L.trange() == A.trange()); // first, test against LAPACK - auto [L_lapack, X_lapack] = - lapack::cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + auto [L_lapack, X_lapack] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); decltype(L) L_error; L_error("i,j") = L("i,j") - L_lapack("i,j"); BOOST_CHECK_SMALL(L_error("i,j").norm().get(), @@ -681,10 +691,8 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_lu_solve) { +BOOST_AUTO_TEST_CASE(lu_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -694,7 +702,7 @@ BOOST_AUTO_TEST_CASE(sca_lu_solve) { return this->make_ta_reference(t, range); }); - auto iden = lu_solve(ref_ta, ref_ta); + auto iden = lapack::lu_solve(ref_ta, ref_ta); BOOST_CHECK(iden.trange() == ref_ta.trange()); @@ -709,16 +717,18 @@ BOOST_AUTO_TEST_CASE(sca_lu_solve) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon); GlobalFixture::world->gop.fence(); + } -BOOST_AUTO_TEST_CASE(sca_lu_inv) { +BOOST_AUTO_TEST_CASE(lu_inv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -730,7 +740,7 @@ BOOST_AUTO_TEST_CASE(sca_lu_inv) { TA::TArray iden(*GlobalFixture::world, trange); - auto Ainv = lu_inv(ref_ta); + auto Ainv = lapack::lu_inv(ref_ta); iden("i,j") = Ainv("i,k") * ref_ta("k,j"); BOOST_CHECK(iden.trange() == ref_ta.trange()); @@ -746,17 +756,19 @@ BOOST_AUTO_TEST_CASE(sca_lu_inv) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon); GlobalFixture::world->gop.fence(); + } #if 1 -BOOST_AUTO_TEST_CASE(sca_svd_values_only) { +BOOST_AUTO_TEST_CASE(svd_values_only) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -766,7 +778,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_values_only) { return this->make_ta_reference(t, range); }); - auto S = svd(ref_ta, trange, trange); + auto S = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -779,10 +791,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_values_only) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { +BOOST_AUTO_TEST_CASE(svd_leftvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -792,7 +802,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { return this->make_ta_reference(t, range); }); - auto [S, U] = svd(ref_ta, trange, trange); + auto [S, U] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -805,10 +815,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { +BOOST_AUTO_TEST_CASE(svd_rightvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -818,7 +826,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { return this->make_ta_reference(t, range); }); - auto [S, VT] = svd(ref_ta, trange, trange); + auto [S, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -831,10 +839,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_allvectors) { +BOOST_AUTO_TEST_CASE(svd_allvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -844,7 +850,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_allvectors) { return this->make_ta_reference(t, range); }); - auto [S, U, VT] = svd(ref_ta, trange, trange); + auto [S, U, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), From cfa3a93226a26b740608d3c1fc1f3d90f6a9f335 Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 21:39:18 -0500 Subject: [PATCH 34/36] Set BLA_STATIC=OFF by default --- CMakeLists.txt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c0eaa7fa3..a8d770603f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,9 +59,9 @@ endif(TILEDARRAY_PRERELEASE_ID) # Set install paths ============================================================ -set(TILEDARRAY_INSTALL_BINDIR "bin" +set(TILEDARRAY_INSTALL_BINDIR "bin" CACHE PATH "TiledArray binary install directory") -set(TILEDARRAY_INSTALL_INCLUDEDIR "include" +set(TILEDARRAY_INSTALL_INCLUDEDIR "include" CACHE PATH "TiledArray INCLUDE install directory") set(TILEDARRAY_INSTALL_LIBDIR "lib" CACHE PATH "TiledArray LIB install directory") @@ -162,16 +162,15 @@ else () endif() redefaultable_option(CMAKE_POSITION_INDEPENDENT_CODE "Default value for POSITION_INDEPENDENT_CODE of targets" ${default_CMAKE_POSITION_INDEPENDENT_CODE}) +set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") if(BUILD_SHARED_LIBS) - set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH TRUE) else() - set(BLA_STATIC TRUE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH FALSE) endif() # miscellaneous cmake platform-neutral and platform-specific configuration ============================= -set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX +set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX set(CMAKE_SKIP_RPATH FALSE) set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_SKIP_INSTALL_RPATH FALSE) @@ -394,19 +393,19 @@ export(EXPORT tiledarray configure_package_config_file(cmake/tiledarray-config.cmake.in "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" INSTALL_DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" - PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR + PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR TILEDARRAY_INSTALL_INCLUDEDIR TILEDARRAY_INSTALL_LIBDIR TILEDARRAY_INSTALL_DOCDIR TILEDARRAY_INSTALL_CMAKEDIR) # Install config, version, and target files install(EXPORT tiledarray FILE "tiledarray-targets.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) install(FILES "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" "${PROJECT_BINARY_DIR}/tiledarray-config-version.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) From 217be9aeb53e686ec9b675e3297a3dbce2e6d7b0 Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 22:02:34 -0500 Subject: [PATCH 35/36] fixup --- tests/linear_algebra.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/linear_algebra.cpp b/tests/linear_algebra.cpp index 94d8643759..390c068aeb 100644 --- a/tests/linear_algebra.cpp +++ b/tests/linear_algebra.cpp @@ -22,7 +22,9 @@ namespace lapack = TA::lapack; #include "TiledArray/algebra/scalapack/all.h" namespace scalapack = TA::scalapack; #define TILEDARRAY_SCALAPACK_TEST(F,E) \ + GlobalFixture::world->gop.fence(); \ compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + GlobalFixture::world->gop.fence(); \ compare("TiledArray", lapack::F, TiledArray::F, E); #else #define TILEDARRAY_SCALAPACK_TEST(...) @@ -577,6 +579,7 @@ BOOST_AUTO_TEST_CASE(cholesky_linv) { TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); GlobalFixture::world->gop.fence(); + } BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { From 8355dc5f8cebee28d4f52d4a28c3c670e65cf4ed Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 20 Nov 2020 10:27:19 -0500 Subject: [PATCH 36/36] fixed cholesky_linv unit test --- tests/linear_algebra.cpp | 63 +++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/tests/linear_algebra.cpp b/tests/linear_algebra.cpp index 390c068aeb..07f9a58be2 100644 --- a/tests/linear_algebra.cpp +++ b/tests/linear_algebra.cpp @@ -21,17 +21,16 @@ namespace lapack = TA::lapack; #include "TiledArray/algebra/scalapack/all.h" namespace scalapack = TA::scalapack; -#define TILEDARRAY_SCALAPACK_TEST(F,E) \ - GlobalFixture::world->gop.fence(); \ - compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ - GlobalFixture::world->gop.fence(); \ - compare("TiledArray", lapack::F, TiledArray::F, E); +#define TILEDARRAY_SCALAPACK_TEST(F, E) \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray", lapack::F, TiledArray::F, E); #else #define TILEDARRAY_SCALAPACK_TEST(...) #endif struct ReferenceFixture { - size_t N; std::vector htoeplitz_vector; std::vector exact_evals; @@ -46,7 +45,8 @@ struct ReferenceFixture { #endif } - inline double make_ta_reference(TA::Tensor& t, TA::Range const& range) { + inline double make_ta_reference(TA::Tensor& t, + TA::Range const& range) { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); @@ -60,8 +60,7 @@ struct ReferenceFixture { }; ReferenceFixture(int64_t N = 1000) - : N(N), htoeplitz_vector(N), exact_evals(N) - { + : N(N), htoeplitz_vector(N), exact_evals(N) { // Generate an hermitian Circulant vector std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); htoeplitz_vector[0] = 100; @@ -84,40 +83,37 @@ struct ReferenceFixture { } std::sort(exact_evals.begin(), exact_evals.end()); - } - }; struct LinearAlgebraFixture : ReferenceFixture { - #if TILEDARRAY_HAS_SCALAPACK blacspp::Grid grid; scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? - LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) : - ReferenceFixture(N), - grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? - ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) - { + LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) + : ReferenceFixture(N), + grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? + ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) { for (size_t i = 0; i < N; ++i) { for (size_t j = 0; j < N; ++j) { if (ref_matrix.dist().i_own(i, j)) { auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); - ref_matrix.local_mat()(i_local, j_local) = matrix_element_generator(i, j); + ref_matrix.local_mat()(i_local, j_local) = + matrix_element_generator(i, j); } } } } - template - static void compare(const char *context, const A& lapack, const A& result, double e) { + template + static void compare(const char* context, const A& lapack, const A& result, + double e) { BOOST_TEST_CONTEXT(context); auto diff_with_lapack = (lapack("i,j") - result("i,j")).norm().get(); BOOST_CHECK_SMALL(diff_with_lapack, e); } #endif - }; TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { @@ -142,7 +138,6 @@ TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { return TA::TiledRange(ranges.begin(), ranges.end()); }; - BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) #if TILEDARRAY_HAS_SCALAPACK @@ -164,7 +159,8 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -192,7 +188,8 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -255,7 +252,8 @@ BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -318,8 +316,8 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = - scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>( + ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -400,7 +398,7 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); }; -#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_HAS_SCALAPACK BOOST_AUTO_TEST_CASE(heig_same_tiling) { GlobalFixture::world->gop.fence(); @@ -551,6 +549,7 @@ BOOST_AUTO_TEST_CASE(cholesky_linv) { [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); + decltype(A) Acopy = A.clone(); auto Linv = lapack::cholesky_linv(A); @@ -576,10 +575,9 @@ BOOST_AUTO_TEST_CASE(cholesky_linv) { BOOST_CHECK_SMALL(norm, epsilon); - TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(Acopy), epsilon); GlobalFixture::world->gop.fence(); - } BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { @@ -676,7 +674,8 @@ BOOST_AUTO_TEST_CASE(cholesky_lsolve) { BOOST_CHECK(L.trange() == A.trange()); // first, test against LAPACK - auto [L_lapack, X_lapack] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); + auto [L_lapack, X_lapack] = + lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); decltype(L) L_error; L_error("i,j") = L("i,j") - L_lapack("i,j"); BOOST_CHECK_SMALL(L_error("i,j").norm().get(), @@ -727,7 +726,6 @@ BOOST_AUTO_TEST_CASE(lu_solve) { TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon); GlobalFixture::world->gop.fence(); - } BOOST_AUTO_TEST_CASE(lu_inv) { @@ -766,7 +764,6 @@ BOOST_AUTO_TEST_CASE(lu_inv) { TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon); GlobalFixture::world->gop.fence(); - } #if 1