diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0c0eaa7fa3..a8d770603f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -59,9 +59,9 @@ endif(TILEDARRAY_PRERELEASE_ID)
 
 # Set install paths ============================================================
 
-set(TILEDARRAY_INSTALL_BINDIR "bin" 
+set(TILEDARRAY_INSTALL_BINDIR "bin"
     CACHE PATH "TiledArray binary install directory")
-set(TILEDARRAY_INSTALL_INCLUDEDIR "include" 
+set(TILEDARRAY_INSTALL_INCLUDEDIR "include"
     CACHE PATH "TiledArray INCLUDE install directory")
 set(TILEDARRAY_INSTALL_LIBDIR "lib"
     CACHE PATH "TiledArray LIB install directory")
@@ -162,16 +162,15 @@ else ()
 endif()
 redefaultable_option(CMAKE_POSITION_INDEPENDENT_CODE "Default value for POSITION_INDEPENDENT_CODE of targets" ${default_CMAKE_POSITION_INDEPENDENT_CODE})
 
+set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries")
 if(BUILD_SHARED_LIBS)
-  set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries")
   set(CMAKE_MACOSX_RPATH TRUE)
 else()
-  set(BLA_STATIC TRUE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries")
   set(CMAKE_MACOSX_RPATH FALSE)
 endif()
 
 # miscellaneous cmake platform-neutral and platform-specific configuration =============================
-set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE)  # do not search in CMAKE_INSTALL_PREFIX 
+set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE)  # do not search in CMAKE_INSTALL_PREFIX
 set(CMAKE_SKIP_RPATH FALSE)
 set(CMAKE_SKIP_BUILD_RPATH  FALSE)
 set(CMAKE_SKIP_INSTALL_RPATH FALSE)
@@ -394,19 +393,19 @@ export(EXPORT tiledarray
 configure_package_config_file(cmake/tiledarray-config.cmake.in
     "${PROJECT_BINARY_DIR}/tiledarray-config.cmake"
   INSTALL_DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}"
-  PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR 
+  PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR
             TILEDARRAY_INSTALL_INCLUDEDIR TILEDARRAY_INSTALL_LIBDIR
             TILEDARRAY_INSTALL_DOCDIR TILEDARRAY_INSTALL_CMAKEDIR)
 
 # Install config, version, and target files
 install(EXPORT tiledarray
     FILE "tiledarray-targets.cmake"
-    DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" 
+    DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}"
     COMPONENT tiledarray)
 install(FILES
     "${PROJECT_BINARY_DIR}/tiledarray-config.cmake"
     "${PROJECT_BINARY_DIR}/tiledarray-config-version.cmake"
-    DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" 
+    DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}"
     COMPONENT tiledarray)
 
 
diff --git a/examples/scalapack/evp.cpp b/examples/scalapack/evp.cpp
index d32a110073..22964ccae0 100644
--- a/examples/scalapack/evp.cpp
+++ b/examples/scalapack/evp.cpp
@@ -28,7 +28,7 @@
 #include <scalapackpp/eigenvalue_problem/sevp.hpp>
 #include <scalapackpp/pblas/gemm.hpp>
 
-#include <TiledArray/math/scalapack.h>
+#include <TiledArray/algebra/scalapack/heig.h>
 
 using Array = TA::TArray<double>;
 // using Array = TA::TSpArray<double>;
@@ -93,23 +93,22 @@ int main(int argc, char** argv) {
     tensor_symm("i,j") = 0.5 * (tensor("i,j") + tensor("j,i"));
     tensor("i,j") = tensor_symm("i,j");
 
-
-    auto [ evals, evecs_ta ] = TA::heig( tensor );
-
+    auto [evals, evecs_ta] = TA::scalapack::heig(tensor);
 
     //// Check EVP with TA
-    Array tmp = TA::foreach (evecs_ta, [evals = evals](TA::Tensor<double>& result,
-                                           const TA::Tensor<double>& arg) {
-      result = TA::clone(arg);
-
-      auto range = arg.range();
-      auto lo = range.lobound_data();
-      auto up = range.upbound_data();
-      for (auto m = lo[0]; m < up[0]; ++m)
-        for (auto n = lo[1]; n < up[1]; ++n) {
-          result(m, n) = arg(m, n) * evals[n];
-        }
-    });
+    Array tmp =
+        TA::foreach (evecs_ta, [evals = evals](TA::Tensor<double>& result,
+                                               const TA::Tensor<double>& arg) {
+          result = TA::clone(arg);
+
+          auto range = arg.range();
+          auto lo = range.lobound_data();
+          auto up = range.upbound_data();
+          for (auto m = lo[0]; m < up[0]; ++m)
+            for (auto n = lo[1]; n < up[1]; ++n) {
+              result(m, n) = arg(m, n) * evals[n];
+            }
+        });
 
     world.gop.fence();
     tensor("i,j") = tensor("i,j") - tmp("i,k") * evecs_ta("j,k");
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c1b9f182cf..ddab08d0a1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -60,6 +60,20 @@ TiledArray/zero_tensor.h
 TiledArray/algebra/conjgrad.h
 TiledArray/algebra/diis.h
 TiledArray/algebra/utils.h
+TiledArray/algebra/cholesky.h
+TiledArray/algebra/heig.h
+TiledArray/algebra/lu.h
+TiledArray/algebra/svd.h
+TiledArray/algebra/types.h
+TiledArray/algebra/lapack/cholesky.h
+TiledArray/algebra/lapack/heig.h
+TiledArray/algebra/lapack/util.h
+TiledArray/algebra/lapack/lu.h
+TiledArray/algebra/lapack/svd.h
+TiledArray/algebra/scalapack/cholesky.h
+TiledArray/algebra/scalapack/heig.h
+TiledArray/algebra/scalapack/lu.h
+TiledArray/algebra/scalapack/svd.h
 TiledArray/conversions/btas.h
 TiledArray/conversions/clone.h
 TiledArray/conversions/dense_to_sparse.h
@@ -112,8 +126,6 @@ TiledArray/math/partial_reduce.h
 TiledArray/math/transpose.h
 TiledArray/math/vector_op.h
 TiledArray/math/scalapack.h
-TiledArray/math/scalapack/heig.h
-TiledArray/math/scalapack/chol.h
 TiledArray/pmap/blocked_pmap.h
 TiledArray/pmap/cyclic_pmap.h
 TiledArray/pmap/hash_pmap.h
@@ -197,6 +209,7 @@ TiledArray/array_impl.cpp
 TiledArray/dist_array.cpp
 TiledArray/util/backtrace.cpp
 TiledArray/util/bug.cpp
+TiledArray/algebra/lapack/lapack.cpp
 )
 
 # the list of libraries on which TiledArray depends on, will be cached later
@@ -292,5 +305,3 @@ install(
   FILES_MATCHING PATTERN "*.h"
   PATTERN "CMakeFiles" EXCLUDE
 )
-
-    
diff --git a/src/TiledArray/algebra/cholesky.h b/src/TiledArray/algebra/cholesky.h
new file mode 100644
index 0000000000..b34bf38292
--- /dev/null
+++ b/src/TiledArray/algebra/cholesky.h
@@ -0,0 +1,77 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  chol.h
+ *  Created:    16 October, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SCALAPACK
+#include <TiledArray/algebra/scalapack/cholesky.h>
+#endif
+#include <TiledArray/algebra/lapack/cholesky.h>
+
+namespace TiledArray {
+
+template <typename Array>
+auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) {
+#if TILEDARRAY_HAS_SCALAPACK
+  if (A.world().size() > 1 && A.range().volume() > 10000000)
+    return scalapack::cholesky<Array>(A, l_trange);
+#endif
+  return lapack::cholesky<Array>(A, l_trange);
+}
+
+template <typename Array, bool RetL = false>
+auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) {
+#if TILEDARRAY_HAS_SCALAPACK
+  if (A.world().size() > 1 && A.range().volume() > 10000000)
+    return scalapack::cholesky_linv<Array, RetL>(A, l_trange);
+#endif
+  return lapack::cholesky_linv<Array, RetL>(A, l_trange);
+}
+
+template <typename Array>
+auto cholesky_solve(const Array& A, const Array& B,
+                    TiledRange x_trange = TiledRange()) {
+#if TILEDARRAY_HAS_SCALAPACK
+  if (A.world().size() > 1 && A.range().volume() > 10000000)
+    return scalapack::cholesky_solve<Array>(A, B, x_trange);
+#endif
+  return lapack::cholesky_solve<Array>(A, B, x_trange);
+}
+
+template <typename Array>
+auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B,
+                     TiledRange l_trange = TiledRange(),
+                     TiledRange x_trange = TiledRange()) {
+#if TILEDARRAY_HAS_SCALAPACK
+  if (A.world().size() > 1 && A.range().volume() > 10000000)
+    return scalapack::cholesky_lsolve<Array>(transpose, A, B, l_trange,
+                                             x_trange);
+#endif
+  return lapack::cholesky_lsolve<Array>(transpose, A, B, l_trange, x_trange);
+}
+
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_CHOL_H__INCLUDED
diff --git a/src/TiledArray/algebra/heig.h b/src/TiledArray/algebra/heig.h
new file mode 100644
index 0000000000..68e39c9de9
--- /dev/null
+++ b/src/TiledArray/algebra/heig.h
@@ -0,0 +1,57 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  heig.h
+ *  Created:  16 October,  2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SCALAPACK
+#include <TiledArray/algebra/scalapack/heig.h>
+#endif
+#include <TiledArray/algebra/lapack/heig.h>
+
+namespace TiledArray {
+
+template <typename Array>
+auto heig(const Array& A, TiledRange evec_trange = TiledRange()) {
+#if TILEDARRAY_HAS_SCALAPACK
+  if (A.world().size() > 1 && A.range().volume() > 10000000) {
+    return scalapack::heig(A, evec_trange);
+  }
+#endif
+  return lapack::heig(A, evec_trange);
+}
+
+template <typename ArrayA, typename ArrayB, typename EVecType = ArrayA>
+auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) {
+#if TILEDARRAY_HAS_SCALAPACK
+  if (A.world().size() > 1 && A.range().volume() > 10000000) {
+    return scalapack::heig(A, B, evec_trange);
+  }
+#endif
+  return lapack::heig(A, B, evec_trange);
+}
+
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_HEIG_H__INCLUDED
diff --git a/src/TiledArray/algebra/lapack/cholesky.h b/src/TiledArray/algebra/lapack/cholesky.h
new file mode 100644
index 0000000000..44376bae9a
--- /dev/null
+++ b/src/TiledArray/algebra/lapack/cholesky.h
@@ -0,0 +1,205 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  chol.h
+ *  Created:    16 October, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED
+
+#include <TiledArray/algebra/lapack/lapack.h>
+#include <TiledArray/algebra/lapack/util.h>
+#include <TiledArray/config.h>
+#include <TiledArray/conversions/eigen.h>
+
+namespace TiledArray {
+namespace lapack {
+
+namespace detail {
+
+template <typename Tile, typename Policy>
+auto make_L_eig(const DistArray<Tile, Policy>& A) {
+  using Array = DistArray<Tile, Policy>;
+  using numeric_type = typename Array::numeric_type;
+  static_assert(std::is_same_v<numeric_type, typename Array::element_type>,
+                "TA::lapack::{cholesky*} are only usable with a DistArray of "
+                "scalar types");
+
+  World& world = A.world();
+  auto A_eig = detail::to_eigen(A);
+  if (world.rank() == 0) {
+    lapack::cholesky(A_eig);
+  }
+  world.gop.broadcast_serializable(A_eig, 0);
+  return A_eig;
+}
+
+}  // namespace detail
+
+/**
+ *  @brief Compute the Cholesky factorization of a HPD rank-2 tensor
+ *
+ *  A(i,j) = L(i,k) * conj(L(j,k))
+ *
+ *  Example Usage:
+ *
+ *  auto L = cholesky(A, ...)
+ *
+ *  @tparam Array a DistArray type (i.e., @c is_array_v<Array> is true)
+ *
+ *  @param[in] A           Input array to be diagonalized. Must be rank-2
+ *  @param[in] l_trange    TiledRange for resulting Cholesky factor. If left
+ * empty, will default to array.trange()
+ *
+ *  @returns The lower triangular Cholesky factor L in TA format
+ *  @note this is a collective operation with respect to the world of @p A
+ */
+template <typename Array,
+          typename = std::enable_if_t<TiledArray::detail::is_array_v<Array>>>
+auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) {
+  auto L_eig = detail::make_L_eig(A);
+  detail::zero_out_upper_triangle(L_eig);
+  if (l_trange.rank() == 0) l_trange = A.trange();
+  return eigen_to_array<Array>(A.world(), l_trange, L_eig);
+}
+
+/**
+ *  @brief Compute the Cholesky factorization of a HPD rank-2 tensor
+ *
+ *  A(i,j) = L(i,k) * conj(L(j,k))
+ *
+ *  Example Usage:
+ *
+ *  auto L = cholesky(A, ...)
+ *
+ *  @tparam ContiguousTensor a contiguous tensor type (i.e., @c
+ * is_contiguous_tensor_v<ContiguousTensor> is true)
+ *
+ *  @param[in] A           Input array to be diagonalized. Must be rank-2
+ *  @returns The lower triangular Cholesky factor L as a ContiguousTensor
+ *  @note this is a non-collective operation, only computes on the rank on which
+ * invoked
+ */
+template <typename ContiguousTensor,
+          typename = std::enable_if_t<
+              TiledArray::detail::is_contiguous_tensor_v<ContiguousTensor>>>
+auto cholesky(const ContiguousTensor& A) {
+  auto A_eig = detail::to_eigen(A);
+  lapack::cholesky(A_eig);
+  detail::zero_out_upper_triangle(A_eig);
+  return detail::from_eigen<ContiguousTensor>(A_eig, A.range());
+}
+
+/**
+ *  @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor.
+ *  Optionally return the Cholesky factor itself
+ *
+ *  A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv
+ *
+ *  Example Usage:
+ *
+ *  auto Linv     = cholesky_Linv(A, ...)
+ *  auto [L,Linv] = cholesky_Linv<decltype(A),true>(A, ...)
+ *
+ *  @tparam Array a DistArray type (i.e., @c is_array_v<Array> is true)
+ *  @tparam RetL  Whether or not to return the cholesky factor
+ *
+ *  @param[in] A           Input array to be diagonalized. Must be rank-2
+ *  @param[in] l_trange    TiledRange for resulting inverse Cholesky factor.
+ *                         If left empty, will default to array.trange()
+ *
+ *  @returns The inverse lower triangular Cholesky factor in TA format
+ *  @note this is a collective operation with respect to the world of @p A
+ */
+template <typename Array, bool RetL = false,
+          typename = std::enable_if_t<TiledArray::detail::is_array_v<Array>>>
+auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) {
+  World& world = A.world();
+  auto L_eig = detail::make_L_eig(A);
+  if constexpr (RetL) detail::zero_out_upper_triangle(L_eig);
+
+  // if need to return L use its copy to compute inverse
+  decltype(L_eig) L_inv_eig;
+
+  if (world.rank() == 0) {
+    if (RetL) L_inv_eig = L_eig;
+    auto& L_inv_eig_ref = RetL ? L_inv_eig : L_eig;
+    cholesky_linv(L_inv_eig_ref);
+    detail::zero_out_upper_triangle(L_inv_eig_ref);
+  }
+  world.gop.broadcast_serializable(RetL ? L_inv_eig : L_eig, 0);
+
+  if (l_trange.rank() == 0) l_trange = A.trange();
+  if constexpr (RetL)
+    return std::make_tuple(eigen_to_array<Array>(world, l_trange, L_eig),
+                           eigen_to_array<Array>(world, l_trange, L_inv_eig));
+  else
+    return eigen_to_array<Array>(world, l_trange, L_eig);
+}
+
+template <typename Array,
+          typename = std::enable_if_t<TiledArray::detail::is_array_v<Array>>>
+auto cholesky_solve(const Array& A, const Array& B,
+                    TiledRange x_trange = TiledRange()) {
+  using numeric_type = typename Array::numeric_type;
+  static_assert(std::is_same_v<numeric_type, typename Array::element_type>,
+                "TA::lapack::{cholesky*} are only usable with a DistArray of "
+                "scalar types");
+
+  auto A_eig = detail::to_eigen(A);
+  auto X_eig = detail::to_eigen(B);
+  World& world = A.world();
+  if (world.rank() == 0) {
+    cholesky_solve(A_eig, X_eig);
+  }
+  world.gop.broadcast_serializable(X_eig, 0);
+  if (x_trange.rank() == 0) x_trange = B.trange();
+  return eigen_to_array<Array>(world, x_trange, X_eig);
+}
+
+template <typename Array,
+          typename = std::enable_if_t<TiledArray::detail::is_array_v<Array>>>
+auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B,
+                     TiledRange l_trange = TiledRange(),
+                     TiledRange x_trange = TiledRange()) {
+  World& world = A.world();
+  auto L_eig = detail::make_L_eig(A);
+  detail::zero_out_upper_triangle(L_eig);
+
+  using numeric_type = typename Array::numeric_type;
+  static_assert(std::is_same_v<numeric_type, typename Array::element_type>,
+                "TA::lapack::{cholesky*} are only usable with a DistArray of "
+                "scalar types");
+
+  auto X_eig = detail::to_eigen(B);
+  if (world.rank() == 0) {
+    cholesky_lsolve(transpose, L_eig, X_eig);
+  }
+  world.gop.broadcast_serializable(X_eig, 0);
+  if (l_trange.rank() == 0) l_trange = A.trange();
+  if (x_trange.rank() == 0) x_trange = B.trange();
+  return std::make_tuple(eigen_to_array<Array>(world, l_trange, L_eig),
+                         eigen_to_array<Array>(world, x_trange, X_eig));
+}
+
+}  // namespace lapack
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED
diff --git a/src/TiledArray/algebra/lapack/heig.h b/src/TiledArray/algebra/lapack/heig.h
new file mode 100644
index 0000000000..5fe4270c8f
--- /dev/null
+++ b/src/TiledArray/algebra/lapack/heig.h
@@ -0,0 +1,116 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  heig.h
+ *  Created:  19 October,  2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED
+
+#include <TiledArray/config.h>
+
+#include <TiledArray/algebra/lapack/lapack.h>
+#include <TiledArray/algebra/lapack/util.h>
+#include <TiledArray/conversions/eigen.h>
+
+namespace TiledArray::lapack {
+
+/**
+ *  @brief Solve the standard eigenvalue problem with LAPACK
+ *
+ *  A(i,k) X(k,j) = X(i,j) E(j)
+ *
+ *  Example Usage:
+ *
+ *  auto [E, X] = heig(A, ...)
+ *
+ *  @tparam Array Input array type
+ *
+ *  @param[in] A           Input array to be diagonalized. Must be rank-2
+ *  @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty,
+ *                         will default to array.trange()
+ *
+ *  @returns A tuple containing the eigenvalues and eigenvectors of input array
+ *  as std::vector and in TA format, respectively.
+ */
+template <typename Array>
+auto heig(const Array& A, TiledRange evec_trange = TiledRange()) {
+  using numeric_type = typename lapack::array_traits<Array>::numeric_type;
+  World& world = A.world();
+  auto A_eig = detail::to_eigen(A);
+  std::vector<numeric_type> evals;
+  if (world.rank() == 0) {
+    lapack::heig(A_eig, evals);
+  }
+  world.gop.broadcast_serializable(A_eig, 0);
+  world.gop.broadcast_serializable(evals, 0);
+  if (evec_trange.rank() == 0) evec_trange = A.trange();
+  return std::tuple(
+    evals,
+    eigen_to_array<Array>(world, evec_trange, A_eig)
+  );
+}
+
+/**
+ *  @brief Solve the generalized eigenvalue problem with LAPACK
+ *
+ *  A(i,k) X(k,j) = B(i,k) X(k,j) E(j)
+ *
+ *  with
+ *
+ *  X(k,i) B(k,l) X(l,j) = I(i,j)
+ *
+ *  Example Usage:
+ *
+ *  auto [E, X] = heig(A, B, ...)
+ *
+ *  @tparam Array Input array type
+ *
+ *  @param[in] A           Input array to be diagonalized. Must be rank-2
+ *  @param[in] B           Positive-definite matrix
+ *  @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty,
+ *                         will default to array.trange()
+ *
+ *  @returns A tuple containing the eigenvalues and eigenvectors of input array
+ *  as std::vector and in TA format, respectively.
+ */
+template <typename ArrayA, typename ArrayB, typename EVecType = ArrayA>
+auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) {
+  using numeric_type = typename lapack::array_traits<ArrayA>::numeric_type;
+  (void)lapack::array_traits<ArrayB>{};
+  World& world = A.world();
+  auto A_eig = detail::to_eigen(A);
+  auto B_eig = detail::to_eigen(B);
+  std::vector<numeric_type> evals;
+  if (world.rank() == 0) {
+    lapack::heig(A_eig, B_eig, evals);
+  }
+  world.gop.broadcast_serializable(A_eig, 0);
+  world.gop.broadcast_serializable(evals, 0);
+  if (evec_trange.rank() == 0) evec_trange = A.trange();
+  return std::tuple(
+    evals,
+    eigen_to_array<ArrayA>(A.world(), evec_trange, A_eig)
+  );
+}
+
+}  // namespace TiledArray::lapack
+
+#endif  // TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED
diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/TiledArray/algebra/lapack/lapack.cpp b/src/TiledArray/algebra/lapack/lapack.cpp
new file mode 100644
index 0000000000..640ef695f1
--- /dev/null
+++ b/src/TiledArray/algebra/lapack/lapack.cpp
@@ -0,0 +1,282 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  cholesky.h
+ *  Created:    16 October, 2020
+ *
+ */
+
+#include <TiledArray/algebra/lapack/lapack.h>
+#include <TiledArray/algebra/lapack/util.h>
+#include <TiledArray/config.h>
+#include <madness/tensor/clapack.h>
+#include <Eigen/Core>
+
+#define TA_LAPACK_CALL(name, args...)                                    \
+  typedef T numeric_type;                                                \
+  if constexpr (std::is_same_v<numeric_type, double>)                    \
+    d##name##_(args);                                                    \
+  else if constexpr (std::is_same_v<numeric_type, float>)                \
+    s##name##_(args);                                                    \
+  else if constexpr (std::is_same_v<numeric_type, std::complex<double>>) \
+    z##name##_(args);                                                    \
+  else if constexpr (std::is_same_v<numeric_type, std::complex<float>>)  \
+    c##name##_(args);                                                    \
+  else                                                                   \
+    std::abort();
+
+#define TA_LAPACK_GESV(...) TA_LAPACK_CALL(gesv, __VA_ARGS__)
+#define TA_LAPACK_GETRF(...) TA_LAPACK_CALL(getrf, __VA_ARGS__)
+#define TA_LAPACK_GETRI(...) TA_LAPACK_CALL(getri, __VA_ARGS__)
+
+#ifdef MADNESS_LINALG_USE_LAPACKE
+
+#define TA_LAPACK_POTRF(...)  TA_LAPACK_CALL(potrf, __VA_ARGS__)
+#define TA_LAPACK_POSV(...)  TA_LAPACK_CALL(posv, __VA_ARGS__)
+#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__)
+#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__)
+#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__)
+#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__)
+#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__)
+
+#else
+
+#ifdef FORTRAN_LINKAGE_LCU
+#define dtrtri dtrtri_
+#define dtrtrs dtrtrs_
+#define dposv dposv_
+#endif
+
+extern "C" { // these arent in madness/clapack_fortran.h
+void dtrtri(const char* uplo, const char* diag,
+            const integer* n,
+            const real8* a, const integer* lda,
+            integer *info,
+            char_len, char_len);
+void dtrtrs(const char* uplo, const char* trans, const char* diag,
+            const integer* n, const integer *nrhs,
+            const real8* a, const integer* lda,
+            const real8* b, const integer* ldb,
+            integer *info,
+            char_len, char_len, char_len);
+void dposv(const char* uplo,
+           const integer* n, const integer *nrhs,
+           const real8* a, const integer* lda,
+           const real8* b, const integer* ldb,
+           integer *info,
+           char_len);
+}
+
+#define TA_LAPACK_POTRF(...)  TA_LAPACK_CALL(potrf, __VA_ARGS__, sizeof(char))
+#define TA_LAPACK_POSV(...)  TA_LAPACK_CALL(posv, __VA_ARGS__, sizeof(char))
+#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__, sizeof(char), sizeof(char))
+#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__, sizeof(char), sizeof(char))
+#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__, sizeof(char), sizeof(char), sizeof(char))
+#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__, sizeof(char), sizeof(char))
+#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__, sizeof(char), sizeof(char))
+
+#endif // MADNESS_LINALG_USE_LAPACKE
+
+namespace TiledArray::lapack {
+
+template <typename T>
+void cholesky(Matrix<T>& A) {
+  char uplo = 'L';
+  integer n = A.rows();
+  auto* a = A.data();
+  integer lda = n;
+  integer info = 0;
+  TA_LAPACK_POTRF(&uplo, &n, a, &lda, &info);
+  if (info != 0) TA_EXCEPTION("lapack::cholesky failed");
+}
+
+template <typename T>
+void cholesky_linv(Matrix<T>& A) {
+  char uplo = 'L';
+  char diag = 'N';
+  integer n = A.rows();
+  auto* l = A.data();
+  integer lda = n;
+  integer info = 0;
+  TA_LAPACK_TRTRI(&uplo, &diag, &n, l, &lda, &info);
+  if (info != 0) TA_EXCEPTION("lapack::cholesky_linv failed");
+}
+
+template <typename T>
+void cholesky_solve(Matrix<T>& A, Matrix<T>& X) {
+  char uplo = 'L';
+  integer n = A.rows();
+  integer nrhs = X.cols();
+  auto* a = A.data();
+  auto* b = X.data();
+  integer lda = n;
+  integer ldb = n;
+  integer info = 0;
+  TA_LAPACK_POSV(&uplo, &n, &nrhs, a, &lda, b, &ldb, &info);
+  if (info != 0) TA_EXCEPTION("lapack::cholesky_solve failed");
+}
+
+template <typename T>
+void cholesky_lsolve(TransposeFlag transpose, Matrix<T>& A, Matrix<T>& X) {
+  char uplo = 'L';
+  char trans = transpose == TransposeFlag::Transpose
+                   ? 'T'
+                   : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C');
+  char diag = 'N';
+  integer n = A.rows();
+  integer nrhs = X.cols();
+  auto* a = A.data();
+  auto* b = X.data();
+  integer lda = n;
+  integer ldb = n;
+  integer info = 0;
+  TA_LAPACK_TRTRS(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info);
+  if (info != 0) TA_EXCEPTION("lapack::cholesky_lsolve failed");
+}
+
+template <typename T>
+void heig(Matrix<T>& A, std::vector<T>& W) {
+  char jobz = 'V';
+  char uplo = 'L';
+  integer n = A.rows();
+  T* a = A.data();
+  integer lda = A.rows();
+  W.resize(n);
+  T* w = W.data();
+  integer lwork = -1;
+  integer info;
+  T lwork_dummy;
+  TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info);
+  lwork = integer(lwork_dummy);
+  std::vector<T> work(lwork);
+  TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info);
+  if (info != 0) TA_EXCEPTION("lapack::heig failed");
+}
+
+template <typename T>
+void heig(Matrix<T>& A, Matrix<T>& B, std::vector<T>& W) {
+  integer itype = 1;
+  char jobz = 'V';
+  char uplo = 'L';
+  integer n = A.rows();
+  T* a = A.data();
+  integer lda = A.rows();
+  T* b = B.data();
+  integer ldb = B.rows();
+  W.resize(n);
+  T* w = W.data();
+  integer lwork = -1;
+  integer info;
+  T lwork_dummy;
+  TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info);
+  lwork = integer(lwork_dummy);
+  std::vector<T> work(lwork);
+  TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info);
+  if (info != 0) TA_EXCEPTION("lapack::heig failed");
+}
+
+template <typename T>
+void svd(Matrix<T>& A, std::vector<T>& S, Matrix<T>* U, Matrix<T>* VT) {
+  integer m = A.rows();
+  integer n = A.cols();
+  T* a = A.data();
+  integer lda = A.rows();
+
+  S.resize(std::min(m, n));
+  T* s = S.data();
+
+  char jobu = 'N';
+  T* u = nullptr;
+  integer ldu = m;
+  if (U) {
+    jobu = 'A';
+    U->resize(m, n);
+    u = U->data();
+    ldu = U->rows();
+  }
+
+  char jobvt = 'N';
+  T* vt = nullptr;
+  integer ldvt = n;
+  if (VT) {
+    jobvt = 'A';
+    VT->resize(n, m);
+    vt = VT->data();
+    ldvt = VT->rows();
+  }
+
+  integer lwork = -1;
+  integer info;
+  T lwork_dummy;
+
+  TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info);
+  lwork = integer(lwork_dummy);
+  std::vector<T> work(lwork);
+  TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work.data(), &lwork, &info);
+  if (info != 0) TA_EXCEPTION("lapack::svd failed");
+}
+
+template<typename T>
+void lu_solve(Matrix<T> &A, Matrix<T> &B) {
+  integer n = A.rows();
+  integer nrhs = B.cols();
+  T* a = A.data();
+  integer lda = A.rows();
+  T* b = B.data();
+  integer ldb = B.rows();
+  std::vector<integer> ipiv(n);
+  integer info;
+  TA_LAPACK_GESV(&n, &nrhs, a, &lda, ipiv.data(), b, &ldb, &info);
+  if (info != 0) TA_EXCEPTION("lapack::lu_solve failed");
+}
+
+template<typename T>
+void lu_inv(Matrix<T> &A) {
+  integer n = A.rows();
+  T* a = A.data();
+  integer lda = A.rows();
+  integer lwork = -1;
+  std::vector<T> work(1);
+  std::vector<integer> ipiv(n);
+  integer info;
+  TA_LAPACK_GETRF(&n, &n, a, &lda, ipiv.data(), &info);
+  if (info != 0) TA_EXCEPTION("lapack::lu_inv failed");
+  TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info);
+  lwork = (integer)work[0];
+  work.resize(lwork);
+  TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info);
+  if (info != 0) TA_EXCEPTION("lapack::lu_inv failed");
+}
+
+
+#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR)                        \
+  template void cholesky(MATRIX&);                                \
+  template void cholesky_linv(MATRIX&);                           \
+  template void cholesky_solve(MATRIX&, MATRIX&);                 \
+  template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \
+  template void heig(MATRIX&, VECTOR&);                           \
+  template void heig(MATRIX&, MATRIX&, VECTOR&);                  \
+  template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*);          \
+  template void lu_solve(MATRIX&, MATRIX&);                       \
+  template void lu_inv(MATRIX&);
+
+TA_LAPACK_EXPLICIT(lapack::Matrix<double>, std::vector<double>);
+//TA_LAPACK_EXPLICIT(lapack::Matrix<float>, std::vector<float>);
+
+}  // namespace TiledArray::lapack
diff --git a/src/TiledArray/algebra/lapack/lapack.h b/src/TiledArray/algebra/lapack/lapack.h
new file mode 100644
index 0000000000..78c767bfc5
--- /dev/null
+++ b/src/TiledArray/algebra/lapack/lapack.h
@@ -0,0 +1,76 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  chol.h
+ *  Created:    16 October, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED
+
+#include <TiledArray/config.h>
+#include <TiledArray/algebra/types.h>
+#include <Eigen/Core>
+
+namespace TiledArray::lapack {
+
+template<typename A>
+struct array_traits {
+  using scalar_type = typename A::scalar_type;
+  using numeric_type = typename A::numeric_type;
+  static const bool complex = !std::is_same_v<scalar_type, numeric_type>;
+  static_assert(
+    std::is_same_v<numeric_type, typename A::element_type>,
+    "TA::lapack is only usable with a DistArray of scalar types"
+  );
+};
+
+template<typename T>
+using Matrix = Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic,Eigen::ColMajor>;
+
+template<typename T>
+void cholesky(Matrix<T> &A);
+
+template<typename T>
+void cholesky_linv(Matrix<T> &A);
+
+template<typename T>
+void cholesky_solve(Matrix<T> &A, Matrix<T> &X);
+
+template<typename T>
+void cholesky_lsolve(TransposeFlag transpose, Matrix<T> &A, Matrix<T> &X);
+
+template<typename T>
+void heig(Matrix<T> &A, std::vector<T> &W);
+
+template<typename T>
+void heig(Matrix<T> &A, Matrix<T> &B, std::vector<T> &W);
+
+template<typename T>
+void svd(Matrix<T> &A, std::vector<T> &S, Matrix<T> *U, Matrix<T> *VT);
+
+template<typename T>
+void lu_solve(Matrix<T> &A, Matrix<T> &B);
+
+template<typename T>
+void lu_inv(Matrix<T> &A);
+
+}
+
+#endif  // TILEDARRAY_ALGEBRA_LAPACK_LAPACK_H__INCLUDED
diff --git a/src/TiledArray/algebra/lapack/lu.h b/src/TiledArray/algebra/lapack/lu.h
new file mode 100644
index 0000000000..389af28942
--- /dev/null
+++ b/src/TiledArray/algebra/lapack/lu.h
@@ -0,0 +1,70 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  lu.h
+ *  Created:    19 June, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED
+
+#include <TiledArray/config.h>
+
+#include <TiledArray/algebra/lapack/lapack.h>
+
+namespace TiledArray::lapack {
+
+/**
+ *  @brief Solve a linear system via LU factorization
+ */
+template <typename ArrayA, typename ArrayB>
+auto lu_solve(const ArrayA& A, const ArrayB& B, TiledRange x_trange = TiledRange()) {
+  (void)lapack::array_traits<ArrayA>{};
+  (void)lapack::array_traits<ArrayB>{};
+  auto& world = A.world();
+  auto A_eig = detail::to_eigen(A);
+  auto B_eig = detail::to_eigen(B);
+  if (world.rank() == 0) {
+    lapack::lu_solve(A_eig, B_eig);
+  }
+  world.gop.broadcast_serializable(B_eig, 0);
+  if (x_trange.rank() == 0) x_trange = B.trange();
+  return eigen_to_array<ArrayB>(world, x_trange, B_eig);
+}
+
+/**
+ *  @brief Invert a matrix via LU
+ */
+template <typename Array>
+auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange()) {
+  (void)lapack::array_traits<Array>{};
+  auto& world = A.world();
+  auto A_eig = detail::to_eigen(A);
+  if (world.rank() == 0) {
+    lapack::lu_inv(A_eig);
+  }
+  world.gop.broadcast_serializable(A_eig, 0);
+  if (ainv_trange.rank() == 0) ainv_trange = A.trange();
+  return eigen_to_array<Array>(A.world(), ainv_trange, A_eig);
+}
+
+}  // namespace TiledArray::lapack
+
+#endif  // TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED
diff --git a/src/TiledArray/algebra/lapack/svd.h b/src/TiledArray/algebra/lapack/svd.h
new file mode 100644
index 0000000000..8ea0afd3da
--- /dev/null
+++ b/src/TiledArray/algebra/lapack/svd.h
@@ -0,0 +1,103 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  svd.h
+ *  Created:    12 June, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED
+
+#include <TiledArray/config.h>
+
+#include <TiledArray/algebra/lapack/lapack.h>
+
+namespace TiledArray::lapack {
+
+/**
+ *  @brief Compute the singular value decomposition (SVD) via ScaLAPACK
+ *
+ *  A(i,j) = S(k) U(i,k) conj(V(j,k))
+ *
+ *  Example Usage:
+ *
+ *  auto S          = svd<SVDValuesOnly>  (A, ...)
+ *  auto [S, U]     = svd<SVDLeftstd::vectors> (A, ...)
+ *  auto [S, VT]    = svd<SVDRightstd::vectors>(A, ...)
+ *  auto [S, U, VT] = svd<SVDAllstd::vectors>  (A, ...)
+ *
+ *  @tparam Array Input array type, must be convertible to BlockCyclicMatrix
+ *
+ *  @param[in] A           Input array to be decomposed. Must be rank-2
+ *  @param[in] u_trange    TiledRange for resulting left singular vectors.
+ *  @param[in] vt_trange   TiledRange for resulting right singular vectors
+ * (transposed).
+ *
+ *  @returns A tuple containing the eigenvalues and eigenvectors of input array
+ *  as std::vector and in TA format, respectively.
+ */
+template <typename SVDType, typename Array,
+          typename = TiledArray::detail::enable_if_svd_return_type<SVDType>>
+auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) {
+
+  using T = typename Array::numeric_type;
+
+  World& world = A.world();
+  auto A_eig = detail::to_eigen(A);
+
+  constexpr bool svd_all_vectors = std::is_same_v<SVDType, SVDAllVectors>;
+  constexpr bool need_u = std::is_same_v<SVDType, SVDLeftVectors> or svd_all_vectors;
+  constexpr bool need_vt = std::is_same_v<SVDType, SVDRightVectors> or svd_all_vectors;
+
+  std::vector<T> S;
+  std::unique_ptr< Matrix<T> > U, VT;
+
+  if constexpr (need_u) U = std::make_unique< Matrix<T> >();
+  if constexpr (need_vt) VT = std::make_unique< Matrix<T> >();
+
+  if (world.rank() == 0) {
+    lapack::svd(A_eig, S, U.get(), VT.get());
+  }
+
+  world.gop.broadcast_serializable(S, 0);
+  if (U) world.gop.broadcast_serializable(*U, 0);
+  if (VT) world.gop.broadcast_serializable(*VT, 0);
+
+  auto make_array = [&world](auto && ... args) {
+    return eigen_to_array<Array>(world, args...);
+  };
+
+  if constexpr (need_u && need_vt) {
+    return std::tuple(S, make_array(u_trange, *U), make_array(vt_trange, *VT));
+  }
+  if constexpr (need_u && !need_vt) {
+    return std::tuple(S, make_array(u_trange, *U));
+  }
+  if constexpr (!need_u && need_vt) {
+    return std::tuple(S, make_array(vt_trange, *VT));
+  }
+
+  if constexpr (!need_u && !need_vt) return S;
+
+}
+
+}  // namespace scalapack::TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED
diff --git a/src/TiledArray/algebra/lapack/util.h b/src/TiledArray/algebra/lapack/util.h
new file mode 100644
index 0000000000..daf822ff8e
--- /dev/null
+++ b/src/TiledArray/algebra/lapack/util.h
@@ -0,0 +1,109 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  util.h
+ *  Created:    19 October, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED
+
+#include <TiledArray/config.h>
+#include <TiledArray/conversions/eigen.h>
+
+namespace TiledArray {
+namespace lapack {
+namespace detail {
+
+template <typename Tile, typename Policy>
+auto to_eigen(const DistArray<Tile, Policy>& A) {
+  auto A_repl = A;
+  A_repl.make_replicated();
+  return array_to_eigen<Tile, Policy, Eigen::ColMajor>(A_repl);
+}
+
+template <typename ContiguousTensor,
+          typename = std::enable_if_t<
+              TiledArray::detail::is_contiguous_tensor_v<ContiguousTensor>>>
+auto to_eigen(const ContiguousTensor& A) {
+  using numeric_type = TiledArray::detail::numeric_t<ContiguousTensor>;
+  static_assert(
+      std::is_same_v<numeric_type, typename ContiguousTensor::value_type>,
+      "TA::lapack::{cholesky*} are only usable with a ContiguousTensor of "
+      "scalar types");
+  TA_ASSERT(A.range().rank() == 1 || A.range().rank() == 2);
+  using colmajor_matrix_type = Eigen::Matrix<numeric_type, Eigen::Dynamic,
+                                             Eigen::Dynamic, Eigen::ColMajor>;
+  colmajor_matrix_type result(A.range().extent(0),
+                              A.range().rank() == 2 ? A.range().extent(1) : 1);
+  constexpr const auto layout =
+      TiledArray::detail::ordinal_traits<ContiguousTensor>::type;
+  if (layout == TiledArray::OrdinalType::RowMajor) {
+    using rowmajor_matrix_type = Eigen::Matrix<numeric_type, Eigen::Dynamic,
+                                               Eigen::Dynamic, Eigen::RowMajor>;
+    auto result_map = Eigen::Map<const rowmajor_matrix_type>(
+        A.data(), result.rows(), result.cols());
+    result = result_map;
+  } else if constexpr (layout == TiledArray::OrdinalType::ColMajor) {
+    using rowmajor_matrix_type = Eigen::Matrix<numeric_type, Eigen::Dynamic,
+                                               Eigen::Dynamic, Eigen::ColMajor>;
+    auto result_map = Eigen::Map<const rowmajor_matrix_type>(
+        A.data(), result.rows(), result.cols());
+    result = result_map;
+  } else
+    abort();
+  return result;
+}
+
+template <typename ContiguousTensor, typename Scalar, int RowsAtCompileTime,
+          int ColsAtCompileTime, int Options, int MaxRowsAtCompileTime,
+          int MaxColsAtCompileTime,
+          typename = std::enable_if_t<
+              TiledArray::detail::is_contiguous_tensor_v<ContiguousTensor>>>
+auto from_eigen(
+    const Eigen::Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, Options,
+                        MaxRowsAtCompileTime, MaxColsAtCompileTime>& A,
+    typename ContiguousTensor::range_type range = {}) {
+  using numeric_type = TiledArray::detail::numeric_t<ContiguousTensor>;
+  static_assert(
+      std::is_same_v<numeric_type, typename ContiguousTensor::value_type>,
+      "TA::lapack::{cholesky*} are only usable with a ContiguousTensor of "
+      "scalar types");
+  using range_type = typename ContiguousTensor::range_type;
+  if (range.rank() == 0)
+    range = range_type(A.rows(), A.cols());
+  else
+    TA_ASSERT(A.rows() * A.cols() == range.volume());
+  ContiguousTensor result(range);
+  auto result_map = eigen_map(result, A.rows(), A.cols());
+  result_map = A;
+  return result;
+}
+
+template <typename Derived>
+void zero_out_upper_triangle(Eigen::MatrixBase<Derived>& A) {
+  A.template triangularView<Eigen::StrictlyUpper>().setZero();
+}
+
+}  // namespace detail
+
+}  // namespace lapack
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_LAPACK_UTIL_H__INCLUDED
diff --git a/src/TiledArray/algebra/lu.h b/src/TiledArray/algebra/lu.h
new file mode 100644
index 0000000000..330f49c8f7
--- /dev/null
+++ b/src/TiledArray/algebra/lu.h
@@ -0,0 +1,43 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  lu.h
+ *  Created:    16 October, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_LU_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_LU_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SCALAPACK
+#include <TiledArray/algebra/scalapack/lu.h>
+#endif
+#include <TiledArray/algebra/lapack/lu.h>
+
+namespace TiledArray {
+
+#if TILEDARRAY_HAS_SCALAPACK
+using scalapack::lu_inv;
+using scalapack::lu_solve;
+#else
+#endif
+
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_LU_H__INCLUDED
diff --git a/src/TiledArray/algebra/scalapack/all.h b/src/TiledArray/algebra/scalapack/all.h
new file mode 100644
index 0000000000..fbba6826d0
--- /dev/null
+++ b/src/TiledArray/algebra/scalapack/all.h
@@ -0,0 +1,37 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  scalapack.h
+ *  Created:  25 May, 2020
+ *
+ */
+
+#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SCALAPACK
+#include <TiledArray/algebra/scalapack/cholesky.h>
+#include <TiledArray/algebra/scalapack/heig.h>
+#include <TiledArray/algebra/scalapack/lu.h>
+#include <TiledArray/algebra/scalapack/svd.h>
+#endif
+
+#endif  // TILEDARRAY_ALGEBRA_SCALAPACK_ALL_H__INCLUDED
diff --git a/src/TiledArray/algebra/scalapack/cholesky.h b/src/TiledArray/algebra/scalapack/cholesky.h
new file mode 100644
index 0000000000..535db24d41
--- /dev/null
+++ b/src/TiledArray/algebra/scalapack/cholesky.h
@@ -0,0 +1,283 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  chol.h
+ *  Created:    8 June, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SCALAPACK
+
+#include <TiledArray/algebra/scalapack/util.h>
+#include <TiledArray/algebra/types.h>
+#include <TiledArray/conversions/block_cyclic.h>
+
+#include <scalapackpp/factorizations/potrf.hpp>
+#include <scalapackpp/linear_systems/posv.hpp>
+#include <scalapackpp/linear_systems/trtrs.hpp>
+#include <scalapackpp/matrix_inverse/trtri.hpp>
+
+namespace TiledArray {
+namespace scalapack {
+
+/**
+ *  @brief Compute the Cholesky factorization of a HPD rank-2 tensor
+ *
+ *  A(i,j) = L(i,k) * conj(L(j,k))
+ *
+ *  Example Usage:
+ *
+ *  auto L = cholesky(A, ...)
+ *
+ *  @tparam Array Input array type, must be convertible to BlockCyclicMatrix
+ *
+ *  @param[in] A           Input array to be diagonalized. Must be rank-2
+ *  @param[in] l_trange    TiledRange for resulting Cholesky factor. If left
+ * empty, will default to array.trange()
+ *  @param[in] NB          ScaLAPACK block size. Defaults to 128
+ *
+ *  @returns The lower triangular Cholesky factor L in TA format
+ */
+template <typename Array>
+auto cholesky(const Array& A, TiledRange l_trange = TiledRange(),
+              size_t NB = default_block_size()) {
+  auto& world = A.world();
+  auto world_comm = world.mpi.comm().Get_mpi_comm();
+  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
+
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
+
+  auto [M, N] = matrix.dims();
+  if (M != N) TA_EXCEPTION("Matrix must be square for Cholesky");
+
+  auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N);
+  auto desc = matrix.dist().descinit_noerror(N, N, Mloc);
+
+  auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N,
+                                  matrix.local_mat().data(), 1, 1, desc);
+  if (info) TA_EXCEPTION("Cholesky Failed");
+
+  // Zero out the upper triangle
+  zero_triangle(blacspp::Triangle::Upper, matrix);
+
+  if (l_trange.rank() == 0) l_trange = A.trange();
+
+  world.gop.fence();
+  auto L = scalapack::block_cyclic_to_array<Array>(matrix, l_trange);
+  world.gop.fence();
+
+  return L;
+}
+
+/**
+ *  @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor.
+ *  Optionally return the Cholesky factor itself
+ *
+ *  A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv
+ *
+ *  Example Usage:
+ *
+ *  auto Linv     = cholesky_Linv(A, ...)
+ *  auto [L,Linv] = cholesky_Linv<decltype(A),true>(A, ...)
+ *
+ *  @tparam Array Input array type, must be convertible to BlockCyclicMatrix
+ *  @tparam RetL  Whether or not to return the cholesky factor
+ *
+ *  @param[in] A           Input array to be diagonalized. Must be rank-2
+ *  @param[in] l_trange    TiledRange for resulting inverse Cholesky factor.
+ *                         If left empty, will default to array.trange()
+ *  @param[in] NB          ScaLAPACK block size. Defaults to 128
+ *
+ *  @returns The inverse lower triangular Cholesky factor in TA format
+ */
+template <typename Array, bool RetL = false>
+auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange(),
+                   size_t NB = default_block_size()) {
+  using value_type = typename Array::element_type;
+
+  auto& world = A.world();
+  auto world_comm = world.mpi.comm().Get_mpi_comm();
+  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
+
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
+
+  auto [M, N] = matrix.dims();
+  if (M != N) TA_EXCEPTION("Matrix must be square for Cholesky");
+
+  auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N);
+  auto desc = matrix.dist().descinit_noerror(N, N, Mloc);
+
+  auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N,
+                                  matrix.local_mat().data(), 1, 1, desc);
+  if (info) TA_EXCEPTION("Cholesky Failed");
+
+  // Zero out the upper triangle
+  zero_triangle(blacspp::Triangle::Upper, matrix);
+
+  // Copy L if needed
+  std::shared_ptr<scalapack::BlockCyclicMatrix<value_type>> L_sca = nullptr;
+  if constexpr (RetL) {
+    L_sca = std::make_shared<scalapack::BlockCyclicMatrix<value_type>>(
+        world, grid, N, N, NB, NB);
+    L_sca->local_mat() = matrix.local_mat();
+  }
+
+  // Compute inverse
+  info =
+      scalapackpp::ptrtri(blacspp::Triangle::Lower, blacspp::Diagonal::NonUnit,
+                          N, matrix.local_mat().data(), 1, 1, desc);
+  if (info) TA_EXCEPTION("TRTRI Failed");
+
+  if (l_trange.rank() == 0) l_trange = A.trange();
+
+  world.gop.fence();
+  auto Linv = scalapack::block_cyclic_to_array<Array>(matrix, l_trange);
+  world.gop.fence();
+
+  if constexpr (RetL) {
+    auto L = scalapack::block_cyclic_to_array<Array>(*L_sca, l_trange);
+    world.gop.fence();
+    return std::tuple(L, Linv);
+  } else {
+    return Linv;
+  }
+}
+
+template <typename Array>
+auto cholesky_solve(const Array& A, const Array& B,
+                    TiledRange x_trange = TiledRange(),
+                    size_t NB = default_block_size()) {
+  auto& world = A.world();
+  /*
+  if( world != B.world() ) {
+    TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+  }
+  */
+  auto world_comm = world.mpi.comm().Get_mpi_comm();
+  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
+
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB);
+  auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
+
+  auto [M, N] = A_sca.dims();
+  if (M != N) TA_EXCEPTION("A must be square for Cholesky Solve");
+
+  auto [B_N, NRHS] = B_sca.dims();
+  if (B_N != N) TA_EXCEPTION("A and B dims must agree");
+
+  scalapackpp::scalapack_desc desc_a, desc_b;
+  {
+    auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N);
+    desc_a = A_sca.dist().descinit_noerror(N, N, Mloc);
+  }
+
+  {
+    auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS);
+    desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc);
+  }
+
+  auto info = scalapackpp::pposv(blacspp::Triangle::Lower, N, NRHS,
+                                 A_sca.local_mat().data(), 1, 1, desc_a,
+                                 B_sca.local_mat().data(), 1, 1, desc_b);
+  if (info) TA_EXCEPTION("Cholesky Solve Failed");
+
+  if (x_trange.rank() == 0) x_trange = B.trange();
+
+  world.gop.fence();
+  auto X = scalapack::block_cyclic_to_array<Array>(B_sca, x_trange);
+  world.gop.fence();
+
+  return X;
+}
+
+template <typename Array>
+auto cholesky_lsolve(TransposeFlag trans, const Array& A, const Array& B,
+                     TiledRange l_trange = TiledRange(),
+                     TiledRange x_trange = TiledRange(),
+                     size_t NB = default_block_size()) {
+  auto& world = A.world();
+  /*
+  if( world != B.world() ) {
+    TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+  }
+  */
+  auto world_comm = world.mpi.comm().Get_mpi_comm();
+  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
+
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB);
+  auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
+
+  auto [M, N] = A_sca.dims();
+  if (M != N) TA_EXCEPTION("A must be square for Cholesky Solve");
+
+  auto [B_N, NRHS] = B_sca.dims();
+  if (B_N != N) TA_EXCEPTION("A and B dims must agree");
+
+  scalapackpp::scalapack_desc desc_a, desc_b;
+  {
+    auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N);
+    desc_a = A_sca.dist().descinit_noerror(N, N, Mloc);
+  }
+
+  {
+    auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS);
+    desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc);
+  }
+
+  auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N,
+                                  A_sca.local_mat().data(), 1, 1, desc_a);
+  if (info) TA_EXCEPTION("Cholesky Failed");
+
+  info = scalapackpp::ptrtrs(
+      blacspp::Triangle::Lower, to_scalapackpp_transposeflag(trans),
+      blacspp::Diagonal::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1,
+      desc_a, B_sca.local_mat().data(), 1, 1, desc_b);
+  if (info) TA_EXCEPTION("TRTRS Failed");
+
+  // Zero out the upper triangle
+  zero_triangle(blacspp::Triangle::Upper, A_sca);
+
+  if (l_trange.rank() == 0) l_trange = A.trange();
+  if (x_trange.rank() == 0) x_trange = B.trange();
+
+  world.gop.fence();
+  auto L = scalapack::block_cyclic_to_array<Array>(A_sca, l_trange);
+  auto X = scalapack::block_cyclic_to_array<Array>(B_sca, x_trange);
+  world.gop.fence();
+
+  return std::tuple(L, X);
+}
+
+}  // namespace scalapack
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_HAS_SCALAPACK
+#endif  // TILEDARRAY_ALGEBRA_SCALAPACK_CHOL_H__INCLUDED
diff --git a/src/TiledArray/math/scalapack/heig.h b/src/TiledArray/algebra/scalapack/heig.h
similarity index 57%
rename from src/TiledArray/math/scalapack/heig.h
rename to src/TiledArray/algebra/scalapack/heig.h
index e51361c651..488867b857 100644
--- a/src/TiledArray/math/scalapack/heig.h
+++ b/src/TiledArray/algebra/scalapack/heig.h
@@ -23,17 +23,18 @@
  *  Edited:    8 June, 2020
  *
  */
-#ifndef TILEDARRAY_MATH_SCALAPACK_HEIG_H__INCLUDED
-#define TILEDARRAY_MATH_SCALAPACK_HEIG_H__INCLUDED
+#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED
 
 #include <TiledArray/config.h>
 #if TILEDARRAY_HAS_SCALAPACK
 
 #include <TiledArray/conversions/block_cyclic.h>
-#include <scalapackpp/eigenvalue_problem/sevp.hpp>
 #include <scalapackpp/eigenvalue_problem/gevp.hpp>
+#include <scalapackpp/eigenvalue_problem/sevp.hpp>
 
 namespace TiledArray {
+namespace scalapack {
 
 /**
  *  @brief Solve the standard eigenvalue problem with ScaLAPACK
@@ -44,62 +45,55 @@ namespace TiledArray {
  *
  *  auto [E, X] = heig(A, ...)
  *
- *  @tparam Array Input array type, must be convertable to BlockCyclicMatrix
+ *  @tparam Array Input array type, must be convertible to BlockCyclicMatrix
  *
  *  @param[in] A           Input array to be diagonalized. Must be rank-2
- *  @param[in] NB          ScaLAPACK blocking factor. Defaults to 128
  *  @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty,
  *                         will default to array.trange()
+ *  @param[in] NB          ScaLAPACK block size. Defaults to 128
  *
  *  @returns A tuple containing the eigenvalues and eigenvectors of input array
  *  as std::vector and in TA format, respectively.
  */
 template <typename Array>
-auto heig( const Array& A, size_t NB = 128, TiledRange evec_trange = TiledRange() ) {
-
+auto heig(const Array& A, TiledRange evec_trange = TiledRange(),
+          size_t NB = default_block_size()) {
   using value_type = typename Array::element_type;
-  using real_type  = scalapackpp::detail::real_t<value_type>;
+  using real_type = scalapackpp::detail::real_t<value_type>;
 
   auto& world = A.world();
   auto world_comm = world.mpi.comm().Get_mpi_comm();
-  //auto world_comm = MPI_COMM_WORLD;
+  // auto world_comm = MPI_COMM_WORLD;
   blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
 
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto matrix = scalapack::array_to_block_cyclic(A, grid, NB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
 
   auto [M, N] = matrix.dims();
-  if( M != N )
-    TA_EXCEPTION("Matrix must be square for EVP");
+  if (M != N) TA_EXCEPTION("Matrix must be square for EVP");
 
   auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N);
   auto desc = matrix.dist().descinit_noerror(N, N, Mloc);
 
-  std::vector<real_type>        evals( N );
-  scalapack::BlockCyclicMatrix<value_type> evecs( world, grid, N, N, NB, NB );
+  std::vector<real_type> evals(N);
+  scalapack::BlockCyclicMatrix<value_type> evecs(world, grid, N, N, NB, NB);
 
   auto info = scalapackpp::hereig(
-    scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N,
-    matrix.local_mat().data(), 1, 1, desc, evals.data(),
-    evecs.local_mat().data(), 1, 1, desc );
+      scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N,
+      matrix.local_mat().data(), 1, 1, desc, evals.data(),
+      evecs.local_mat().data(), 1, 1, desc);
   if (info) TA_EXCEPTION("EVP Failed");
 
-  if( evec_trange.rank() == 0 ) evec_trange = A.trange();
+  if (evec_trange.rank() == 0) evec_trange = A.trange();
 
   world.gop.fence();
-  auto evecs_ta = scalapack::block_cyclic_to_array<Array>( evecs, evec_trange );
+  auto evecs_ta = scalapack::block_cyclic_to_array<Array>(evecs, evec_trange);
   world.gop.fence();
 
-
-  return std::tuple( evals, evecs_ta );
-
+  return std::tuple(evals, evecs_ta);
 }
 
-
-
-
-
 /**
  *  @brief Solve the generalized eigenvalue problem with ScaLAPACK
  *
@@ -113,69 +107,66 @@ auto heig( const Array& A, size_t NB = 128, TiledRange evec_trange = TiledRange(
  *
  *  auto [E, X] = heig(A, B, ...)
  *
- *  @tparam Array Input array type, must be convertable to BlockCyclicMatrix
+ *  @tparam Array Input array type, must be convertible to BlockCyclicMatrix
  *
  *  @param[in] A           Input array to be diagonalized. Must be rank-2
  *  @param[in] B           Metric
- *  @param[in] NB          ScaLAPACK blocking factor. Defaults to 128
  *  @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty,
  *                         will default to array.trange()
+ *  @param[in] NB          ScaLAPACK block size. Defaults to 128
  *
  *  @returns A tuple containing the eigenvalues and eigenvectors of input array
  *  as std::vector and in TA format, respectively.
  */
 template <typename ArrayA, typename ArrayB, typename EVecType = ArrayA>
-auto heig( const ArrayA& A, const ArrayB& B, 
-  size_t NB = 128, TiledRange evec_trange = TiledRange() ) {
-
+auto heig(const ArrayA& A, const ArrayB& B,
+          TiledRange evec_trange = TiledRange(),
+          size_t NB = default_block_size()) {
   using value_type = typename ArrayA::element_type;
-  static_assert( std::is_same_v<typename ArrayB::element_type, value_type> );
-  using real_type  = scalapackpp::detail::real_t<value_type>;
+  static_assert(std::is_same_v<typename ArrayB::element_type, value_type>);
+  using real_type = scalapackpp::detail::real_t<value_type>;
 
   auto& world = A.world();
   auto world_comm = world.mpi.comm().Get_mpi_comm();
-  //auto world_comm = MPI_COMM_WORLD;
+  // auto world_comm = MPI_COMM_WORLD;
   blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
 
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB );
-  auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto A_sca = scalapack::array_to_block_cyclic(A, grid, NB, NB);
+  auto B_sca = scalapack::array_to_block_cyclic(B, grid, NB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
 
   auto [M, N] = A_sca.dims();
-  if( M != N )
-    TA_EXCEPTION("Matrix must be square for EVP");
+  if (M != N) TA_EXCEPTION("Matrix must be square for EVP");
 
   auto [B_M, B_N] = B_sca.dims();
-  if( B_M != M or B_N != N )
+  if (B_M != M or B_N != N)
     TA_EXCEPTION("A and B must have the same dimensions");
 
   auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N);
   auto desc = A_sca.dist().descinit_noerror(N, N, Mloc);
 
-  std::vector<real_type>        evals( N );
-  scalapack::BlockCyclicMatrix<value_type> evecs( world, grid, N, N, NB, NB );
+  std::vector<real_type> evals(N);
+  scalapack::BlockCyclicMatrix<value_type> evecs(world, grid, N, N, NB, NB);
 
   auto info = scalapackpp::hereig_gen(
-    scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N,
-    A_sca.local_mat().data(), 1, 1, desc, 
-    B_sca.local_mat().data(), 1, 1, desc, 
-    evals.data(),
-    evecs.local_mat().data(), 1, 1, desc );
+      scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N,
+      A_sca.local_mat().data(), 1, 1, desc, B_sca.local_mat().data(), 1, 1,
+      desc, evals.data(), evecs.local_mat().data(), 1, 1, desc);
   if (info) TA_EXCEPTION("EVP Failed");
 
-  if( evec_trange.rank() == 0 ) evec_trange = A.trange();
+  if (evec_trange.rank() == 0) evec_trange = A.trange();
 
   world.gop.fence();
-  auto evecs_ta = scalapack::block_cyclic_to_array<EVecType>( evecs, evec_trange );
+  auto evecs_ta =
+      scalapack::block_cyclic_to_array<EVecType>(evecs, evec_trange);
   world.gop.fence();
 
-
-  return std::tuple( evals, evecs_ta );
-
+  return std::tuple(evals, evecs_ta);
 }
 
-} // namespace TiledArray
+}  // namespace scalapack
+}  // namespace TiledArray
 
-#endif // TILEDARRAY_HAS_SCALAPACK
-#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED
+#endif  // TILEDARRAY_HAS_SCALAPACK
+#endif  // TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED
diff --git a/src/TiledArray/math/scalapack/lu.h b/src/TiledArray/algebra/scalapack/lu.h
similarity index 50%
rename from src/TiledArray/math/scalapack/lu.h
rename to src/TiledArray/algebra/scalapack/lu.h
index 7581425f2c..8c65383846 100644
--- a/src/TiledArray/math/scalapack/lu.h
+++ b/src/TiledArray/algebra/scalapack/lu.h
@@ -22,46 +22,46 @@
  *  Created:    19 June, 2020
  *
  */
-#ifndef TILEDARRAY_MATH_SCALAPACK_LU_H__INCLUDED
-#define TILEDARRAY_MATH_SCALAPACK_LU_H__INCLUDED
+#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED
 
 #include <TiledArray/config.h>
 #if TILEDARRAY_HAS_SCALAPACK
 
+#include <TiledArray/algebra/scalapack/util.h>
 #include <TiledArray/conversions/block_cyclic.h>
-#include <TiledArray/math/scalapack/util.h>
 
 #include <scalapackpp/factorizations/getrf.hpp>
 #include <scalapackpp/linear_systems/gesv.hpp>
 #include <scalapackpp/matrix_inverse/getri.hpp>
 
 namespace TiledArray {
+namespace scalapack {
 
 /**
  *  @brief Solve a linear system via LU factorization
  */
 template <typename ArrayA, typename ArrayB>
-auto lu_solve( const ArrayA& A, const ArrayB& B, size_t NB = 128, size_t MB = 128,
-  TiledRange x_trange = TiledRange() ) {
-
+auto lu_solve(const ArrayA& A, const ArrayB& B,
+              TiledRange x_trange = TiledRange(),
+              size_t NB = default_block_size(),
+              size_t MB = default_block_size()) {
   using value_type = typename ArrayA::element_type;
-  static_assert(std::is_same_v<value_type,typename ArrayB::element_type>);
+  static_assert(std::is_same_v<value_type, typename ArrayB::element_type>);
 
   auto& world = A.world();
   auto world_comm = world.mpi.comm().Get_mpi_comm();
   blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
 
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto A_sca = scalapack::array_to_block_cyclic( A, grid, MB, NB );
-  auto B_sca = scalapack::array_to_block_cyclic( B, grid, MB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto A_sca = scalapack::array_to_block_cyclic(A, grid, MB, NB);
+  auto B_sca = scalapack::array_to_block_cyclic(B, grid, MB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
 
-  auto [M, N]      = A_sca.dims();
-  if( M != N )
-    TA_EXCEPTION("A must be square for LU Solve");
+  auto [M, N] = A_sca.dims();
+  if (M != N) TA_EXCEPTION("A must be square for LU Solve");
   auto [B_N, NRHS] = B_sca.dims();
-  if( B_N != N )
-    TA_EXCEPTION("A and B dims must agree");
+  if (B_N != N) TA_EXCEPTION("A and B dims must agree");
 
   auto [A_Mloc, A_Nloc] = A_sca.dist().get_local_dims(N, N);
   auto desc_a = A_sca.dist().descinit_noerror(N, N, A_Mloc);
@@ -69,77 +69,68 @@ auto lu_solve( const ArrayA& A, const ArrayB& B, size_t NB = 128, size_t MB = 12
   auto [B_Mloc, B_Nloc] = B_sca.dist().get_local_dims(N, NRHS);
   auto desc_b = B_sca.dist().descinit_noerror(N, NRHS, B_Mloc);
 
-  std::vector<scalapackpp::scalapack_int> IPIV( A_Mloc + MB );
+  std::vector<scalapackpp::scalapack_int> IPIV(A_Mloc + MB);
 
-  auto info = scalapackpp::pgesv( N, NRHS,
-    A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data(),
-    B_sca.local_mat().data(), 1, 1, desc_b );
+  auto info =
+      scalapackpp::pgesv(N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a,
+                         IPIV.data(), B_sca.local_mat().data(), 1, 1, desc_b);
   if (info) TA_EXCEPTION("LU Solve Failed");
 
-  if( x_trange.rank() == 0 ) x_trange = B.trange();
+  if (x_trange.rank() == 0) x_trange = B.trange();
 
   world.gop.fence();
-  auto X = scalapack::block_cyclic_to_array<ArrayB>( B_sca, x_trange );
+  auto X = scalapack::block_cyclic_to_array<ArrayB>(B_sca, x_trange);
   world.gop.fence();
 
   return X;
-
 }
 
 /**
  *  @brief Invert a matrix via LU
  */
 template <typename Array>
-auto lu_inv( const Array& A, size_t NB = 128, size_t MB = 128,
-  TiledRange ainv_trange = TiledRange() ) {
-
+auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange(),
+            size_t NB = default_block_size(),
+            size_t MB = default_block_size()) {
   auto& world = A.world();
   auto world_comm = world.mpi.comm().Get_mpi_comm();
   blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
 
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto A_sca = scalapack::array_to_block_cyclic( A, grid, MB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto A_sca = scalapack::array_to_block_cyclic(A, grid, MB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
 
-  auto [M, N]      = A_sca.dims();
-  if( M != N )
-    TA_EXCEPTION("A must be square for LU Inverse");
+  auto [M, N] = A_sca.dims();
+  if (M != N) TA_EXCEPTION("A must be square for LU Inverse");
 
   auto [A_Mloc, A_Nloc] = A_sca.dist().get_local_dims(N, N);
   auto desc_a = A_sca.dist().descinit_noerror(N, N, A_Mloc);
 
-
-  std::vector<scalapackpp::scalapack_int> IPIV( A_Mloc + MB );
+  std::vector<scalapackpp::scalapack_int> IPIV(A_Mloc + MB);
 
   {
-  auto info = scalapackpp::pgetrf( N, N,
-    A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data() );
-  if (info) TA_EXCEPTION("LU Failed");
+    auto info = scalapackpp::pgetrf(N, N, A_sca.local_mat().data(), 1, 1,
+                                    desc_a, IPIV.data());
+    if (info) TA_EXCEPTION("LU Failed");
   }
 
   {
-  auto info = scalapackpp::pgetri( N,
-    A_sca.local_mat().data(), 1, 1, desc_a, IPIV.data() );
-  if (info) TA_EXCEPTION("LU Inverse Failed");
+    auto info = scalapackpp::pgetri(N, A_sca.local_mat().data(), 1, 1, desc_a,
+                                    IPIV.data());
+    if (info) TA_EXCEPTION("LU Inverse Failed");
   }
 
-  if( ainv_trange.rank() == 0 ) ainv_trange = A.trange();
+  if (ainv_trange.rank() == 0) ainv_trange = A.trange();
 
   world.gop.fence();
-  auto Ainv = scalapack::block_cyclic_to_array<Array>( A_sca, ainv_trange );
+  auto Ainv = scalapack::block_cyclic_to_array<Array>(A_sca, ainv_trange);
   world.gop.fence();
 
   return Ainv;
-
 }
 
+}  // namespace scalapack
+}  // namespace TiledArray
 
-
-
-
-} // namespace TiledArray
-
-#endif // TILEDARRAY_HAS_SCALAPACK
-#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED
-
-
+#endif  // TILEDARRAY_HAS_SCALAPACK
+#endif  // TILEDARRAY_ALGEBRA_SCALAPACK_LU_H__INCLUDED
diff --git a/src/TiledArray/algebra/scalapack/svd.h b/src/TiledArray/algebra/scalapack/svd.h
new file mode 100644
index 0000000000..d73f9e05dd
--- /dev/null
+++ b/src/TiledArray/algebra/scalapack/svd.h
@@ -0,0 +1,155 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  svd.h
+ *  Created:    12 June, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SCALAPACK
+
+#include <TiledArray/algebra/types.h>
+#include <TiledArray/conversions/block_cyclic.h>
+#include <scalapackpp/svd.hpp>
+
+namespace TiledArray {
+namespace scalapack {
+
+/**
+ *  @brief Compute the singular value decomposition (SVD) via ScaLAPACK
+ *
+ *  A(i,j) = S(k) U(i,k) conj(V(j,k))
+ *
+ *  Example Usage:
+ *
+ *  auto S          = svd<SVDValuesOnly>  (A, ...)
+ *  auto [S, U]     = svd<SVDLeftVectors> (A, ...)
+ *  auto [S, VT]    = svd<SVDRightVectors>(A, ...)
+ *  auto [S, U, VT] = svd<SVDAllVectors>  (A, ...)
+ *
+ *  @tparam Array Input array type, must be convertible to BlockCyclicMatrix
+ *
+ *  @param[in] A           Input array to be decomposed. Must be rank-2
+ *  @param[in] u_trange    TiledRange for resulting left singular vectors.
+ *  @param[in] vt_trange   TiledRange for resulting right singular vectors
+ * (transposed).
+ *  @param[in] MB          ScaLAPACK row block size. Defaults to 128
+ *  @param[in] NB          ScaLAPACK column block size. Defaults to 128
+ *
+ *  @returns A tuple containing the eigenvalues and eigenvectors of input array
+ *  as std::vector and in TA format, respectively.
+ */
+template <typename SVDType, typename Array,
+          typename = TiledArray::detail::enable_if_svd_return_type<SVDType>>
+auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange,
+         size_t MB = default_block_size(), size_t NB = default_block_size()) {
+  using value_type = typename Array::element_type;
+  using real_type = scalapackpp::detail::real_t<value_type>;
+
+  auto& world = A.world();
+  auto world_comm = world.mpi.comm().Get_mpi_comm();
+  // auto world_comm = MPI_COMM_WORLD;
+  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
+
+  world.gop.fence();  // stage ScaLAPACK execution
+  auto matrix = scalapack::array_to_block_cyclic(A, grid, MB, NB);
+  world.gop.fence();  // stage ScaLAPACK execution
+
+  auto [M, N] = matrix.dims();
+  auto SVD_SIZE = std::min(M, N);
+
+  auto [AMloc, ANloc] = matrix.dist().get_local_dims(M, N);
+  auto [UMloc, UNloc] = matrix.dist().get_local_dims(M, SVD_SIZE);
+  auto [VTMloc, VTNloc] = matrix.dist().get_local_dims(SVD_SIZE, N);
+
+  auto desc_a = matrix.dist().descinit_noerror(M, N, AMloc);
+  auto desc_u = matrix.dist().descinit_noerror(M, SVD_SIZE, UMloc);
+  auto desc_vt = matrix.dist().descinit_noerror(SVD_SIZE, N, VTMloc);
+
+  std::vector<real_type> S(SVD_SIZE);
+
+  constexpr bool need_uv = std::is_same_v<SVDType, SVDAllVectors>;
+  constexpr bool need_u = std::is_same_v<SVDType, SVDLeftVectors> or need_uv;
+  constexpr bool need_vt = std::is_same_v<SVDType, SVDRightVectors> or need_uv;
+
+  std::shared_ptr<scalapack::BlockCyclicMatrix<value_type>> U = nullptr,
+                                                            VT = nullptr;
+
+  scalapackpp::VectorFlag JOBU = scalapackpp::VectorFlag::NoVectors;
+  scalapackpp::VectorFlag JOBVT = scalapackpp::VectorFlag::NoVectors;
+
+  value_type* U_ptr = nullptr;
+  value_type* VT_ptr = nullptr;
+
+  if constexpr (need_u) {
+    JOBU = scalapackpp::VectorFlag::Vectors;
+    U = std::make_shared<scalapack::BlockCyclicMatrix<value_type>>(
+        world, grid, M, SVD_SIZE, MB, NB);
+
+    U_ptr = U->local_mat().data();
+  }
+
+  if constexpr (need_vt) {
+    JOBVT = scalapackpp::VectorFlag::Vectors;
+    VT = std::make_shared<scalapack::BlockCyclicMatrix<value_type>>(
+        world, grid, SVD_SIZE, N, MB, NB);
+
+    VT_ptr = VT->local_mat().data();
+  }
+
+  auto info = scalapackpp::pgesvd(JOBU, JOBVT, M, N, matrix.local_mat().data(),
+                                  1, 1, desc_a, S.data(), U_ptr, 1, 1, desc_u,
+                                  VT_ptr, 1, 1, desc_vt);
+  if (info) TA_EXCEPTION("SVD Failed");
+
+  world.gop.fence();
+
+  if constexpr (need_uv) {
+    auto U_ta = scalapack::block_cyclic_to_array<Array>(*U, u_trange);
+    auto VT_ta = scalapack::block_cyclic_to_array<Array>(*VT, vt_trange);
+    world.gop.fence();
+
+    return std::tuple(S, U_ta, VT_ta);
+
+  } else if constexpr (need_u) {
+    auto U_ta = scalapack::block_cyclic_to_array<Array>(*U, u_trange);
+    world.gop.fence();
+
+    return std::tuple(S, U_ta);
+
+  } else if constexpr (need_vt) {
+    auto VT_ta = scalapack::block_cyclic_to_array<Array>(*VT, vt_trange);
+    world.gop.fence();
+
+    return std::tuple(S, VT_ta);
+
+  } else {
+    return S;
+  }
+}
+
+}  // namespace scalapack
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_HAS_SCALAPACK
+#endif  // TILEDARRAY_ALGEBRA_SCALAPACK_SVD_H__INCLUDED
diff --git a/src/TiledArray/algebra/scalapack/util.h b/src/TiledArray/algebra/scalapack/util.h
new file mode 100644
index 0000000000..439d2fa590
--- /dev/null
+++ b/src/TiledArray/algebra/scalapack/util.h
@@ -0,0 +1,104 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  util.h
+ *  Created:    19 June, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SCALAPACK
+
+#include <TiledArray/algebra/types.h>
+#include <TiledArray/conversions/block_cyclic.h>
+
+namespace TiledArray {
+
+namespace scalapack {
+
+inline scalapackpp::TransposeFlag to_scalapackpp_transposeflag(
+    TransposeFlag t) {
+  switch (t) {
+    case TransposeFlag::NoTranspose:
+      return scalapackpp::TransposeFlag::NoTranspose;
+    case TransposeFlag::Transpose:
+      return scalapackpp::TransposeFlag::Transpose;
+    case TransposeFlag::ConjTranspose:
+      return scalapackpp::TransposeFlag::ConjTranspose;
+    default:
+      abort();
+  }
+}
+
+template <typename T>
+void zero_triangle(blacspp::Triangle tri, scalapack::BlockCyclicMatrix<T>& A,
+                   bool zero_diag = false) {
+  auto zero_el = [&](size_t I, size_t J) {
+    if (A.dist().i_own(I, J)) {
+      auto [i, j] = A.dist().local_indx(I, J);
+      A.local_mat()(i, j) = 0.;
+    }
+  };
+
+  auto [M, N] = A.dims();
+
+  // Zero the lower triangle
+  if (tri == blacspp::Triangle::Lower) {
+    if (zero_diag)
+      for (size_t j = 0; j < N; ++j)
+        for (size_t i = j; i < M; ++i) zero_el(i, j);
+    else
+      for (size_t j = 0; j < N; ++j)
+        for (size_t i = j + 1; i < M; ++i) zero_el(i, j);
+
+    // Zero the upper triangle
+  } else {
+    if (zero_diag)
+      for (size_t j = 0; j < N; ++j)
+        for (size_t i = 0; i <= std::min(j, M); ++i) zero_el(i, j);
+    else
+      for (size_t j = 0; j < N; ++j)
+        for (size_t i = 0; i < std::min(j, M); ++i) zero_el(i, j);
+  }
+}
+
+namespace detail {
+inline std::size_t& default_block_size_accessor() {
+  static std::size_t block_size = 128;
+  return block_size;
+}
+}  // namespace detail
+
+inline std::size_t default_block_size() {
+  return detail::default_block_size_accessor();
+}
+
+inline void set_default_block_size(std::size_t NB) {
+  TA_ASSERT(NB > 0);
+  detail::default_block_size_accessor() = NB;
+}
+
+}  // namespace scalapack
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_HAS_SCALAPACK
+#endif  // TILEDARRAY_ALGEBRA_SCALAPACK_UTIL_H__INCLUDED
diff --git a/src/TiledArray/algebra/svd.h b/src/TiledArray/algebra/svd.h
new file mode 100644
index 0000000000..3c87fbc087
--- /dev/null
+++ b/src/TiledArray/algebra/svd.h
@@ -0,0 +1,48 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Eduard Valeyev
+ *
+ *  svd.h
+ *  Created:    16 October, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_SVD_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SVD_H__INCLUDED
+
+#include <TiledArray/config.h>
+#ifdef TILEDARRAY_HAS_SCALAPACK
+#include <TiledArray/algebra/scalapack/svd.h>
+#endif  // TILEDARRAY_HAS_SCALAPACK
+#include <TiledArray/algebra/lapack/svd.h>
+
+namespace TiledArray {
+
+template <typename SVDType, typename Array,
+          typename = TiledArray::detail::enable_if_svd_return_type<SVDType>>
+auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) {
+#if TILEDARRAY_HAS_SCALAPACK
+  if (A.world().size() > 1 && A.range().volume() > 10000000) {
+    return scalapack::svd<SVDType>(A, u_trange, vt_trange);
+  }
+#endif
+  return lapack::svd<SVDType>(A, u_trange, vt_trange);
+}
+
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_SVD_H__INCLUDED
diff --git a/src/TiledArray/algebra/types.h b/src/TiledArray/algebra/types.h
new file mode 100644
index 0000000000..1cc0a50e57
--- /dev/null
+++ b/src/TiledArray/algebra/types.h
@@ -0,0 +1,66 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  svd.h
+ *  Created:    12 June, 2020
+ *
+ */
+#ifndef TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED
+#define TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED
+
+#include <TiledArray/config.h>
+#include <type_traits>
+
+namespace TiledArray {
+
+enum TransposeFlag { NoTranspose, Transpose, ConjTranspose };
+
+struct SVDReturnType {};
+struct SVDValuesOnly : public SVDReturnType {};
+struct SVDLeftVectors : public SVDReturnType {};
+struct SVDRightVectors : public SVDReturnType {};
+struct SVDAllVectors : public SVDReturnType {};
+
+namespace detail {
+
+template <typename SVDType, typename = void>
+struct is_svd_return_type : public std::false_type {};
+
+template <typename SVDType>
+struct is_svd_return_type<
+    SVDType, std::enable_if_t<std::is_base_of_v<SVDReturnType, SVDType>>>
+    : public std::true_type {};
+
+template <typename SVDType>
+inline constexpr bool is_svd_return_type_v = is_svd_return_type<SVDType>::value;
+
+template <typename SVDType, typename U = void>
+struct enable_if_svd_return_type
+    : public std::enable_if<is_svd_return_type_v<SVDType>, U> {};
+
+template <typename SVDType, typename U = void>
+using enable_if_svd_return_type_t =
+    typename enable_if_svd_return_type<SVDType, U>::type;
+
+}  // namespace detail
+
+}  // namespace TiledArray
+
+#endif  // TILEDARRAY_ALGEBRA_SVD_UTILS_H__INCLUDED
diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h
index 7380a1faf1..b43602c98e 100644
--- a/src/TiledArray/conversions/eigen.h
+++ b/src/TiledArray/conversions/eigen.h
@@ -61,53 +61,57 @@ typedef Eigen::Matrix<long, Eigen::Dynamic, 1> EigenVectorXl;
 
 /// Construct a const Eigen::Map object for a given Tensor object
 
-/// \tparam T A tensor type, e.g. TiledArray::Tensor
-/// \param tensor The tensor object
+/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c
+/// TiledArray::detail::is_contiguous_tensor_v<T> must be true \tparam Storage
+/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor
+/// \param tensor The tensor object, laid out according to Storage
 /// \param m The number of rows in the result matrix
 /// \param n The number of columns in the result matrix
 /// \return An m x n Eigen matrix map for \c tensor
 /// \throw TiledArray::Exception When m * n is not equal to \c tensor size
-template <typename T,
+template <typename T, Eigen::StorageOptions Storage = Eigen::RowMajor,
           std::enable_if_t<detail::is_contiguous_tensor_v<T>>* = nullptr>
 inline Eigen::Map<const Eigen::Matrix<typename T::value_type, Eigen::Dynamic,
-                                      Eigen::Dynamic, Eigen::RowMajor>,
+                                      Eigen::Dynamic, Storage>,
                   Eigen::AutoAlign>
 eigen_map(const T& tensor, const std::size_t m, const std::size_t n) {
   TA_ASSERT((m * n) == tensor.size());
 
   return Eigen::Map<const Eigen::Matrix<typename T::value_type, Eigen::Dynamic,
-                                        Eigen::Dynamic, Eigen::RowMajor>,
+                                        Eigen::Dynamic, Storage>,
                     Eigen::AutoAlign>(tensor.data(), m, n);
 }
 
 /// Construct an Eigen::Map object for a given Tensor object
 
-/// \tparam T A tensor type, e.g. TiledArray::Tensor
-/// \param tensor The tensor object
+/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c
+/// TiledArray::detail::is_contiguous_tensor_v<T> must be true \tparam Storage
+/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor
+/// \param tensor The tensor object, laid out according to Storage
 /// \param m The number of rows in the result matrix
 /// \param n The number of columns in the result matrix
 /// \return An m x n Eigen matrix map for \c tensor
 /// \throw TiledArray::Exception When m * n is not equal to \c tensor size
-template <typename T,
+template <typename T, Eigen::StorageOptions Storage = Eigen::RowMajor,
           std::enable_if_t<detail::is_contiguous_tensor_v<T>>* = nullptr>
 inline Eigen::Map<Eigen::Matrix<typename T::value_type, Eigen::Dynamic,
-                                Eigen::Dynamic, Eigen::RowMajor>,
+                                Eigen::Dynamic, Storage>,
                   Eigen::AutoAlign>
 eigen_map(T& tensor, const std::size_t m, const std::size_t n) {
   TA_ASSERT((m * n) == tensor.size());
 
   return Eigen::Map<Eigen::Matrix<typename T::value_type, Eigen::Dynamic,
-                                  Eigen::Dynamic, Eigen::RowMajor>,
+                                  Eigen::Dynamic, Storage>,
                     Eigen::AutoAlign>(tensor.data(), m, n);
 }
 
 /// Construct a const Eigen::Map object for a given Tensor object
 
-/// \tparam T A tensor type, e.g. TiledArray::Tensor
-/// \param tensor The tensor object
-/// \param n The number of elements in the result matrix
-/// \return An n element Eigen vector map for \c tensor
-/// \throw TiledArray::Exception When n is not equal to \c tensor size
+/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c
+/// TiledArray::detail::is_contiguous_tensor_v<T> must be true \param tensor The
+/// tensor object \param n The number of elements in the result matrix \return
+/// An n element Eigen vector map for \c tensor \throw TiledArray::Exception
+/// When n is not equal to \c tensor size
 template <typename T,
           std::enable_if_t<detail::is_contiguous_tensor_v<T>>* = nullptr>
 inline Eigen::Map<
@@ -127,7 +131,7 @@ eigen_map(const T& tensor, const std::size_t n) {
 /// \param tensor The tensor object
 /// \param n The number of elements in the result matrix
 /// \return An n element Eigen vector map for \c tensor
-/// \throw TiledArray::Exception When m * n is not equal to \c tensor size
+/// \throw TiledArray::Exception When n is not equal to \c tensor size
 template <typename T,
           std::enable_if_t<detail::is_contiguous_tensor_v<T>>* = nullptr>
 inline Eigen::Map<Eigen::Matrix<typename T::value_type, Eigen::Dynamic, 1>,
@@ -141,42 +145,49 @@ eigen_map(T& tensor, const std::size_t n) {
 
 /// Construct a const Eigen::Map object for a given Tensor object
 
-/// The dimensions of the result tensor
-/// \tparam T A tensor type, e.g. TiledArray::Tensor
-/// \param tensor The tensor object
+/// The dimensions of the result tensor are extracted from the tensor itself
+/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c
+/// TiledArray::detail::is_contiguous_tensor_v<T> must be true \tparam Storage
+/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor
+/// \param tensor The tensor object, laid out according to Storage
 /// \return An Eigen matrix map for \c tensor
 /// \throw TiledArray::Exception When \c tensor dimensions are not equal to 2
 /// or 1.
-template <typename T,
+template <typename T, Eigen::StorageOptions Storage = Eigen::RowMajor,
           std::enable_if_t<detail::is_contiguous_tensor_v<T>>* = nullptr>
 inline Eigen::Map<const Eigen::Matrix<typename T::value_type, Eigen::Dynamic,
-                                      Eigen::Dynamic, Eigen::RowMajor>,
+                                      Eigen::Dynamic, Storage>,
                   Eigen::AutoAlign>
 eigen_map(const T& tensor) {
   TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u));
   const auto* MADNESS_RESTRICT const tensor_extent =
       tensor.range().extent_data();
-  return eigen_map(tensor, tensor_extent[0],
-                   (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul));
+  return eigen_map<T, Storage>(
+      tensor, tensor_extent[0],
+      (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul));
 }
 
 /// Construct an Eigen::Map object for a given Tensor object
 
-/// \tparam T A tensor type, e.g. TiledArray::Tensor
-/// \param tensor The tensor object
+/// The dimensions of the result tensor are extracted from the tensor itself
+/// \tparam T A contiguous tensor type, e.g. TiledArray::Tensor ; namely, \c
+/// TiledArray::detail::is_contiguous_tensor_v<T> must be true \tparam Storage
+/// the tensor layout, either Eigen::RowMajor (default) or Eigen::ColMajor
+/// \param tensor The tensor object, laid out according to Storage
 /// \return An Eigen matrix map for \c tensor
 /// \throw When \c tensor dimensions are not equal to 2 or 1.
-template <typename T,
+template <typename T, Eigen::StorageOptions Storage = Eigen::RowMajor,
           std::enable_if_t<detail::is_contiguous_tensor_v<T>>* = nullptr>
 inline Eigen::Map<Eigen::Matrix<typename T::value_type, Eigen::Dynamic,
-                                Eigen::Dynamic, Eigen::RowMajor>,
+                                Eigen::Dynamic, Storage>,
                   Eigen::AutoAlign>
 eigen_map(T& tensor) {
   TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u));
   const auto* MADNESS_RESTRICT const tensor_extent =
       tensor.range().extent_data();
-  return eigen_map(tensor, tensor_extent[0],
-                   (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul));
+  return eigen_map<T, Storage>(
+      tensor, tensor_extent[0],
+      (tensor.range().rank() == 2u ? tensor_extent[1] : 1ul));
 }
 
 /// Copy a block of an Eigen matrix into a tensor
@@ -351,15 +362,19 @@ void counted_tensor_to_eigen_submatrix(const T& tensor,
 
 }  // namespace detail
 
+// clang-format off
 /// Convert an Eigen matrix into an Array object
 
 /// This function will copy the content of \c matrix into an \c Array object
 /// that is tiled according to the \c trange object. The copy operation is
 /// done in parallel, and this function will block until all elements of
-/// \c matrix have been copied into the result array tiles. The size of
-/// \c world.size() must be equal to 1 or \c replicate must be equal to
-/// \c true . If \c replicate is \c true, it is your responsibility to ensure
-/// that the data in matrix is identical on all nodes.
+/// \c matrix have been copied into the result array tiles.
+/// Each tile is created
+/// using the local contents of \c matrix, hence
+/// it is your responsibility to ensure that the data in \c matrix
+/// is distributed correctly among the ranks. If in doubt, you should replicate
+/// \c matrix among the ranks prior to calling this.
+///
 /// Usage:
 /// \code
 /// Eigen::MatrixXd m(100, 100);
@@ -383,16 +398,18 @@ void counted_tensor_to_eigen_submatrix(const T& tensor,
 /// \param world The world where the array will live
 /// \param trange The tiled range of the new array
 /// \param matrix The Eigen matrix to be copied
-/// \param replicated \c true indicates that the result array should be a
-/// replicated array [default = false].
+/// \param replicated if true, the result will be replicated
+/// [default = true].
+/// \param pmap the process map object [default=null]; initialized to the
+/// default if \p replicated is false, or a replicated pmap if \p replicated
+/// is true; ignored if \p replicated is true and \c world.size()>1
 /// \return An \c Array object that is a copy of \c matrix
-/// \throw TiledArray::Exception When world size is greater than 1
-/// \note If using 2 or more World ranks, set \c replicated=true and make sure
-/// \c matrix is the same on each rank!
+// clang-format on
 template <typename A, typename Derived>
 A eigen_to_array(World& world, const typename A::trange_type& trange,
                  const Eigen::MatrixBase<Derived>& matrix,
-                 bool replicated = false) {
+                 bool replicated = false,
+                 std::shared_ptr<typename A::pmap_interface> pmap = {}) {
   typedef typename A::index1_type size_type;
   // Check that trange matches the dimensions of other
   if ((matrix.cols() > 1) && (matrix.rows() > 1)) {
@@ -417,36 +434,35 @@ A eigen_to_array(World& world, const typename A::trange_type& trange,
         "matrix size.");
   }
 
-  // Check that this is not a distributed computing environment
-  if (!replicated)
-    TA_USER_ASSERT(world.size() == 1,
-                   "An array cannot be assigned with an Eigen::Matrix when the "
-                   "number of World ranks is greater than 1.");
-
   // Create a new tensor
-  A array = (replicated && (world.size() > 1)
-                 ? A(world, trange,
-                     std::static_pointer_cast<typename A::pmap_interface>(
-                         std::make_shared<detail::ReplicatedPmap>(
-                             world, trange.tiles_range().volume())))
-                 : A(world, trange));
+  if (replicated && (world.size() > 1))
+    pmap = std::static_pointer_cast<typename A::pmap_interface>(
+        std::make_shared<detail::ReplicatedPmap>(
+            world, trange.tiles_range().volume()));
+  A array = (pmap ? A(world, trange, pmap) : A(world, trange));
 
   // Spawn tasks to copy Eigen to an Array
   madness::AtomicInt counter;
   counter = 0;
   std::int64_t n = 0;
   for (std::size_t i = 0; i < array.size(); ++i) {
-    world.taskq.add(&detail::counted_eigen_submatrix_to_tensor<A, Derived>,
-                    &matrix, &array, i, &counter);
-    ++n;
+    if (array.is_local(i)) {
+      world.taskq.add(&detail::counted_eigen_submatrix_to_tensor<A, Derived>,
+                      &matrix, &array, i, &counter);
+      ++n;
+    }
   }
 
   // Wait until the write tasks are complete
   array.world().await([&counter, n]() { return counter == n; });
 
+  // truncate, n.b. this can replace the wait above
+  array.truncate();
+
   return array;
 }
 
+// clang-format off
 /// Convert an Array object into an Eigen matrix object
 
 /// This function will copy the content of an \c Array object into matrix. The
@@ -465,10 +481,14 @@ A eigen_to_array(World& world, const typename A::trange_type& trange,
 /// \tparam EigenStorageOrder The storage order of the resulting Eigen::Matrix
 ///      object; the default is Eigen::ColMajor, i.e. the column-major storage
 /// \param array The array to be converted. It must be replicated if using 2 or
-/// more World ranks. \return an Eigen matrix; it will contain same data on each
-/// World rank. \throw TiledArray::Exception When world size is greater than 1
-/// and \c array is not replicated. \throw TiledArray::Exception When the number
+/// more World ranks.
+/// \return an Eigen matrix; it will contain same data on each
+/// World rank.
+/// \throw TiledArray::Exception When world size is greater than 1
+/// and \c array is not replicated.
+/// \throw TiledArray::Exception When the number
 /// of dimensions of \c array is not equal to 1 or 2.
+// clang-format on
 template <typename Tile, typename Policy,
           unsigned int EigenStorageOrder = Eigen::ColMajor>
 Eigen::Matrix<typename Tile::value_type, Eigen::Dynamic, Eigen::Dynamic,
@@ -526,10 +546,13 @@ array_to_eigen(const DistArray<Tile, Policy>& array) {
 /// This function will copy the content of \c buffer into an \c Array object
 /// that is tiled according to the \c trange object. The copy operation is
 /// done in parallel, and this function will block until all elements of
-/// \c matrix have been copied into the result array tiles. The size of
-/// \c world.size() must be equal to 1 or \c replicate must be equal to
-/// \c true . If \c replicate is \c true, it is your responsibility to ensure
-/// that the data in \c buffer is identical on all nodes.
+/// \c matrix have been copied into the result array tiles.
+/// Each tile is created
+/// using the local contents of \c matrix, hence
+/// it is your responsibility to ensure that the data in \c matrix
+/// is distributed correctly among the ranks. If in doubt, you should replicate
+/// \c matrix among the ranks prior to calling this.
+///
 /// Usage:
 /// \code
 /// double* buffer = new double[100 * 100];
@@ -557,8 +580,11 @@ array_to_eigen(const DistArray<Tile, Policy>& array) {
 /// \param buffer The row-major matrix buffer to be copied
 /// \param m The number of rows in the matrix
 /// \param n The number of columns in the matrix
-/// \param replicated \c true indicates that the result array should be a
-/// replicated array [default = false].
+/// \param replicated if true, the result will be replicated
+/// [default = true].
+/// \param pmap the process map object [default=null]; initialized to the
+/// default if \p replicated is false, or a replicated pmap if \p replicated
+/// is true; ignored if \p replicated is true and \c world.size()>1
 /// \return An \c Array object that is a copy of \c matrix
 /// \throw TiledArray::Exception When \c m and \c n are not equal to the
 /// number of rows or columns in tiled range.
@@ -566,7 +592,8 @@ template <typename A>
 inline A row_major_buffer_to_array(
     World& world, const typename A::trange_type& trange,
     const typename A::value_type::value_type* buffer, const std::size_t m,
-    const std::size_t n, const bool replicated = false) {
+    const std::size_t n, const bool replicated = false,
+    std::shared_ptr<typename A::pmap_interface> pmap = {}) {
   TA_USER_ASSERT(trange.elements_range().extent(0) == m,
                  "TiledArray::eigen_to_array(): The number of rows in trange "
                  "is not equal to m.");
@@ -579,8 +606,8 @@ inline A row_major_buffer_to_array(
       matrix_type;
   return eigen_to_array(
       world, trange,
-      Eigen::Map<const matrix_type, Eigen::AutoAlign>(buffer, m, n),
-      replicated);
+      Eigen::Map<const matrix_type, Eigen::AutoAlign>(buffer, m, n), replicated,
+      pmap);
 }
 
 /// Convert a column-major matrix buffer into an Array object
@@ -588,10 +615,13 @@ inline A row_major_buffer_to_array(
 /// This function will copy the content of \c buffer into an \c Array object
 /// that is tiled according to the \c trange object. The copy operation is
 /// done in parallel, and this function will block until all elements of
-/// \c matrix have been copied into the result array tiles. The size of
-/// \c world.size() must be equal to 1 or \c replicate must be equal to
-/// \c true . If \c replicate is \c true, it is your responsibility to ensure
-/// that the data in \c buffer is identical on all nodes.
+/// \c matrix have been copied into the result array tiles.
+/// Each tile is created
+/// using the local contents of \c matrix, hence
+/// it is your responsibility to ensure that the data in \c matrix
+/// is distributed correctly among the ranks. If in doubt, you should replicate
+/// \c matrix among the ranks prior to calling this.
+///
 /// Usage:
 /// \code
 /// double* buffer = new double[100 * 100];
@@ -619,8 +649,11 @@ inline A row_major_buffer_to_array(
 /// \param buffer The row-major matrix buffer to be copied
 /// \param m The number of rows in the matrix
 /// \param n The number of columns in the matrix
-/// \param replicated \c true indicates that the result array should be a
-/// replicated array [default = false].
+/// \param replicated if true, the result will be replicated
+/// [default = true].
+/// \param pmap the process map object [default=null]; initialized to the
+/// default if \p replicated is false, or a replicated pmap if \p replicated
+/// is true; ignored if \p replicated is true and \c world.size()>1
 /// \return An \c Array object that is a copy of \c matrix
 /// \throw TiledArray::Exception When \c m and \c n are not equal to the
 /// number of rows or columns in tiled range.
@@ -628,7 +661,8 @@ template <typename A>
 inline A column_major_buffer_to_array(
     World& world, const typename A::trange_type& trange,
     const typename A::value_type::value_type* buffer, const std::size_t m,
-    const std::size_t n, const bool replicated = false) {
+    const std::size_t n, const bool replicated = false,
+    std::shared_ptr<typename A::pmap_interface> pmap = {}) {
   TA_USER_ASSERT(trange.elements_range().extent(0) == m,
                  "TiledArray::eigen_to_array(): The number of rows in trange "
                  "is not equal to m.");
@@ -641,8 +675,8 @@ inline A column_major_buffer_to_array(
       matrix_type;
   return eigen_to_array(
       world, trange,
-      Eigen::Map<const matrix_type, Eigen::AutoAlign>(buffer, m, n),
-      replicated);
+      Eigen::Map<const matrix_type, Eigen::AutoAlign>(buffer, m, n), replicated,
+      pmap);
 }
 
 }  // namespace TiledArray
diff --git a/src/TiledArray/external/btas.h b/src/TiledArray/external/btas.h
index 51da57337c..ffa6e72f22 100644
--- a/src/TiledArray/external/btas.h
+++ b/src/TiledArray/external/btas.h
@@ -768,6 +768,13 @@ struct is_btas_tensor<btas::Tensor<T, Args...>> : public std::true_type {};
 template <typename T>
 constexpr const bool is_btas_tensor_v = is_btas_tensor<T>::value;
 
+/// btas::RangeNd can be col or row-major
+template <CBLAS_ORDER _Order, typename _Index, typename _Ordinal>
+struct ordinal_traits<btas::RangeNd<_Order, _Index, _Ordinal>> {
+  static constexpr const auto type =
+      _Order == CblasRowMajor ? OrdinalType::RowMajor : OrdinalType::ColMajor;
+};
+
 }  // namespace detail
 }  // namespace TiledArray
 
diff --git a/src/TiledArray/external/eigen.h b/src/TiledArray/external/eigen.h
index b296c56f3a..90e8196c2d 100644
--- a/src/TiledArray/external/eigen.h
+++ b/src/TiledArray/external/eigen.h
@@ -86,4 +86,56 @@ TILEDARRAY_PRAGMA_GCC(system_header)
 
 TILEDARRAY_PRAGMA_GCC(diagnostic pop)
 
+namespace madness {
+namespace archive {
+
+template <class>
+class archive_array;
+template <class T>
+inline archive_array<T> wrap(const T*, unsigned int);
+template <class Archive, typename Data>
+struct ArchiveStoreImpl;
+template <class Archive, typename Data>
+struct ArchiveLoadImpl;
+
+template <class Archive, typename Scalar, int RowsAtCompileTime,
+          int ColsAtCompileTime, int Options, int MaxRowsAtCompileTime,
+          int MaxColsAtCompileTime>
+struct ArchiveStoreImpl<
+    Archive,
+    Eigen::Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, Options,
+                  MaxRowsAtCompileTime, MaxColsAtCompileTime>> {
+  static inline void store(
+      const Archive& ar,
+      const Eigen::Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, Options,
+                          MaxRowsAtCompileTime, MaxColsAtCompileTime>& t) {
+    ar& t.rows() & t.cols();
+    if (t.size()) ar& madness::archive::wrap(t.data(), t.size());
+  }
+};
+
+template <class Archive, typename Scalar, int RowsAtCompileTime,
+          int ColsAtCompileTime, int Options, int MaxRowsAtCompileTime,
+          int MaxColsAtCompileTime>
+struct ArchiveLoadImpl<
+    Archive,
+    Eigen::Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, Options,
+                  MaxRowsAtCompileTime, MaxColsAtCompileTime>> {
+  static inline void load(
+      const Archive& ar,
+      Eigen::Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, Options,
+                    MaxRowsAtCompileTime, MaxColsAtCompileTime>& t) {
+    typename Eigen::Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime,
+                           Options, MaxRowsAtCompileTime,
+                           MaxColsAtCompileTime>::Index nrows(0),
+        ncols(0);
+    ar& nrows& ncols;
+    t.resize(nrows, ncols);
+    if (t.size()) ar& madness::archive::wrap(t.data(), t.size());
+  }
+};
+
+}  // namespace archive
+}  // namespace madness
+
 #endif  // TILEDARRAY_EXTERNAL_EIGEN_H__INCLUDED
diff --git a/src/TiledArray/math/scalapack.h b/src/TiledArray/math/scalapack.h
index d2e3a65afe..c7ed2c3a73 100644
--- a/src/TiledArray/math/scalapack.h
+++ b/src/TiledArray/math/scalapack.h
@@ -25,14 +25,9 @@
 #ifndef TILEDARRAY_MATH_SCALAPACK_H__INCLUDED
 #define TILEDARRAY_MATH_SCALAPACK_H__INCLUDED
 
-#if TILEDARRAY_HAS_SCALAPACK
+#warning \
+    "TiledArray/math/scalapack.h header is deprecated, please include TiledArray/algebra/scalapack/all.h"
 
-#include <TiledArray/math/scalapack/heig.h>
-#include <TiledArray/math/scalapack/chol.h>
-#include <TiledArray/math/scalapack/svd.h>
-#include <TiledArray/math/scalapack/lu.h>
+#include <TiledArray/algebra/scalapack/all.h>
 
 #endif
-
-#endif
-
diff --git a/src/TiledArray/math/scalapack/chol.h b/src/TiledArray/math/scalapack/chol.h
deleted file mode 100644
index eff1d4b20a..0000000000
--- a/src/TiledArray/math/scalapack/chol.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- *  This file is a part of TiledArray.
- *  Copyright (C) 2020 Virginia Tech
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  David Williams-Young
- *  Computational Research Division, Lawrence Berkeley National Laboratory
- *
- *  chol.h
- *  Created:    8 June, 2020
- *
- */
-#ifndef TILEDARRAY_MATH_SCALAPACK_CHOL_H__INCLUDED
-#define TILEDARRAY_MATH_SCALAPACK_CHOL_H__INCLUDED
-
-#include <TiledArray/config.h>
-#if TILEDARRAY_HAS_SCALAPACK
-
-#include <TiledArray/conversions/block_cyclic.h>
-#include <TiledArray/math/scalapack/util.h>
-
-#include <scalapackpp/factorizations/potrf.hpp>
-#include <scalapackpp/matrix_inverse/trtri.hpp>
-#include <scalapackpp/linear_systems/posv.hpp>
-#include <scalapackpp/linear_systems/trtrs.hpp>
-
-namespace TiledArray {
-
-/**
- *  @brief Compute the Cholesky factorization of a HPD rank-2 tensor
- *
- *  A(i,j) = L(i,k) * conj(L(j,k))
- *
- *  Example Usage:
- *
- *  auto L = cholesky(A, ...)
- *
- *  @tparam Array Input array type, must be convertable to BlockCyclicMatrix
- *
- *  @param[in] A           Input array to be diagonalized. Must be rank-2
- *  @param[in] NB          ScaLAPACK blocking factor. Defaults to 128
- *  @param[in] l_trange    TiledRange for resulting Cholesky factor. If left empty,
- *                         will default to array.trange()
- *
- *  @returns The lower triangular Cholesky factor L in TA format
- */
-template <typename Array>
-auto cholesky( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange() ) {
-
-  auto& world = A.world();
-  auto world_comm = world.mpi.comm().Get_mpi_comm();
-  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
-
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
-
-  auto [M, N] = matrix.dims();
-  if( M != N )
-    TA_EXCEPTION("Matrix must be square for Cholesky");
-
-  auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N);
-  auto desc = matrix.dist().descinit_noerror(N, N, Mloc);
-
-  auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N,
-    matrix.local_mat().data(), 1, 1, desc );
-  if (info) TA_EXCEPTION("Cholesky Failed");
-
-  // Zero out the upper triangle
-  detail::scalapack_zero_triangle( blacspp::Triangle::Upper, matrix );
-
-  if( l_trange.rank() == 0 ) l_trange = A.trange();
-
-  world.gop.fence();
-  auto L = scalapack::block_cyclic_to_array<Array>( matrix, l_trange );
-  world.gop.fence();
-
-
-  return L;
-
-}
-
-
-
-
-
-
-
-/**
- *  @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor.
- *  Optinally return the Cholesky factor itself
- *
- *  A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv
- *
- *  Example Usage:
- *
- *  auto Linv     = cholesky_Linv(A, ...)
- *  auto [L,Linv] = cholesky_Linv<decltype(A),true>(A, ...)
- *
- *  @tparam Array Input array type, must be convertable to BlockCyclicMatrix
- *  @tparam RetL  Whether or not to return the cholesky factor
- *
- *  @param[in] A           Input array to be diagonalized. Must be rank-2
- *  @param[in] NB          ScaLAPACK blocking factor. Defaults to 128
- *  @param[in] l_trange    TiledRange for resulting inverse Cholesky factor.
- *                         If left empty, will default to array.trange()
- *
- *  @returns The inverse lower triangular Cholesky factor in TA format
- */
-template <typename Array, bool RetL = false>
-auto cholesky_linv( const Array& A, size_t NB = 128, TiledRange l_trange = TiledRange() ) {
-
-  using value_type = typename Array::element_type;
-
-  auto& world = A.world();
-  auto world_comm = world.mpi.comm().Get_mpi_comm();
-  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
-
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto matrix = scalapack::array_to_block_cyclic( A, grid, NB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
-
-  auto [M, N] = matrix.dims();
-  if( M != N )
-    TA_EXCEPTION("Matrix must be square for Cholesky");
-
-  auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N);
-  auto desc = matrix.dist().descinit_noerror(N, N, Mloc);
-
-  auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N,
-    matrix.local_mat().data(), 1, 1, desc );
-  if (info) TA_EXCEPTION("Cholesky Failed");
-
-  // Zero out the upper triangle
-  detail::scalapack_zero_triangle( blacspp::Triangle::Upper, matrix );
-
-  // Copy L if needed
-  std::shared_ptr<scalapack::BlockCyclicMatrix<value_type>> L_sca = nullptr;
-  if constexpr (RetL) {
-    L_sca = std::make_shared<scalapack::BlockCyclicMatrix<value_type>>(
-      world, grid, N, N, NB, NB
-    );
-    L_sca->local_mat() = matrix.local_mat();
-  }
-
-  // Compute inverse
-  info = scalapackpp::ptrtri( blacspp::Triangle::Lower,
-    blacspp::Diagonal::NonUnit, N, matrix.local_mat().data(), 1, 1, desc );
-  if (info) TA_EXCEPTION("TRTRI Failed");
-
-
-  if( l_trange.rank() == 0 ) l_trange = A.trange();
-
-  world.gop.fence();
-  auto Linv = scalapack::block_cyclic_to_array<Array>( matrix, l_trange );
-  world.gop.fence();
-
-
-  if constexpr (RetL) {
-    auto L = scalapack::block_cyclic_to_array<Array>( *L_sca, l_trange);
-    world.gop.fence();
-    return std::tuple( L, Linv );
-  } else {
-    return Linv;
-  }
-
-}
-
-
-template <typename Array>
-auto cholesky_solve( const Array& A, const Array& B, size_t NB = 128,
-  TiledRange x_trange = TiledRange() ) {
-
-  auto& world = A.world();
-  /*
-  if( world != B.world() ) {
-    TA_EXCEPTION("A and B must be distributed on same MADWorld context");
-  }
-  */
-  auto world_comm = world.mpi.comm().Get_mpi_comm();
-  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
-
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB );
-  auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
-
-  auto [M, N] = A_sca.dims();
-  if( M != N )
-    TA_EXCEPTION("A must be square for Cholesky Solve");
-
-  auto [B_N, NRHS] = B_sca.dims();
-  if( B_N != N )
-    TA_EXCEPTION("A and B dims must agree");
-
-
-  scalapackpp::scalapack_desc desc_a, desc_b;
-  {
-  auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N);
-  desc_a = A_sca.dist().descinit_noerror(N, N, Mloc);
-  }
-
-  {
-  auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS);
-  desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc);
-  }
-
-  auto info = scalapackpp::pposv( blacspp::Triangle::Lower, N, NRHS,
-    A_sca.local_mat().data(), 1, 1, desc_a, B_sca.local_mat().data(),
-    1, 1, desc_b );
-  if (info) TA_EXCEPTION("Cholesky Solve Failed");
-
-  if( x_trange.rank() == 0 ) x_trange = B.trange();
-
-  world.gop.fence();
-  auto X = scalapack::block_cyclic_to_array<Array>( B_sca, x_trange );
-  world.gop.fence();
-
-  return X;
-}
-
-
-
-
-template <typename Array>
-auto cholesky_lsolve( scalapackpp::TransposeFlag trans,
-  const Array& A, const Array& B, size_t NB = 128,
-  TiledRange l_trange = TiledRange(),
-  TiledRange x_trange = TiledRange() ) {
-
-  auto& world = A.world();
-  /*
-  if( world != B.world() ) {
-    TA_EXCEPTION("A and B must be distributed on same MADWorld context");
-  }
-  */
-  auto world_comm = world.mpi.comm().Get_mpi_comm();
-  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
-
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto A_sca = scalapack::array_to_block_cyclic( A, grid, NB, NB );
-  auto B_sca = scalapack::array_to_block_cyclic( B, grid, NB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
-
-  auto [M, N] = A_sca.dims();
-  if( M != N )
-    TA_EXCEPTION("A must be square for Cholesky Solve");
-
-  auto [B_N, NRHS] = B_sca.dims();
-  if( B_N != N )
-    TA_EXCEPTION("A and B dims must agree");
-
-
-  scalapackpp::scalapack_desc desc_a, desc_b;
-  {
-  auto [Mloc, Nloc] = A_sca.dist().get_local_dims(N, N);
-  desc_a = A_sca.dist().descinit_noerror(N, N, Mloc);
-  }
-
-  {
-  auto [Mloc, Nloc] = B_sca.dist().get_local_dims(N, NRHS);
-  desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc);
-  }
-
-  auto info = scalapackpp::ppotrf( blacspp::Triangle::Lower, N,
-    A_sca.local_mat().data(), 1, 1, desc_a );
-  if (info) TA_EXCEPTION("Cholesky Failed");
-
-  info = scalapackpp::ptrtrs( blacspp::Triangle::Lower, trans,
-    blacspp::Diagonal::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a,
-    B_sca.local_mat().data(), 1, 1, desc_b );
-  if (info) TA_EXCEPTION("TRTRS Failed");
-
-  // Zero out the upper triangle
-  detail::scalapack_zero_triangle( blacspp::Triangle::Upper, A_sca );
-
-  if( l_trange.rank() == 0 ) l_trange = A.trange();
-  if( x_trange.rank() == 0 ) x_trange = B.trange();
-
-  world.gop.fence();
-  auto L = scalapack::block_cyclic_to_array<Array>( A_sca, l_trange );
-  auto X = scalapack::block_cyclic_to_array<Array>( B_sca, x_trange );
-  world.gop.fence();
-
-  return std::tuple(L, X);
-}
-
-} // namespace TiledArray
-
-#endif // TILEDARRAY_HAS_SCALAPACK
-#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED
-
diff --git a/src/TiledArray/math/scalapack/svd.h b/src/TiledArray/math/scalapack/svd.h
deleted file mode 100644
index da8274acd5..0000000000
--- a/src/TiledArray/math/scalapack/svd.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- *  This file is a part of TiledArray.
- *  Copyright (C) 2020 Virginia Tech
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  David Williams-Young
- *  Computational Research Division, Lawrence Berkeley National Laboratory
- *
- *  svd.h
- *  Created:    12 June, 2020
- *
- */
-#ifndef TILEDARRAY_MATH_SCALAPACK_SVD_H__INCLUDED
-#define TILEDARRAY_MATH_SCALAPACK_SVD_H__INCLUDED
-
-#include <TiledArray/config.h>
-#if TILEDARRAY_HAS_SCALAPACK
-
-#include <TiledArray/conversions/block_cyclic.h>
-#include <scalapackpp/svd.hpp>
-
-namespace TiledArray {
-
-
-struct SVDReturnType{ };
-struct SVDValuesOnly   : public SVDReturnType { };
-struct SVDLeftVectors  : public SVDReturnType { };
-struct SVDRightVectors : public SVDReturnType { };
-struct SVDAllVectors   : public SVDReturnType { };
-
-namespace detail {
-
-template <typename SVDType, typename = void>
-struct is_svd_return_type : public std::false_type { };
-
-template <typename SVDType>
-struct is_svd_return_type<
-  SVDType, 
-  std::enable_if_t<std::is_base_of_v<SVDReturnType, SVDType>>
-> : public std::true_type { };
-
-template <typename SVDType>
-inline constexpr bool is_svd_return_type_v = is_svd_return_type<SVDType>::value;
-
-template <typename SVDType, typename U = void>
-struct enable_if_svd_return_type : 
-  public std::enable_if< is_svd_return_type_v<SVDType>, U > { };
-
-template <typename SVDType, typename U = void>
-using enable_if_svd_return_type_t = 
-  typename enable_if_svd_return_type<SVDType,U>::type;
-
-}
-
-/**
- *  @brief Compute the singular value decomposition (SVD) via ScaLAPACK
- *
- *  A(i,j) = S(k) U(i,k) conj(V(j,k))
- *
- *  Example Usage:
- *
- *  auto S          = svd<SVDValuesOnly>  (A, ...)
- *  auto [S, U]     = svd<SVDLeftVectors> (A, ...)
- *  auto [S, VT]    = svd<SVDRightVectors>(A, ...)
- *  auto [S, U, VT] = svd<SVDAllVectors>  (A, ...)
- *
- *  @tparam Array Input array type, must be convertable to BlockCyclicMatrix
- *
- *  @param[in] A           Input array to be decomposed. Must be rank-2
- *  @param[in] MB          ScaLAPACK row blocking factor. Defaults to 128
- *  @param[in] NB          ScaLAPACK column blocking factor. Defaults to 128
- *  @param[in] u_trange    TiledRange for resulting left singlar vectors. 
- *  @param[in] vt_trange   TiledRange for resulting right singlar vectors (transposed).
- *
- *  @returns A tuple containing the eigenvalues and eigenvectors of input array
- *  as std::vector and in TA format, respectively.
- */
-template <typename SVDType, typename Array,
-  typename = detail::enable_if_svd_return_type<SVDType>
->
-auto svd( const Array& A, TiledRange u_trange, TiledRange vt_trange,
-  size_t MB = 128, size_t NB = 128 
-) {
-
-  using value_type = typename Array::element_type;
-  using real_type  = scalapackpp::detail::real_t<value_type>;
-
-  auto& world = A.world();
-  auto world_comm = world.mpi.comm().Get_mpi_comm();
-  //auto world_comm = MPI_COMM_WORLD;
-  blacspp::Grid grid = blacspp::Grid::square_grid(world_comm);
-
-  world.gop.fence(); // stage ScaLAPACK execution
-  auto matrix = scalapack::array_to_block_cyclic( A, grid, MB, NB );
-  world.gop.fence(); // stage ScaLAPACK execution
-
-  auto [M, N] = matrix.dims();
-  auto SVD_SIZE = std::min(M,N);
-
-  auto [AMloc, ANloc]   = matrix.dist().get_local_dims(M, N);
-  auto [UMloc, UNloc]   = matrix.dist().get_local_dims(M, SVD_SIZE);
-  auto [VTMloc, VTNloc] = matrix.dist().get_local_dims(SVD_SIZE, N);
-
-
-  auto desc_a  = matrix.dist().descinit_noerror(M, N,        AMloc );
-  auto desc_u  = matrix.dist().descinit_noerror(M, SVD_SIZE, UMloc );
-  auto desc_vt = matrix.dist().descinit_noerror(SVD_SIZE, N, VTMloc);
-
-  std::vector<real_type>        S( SVD_SIZE );
-
-  constexpr bool need_uv = std::is_same_v< SVDType, SVDAllVectors   >;
-  constexpr bool need_u  = std::is_same_v< SVDType, SVDLeftVectors  > or need_uv;
-  constexpr bool need_vt = std::is_same_v< SVDType, SVDRightVectors > or need_uv;
-
-  std::shared_ptr<scalapack::BlockCyclicMatrix<value_type>> U = nullptr, VT = nullptr;
-
-  scalapackpp::VectorFlag JOBU  = scalapackpp::VectorFlag::NoVectors;
-  scalapackpp::VectorFlag JOBVT = scalapackpp::VectorFlag::NoVectors;
-
-  value_type* U_ptr  = nullptr;
-  value_type* VT_ptr = nullptr;
-
-  if constexpr (need_u) {
-    JOBU = scalapackpp::VectorFlag::Vectors;
-    U = std::make_shared<scalapack::BlockCyclicMatrix<value_type>>( 
-      world, grid, M, SVD_SIZE, MB, NB 
-    );
-
-    U_ptr = U->local_mat().data();
-  }
-
-  if constexpr (need_vt) {
-    JOBVT = scalapackpp::VectorFlag::Vectors;
-    VT = std::make_shared<scalapack::BlockCyclicMatrix<value_type>>( 
-      world, grid, SVD_SIZE, N, MB, NB
-    );
-
-    VT_ptr = VT->local_mat().data();
-  }
-
-
-  auto info = scalapackpp::pgesvd( JOBU, JOBVT, M, N,
-    matrix.local_mat().data(), 1, 1, desc_a, S.data(),
-    U_ptr, 1, 1, desc_u, VT_ptr, 1, 1, desc_vt );
-  if (info) TA_EXCEPTION("SVD Failed");
-
-  world.gop.fence();
-
-
-  if constexpr (need_uv) {
-
-    auto U_ta  = scalapack::block_cyclic_to_array<Array>( *U,  u_trange  );
-    auto VT_ta = scalapack::block_cyclic_to_array<Array>( *VT, vt_trange );
-    world.gop.fence();
-
-    return std::tuple( S, U_ta, VT_ta );
-
-  } else if constexpr (need_u) {
-
-    auto U_ta  = scalapack::block_cyclic_to_array<Array>( *U,  u_trange  );
-    world.gop.fence();
-
-    return std::tuple( S, U_ta );
-
-  } else if constexpr (need_vt) {
-
-    auto VT_ta  = scalapack::block_cyclic_to_array<Array>( *VT,  vt_trange  );
-    world.gop.fence();
-
-    return std::tuple( S, VT_ta );
-
-  } else {
-
-    return S;
-
-  }
-
-
-
-}
-
-} // namespace TiledArray
-
-#endif // TILEDARRAY_HAS_SCALAPACK
-#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED
-
diff --git a/src/TiledArray/math/scalapack/util.h b/src/TiledArray/math/scalapack/util.h
deleted file mode 100644
index fe750f61e2..0000000000
--- a/src/TiledArray/math/scalapack/util.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- *  This file is a part of TiledArray.
- *  Copyright (C) 2020 Virginia Tech
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  David Williams-Young
- *  Computational Research Division, Lawrence Berkeley National Laboratory
- *
- *  util.h
- *  Created:    19 June, 2020
- *
- */
-#ifndef TILEDARRAY_MATH_SCALAPACK_UTIL_H__INCLUDED
-#define TILEDARRAY_MATH_SCALAPACK_UTIL_H__INCLUDED
-
-#include <TiledArray/config.h>
-#if TILEDARRAY_HAS_SCALAPACK
-
-#include <TiledArray/conversions/block_cyclic.h>
-
-namespace TiledArray {
-
-namespace detail {
-
-template <typename T>
-void scalapack_zero_triangle( 
-  blacspp::Triangle tri, scalapack::BlockCyclicMatrix<T>& A, bool zero_diag = false 
-) {
-
-  auto zero_el = [&]( size_t I, size_t J ) {
-    if( A.dist().i_own(I,J) ) {
-      auto [i,j] = A.dist().local_indx(I,J);
-      A.local_mat()(i,j) = 0.;
-    }
-  };
-
-  auto [M,N] = A.dims();
-
-  // Zero the lower triangle
-  if( tri == blacspp::Triangle::Lower ) {
-
-    if( zero_diag )
-      for( size_t j = 0; j < N; ++j )
-      for( size_t i = j; i < M; ++i )
-        zero_el( i,j );
-    else
-      for( size_t j = 0;   j < N; ++j )
-      for( size_t i = j+1; i < M; ++i )
-        zero_el( i,j );
-
-  // Zero the upper triangle
-  } else {
-
-    if( zero_diag )
-      for( size_t j = 0; j < N;  ++j )
-      for( size_t i = 0; i <= std::min(j,M); ++i )
-        zero_el( i,j );
-    else
-      for( size_t j = 0; j < N; ++j )
-      for( size_t i = 0; i < std::min(j,M); ++i )
-        zero_el( i,j );
-
-  }
-}
-
-}
-}
-
-#endif // TILEDARRAY_HAS_SCALAPACK
-#endif // TILEDARRAY_MATH_SCALAPACK_H__INCLUDED
-
diff --git a/src/TiledArray/tensor/type_traits.h b/src/TiledArray/tensor/type_traits.h
index 65c775c6d8..6690eb1f61 100644
--- a/src/TiledArray/tensor/type_traits.h
+++ b/src/TiledArray/tensor/type_traits.h
@@ -310,6 +310,34 @@ static constexpr const auto is_bipartite_permutable_v =
         const T&, const TiledArray::BipartitePermutation&>;
 
 }  // namespace detail
+
+/// Specifies how coordinates are mapped to ordinal values
+/// - RowMajor: stride decreases as mode index increases
+/// - ColMajor: stride increases with the mode index
+/// - Other: unknown or dynamic order
+enum class OrdinalType { RowMajor = -1, ColMajor = 1, Other = 0, Invalid };
+
+namespace detail {
+
+/// ordinal trait specifies properties of the ordinal
+template <typename Ordinal, typename Enabler = void>
+struct ordinal_traits;
+
+/// TA::Range is hardwired to row-major
+template <>
+struct ordinal_traits<Range> {
+  static constexpr const auto type = OrdinalType::RowMajor;
+};
+
+/// ordinal traits of contiguous tensors are defined by their range type
+template <typename T>
+struct ordinal_traits<T, std::enable_if_t<is_contiguous_tensor_v<T>>> {
+  static constexpr const auto type = ordinal_traits<
+      std::decay_t<decltype(std::declval<const T&>().range())>>::type;
+};
+
+}  // namespace detail
+
 }  // namespace TiledArray
 
 #endif  // TILEDARRAY_TENSOR_TYPE_TRAITS_H__INCLUDED
diff --git a/src/tiledarray.h b/src/tiledarray.h
index 274958ec38..a20d1c86bc 100644
--- a/src/tiledarray.h
+++ b/src/tiledarray.h
@@ -60,8 +60,8 @@
 
 // ScaLAPACK functions
 #ifdef TILEDARRAY_HAS_SCALAPACK
+#include <TiledArray/algebra/scalapack/all.h>
 #include <TiledArray/conversions/block_cyclic.h>
-#include <TiledArray/math/scalapack.h>
 #endif
 
 #endif  // TILEDARRAY_H__INCLUDED
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 8799ff95ee..75889908de 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -32,97 +32,98 @@ set(executable ta_test)
 # N.B.: The order of files here represents the order in which the tests are run.
 # N.B. 2: if you want to trim this down you may need to resolve linker errors due to missing fixture deps manually
 set(ta_test_src_files  ta_test.cpp
-    range.cpp
-    btas.cpp
-    meta.cpp
-    type_traits.cpp
-    utility.cpp
-    permutation.cpp
-    symm_permutation_group.cpp
-    symm_irrep.cpp
-    symm_representation.cpp
-    block_range.cpp
-    perm_index.cpp
-    transform_iterator.cpp
-    bitset.cpp
-    math_outer.cpp
-    math_partial_reduce.cpp
-    math_transpose.cpp
-    math_blas.cpp
-    tensor.cpp
-    tensor_of_tensor.cpp
-    tensor_tensor_view.cpp
-    tensor_shift_wrapper.cpp
-    tiled_range1.cpp
-    tiled_range.cpp
-    blocked_pmap.cpp
-    hash_pmap.cpp
-    cyclic_pmap.cpp
-    replicated_pmap.cpp
-    dense_shape.cpp
-    sparse_shape.cpp
-    distributed_storage.cpp
-    tensor_impl.cpp
-    array_impl.cpp
-    index_list.cpp
-    bipartite_index_list.cpp
-    dist_array.cpp
-    conversions.cpp
-    eigen.cpp
-    dist_op_dist_cache.cpp
-    dist_op_group.cpp
-    dist_op_communicator.cpp
-    tile_op_noop.cpp
-    tile_op_scal.cpp
-    dist_eval_array_eval.cpp
-    dist_eval_unary_eval.cpp
-    tile_op_add.cpp
-    tile_op_scal_add.cpp
-    tile_op_subt.cpp
-    tile_op_scal_subt.cpp
-    dist_eval_binary_eval.cpp
-    tile_op_mult.cpp
-    tile_op_scal_mult.cpp
-    tile_op_contract_reduce.cpp
-    reduce_task.cpp
-    proc_grid.cpp
-    dist_eval_contraction_eval.cpp
-    expressions.cpp
-    expressions_sparse.cpp
-    expressions_complex.cpp
-    expressions_btas.cpp
-    expressions_mixed.cpp
-    foreach.cpp
-    solvers.cpp
-    initializer_list.cpp
-    diagonal_array.cpp
-    retile.cpp
-    tot_dist_array_part1.cpp
-    tot_dist_array_part2.cpp
-    random.cpp
-    trace.cpp
-    tot_expressions.cpp
-    annotation.cpp
-    diagonal_array.cpp
-    contraction_helpers.cpp
-    s_t_t_contract_.cpp
-    t_t_t_contract_.cpp
-    t_s_t_contract_.cpp
-    t_tot_tot_contract_.cpp
-    tot_tot_tot_contract_.cpp
-    einsum.cpp
+    # range.cpp
+    # btas.cpp
+    # meta.cpp
+    # type_traits.cpp
+    # utility.cpp
+    # permutation.cpp
+    # symm_permutation_group.cpp
+    # symm_irrep.cpp
+    # symm_representation.cpp
+    # block_range.cpp
+    # perm_index.cpp
+    # transform_iterator.cpp
+    # bitset.cpp
+    # math_outer.cpp
+    # math_partial_reduce.cpp
+    # math_transpose.cpp
+    # math_blas.cpp
+    # tensor.cpp
+    # tensor_of_tensor.cpp
+    # tensor_tensor_view.cpp
+    # tensor_shift_wrapper.cpp
+    # tiled_range1.cpp
+    # tiled_range.cpp
+    # blocked_pmap.cpp
+    # hash_pmap.cpp
+    # cyclic_pmap.cpp
+    # replicated_pmap.cpp
+    # dense_shape.cpp
+    # sparse_shape.cpp
+    # distributed_storage.cpp
+    # tensor_impl.cpp
+    # array_impl.cpp
+    # index_list.cpp
+    # bipartite_index_list.cpp
+    # dist_array.cpp
+    # conversions.cpp
+    # eigen.cpp
+    # dist_op_dist_cache.cpp
+    # dist_op_group.cpp
+    # dist_op_communicator.cpp
+    # tile_op_noop.cpp
+    # tile_op_scal.cpp
+    # dist_eval_array_eval.cpp
+    # dist_eval_unary_eval.cpp
+    # tile_op_add.cpp
+    # tile_op_scal_add.cpp
+    # tile_op_subt.cpp
+    # tile_op_scal_subt.cpp
+    # dist_eval_binary_eval.cpp
+    # tile_op_mult.cpp
+    # tile_op_scal_mult.cpp
+    # tile_op_contract_reduce.cpp
+    # reduce_task.cpp
+    # proc_grid.cpp
+    # dist_eval_contraction_eval.cpp
+    # expressions.cpp
+    # expressions_sparse.cpp
+    # expressions_complex.cpp
+    # expressions_btas.cpp
+    # expressions_mixed.cpp
+    # foreach.cpp
+    # solvers.cpp
+    # initializer_list.cpp
+    # diagonal_array.cpp
+    # retile.cpp
+    # tot_dist_array_part1.cpp
+    # tot_dist_array_part2.cpp
+    # random.cpp
+    # trace.cpp
+    # tot_expressions.cpp
+    # annotation.cpp
+    # diagonal_array.cpp
+    # contraction_helpers.cpp
+    # s_t_t_contract_.cpp
+    # t_t_t_contract_.cpp
+    # t_s_t_contract_.cpp
+    # t_tot_tot_contract_.cpp
+    # tot_tot_tot_contract_.cpp
+    # einsum.cpp
+    linear_algebra.cpp
 )
 
 if(CUDA_FOUND)
     list(APPEND ta_test_src_files cutt.cpp expressions_cuda_um.cpp tensor_um.cpp)
 endif()
 
-if (TARGET TiledArray_SCALAPACK)
-  list(APPEND ta_test_src_files scalapack.cpp)
-endif(TARGET TiledArray_SCALAPACK)
+# if (TARGET TiledArray_SCALAPACK)
+#   list(APPEND ta_test_src_files scalapack.cpp)
+# endif(TARGET TiledArray_SCALAPACK)
 
 # if tiledarray library was compiled without exceptions, use TA header-only (see below)
-if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND)
+if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND AND FALSE)
   add_ta_executable(${executable} "${ta_test_src_files}" "MADworld;${TILEDARRAY_PRIVATE_LINK_LIBRARIES}")
   target_compile_definitions(${executable} PRIVATE TILEDARRAY_HEADER_ONLY=1)
   if (LAPACK_INCLUDE_DIRS)
diff --git a/tests/btas.cpp b/tests/btas.cpp
index 99cf14ec97..df9122695a 100644
--- a/tests/btas.cpp
+++ b/tests/btas.cpp
@@ -21,6 +21,7 @@
  */
 
 #include <TiledArray/config.h>
+#include <TiledArray/tensor/type_traits.h>
 
 #ifdef TILEDARRAY_HAS_BTAS
 
@@ -32,6 +33,28 @@
 
 using namespace TiledArray;
 
+static_assert(detail::ordinal_traits<btas::RangeNd<>>::type ==
+                  OrdinalType::RowMajor,
+              "btas::RangeNd<> is row-major");
+static_assert(detail::ordinal_traits<btas::RangeNd<CblasRowMajor>>::type ==
+                  OrdinalType::RowMajor,
+              "btas::RangeNd<CblasRowMajor> is row-major");
+static_assert(detail::ordinal_traits<btas::RangeNd<CblasColMajor>>::type ==
+                  OrdinalType::ColMajor,
+              "btas::RangeNd<CblasColMajor> is col-major");
+static_assert(detail::ordinal_traits<btas::Tensor<double>>::type ==
+                  OrdinalType::RowMajor,
+              "btas::Tenspr<T> is row-major");
+static_assert(
+    detail::ordinal_traits<btas::Tensor<double, TiledArray::Range>>::type ==
+        OrdinalType::RowMajor,
+    "btas::Tenspr<T, TA::Range> is row-major");
+static_assert(
+    detail::ordinal_traits<
+        TiledArray::Tile<btas::Tensor<double, TiledArray::Range>>>::type ==
+        OrdinalType::RowMajor,
+    "TA::Tile<btas::Tenspr<T, TA::Range>> is row-major");
+
 // test both bare (deep-copy) BTAS tensor as well as its shallow-copy wrap in
 // Tile<>, using both btas::RangeNd<> and TiledArray::Range as the range type
 typedef boost::mpl::list<
diff --git a/tests/eigen.cpp b/tests/eigen.cpp
index 96e128c66e..37eef88220 100644
--- a/tests/eigen.cpp
+++ b/tests/eigen.cpp
@@ -158,26 +158,9 @@ BOOST_AUTO_TEST_CASE(matrix_to_array) {
   // Fill the matrix with random data
   matrix = decltype(matrix)::Random(matrix.rows(), matrix.cols());
 
-  if (GlobalFixture::world->size() == 1) {
-    // Copy matrix to array
-    BOOST_CHECK_NO_THROW((array = eigen_to_array<TArrayI>(*GlobalFixture::world,
-                                                          trange, matrix)));
-  } else {
-    // Check that eigen_to_array does not work in distributed environments
-#if !defined(TA_USER_ASSERT_DISABLED)
-    BOOST_CHECK_THROW(
-        (eigen_to_array<TArrayI>(*GlobalFixture::world, trange, matrix)),
-        TiledArray::Exception);
-#endif
-
-    // Note: The following tests constructs a replicated array, but the data may
-    // not be identical. That is OK here since we are check the local data, but
-    // in real applications the data should be identical.
-
-    // Copy matrix to a replicated array
-    BOOST_CHECK_NO_THROW((array = eigen_to_array<TArrayI>(
-                              *GlobalFixture::world, trange, matrix, true)));
-  }
+  // Copy matrix to array
+  BOOST_CHECK_NO_THROW(
+      (array = eigen_to_array<TArrayI>(*GlobalFixture::world, trange, matrix)));
 
   // Check that the data in array is equal to that in matrix
   for (Range::const_iterator it = array.range().begin();
@@ -195,27 +178,9 @@ BOOST_AUTO_TEST_CASE(vector_to_array) {
   // Fill the vector with random data
   vector = Eigen::VectorXi::Random(vector.size());
 
-  if (GlobalFixture::world->size() == 1) {
-    // Convert the vector to an array
-    BOOST_CHECK_NO_THROW((array1 = eigen_to_array<TArrayI>(
-                              *GlobalFixture::world, trange1, vector)));
-
-  } else {
-    // Check that eigen_to_array does not work in distributed environments
-#if !defined(TA_USER_ASSERT_DISABLED)
-    BOOST_CHECK_THROW(
-        (eigen_to_array<TArrayI>(*GlobalFixture::world, trange1, vector)),
-        TiledArray::Exception);
-#endif
-
-    // Note: The following tests constructs a replicated array, but the data may
-    // not be identical. That is OK here since we are check the local data, but
-    // in real applications the data should be identical.
-
-    // Convert the vector to an array
-    BOOST_CHECK_NO_THROW((array1 = eigen_to_array<TArrayI>(
-                              *GlobalFixture::world, trange1, vector, true)));
-  }
+  // Convert the vector to an array
+  BOOST_CHECK_NO_THROW((array1 = eigen_to_array<TArrayI>(*GlobalFixture::world,
+                                                         trange1, vector)));
 
   // Check that the data in array matches the data in vector
   for (Range::const_iterator it = array1.range().begin();
diff --git a/tests/scalapack.cpp b/tests/linear_algebra.cpp
similarity index 50%
rename from tests/scalapack.cpp
rename to tests/linear_algebra.cpp
index 1554dc8529..07f9a58be2 100644
--- a/tests/scalapack.cpp
+++ b/tests/linear_algebra.cpp
@@ -1,87 +1,119 @@
 #include <tiledarray.h>
 #include <random>
 #include "TiledArray/config.h"
-#include "range_fixture.h"
+//#include "range_fixture.h"
 #include "unit_test_config.h"
 
-#include "TiledArray/math/scalapack.h"
-
-struct ScaLAPACKFixture {
-  blacspp::Grid grid;
-  scalapack::BlockCyclicMatrix<double> ref_matrix;  // XXX: Just double is fine?
+#include "TiledArray/algebra/lapack/cholesky.h"
+#include "TiledArray/algebra/lapack/heig.h"
+#include "TiledArray/algebra/lapack/lu.h"
+#include "TiledArray/algebra/lapack/svd.h"
+
+#include "TiledArray/algebra/cholesky.h"
+#include "TiledArray/algebra/heig.h"
+#include "TiledArray/algebra/lu.h"
+#include "TiledArray/algebra/svd.h"
+
+namespace TA = TiledArray;
+namespace lapack = TA::lapack;
+
+#if TILEDARRAY_HAS_SCALAPACK
+
+#include "TiledArray/algebra/scalapack/all.h"
+namespace scalapack = TA::scalapack;
+#define TILEDARRAY_SCALAPACK_TEST(F, E)                         \
+  GlobalFixture::world->gop.fence();                            \
+  compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \
+  GlobalFixture::world->gop.fence();                            \
+  compare("TiledArray", lapack::F, TiledArray::F, E);
+#else
+#define TILEDARRAY_SCALAPACK_TEST(...)
+#endif
 
+struct ReferenceFixture {
+  size_t N;
   std::vector<double> htoeplitz_vector;
   std::vector<double> exact_evals;
 
-  inline
-  double matrix_element_generator( int64_t i, int64_t j ) {
-    #if 0
+  inline double matrix_element_generator(int64_t i, int64_t j) {
+#if 0
     // Generates a Hankel matrix: absurd condition number
     return i+j;
-    #else
+#else
     // Generates a Circulant matrix: good condition number
-    return htoeplitz_vector[std::abs(i-j)];
-    #endif
+    return htoeplitz_vector[std::abs(i - j)];
+#endif
   }
 
-  inline
-  double make_ta_reference(TA::Tensor<double>& t,
+  inline double make_ta_reference(TA::Tensor<double>& t,
                                   TA::Range const& range) {
     t = TA::Tensor<double>(range, 0.0);
     auto lo = range.lobound_data();
     auto up = range.upbound_data();
     for (auto m = lo[0]; m < up[0]; ++m) {
       for (auto n = lo[1]; n < up[1]; ++n) {
-        t(m, n) = matrix_element_generator(m,n);
+        t(m, n) = matrix_element_generator(m, n);
       }
     }
 
     return t.norm();
   };
 
-  inline void construct_scalapack( scalapack::BlockCyclicMatrix<double>& A ) {
-    auto [M,N] = A.dims();
-    for (size_t i = 0; i < M; ++i)
-    for (size_t j = 0; j < N; ++j)
-    if (A.dist().i_own(i, j)) {
-      auto [i_local, j_local] = A.dist().local_indx(i, j);
-      A.local_mat()(i_local, j_local) = matrix_element_generator(i,j);
-    }
-
-  }
-
-  ScaLAPACKFixture(int64_t N, int64_t NB)
-      : grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)),  // XXX: Is this safe?
-        ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB),
-        htoeplitz_vector( N ), exact_evals( N )	{
-
+  ReferenceFixture(int64_t N = 1000)
+      : N(N), htoeplitz_vector(N), exact_evals(N) {
     // Generate an hermitian Circulant vector
-    std::fill( htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0 );
+    std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0);
     htoeplitz_vector[0] = 100;
     std::default_random_engine gen(0);
     std::uniform_real_distribution<> dist(0., 1.);
-    for( int64_t i = 1; i <= (N/2); ++i ) {
+    for (int64_t i = 1; i <= (N / 2); ++i) {
       double val = dist(gen);
-      htoeplitz_vector[i]   = val;
-      htoeplitz_vector[N-i] = val;
+      htoeplitz_vector[i] = val;
+      htoeplitz_vector[N - i] = val;
     }
 
     // Compute exact eigenvalues
     const double ff = 2. * M_PI / N;
-    for( int64_t j = 0; j < N; ++j ) {
-      double val = htoeplitz_vector[0];;
-      for( int64_t k = 1; k < N; ++k )
-        val += htoeplitz_vector[N-k] * std::cos( ff * j * k );
+    for (int64_t j = 0; j < N; ++j) {
+      double val = htoeplitz_vector[0];
+      ;
+      for (int64_t k = 1; k < N; ++k)
+        val += htoeplitz_vector[N - k] * std::cos(ff * j * k);
       exact_evals[j] = val;
     }
 
-    std::sort( exact_evals.begin(), exact_evals.end() );
-
-    // Fill reference matrix
-    construct_scalapack( ref_matrix );
+    std::sort(exact_evals.begin(), exact_evals.end());
   }
+};
+
+struct LinearAlgebraFixture : ReferenceFixture {
+#if TILEDARRAY_HAS_SCALAPACK
+
+  blacspp::Grid grid;
+  scalapack::BlockCyclicMatrix<double> ref_matrix;  // XXX: Just double is fine?
 
-  ScaLAPACKFixture() : ScaLAPACKFixture(1000, 128) {}
+  LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128)
+      : ReferenceFixture(N),
+        grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)),  // XXX: Is this safe?
+        ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) {
+    for (size_t i = 0; i < N; ++i) {
+      for (size_t j = 0; j < N; ++j) {
+        if (ref_matrix.dist().i_own(i, j)) {
+          auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j);
+          ref_matrix.local_mat()(i_local, j_local) =
+              matrix_element_generator(i, j);
+        }
+      }
+    }
+  }
+  template <class A>
+  static void compare(const char* context, const A& lapack, const A& result,
+                      double e) {
+    BOOST_TEST_CONTEXT(context);
+    auto diff_with_lapack = (lapack("i,j") - result("i,j")).norm().get();
+    BOOST_CHECK_SMALL(diff_with_lapack, e);
+  }
+#endif
 };
 
 TA::TiledRange gen_trange(size_t N, const std::vector<size_t>& TA_NBs) {
@@ -106,7 +138,9 @@ TA::TiledRange gen_trange(size_t N, const std::vector<size_t>& TA_NBs) {
   return TA::TiledRange(ranges.begin(), ranges.end());
 };
 
-BOOST_FIXTURE_TEST_SUITE(scalapack_suite, ScaLAPACKFixture)
+BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture)
+
+#if TILEDARRAY_HAS_SCALAPACK
 
 BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) {
   GlobalFixture::world->gop.fence();
@@ -118,27 +152,25 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) {
 
   auto trange = gen_trange(N, {static_cast<size_t>(NB)});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_ta = TA::scalapack::block_cyclic_to_array<TA::TArray<double>>( ref_matrix, trange );
+  auto test_ta =
+      scalapack::block_cyclic_to_array<TA::TArray<double>>(ref_matrix, trange);
   GlobalFixture::world->gop.fence();
 
   auto norm_diff =
-	  (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
+      (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
 
-
-
 BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) {
   GlobalFixture::world->gop.fence();
 
@@ -147,29 +179,27 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) {
 
   auto NB = ref_matrix.dist().nb();
 
-  auto trange = gen_trange(N, {static_cast<size_t>(NB/2)});
+  auto trange = gen_trange(N, {static_cast<size_t>(NB / 2)});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_ta = TA::scalapack::block_cyclic_to_array<TA::TArray<double>>( ref_matrix, trange );
+  auto test_ta =
+      scalapack::block_cyclic_to_array<TA::TArray<double>>(ref_matrix, trange);
   GlobalFixture::world->gop.fence();
 
   auto norm_diff =
-	  (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
+      (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
 
-
-
 BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) {
   GlobalFixture::world->gop.fence();
 
@@ -180,15 +210,14 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) {
 
   auto trange = gen_trange(N, {static_cast<size_t>(NB)});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB );
+  auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
   GlobalFixture::world->gop.fence();
 
   double local_norm_diff =
@@ -201,47 +230,40 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) {
 
   norm_diff = std::sqrt(norm_diff);
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
 
-
-
-
-
-
 BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) {
   GlobalFixture::world->gop.fence();
 
   auto [M, N] = ref_matrix.dims();
   BOOST_REQUIRE_EQUAL(M, N);
 
-  auto NB = ref_matrix.dist().nb();
+  [[maybe_unused]] auto NB = ref_matrix.dist().nb();
 
   auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_ta = TA::scalapack::block_cyclic_to_array<TA::TArray<double>>( ref_matrix, trange );
+  auto test_ta =
+      scalapack::block_cyclic_to_array<TA::TArray<double>>(ref_matrix, trange);
   GlobalFixture::world->gop.fence();
 
   auto norm_diff =
-	  (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
+      (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
 
-
-
 BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) {
   GlobalFixture::world->gop.fence();
 
@@ -252,15 +274,14 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) {
 
   auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB );
+  auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
   GlobalFixture::world->gop.fence();
 
   double local_norm_diff =
@@ -273,14 +294,11 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) {
 
   norm_diff = std::sqrt(norm_diff);
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
 
-
-
-
 BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) {
   GlobalFixture::world->gop.fence();
 
@@ -291,21 +309,21 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) {
 
   auto trange = gen_trange(N, {static_cast<size_t>(NB)});
 
-  auto ref_ta = TA::make_array<TA::TSpArray<double> >(
+  auto ref_ta = TA::make_array<TA::TSpArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_ta = TA::scalapack::block_cyclic_to_array<TA::TSpArray<double>>( ref_matrix, trange );
+  auto test_ta = scalapack::block_cyclic_to_array<TA::TSpArray<double>>(
+      ref_matrix, trange);
   GlobalFixture::world->gop.fence();
 
   auto norm_diff =
-	  (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
+      (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
@@ -320,15 +338,14 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) {
 
   auto trange = gen_trange(N, {static_cast<size_t>(NB)});
 
-  auto ref_ta = TA::make_array<TA::TSpArray<double> >(
+  auto ref_ta = TA::make_array<TA::TSpArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB );
+  auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
   GlobalFixture::world->gop.fence();
 
   double local_norm_diff =
@@ -341,14 +358,11 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) {
 
   norm_diff = std::sqrt(norm_diff);
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
 
-
-
-
 BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) {
   GlobalFixture::world->gop.fence();
 
@@ -359,15 +373,14 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) {
 
   auto trange = gen_trange(N, {static_cast<size_t>(NB)});
 
-  const TA::TArray<double> ref_ta = TA::make_array<TA::TArray<double> >(
+  const TA::TArray<double> ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-
   GlobalFixture::world->gop.fence();
-  auto test_matrix = TA::scalapack::array_to_block_cyclic( ref_ta, grid, NB, NB );
+  auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
   GlobalFixture::world->gop.fence();
 
   double local_norm_diff =
@@ -380,481 +393,473 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) {
 
   norm_diff = std::sqrt(norm_diff);
 
-  BOOST_CHECK_SMALL( norm_diff, std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 };
 
+#endif  // TILEDARRAY_HAS_SCALAPACK
 
-BOOST_AUTO_TEST_CASE( sca_heig_same_tiling ) {
-
+BOOST_AUTO_TEST_CASE(heig_same_tiling) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  const auto ref_ta = TA::make_array<TA::TArray<double> >(
+  const auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  auto [evals, evecs] = heig( ref_ta );
-  //auto evals = heig( ref_ta );
+  auto [evals, evecs] = lapack::heig(ref_ta);
+  auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta);
+  // auto evals = heig( ref_ta );
 
-  BOOST_CHECK( evecs.trange() == ref_ta.trange() );
+  BOOST_CHECK(evecs.trange() == ref_ta.trange());
 
-  // TODO: Check validity of eigenvectors, not crutial for the time being
+  // check eigenvectors against lapack only, for now ...
+  decltype(evecs) evecs_error;
+  evecs_error("i,j") = evecs_lapack("i,j") - evecs("i,j");
+  // TODO need to fix phases of the eigenvectors to be able to compare ...
+  // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(),
+  //                  N * N * std::numeric_limits<double>::epsilon());
 
   // Check eigenvalue correctness
-  double tol = N*N*std::numeric_limits<double>::epsilon();
-  for( int64_t i = 0; i < N; ++i )
-    BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol );
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  for (int64_t i = 0; i < N; ++i) {
+    BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
+    BOOST_CHECK_SMALL(std::abs(evals_lapack[i] - exact_evals[i]), tol);
+  }
 
   GlobalFixture::world->gop.fence();
 }
 
-BOOST_AUTO_TEST_CASE( sca_heig_diff_tiling ) {
-
+BOOST_AUTO_TEST_CASE(heig_diff_tiling) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
-      *GlobalFixture::world, trange, 
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
+      *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
   auto new_trange = gen_trange(N, {64ul});
-  auto [evals, evecs] = heig( ref_ta, 128, new_trange );
+  auto [evals, evecs] = lapack::heig(ref_ta, new_trange);
+  auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta, new_trange);
 
-  BOOST_CHECK( evecs.trange() == new_trange );
+  BOOST_CHECK(evecs.trange() == new_trange);
 
-  // TODO: Check validity of eigenvectors, not crutial for the time being
+  // check eigenvectors against lapack only, for now ...
+  decltype(evecs) evecs_error;
+  evecs_error("i,j") = evecs_lapack("i,j") - evecs("i,j");
+  // TODO need to fix phases of the eigenvectors to be able to compare ...
+  // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(),
+  //                  N * N * std::numeric_limits<double>::epsilon());
 
   // Check eigenvalue correctness
-  double tol = N*N*std::numeric_limits<double>::epsilon();
-  for( int64_t i = 0; i < N; ++i )
-    BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol );
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  for (int64_t i = 0; i < N; ++i) {
+    BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
+    BOOST_CHECK_SMALL(std::abs(evals_lapack[i] - exact_evals[i]), tol);
+  }
 
   GlobalFixture::world->gop.fence();
 }
 
-BOOST_AUTO_TEST_CASE( sca_heig_generalized ) {
-
+BOOST_AUTO_TEST_CASE(heig_generalized) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TSpArray<double> >(
+  auto ref_ta = TA::make_array<TA::TSpArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  auto dense_iden = TA::make_array<TA::TArray<double> >(
+  auto dense_iden = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [](TA::Tensor<double>& t, TA::Range const& range) -> double {
         t = TA::Tensor<double>(range, 0.0);
         auto lo = range.lobound_data();
         auto up = range.upbound_data();
-        for (auto m = lo[0]; m < up[0]; ++m) 
-        for (auto n = lo[1]; n < up[1]; ++n) 
-	if( m == n )
-          t(m, n) = 1.;
-	
-	return t.norm();
+        for (auto m = lo[0]; m < up[0]; ++m)
+          for (auto n = lo[1]; n < up[1]; ++n)
+            if (m == n) t(m, n) = 1.;
+
+        return t.norm();
       });
 
   GlobalFixture::world->gop.fence();
-  auto [evals, evecs] = heig( ref_ta, dense_iden );
-  //auto evals = heig( ref_ta );
+  auto [evals, evecs] = lapack::heig(ref_ta, dense_iden);
+  // auto evals = heig( ref_ta );
 
-  BOOST_CHECK( evecs.trange() == ref_ta.trange() );
+  BOOST_CHECK(evecs.trange() == ref_ta.trange());
 
-  // TODO: Check validity of eigenvectors, not crutial for the time being
+  // TODO: Check validity of eigenvectors, not crucial for the time being
 
   // Check eigenvalue correctness
-  double tol = N*N*std::numeric_limits<double>::epsilon();
-  for( int64_t i = 0; i < N; ++i )
-    BOOST_CHECK_SMALL( std::abs(evals[i] - exact_evals[i]), tol );
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  for (int64_t i = 0; i < N; ++i)
+    BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
 
   GlobalFixture::world->gop.fence();
 }
 
-
-
-BOOST_AUTO_TEST_CASE( sca_chol ) {
-
+BOOST_AUTO_TEST_CASE(cholesky) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto A = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  auto L = cholesky( ref_ta );
-
-  BOOST_CHECK( L.trange() == ref_ta.trange() );
+  auto L = lapack::cholesky(A);
 
-  ref_ta("i,j") -= L("i,k") * L("j,k").conj();
+  BOOST_CHECK(L.trange() == A.trange());
 
-  double diff_norm = ref_ta("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL( diff_norm, N*N*std::numeric_limits<double>::epsilon() );
+  decltype(A) A_minus_LLt;
+  A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj();
 
-  GlobalFixture::world->gop.fence();
-}
+  BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(),
+                    N * N * std::numeric_limits<double>::epsilon());
 
+  // check against LAPACK also
+  auto L_ref = TiledArray::lapack::cholesky(A);
+  decltype(L) L_diff;
+  L_diff("i,j") = L("i,j") - L_ref("i,j");
 
+  BOOST_CHECK_SMALL(L_diff("i,j").norm().get(),
+                    N * N * std::numeric_limits<double>::epsilon());
 
-BOOST_AUTO_TEST_CASE( sca_chol_linv ) {
+  GlobalFixture::world->gop.fence();
+}
 
+BOOST_AUTO_TEST_CASE(cholesky_linv) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto A = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
+  decltype(A) Acopy = A.clone();
 
-     
-  auto Linv = cholesky_linv( ref_ta );
+  auto Linv = lapack::cholesky_linv(A);
 
-  BOOST_CHECK( Linv.trange() == ref_ta.trange() );
+  BOOST_CHECK(Linv.trange() == A.trange());
 
-  TA::TArray<double> tmp( *GlobalFixture::world, trange );
-  tmp("i,j")    = Linv("i,k") * ref_ta("k,j");
-  ref_ta("i,j") = tmp ("i,k") * Linv  ("j,k");
+  TA::TArray<double> tmp(*GlobalFixture::world, trange);
+  tmp("i,j") = Linv("i,k") * A("k,j");
+  A("i,j") = tmp("i,k") * Linv("j,k");
 
-  TA::foreach_inplace( ref_ta, []( TA::Tensor<double>& tile ) {
+  TA::foreach_inplace(A, [](TA::Tensor<double>& tile) {
     auto range = tile.range();
     auto lo = range.lobound_data();
     auto up = range.upbound_data();
     for (auto m = lo[0]; m < up[0]; ++m)
-    for (auto n = lo[1]; n < up[1]; ++n) 
-    if( m == n ) {
-      tile(m,n) -= 1.;
-    }
+      for (auto n = lo[1]; n < up[1]; ++n)
+        if (m == n) {
+          tile(m, n) -= 1.;
+        }
   });
 
-  double norm = ref_ta("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits<double>::epsilon() );
-
-  GlobalFixture::world->gop.fence();
-}
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  double norm = A("i,j").norm().get();
 
+  BOOST_CHECK_SMALL(norm, epsilon);
 
+  TILEDARRAY_SCALAPACK_TEST(cholesky_linv(Acopy), epsilon);
 
+  GlobalFixture::world->gop.fence();
+}
 
-BOOST_AUTO_TEST_CASE( sca_chol_linv_retl ) {
-
+BOOST_AUTO_TEST_CASE(cholesky_linv_retl) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto A = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-     
-  auto [L, Linv] = cholesky_linv<decltype(ref_ta),true>( ref_ta );
+  auto [L, Linv] = lapack::cholesky_linv<decltype(A), true>(A);
 
-  BOOST_CHECK( Linv.trange() == ref_ta.trange() );
-  BOOST_CHECK( L.trange()    == ref_ta.trange() );
+  BOOST_CHECK(Linv.trange() == A.trange());
+  BOOST_CHECK(L.trange() == A.trange());
 
-  TA::TArray<double> tmp( *GlobalFixture::world, trange );
+  TA::TArray<double> tmp(*GlobalFixture::world, trange);
   tmp("i,j") = Linv("i,k") * L("k,j");
 
-  TA::foreach_inplace( tmp, []( TA::Tensor<double>& tile ) {
+  TA::foreach_inplace(tmp, [](TA::Tensor<double>& tile) {
     auto range = tile.range();
     auto lo = range.lobound_data();
     auto up = range.upbound_data();
     for (auto m = lo[0]; m < up[0]; ++m)
-    for (auto n = lo[1]; n < up[1]; ++n) 
-    if( m == n ) {
-      tile(m,n) -= 1.;
-    }
+      for (auto n = lo[1]; n < up[1]; ++n)
+        if (m == n) {
+          tile(m, n) -= 1.;
+        }
   });
 
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
   double norm = tmp("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits<double>::epsilon() );
 
-  GlobalFixture::world->gop.fence();
-}
+  BOOST_CHECK_SMALL(norm, epsilon);
 
+  TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon);
 
+  GlobalFixture::world->gop.fence();
+}
 
-BOOST_AUTO_TEST_CASE( sca_chol_solve ) {
-
+BOOST_AUTO_TEST_CASE(cholesky_solve) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto A = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-     
-  auto iden = cholesky_solve( ref_ta, ref_ta );
+  auto iden = lapack::cholesky_solve(A, A);
+  BOOST_CHECK(iden.trange() == A.trange());
 
-  BOOST_CHECK( iden.trange() == ref_ta.trange() );
+  auto iden_lapack = lapack::cholesky_solve(A, A);
+  decltype(iden) iden_error;
+  iden_error("i,j") = iden("i,j") - iden_lapack("i,j");
+  BOOST_CHECK_SMALL(iden_error("i,j").norm().get(),
+                    N * N * std::numeric_limits<double>::epsilon());
 
-  TA::foreach_inplace( iden, []( TA::Tensor<double>& tile ) {
+  TA::foreach_inplace(iden, [](TA::Tensor<double>& tile) {
     auto range = tile.range();
     auto lo = range.lobound_data();
     auto up = range.upbound_data();
     for (auto m = lo[0]; m < up[0]; ++m)
-    for (auto n = lo[1]; n < up[1]; ++n) 
-    if( m == n ) {
-      tile(m,n) -= 1.;
-    }
+      for (auto n = lo[1]; n < up[1]; ++n)
+        if (m == n) {
+          tile(m, n) -= 1.;
+        }
   });
 
   double norm = iden("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 }
 
-
-
-
-
-BOOST_AUTO_TEST_CASE( sca_chol_lsolve ) {
-
+BOOST_AUTO_TEST_CASE(cholesky_lsolve) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto A = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-     
   // Should produce X = L**H
-  auto [L, X] = cholesky_lsolve( scalapackpp::TransposeFlag::NoTranspose,
-    ref_ta, ref_ta );
-
-  BOOST_CHECK( X.trange() == ref_ta.trange() );
-  BOOST_CHECK( L.trange() == ref_ta.trange() );
+  auto [L, X] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A);
+  BOOST_CHECK(X.trange() == A.trange());
+  BOOST_CHECK(L.trange() == A.trange());
+
+  // first, test against LAPACK
+  auto [L_lapack, X_lapack] =
+      lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A);
+  decltype(L) L_error;
+  L_error("i,j") = L("i,j") - L_lapack("i,j");
+  BOOST_CHECK_SMALL(L_error("i,j").norm().get(),
+                    N * N * std::numeric_limits<double>::epsilon());
+  decltype(X) X_error;
+  X_error("i,j") = X("i,j") - X_lapack("i,j");
+  BOOST_CHECK_SMALL(X_error("i,j").norm().get(),
+                    N * N * std::numeric_limits<double>::epsilon());
 
   X("i,j") -= L("j,i");
 
   double norm = X("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits<double>::epsilon() );
+  BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits<double>::epsilon());
 
   GlobalFixture::world->gop.fence();
 }
 
-BOOST_AUTO_TEST_CASE( sca_lu_solve ) {
-
+BOOST_AUTO_TEST_CASE(lu_solve) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-     
-  auto iden = lu_solve( ref_ta, ref_ta );
+  auto iden = lapack::lu_solve(ref_ta, ref_ta);
 
-  BOOST_CHECK( iden.trange() == ref_ta.trange() );
+  BOOST_CHECK(iden.trange() == ref_ta.trange());
 
-  TA::foreach_inplace( iden, []( TA::Tensor<double>& tile ) {
+  TA::foreach_inplace(iden, [](TA::Tensor<double>& tile) {
     auto range = tile.range();
     auto lo = range.lobound_data();
     auto up = range.upbound_data();
     for (auto m = lo[0]; m < up[0]; ++m)
-    for (auto n = lo[1]; n < up[1]; ++n) 
-    if( m == n ) {
-      tile(m,n) -= 1.;
-    }
+      for (auto n = lo[1]; n < up[1]; ++n)
+        if (m == n) {
+          tile(m, n) -= 1.;
+        }
   });
 
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
   double norm = iden("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits<double>::epsilon() );
+
+  BOOST_CHECK_SMALL(norm, epsilon);
+  TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon);
 
   GlobalFixture::world->gop.fence();
 }
 
-
-BOOST_AUTO_TEST_CASE( sca_lu_inv ) {
-
+BOOST_AUTO_TEST_CASE(lu_inv) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  TA::TArray<double> iden( *GlobalFixture::world, trange );
-     
-  auto Ainv = lu_inv( ref_ta );
-  iden("i,j") = Ainv("i,k") * ref_ta("k,j");
+  TA::TArray<double> iden(*GlobalFixture::world, trange);
 
+  auto Ainv = lapack::lu_inv(ref_ta);
+  iden("i,j") = Ainv("i,k") * ref_ta("k,j");
 
-  BOOST_CHECK( iden.trange() == ref_ta.trange() );
+  BOOST_CHECK(iden.trange() == ref_ta.trange());
 
-  TA::foreach_inplace( iden, []( TA::Tensor<double>& tile ) {
+  TA::foreach_inplace(iden, [](TA::Tensor<double>& tile) {
     auto range = tile.range();
     auto lo = range.lobound_data();
     auto up = range.upbound_data();
     for (auto m = lo[0]; m < up[0]; ++m)
-    for (auto n = lo[1]; n < up[1]; ++n) 
-    if( m == n ) {
-      tile(m,n) -= 1.;
-    }
+      for (auto n = lo[1]; n < up[1]; ++n)
+        if (m == n) {
+          tile(m, n) -= 1.;
+        }
   });
 
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
   double norm = iden("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL( norm, N*N*std::numeric_limits<double>::epsilon() );
+
+  BOOST_CHECK_SMALL(norm, epsilon);
+  TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon);
 
   GlobalFixture::world->gop.fence();
 }
 
-
 #if 1
-BOOST_AUTO_TEST_CASE( sca_svd_values_only ) {
-
+BOOST_AUTO_TEST_CASE(svd_values_only) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  auto S = svd<SVDValuesOnly>( ref_ta, trange, trange );
+  auto S = lapack::svd<TA::SVDValuesOnly>(ref_ta, trange, trange);
 
   std::vector exact_singular_values = exact_evals;
-  std::sort( exact_singular_values.begin(), exact_singular_values.end(),
-    std::greater<double>() );
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
 
   // Check singular value correctness
-  double tol = N*N*std::numeric_limits<double>::epsilon();
-  for( int64_t i = 0; i < N; ++i )
-    BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol );
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  for (int64_t i = 0; i < N; ++i)
+    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
   GlobalFixture::world->gop.fence();
 }
 
-BOOST_AUTO_TEST_CASE( sca_svd_leftvectors ) {
-
+BOOST_AUTO_TEST_CASE(svd_leftvectors) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  auto [S,U] = svd<SVDLeftVectors>( ref_ta, trange, trange );
+  auto [S, U] = lapack::svd<TA::SVDLeftVectors>(ref_ta, trange, trange);
 
   std::vector exact_singular_values = exact_evals;
-  std::sort( exact_singular_values.begin(), exact_singular_values.end(),
-    std::greater<double>() );
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
 
   // Check singular value correctness
-  double tol = N*N*std::numeric_limits<double>::epsilon();
-  for( int64_t i = 0; i < N; ++i )
-    BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol );
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  for (int64_t i = 0; i < N; ++i)
+    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
   GlobalFixture::world->gop.fence();
 }
 
-BOOST_AUTO_TEST_CASE( sca_svd_rightvectors ) {
-
+BOOST_AUTO_TEST_CASE(svd_rightvectors) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  auto [S,VT] = svd<SVDRightVectors>( ref_ta, trange, trange );
+  auto [S, VT] = lapack::svd<TA::SVDRightVectors>(ref_ta, trange, trange);
 
   std::vector exact_singular_values = exact_evals;
-  std::sort( exact_singular_values.begin(), exact_singular_values.end(),
-    std::greater<double>() );
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
 
   // Check singular value correctness
-  double tol = N*N*std::numeric_limits<double>::epsilon();
-  for( int64_t i = 0; i < N; ++i )
-    BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol );
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  for (int64_t i = 0; i < N; ++i)
+    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
   GlobalFixture::world->gop.fence();
 }
 
-BOOST_AUTO_TEST_CASE( sca_svd_allvectors ) {
-
+BOOST_AUTO_TEST_CASE(svd_allvectors) {
   GlobalFixture::world->gop.fence();
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
 
   auto trange = gen_trange(N, {128ul});
 
-  auto ref_ta = TA::make_array<TA::TArray<double> >(
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
       *GlobalFixture::world, trange,
       [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
         return this->make_ta_reference(t, range);
       });
 
-  auto [S,U,VT] = svd<SVDAllVectors>( ref_ta, trange, trange );
+  auto [S, U, VT] = lapack::svd<TA::SVDAllVectors>(ref_ta, trange, trange);
 
   std::vector exact_singular_values = exact_evals;
-  std::sort( exact_singular_values.begin(), exact_singular_values.end(),
-    std::greater<double>() );
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
 
   // Check singular value correctness
-  double tol = N*N*std::numeric_limits<double>::epsilon();
-  for( int64_t i = 0; i < N; ++i )
-    BOOST_CHECK_SMALL( std::abs(S[i] - exact_singular_values[i]), tol );
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  for (int64_t i = 0; i < N; ++i)
+    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
   GlobalFixture::world->gop.fence();
 }
 #endif