diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a972c76b4..382f4abb56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,12 +26,30 @@ cmake_minimum_required (VERSION 3.15.0) # need list(PREPEND for toolchains +# Set TiledArray version ======================================================= + +# see https://semver.org/ +set(TILEDARRAY_MAJOR_VERSION 1) +set(TILEDARRAY_MINOR_VERSION 0) +set(TILEDARRAY_MICRO_VERSION 0) +set(TILEDARRAY_PRERELEASE_ID ) + +set(TILEDARRAY_VERSION "${TILEDARRAY_MAJOR_VERSION}.${TILEDARRAY_MINOR_VERSION}.${TILEDARRAY_MICRO_VERSION}") +if (TILEDARRAY_PRERELEASE_ID) + set(TILEDARRAY_EXT_VERSION "${TILEDARRAY_VERSION}-${TILEDARRAY_PRERELEASE_ID}") +else(TILEDARRAY_PRERELEASE_ID) + set(TILEDARRAY_EXT_VERSION "${TILEDARRAY_VERSION}") +endif(TILEDARRAY_PRERELEASE_ID) + # Preload versions/tags of all dependencies ==================================== include(external/versions.cmake) -############################################################################### -# Bring ValeevGroup cmake toolkit -############################################################################### +# Safety net for dev workflow: accidental install will not affect FindOrFetch* +if (NOT DEFINED CACHE{CMAKE_FIND_NO_INSTALL_PREFIX}) + set(CMAKE_FIND_NO_INSTALL_PREFIX ON CACHE BOOL "Whether find_* commands will search CMAKE_INSTALL_PREFIX and CMAKE_STAGING_PREFIX; see https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_NO_INSTALL_PREFIX.html#variable:CMAKE_FIND_NO_INSTALL_PREFIX") +endif() + +# Bring ValeevGroup cmake toolkit ============================================== include(FetchContent) if (DEFINED PROJECT_BINARY_DIR) set(VG_CMAKE_KIT_PREFIX_DIR PROJECT_BINARY_DIR) @@ -50,23 +68,13 @@ FetchContent_Declare( FetchContent_MakeAvailable(vg_cmake_kit) list(APPEND CMAKE_MODULE_PATH "${vg_cmake_kit_SOURCE_DIR}/modules") -project (TiledArray LANGUAGES) -enable_language(CXX) - -# Set TiledArray version ======================================================= - -# see https://semver.org/ -set(TILEDARRAY_MAJOR_VERSION 1) -set(TILEDARRAY_MINOR_VERSION 0) -set(TILEDARRAY_MICRO_VERSION 0) -set(TILEDARRAY_PRERELEASE_ID ) - -set(TILEDARRAY_VERSION "${TILEDARRAY_MAJOR_VERSION}.${TILEDARRAY_MINOR_VERSION}.${TILEDARRAY_MICRO_VERSION}") -if (TILEDARRAY_PRERELEASE_ID) - set(TILEDARRAY_EXT_VERSION "${TILEDARRAY_VERSION}-${TILEDARRAY_PRERELEASE_ID}") -else(TILEDARRAY_PRERELEASE_ID) - set(TILEDARRAY_EXT_VERSION "${TILEDARRAY_VERSION}") -endif(TILEDARRAY_PRERELEASE_ID) +# Declare ourselves ============================================================ +project(TiledArray + VERSION ${TILEDARRAY_VERSION} + DESCRIPTION "TiledArray: block-sparse tensor framework for modern (distributed-memory and heterogeneous) computing" + LANGUAGES CXX + HOMEPAGE_URL "https://valeevgroup.github.io/tiledarray/") +enable_language(C) # C needed even for basic platform introspection # Set install paths ============================================================ @@ -157,8 +165,11 @@ if(TA_ENABLE_TILE_OPS_LOGGING AND NOT DEFINED TA_TILE_OPS_LOG_LEVEL) set(TA_TILE_OPS_LOG_LEVEL 1) endif(TA_ENABLE_TILE_OPS_LOGGING AND NOT DEFINED TA_TILE_OPS_LOG_LEVEL) -option(TA_ENABLE_RANGEV3 "Enable Range-V3 library" OFF) -add_feature_info(ENABLE_RANGEV3 TA_ENABLE_RANGEV3 "Range-V3 ranges library") +option(TA_RANGEV3 "Enable Range-V3 library" OFF) +add_feature_info(TA_RANGEV3 TA_RANGEV3 "Range-V3 ranges library") + +option(TA_TTG "Enable search/build of TTG library" OFF) +add_feature_info(TA_TTG TA_TTG "TTG library") # Enable shared library support options redefaultable_option(TA_ASSUMES_ASLR_DISABLED "TiledArray assumes the Address Space Layout Randomization (ASLR) to be disabled" OFF) @@ -303,6 +314,9 @@ add_custom_target(External-tiledarray) if(ENABLE_CUDA) include(external/cuda.cmake) endif() +if (TA_TTG) + include(FindOrFetchTTG) +endif(TA_TTG) include(FindOrFetchMADWorld) detect_MADNESS_configuration() include(external/eigen.cmake) @@ -328,9 +342,6 @@ include(FindOrFetchBoost) if(ENABLE_SCALAPACK) include(external/scalapackpp.cmake) endif() -if (TA_ENABLE_RANGEV3) - include(FindOrFetchRangeV3) -endif(TA_ENABLE_RANGEV3) # optional deps: # 1. ccache @@ -340,6 +351,15 @@ if(CCACHE) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE}) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE}) endif(CCACHE) +# 2. range-v3 +if (TA_RANGEV3) + include(FindOrFetchRangeV3) +endif(TA_RANGEV3) +# 3. TTG +# N.B. make sure TA configures MADNESS correctly +#if (TA_TTG) +# include(FindOrFetchTTG) +#endif(TA_TTG) ########################## # sources diff --git a/INSTALL.md b/INSTALL.md index 3606a2bd25..4d3b748722 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -40,9 +40,9 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Container: header-only - Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing* - Boost.Range: header-only, *only used for unit testing* -- [BTAS](http://github.com/ValeevGroup/BTAS), tag 242871710dabd5ef337e5253000d3e38c1d977ba . If usable BTAS installation is not found, TiledArray will download and compile +- [BTAS](http://github.com/ValeevGroup/BTAS), tag 240b49b033864b34d74f2b8d6dd55f2ab524eae3 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 997e8b458c4234fb6c8c2781a5df59cb14b7e700 . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 20d4d7cffbbc98dd7b7d6a5feb7d68f1df067cd6 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. @@ -72,12 +72,13 @@ Optional prerequisites: - [blacspp](https://github.com/wavefunction91/blacspp.git) -- a modern C++ (C++17) wrapper for BLACS - Python3 interpreter -- to test (optionally-built) Python bindings - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20; only used for some unit testing of the functionality anticipated to be supported by future C++ standards. +- [TTG](https://github.com/TESSEorg/ttg.git) -- C++ implementation of the Template Task Graph programming model for fine-grained flow-graph composition of distributed memory programs. -Most of the dependencies (except for MADNESS and BTAS) can be installed with a package manager, +Many of these dependencies can be installed with a package manager, such as Homebrew on OS X or apt-get on Debian Linux distributions; -this is the preferred method. Since on some systems configuring -and building MADNESS can be difficult even for experts, we recommend letting the -TiledArray download and build MADNESS for you. +this is the preferred method. Since configuring and building other +dependencies (such as MADNESS) can be difficult even for experts, +we recommend letting the TiledArray download and build them for you. ## Obtain TiledArray source code @@ -411,7 +412,8 @@ support may be added. * `TA_ASSERT_POLICY` -- Set to `TA_ASSERT_IGNORE` to disable `TA_ASSERT` assertions, `TA_ASSERT_THROW` to cause `TA_ASSERT` assertions to throw, `TA_ASSERT_ABORT` to cause `TA_ASSERT` assertions to abort. The default is `TA_ASSERT_IGNORE` if CMake uses a single-configuration generator and`CMAKE_BUILD_TYPE` is set to `Release` or `MinSizeRel`, else the default is `TA_ASSERT_THROW`. * `BUILD_TESTING` -- Set of `OFF` to disable building unit tests. The default is `ON`. * `TA_TRACE_TASKS` -- Set to `ON` to enable tracing of MADNESS tasks using custom task tracer. Note that standard profilers/tracers are generally useless (except in the trivial cases) with MADWorld-based programs since the submission context of tasks is not captured by standard tracing tools; this makes it impossible in a nontrivial program to attribute tasks to source code. WARNING: task tracing his will greatly increase the memory requirements. [Default=OFF]. -* `TA_ENABLE_RANGEV3` -- Set to `ON` to find or fetch the Range-V3 library and enable additional tests of TA components with constructs anticipated to be supported in the future. [Default=OFF]. +* `TA_RANGEV3` -- Set to `ON` to find or fetch the Range-V3 library and enable additional tests of TA components with constructs anticipated to be supported in the future. [Default=OFF]. +* `TA_TTG` -- Set to `ON` to find or fetch the TTG library. [Default=OFF]. * `TA_SIGNED_1INDEX_TYPE` -- Set to `OFF` to use unsigned 1-index coordinate type (default for TiledArray 1.0.0-alpha.2 and older). The default is `ON`, which enables the use of negative indices in coordinates. * `TA_MAX_SOO_RANK_METADATA` -- Specifies the maximum rank for which to use Small Object Optimization (hence, avoid the use of the heap) for metadata. The default is `8`. * `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile memory allocations in TA::Tensor. diff --git a/cmake/modules/FindOrFetchBoost.cmake b/cmake/modules/FindOrFetchBoost.cmake index 13b63d3a68..0b47bd10e5 100644 --- a/cmake/modules/FindOrFetchBoost.cmake +++ b/cmake/modules/FindOrFetchBoost.cmake @@ -11,11 +11,15 @@ if (NOT TARGET Boost::boost) message(STATUS "Found Boost ${Boost_VERSION}: ${Boost_INCLUDE_DIRS}") endif(TARGET Boost::boost) - # Boost::* targets by default are not GLOBAL, so to allow users of LINALG_LIBRARIES to safely use them we need to make them global + # Boost::* targets by default are not GLOBAL, so to allow users of TA to safely use them we need to make them global # more discussion here: https://gitlab.kitware.com/cmake/cmake/-/issues/17256 - foreach(tgt boost;headers) + foreach(tgt boost;headers;${Boost_BTAS_DEPS_LIBRARIES}) if (TARGET Boost::${tgt}) - set_target_properties(Boost::${tgt} PROPERTIES IMPORTED_GLOBAL TRUE) + get_target_property(_boost_tgt_${tgt}_is_imported_global Boost::${tgt} IMPORTED_GLOBAL) + if (NOT _boost_tgt_${tgt}_is_imported_global) + set_target_properties(Boost::${tgt} PROPERTIES IMPORTED_GLOBAL TRUE) + endif() + unset(_boost_tgt_${tgt}_is_imported_global) endif() endforeach() diff --git a/cmake/modules/FindOrFetchTTG.cmake b/cmake/modules/FindOrFetchTTG.cmake new file mode 100644 index 0000000000..a940656a6a --- /dev/null +++ b/cmake/modules/FindOrFetchTTG.cmake @@ -0,0 +1,28 @@ +if (NOT TARGET ttg-parsec) + find_package(ttg CONFIG) +endif(NOT TARGET ttg-parsec) + +if (TARGET ttg-parsec) + message(STATUS "Found ttg CONFIG at ${ttg_CONFIG}") +else (TARGET ttg-parsec) + + include(FetchContent) + FetchContent_Declare( + ttg + GIT_REPOSITORY ${TA_TRACKED_TTG_URL} + GIT_TAG ${TA_TRACKED_TTG_TAG} + ) + FetchContent_MakeAvailable(ttg) + FetchContent_GetProperties(ttg + SOURCE_DIR TTG_SOURCE_DIR + BINARY_DIR TTG_BINARY_DIR + ) + +endif(TARGET ttg-parsec) + +# postcond check +if (NOT TARGET ttg-parsec) + message(FATAL_ERROR "FindOrFetchTTG could not make ttg-parsec target available") +else() + set(TILEDARRAY_HAS_TTG 1) +endif() diff --git a/external/versions.cmake b/external/versions.cmake index 4ac855e249..83405dfdb3 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,13 +19,13 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 997e8b458c4234fb6c8c2781a5df59cb14b7e700) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG fae8081179b9d074968b08e064a32e3ca07ab0f1) +set(TA_TRACKED_MADNESS_TAG 20d4d7cffbbc98dd7b7d6a5feb7d68f1df067cd6) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG 851ef271858129c47a912a088cc833b085c382aa) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 242871710dabd5ef337e5253000d3e38c1d977ba) -set(TA_TRACKED_BTAS_PREVIOUS_TAG db884b020b5c13c312c07df9d5c03cea2d65afb2) +set(TA_TRACKED_BTAS_TAG 240b49b033864b34d74f2b8d6dd55f2ab524eae3) +set(TA_TRACKED_BTAS_PREVIOUS_TAG ab866a760b72ff23053266bf46b87f19d3df44b7) set(TA_TRACKED_CUTT_TAG 0e8685bf82910bc7435835f846e88f1b39f47f09) set(TA_TRACKED_CUTT_PREVIOUS_TAG 592198b93c93b7ca79e7900b9a9f2e79f9dafec3) @@ -38,3 +38,7 @@ set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 043f85d7f31ec6009740ab466bcb5008af7b0814 set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864) set(TA_TRACKED_RANGEV3_PREVIOUS_TAG dbdaa247a25a0daa24c68f1286a5693c72ea0006) + +set(TA_TRACKED_TTG_URL https://github.com/therault/ttg.git) +set(TA_TRACKED_TTG_TAG bb5309a5224e2546a5316daf7fc5c143f450f17b) +set(TA_TRACKED_TTG_PREVIOUS_TAG 5107143b418384c44587c2776a9e87065d33d670) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f5ed90793b..2578b4eb1a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,6 +40,7 @@ TiledArray/perm_index.h TiledArray/permutation.h TiledArray/proc_grid.h TiledArray/range.h +TiledArray/range1.h TiledArray/range_iterator.h TiledArray/reduce_task.h TiledArray/replicator.h @@ -58,6 +59,7 @@ TiledArray/val_array.h ${PROJECT_BINARY_DIR}/src/TiledArray/version.h TiledArray/zero_tensor.h TiledArray/math/linalg/forward.h +TiledArray/math/linalg/basic.h TiledArray/math/linalg/conjgrad.h TiledArray/math/linalg/diis.h TiledArray/math/linalg/util.h @@ -73,6 +75,8 @@ TiledArray/math/linalg/scalapack/heig.h TiledArray/math/linalg/scalapack/lu.h TiledArray/math/linalg/scalapack/qr.h TiledArray/math/linalg/scalapack/svd.h +TiledArray/math/linalg/ttg/cholesky.h +TiledArray/math/linalg/ttg/util.h TiledArray/conversions/btas.h TiledArray/conversions/clone.h TiledArray/conversions/dense_to_sparse.h @@ -223,6 +227,7 @@ TiledArray/array_impl.cpp TiledArray/dist_array.cpp TiledArray/util/backtrace.cpp TiledArray/util/bug.cpp +TiledArray/math/linalg/basic.cpp TiledArray/math/linalg/rank-local.cpp ) @@ -254,6 +259,10 @@ if( TARGET TiledArray_SCALAPACK ) endif() list(APPEND _TILEDARRAY_DEPENDENCIES "${LAPACK_LIBRARIES}") +if( TARGET ttg-parsec ) + list(APPEND _TILEDARRAY_DEPENDENCIES ttg-parsec) +endif() + # cache deps as TILEDARRAY_PRIVATE_LINK_LIBRARIES set(TILEDARRAY_PRIVATE_LINK_LIBRARIES ${_TILEDARRAY_DEPENDENCIES} CACHE STRING "List of libraries on which TiledArray depends on") @@ -295,6 +304,10 @@ add_library(tiledarray ${TILEDARRAY_SOURCE_FILES} ${TILEDARRAY_HEADER_FILES}) target_compile_definitions(${targetname} PUBLIC ${LAPACK_COMPILE_DEFINITIONS}) endif (LAPACK_COMPILE_DEFINITIONS) +if( TARGET ttg-parsec ) + target_compile_definitions(${targetname} PUBLIC TTG_USE_PARSEC=1) +endif() + # Add library to the list of installed components install(TARGETS tiledarray EXPORT tiledarray COMPONENT tiledarray LIBRARY DESTINATION "${TILEDARRAY_INSTALL_LIBDIR}" diff --git a/src/TiledArray/array_impl.h b/src/TiledArray/array_impl.h index 25ed454949..beb8ba3e09 100644 --- a/src/TiledArray/array_impl.h +++ b/src/TiledArray/array_impl.h @@ -451,7 +451,7 @@ class ArrayImpl : public TensorImpl { /// \throw TiledArray::Exception When the size of shape is not equal to /// zero ArrayImpl(World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap) + const std::shared_ptr& pmap) : TensorImpl_(world, trange, shape, pmap), data_(world, trange.tiles_range().volume(), pmap) {} @@ -636,9 +636,7 @@ class ArrayImpl : public TensorImpl { /// DistributedStorage /// @return const reference to the atomic counter of live DelayedSet requests - const madness::AtomicInt& num_live_ds() const { - return data_.num_live_ds(); - } + const madness::AtomicInt& num_live_ds() const { return data_.num_live_ds(); } }; // class ArrayImpl diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in index 86fef14166..a2e4f06ce4 100644 --- a/src/TiledArray/config.h.in +++ b/src/TiledArray/config.h.in @@ -81,6 +81,9 @@ /* Is TA::Tensor memory profiling enabled? */ #cmakedefine TA_TENSOR_MEM_PROFILE 1 +/* Is TTG available? */ +#cmakedefine TILEDARRAY_HAS_TTG 1 + /* Use preprocessor to check if BTAS is available */ #ifndef TILEDARRAY_HAS_BTAS #ifdef __has_include diff --git a/src/TiledArray/conversions/make_array.h b/src/TiledArray/conversions/make_array.h index db03091d3c..cc2216e58a 100644 --- a/src/TiledArray/conversions/make_array.h +++ b/src/TiledArray/conversions/make_array.h @@ -70,9 +70,9 @@ namespace TiledArray { /// \return An array object of type `Array` template ::value>::type* = nullptr> -inline Array make_array(World& world, const detail::trange_t& trange, - const std::shared_ptr >& pmap, - Op&& op) { +inline Array make_array( + World& world, const detail::trange_t& trange, + const std::shared_ptr >& pmap, Op&& op) { typedef typename Array::value_type value_type; typedef typename value_type::range_type range_type; @@ -134,9 +134,9 @@ inline Array make_array(World& world, const detail::trange_t& trange, /// \return An array object of type `Array` template ::value>::type* = nullptr> -inline Array make_array(World& world, const detail::trange_t& trange, - const std::shared_ptr >& pmap, - Op&& op) { +inline Array make_array( + World& world, const detail::trange_t& trange, + const std::shared_ptr >& pmap, Op&& op) { typedef typename Array::value_type value_type; typedef typename Array::ordinal_type ordinal_type; typedef std::pair > datum_type; @@ -146,8 +146,8 @@ inline Array make_array(World& world, const detail::trange_t& trange, tiles.reserve(pmap->size()); // Construct a tensor to hold updated tile norms for the result shape. - TiledArray::Tensor::value_type> - tile_norms(trange.tiles_range(), 0); + TiledArray::Tensor::value_type> tile_norms( + trange.tiles_range(), 0); // Construct the task function used to construct the result tiles. madness::AtomicInt counter; diff --git a/src/TiledArray/conversions/sparse_to_dense.h b/src/TiledArray/conversions/sparse_to_dense.h index 6791cc315a..c5bdd812c5 100644 --- a/src/TiledArray/conversions/sparse_to_dense.h +++ b/src/TiledArray/conversions/sparse_to_dense.h @@ -39,7 +39,7 @@ to_dense(DistArray const& sparse_array) { ArrayType dense_array(sparse_array.world(), sparse_array.trange()); typedef typename ArrayType::pmap_interface pmap_interface; - std::shared_ptr const& pmap = dense_array.pmap(); + auto& pmap = dense_array.pmap(); typename pmap_interface::const_iterator end = pmap->end(); diff --git a/src/TiledArray/dense_shape.h b/src/TiledArray/dense_shape.h index 68f5c85a8a..1e1c3eda5a 100644 --- a/src/TiledArray/dense_shape.h +++ b/src/TiledArray/dense_shape.h @@ -387,6 +387,16 @@ constexpr inline bool is_replicated(World& world, const DenseShape& t) { return true; } +/// Add the shape to an output stream + +/// \param os The output stream +/// \param shape the DenseShape object +/// \return A reference to the output stream +inline std::ostream& operator<<(std::ostream& os, const DenseShape& shape) { + os << "DenseShape:" << std::endl; + return os; +} + } // namespace TiledArray #endif // TILEDARRAY_DENSE_SHAPE_H__INCLUDED diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 207af4ffdc..4d84ee1a80 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -81,8 +81,8 @@ class DistArray : public madness::archive::ParallelSerializableObject { typedef typename impl_type::ordinal_type ordinal_type; ///< Ordinal type typedef typename impl_type::value_type value_type; ///< Tile type typedef - typename impl_type::eval_type eval_type; ///< The tile evaluation type - typedef typename impl_type::reference future; ///< Future of \c value_type + typename impl_type::eval_type eval_type; ///< The tile evaluation type + typedef typename impl_type::future future; ///< Future of \c value_type typedef typename impl_type::reference reference; ///< \c future type typedef typename impl_type::const_reference const_reference; ///< \c future type @@ -198,10 +198,9 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// \param trange The tiled range object that will be used to set the array /// tiling. \param shape The array shape that defines zero and non-zero tiles /// \param pmap The tile index -> process map - static std::shared_ptr init(World& world, - const trange_type& trange, - const shape_type& shape, - std::shared_ptr pmap) { + static std::shared_ptr init( + World& world, const trange_type& trange, const shape_type& shape, + std::shared_ptr pmap) { // User level validation of input if (!pmap) { @@ -261,10 +260,10 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// assigned by the user. /// \param world The world where the array will live. /// \param trange The tiled range object that will be used to set the array - /// tiling. \param pmap The tile index -> process map + /// tiling. + /// \param pmap The tile index -> process map DistArray(World& world, const trange_type& trange, - const std::shared_ptr& pmap = - std::shared_ptr()) + const std::shared_ptr& pmap = {}) : pimpl_(init(world, trange, shape_type(1, trange), pmap)) {} /// Sparse array constructor @@ -277,8 +276,8 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// tiling. \param shape The array shape that defines zero and non-zero tiles /// \param pmap The tile index -> process map DistArray(World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap = - std::shared_ptr()) + const std::shared_ptr& pmap = + std::shared_ptr()) : pimpl_(init(world, trange, shape, pmap)) {} /// \name Initializer list constructors @@ -440,17 +439,20 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// This is a distributed lazy destructor. The object will only be deleted /// after the last reference to the world object on all nodes has been - /// destroyed and there are no outstanding references to the object's data. - /// Use defer_deleter_to_next_fence() to defer the deletion of the destructor - /// to the next fence. + /// destroyed and there are no outstanding references to the object's data + /// (e.g., obtained by calling `this->find(idx)` for any `idx` with + /// `this->is_local(idx)==false`). However other types of references to + /// the object may exist (e.g. using references to local tiles), + /// and these are generally not tracked automatically. + /// In such case to defer the release of the object's data to the next fence + /// call `this->defer_deleter_to_next_fence()`. /// \sa defer_deleter_to_next_fence ~DistArray() { reset_pimpl(); } /// Defers deletion to the next fence - /// By default the destruction of the object's data occurs lazily, when - /// all local references to the object are gone and all _remote_ references - /// to the local object's data are gone. This is not always sufficient; + /// By default the destruction of the object's data occurs lazily; + /// see DistArray::~DistArray(). This is not always sufficient; /// call this at any point during object's lifetime to ensure that the /// lifetime of the object lasts to (just past)the next fence. void defer_deleter_to_next_fence() { defer_deleter_to_next_fence_ = true; } @@ -521,10 +523,14 @@ class DistArray : public madness::archive::ParallelSerializableObject { DistArray::wait_for_lazy_cleanup(world()); } + // clang-format off /// Copy assignment /// This is a shallow copy, that is no data is copied. /// \param other The array to be copied + /// \post `other.pimpl()==this->pimpl() && other.deleter_deferred_to_next_fence()==this->deleter_deferred_to_next_fence()` + /// \note Hes the same semantics as the destructor w.r.t. when the data destruction occurs: if `this->deleter_deferred_to_next_fence()==true` this object's data will not be deleted until the next fence of this object's world. + // clang-format on DistArray& operator=(const DistArray& other) { // N.B. reset pimpl_ respecting defer_deleter_to_next_fence_ reset_pimpl(); @@ -916,16 +922,9 @@ class DistArray : public madness::archive::ParallelSerializableObject { if (fut.probe()) continue; } Future tile = pimpl_->world().taskq.add( - [pimpl = this->weak_pimpl(), index = ordinal_type(index), + [pimpl = pimpl_, index = ordinal_type(index), op_shared_handle]() -> value_type { - auto pimpl_ptr = pimpl.lock(); - if (pimpl_ptr) - return op_shared_handle( - pimpl_ptr->trange().make_tile_range(index)); - else { - TA_ASSERT(false); - return {}; - } + return op_shared_handle(pimpl->trange().make_tile_range(index)); }); set(index, std::move(tile)); } @@ -1080,16 +1079,27 @@ class DistArray : public madness::archive::ParallelSerializableObject { World& world() const { return impl_ref().world(); } /// \deprecated use DistArray::pmap() - [[deprecated]] const std::shared_ptr& get_pmap() const { + [[deprecated]] const std::shared_ptr& get_pmap() const { return pmap(); } /// Process map accessor - /// \return A reference to the process map that owns this array. + /// \return A shared_ptr to the process map that owns this array. + /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong + /// throw guarantee. + const std::shared_ptr& pmap() const { + return impl_ref().pmap(); + } + + /// Process map accessor + + /// \note alias to pmap() that mirrors shape_shared(). The use of this + /// alias is recommended + /// \return A shared_ptr to the process map that owns this array. /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong /// throw guarantee. - const std::shared_ptr& pmap() const { + const std::shared_ptr& pmap_shared() const { return impl_ref().pmap(); } @@ -1111,6 +1121,16 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// guarantee. inline const shape_type& shape() const { return impl_ref().shape(); } + /// Shape accessor + + /// Returns shared_ptr to shape object. No communication is required. + /// \return shared_ptr to the shape object. + /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong throw + /// guarantee. + inline const std::shared_ptr& shape_shared() const { + return impl_ref().shape_shared(); + } + /// Tile ownership /// \tparam Index An integral type, a type satisfying container of diff --git a/src/TiledArray/dist_eval/array_eval.h b/src/TiledArray/dist_eval/array_eval.h index 525ba055fd..c9f3daf195 100644 --- a/src/TiledArray/dist_eval/array_eval.h +++ b/src/TiledArray/dist_eval/array_eval.h @@ -212,8 +212,8 @@ class ArrayEvalImpl TiledArray::detail::is_permutation_v>> ArrayEvalImpl(const array_type& array, World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap, const Perm& perm, - const op_type& op) + const std::shared_ptr& pmap, + const Perm& perm, const op_type& op) : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), array_(array), op_(std::make_shared(op)), @@ -239,8 +239,8 @@ class ArrayEvalImpl TiledArray::detail::is_permutation_v>> ArrayEvalImpl(const array_type& array, World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap, const Perm& perm, - const op_type& op, const Index1& lower_bound, + const std::shared_ptr& pmap, + const Perm& perm, const op_type& op, const Index1& lower_bound, const Index2& upper_bound) : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), array_(array), diff --git a/src/TiledArray/dist_eval/binary_eval.h b/src/TiledArray/dist_eval/binary_eval.h index 20e97c55b9..a4c203d3dd 100644 --- a/src/TiledArray/dist_eval/binary_eval.h +++ b/src/TiledArray/dist_eval/binary_eval.h @@ -83,8 +83,8 @@ class BinaryEvalImpl : public DistEvalImpl, TiledArray::detail::is_permutation_v>> BinaryEvalImpl(const left_type& left, const right_type& right, World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap, const Perm& perm, - const op_type& op) + const std::shared_ptr& pmap, + const Perm& perm, const op_type& op) : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), left_(left), right_(right), diff --git a/src/TiledArray/dist_eval/contraction_eval.h b/src/TiledArray/dist_eval/contraction_eval.h index af363caecb..ae3b456bc0 100644 --- a/src/TiledArray/dist_eval/contraction_eval.h +++ b/src/TiledArray/dist_eval/contraction_eval.h @@ -1533,7 +1533,7 @@ class Summa TiledArray::detail::is_permutation_v>> Summa(const left_type& left, const right_type& right, World& world, const trange_type trange, const shape_type& shape, - const std::shared_ptr& pmap, const Perm& perm, + const std::shared_ptr& pmap, const Perm& perm, const op_type& op, const ordinal_type k, const ProcGrid& proc_grid) : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), left_(left), diff --git a/src/TiledArray/dist_eval/dist_eval.h b/src/TiledArray/dist_eval/dist_eval.h index ec020b8259..b1056e0ac1 100644 --- a/src/TiledArray/dist_eval/dist_eval.h +++ b/src/TiledArray/dist_eval/dist_eval.h @@ -107,7 +107,7 @@ class DistEvalImpl : public TensorImpl, /// \note \c trange and \c shape will be permuted by \c perm before /// storing the data. DistEvalImpl(World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap, + const std::shared_ptr& pmap, const Permutation& perm) : TensorImpl_(world, trange, shape, pmap), id_(world.unique_obj_id()), @@ -329,7 +329,9 @@ class DistEval { /// Tensor process map accessor /// \return A shared pointer to the process map of this tensor - const std::shared_ptr& pmap() const { return pimpl_->pmap(); } + const std::shared_ptr& pmap() const { + return pimpl_->pmap(); + } /// Query the density of the tensor diff --git a/src/TiledArray/dist_eval/unary_eval.h b/src/TiledArray/dist_eval/unary_eval.h index bb565a33db..b3707b92c2 100644 --- a/src/TiledArray/dist_eval/unary_eval.h +++ b/src/TiledArray/dist_eval/unary_eval.h @@ -70,8 +70,8 @@ class UnaryEvalImpl TiledArray::detail::is_permutation_v>> UnaryEvalImpl(const arg_type& arg, World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap, const Perm& perm, - const op_type& op) + const std::shared_ptr& pmap, + const Perm& perm, const op_type& op) : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), arg_(arg), op_(op) {} diff --git a/src/TiledArray/distributed_storage.h b/src/TiledArray/distributed_storage.h index a8ae5eb0d6..27c2885dcd 100644 --- a/src/TiledArray/distributed_storage.h +++ b/src/TiledArray/distributed_storage.h @@ -62,7 +62,7 @@ class DistributedStorage : public madness::WorldObject > { private: const size_type max_size_; ///< The maximum number of elements that can be ///< stored by this container - std::shared_ptr + std::shared_ptr pmap_; ///< The process map that defines the element distribution mutable container_type data_; ///< The local data container madness::AtomicInt num_live_ds_; ///< Number of live DelayedSet objects @@ -72,8 +72,8 @@ class DistributedStorage : public madness::WorldObject > { DistributedStorage_& operator=(const DistributedStorage_&); template - std::enable_if_t,value_type>, void> - set_handler(const size_type i, Value&& value) { + std::enable_if_t, value_type>, void> + set_handler(const size_type i, Value&& value) { future& f = get_local(i); // Check that the future has not been set already. @@ -90,10 +90,13 @@ class DistributedStorage : public madness::WorldObject > { } template - std::enable_if_t,value_type> || std::is_same_v,future>, void> + std::enable_if_t, value_type> || + std::is_same_v, future>, + void> set_remote(const size_type i, Value&& value) { - WorldObject_::task(owner(i), &DistributedStorage_::set_handler&>, i, std::forward(value), - madness::TaskAttributes::hipri()); + WorldObject_::task( + owner(i), &DistributedStorage_::set_handler&>, + i, std::forward(value), madness::TaskAttributes::hipri()); } struct DelayedSet : public madness::CallbackInterface { @@ -129,7 +132,7 @@ class DistributedStorage : public madness::WorldObject > { /// \param max_size The maximum capacity of this container /// \param pmap The process map for the container (default = null pointer) DistributedStorage(World& world, size_type max_size, - const std::shared_ptr& pmap) + const std::shared_ptr& pmap) : WorldObject_(world), max_size_(max_size), pmap_(pmap), @@ -146,7 +149,8 @@ class DistributedStorage : public madness::WorldObject > { virtual ~DistributedStorage() { if (num_live_ds_ != 0) { madness::print_error( - "DistributedStorage (object id=", this->id(), ") destroyed while " + "DistributedStorage (object id=", this->id(), + ") destroyed while " "outstanding tasks exist. Add a fence() to extend the lifetime of " "this object."); abort(); @@ -159,7 +163,7 @@ class DistributedStorage : public madness::WorldObject > { /// \return A shared pointer to the process map. /// \throw nothing - const std::shared_ptr& pmap() const { return pmap_; } + const std::shared_ptr& pmap() const { return pmap_; } /// Element owner @@ -317,9 +321,7 @@ class DistributedStorage : public madness::WorldObject > { /// Reports the number of live DelayedSet requests /// @return const reference to the atomic counter of live DelayedSet requests - const madness::AtomicInt& num_live_ds() const { - return num_live_ds_; - } + const madness::AtomicInt& num_live_ds() const { return num_live_ds_; } }; // class DistributedStorage } // namespace detail diff --git a/src/TiledArray/expressions/binary_engine.h b/src/TiledArray/expressions/binary_engine.h index 5b610202c2..4758ab0069 100644 --- a/src/TiledArray/expressions/binary_engine.h +++ b/src/TiledArray/expressions/binary_engine.h @@ -259,7 +259,7 @@ class BinaryEngine : public ExprEngine { /// \param world The world were the result will be distributed /// \param pmap The process map for the result tensor tiles void init_distribution(World* world, - const std::shared_ptr& pmap) { + const std::shared_ptr& pmap) { left_.init_distribution(world, pmap); right_.init_distribution(world, left_.pmap()); ExprEngine_::init_distribution(world, left_.pmap()); diff --git a/src/TiledArray/expressions/blk_tsr_engine.h b/src/TiledArray/expressions/blk_tsr_engine.h index c0dfe5d658..2d16172dbe 100644 --- a/src/TiledArray/expressions/blk_tsr_engine.h +++ b/src/TiledArray/expressions/blk_tsr_engine.h @@ -249,7 +249,7 @@ class BlkTsrEngineBase : public LeafEngine { } void init_distribution(World* world, - const std::shared_ptr& pmap) { + const std::shared_ptr& pmap) { ExprEngine_::init_distribution( world, (pmap ? pmap diff --git a/src/TiledArray/expressions/cont_engine.h b/src/TiledArray/expressions/cont_engine.h index f71e44d869..35c2f34199 100644 --- a/src/TiledArray/expressions/cont_engine.h +++ b/src/TiledArray/expressions/cont_engine.h @@ -301,7 +301,8 @@ class ContEngine : public BinaryEngine { /// tensor. /// \param world The world were the result will be distributed /// \param pmap The process map for the result tensor tiles - void init_distribution(World* world, std::shared_ptr pmap) { + void init_distribution(World* world, + std::shared_ptr pmap) { const unsigned int inner_rank = op_.gemm_helper().num_contract_ranks(); const unsigned int left_rank = op_.gemm_helper().left_rank(); const unsigned int right_rank = op_.gemm_helper().right_rank(); diff --git a/src/TiledArray/expressions/contraction_helpers.h b/src/TiledArray/expressions/contraction_helpers.h index 92696a365f..c85019aa0b 100644 --- a/src/TiledArray/expressions/contraction_helpers.h +++ b/src/TiledArray/expressions/contraction_helpers.h @@ -57,7 +57,7 @@ auto range_from_annotation(const IndexList_& target_idxs, RHSType&& rhs) { using range_type = std::decay_t; using size_type = typename range_type::size_type; - using extent_type = std::pair; + using extent_type = Range1; container::svector ranges; // Will be the ranges for each extent const auto& lrange = lhs.range(); diff --git a/src/TiledArray/expressions/expr.h b/src/TiledArray/expressions/expr.h index d0d84804bf..3b50373541 100644 --- a/src/TiledArray/expressions/expr.h +++ b/src/TiledArray/expressions/expr.h @@ -61,7 +61,7 @@ struct EngineParamOverride { pmap_interface; ///< Process map interface type World* world; - std::shared_ptr pmap; + std::shared_ptr pmap; const shape_type* shape; }; @@ -130,7 +130,8 @@ class Expr { } /// \param pmap the Pmap object to use for the result Expr& set_pmap( - const std::shared_ptr pmap) { + const std::shared_ptr + pmap) { if (override_ptr_) { override_ptr_->pmap = pmap; } else { @@ -385,7 +386,8 @@ class Expr { // Get the output process map. // If result's pmap is assigned use it as the initial guess // it will be assigned in engine.init - std::shared_ptr::array_type::pmap_interface> + std::shared_ptr< + const typename TsrExpr::array_type::pmap_interface> pmap; if (tsr.array().is_initialized()) pmap = tsr.array().pmap(); diff --git a/src/TiledArray/expressions/expr_engine.h b/src/TiledArray/expressions/expr_engine.h index 7b183a191a..bd4dbd9ccd 100644 --- a/src/TiledArray/expressions/expr_engine.h +++ b/src/TiledArray/expressions/expr_engine.h @@ -80,7 +80,7 @@ class ExprEngine : private NO_DEFAULTS { BipartitePermutation perm_; trange_type trange_; ///< The tiled range of the result tensor shape_type shape_; ///< The shape of the result tensor - std::shared_ptr + std::shared_ptr pmap_; ///< The process map for the result tensor std::shared_ptr > override_ptr_; ///< The engine params overriding the default @@ -109,7 +109,7 @@ class ExprEngine : private NO_DEFAULTS { /// \param world The world where the expression will be evaluated /// \param pmap The process map for the result tensor (may be NULL) /// \param target_indices The target index list of the result tensor - void init(World& world, std::shared_ptr pmap, + void init(World& world, std::shared_ptr pmap, const BipartiteIndexList& target_indices) { if (target_indices.size()) { derived().init_indices(target_indices); @@ -169,7 +169,7 @@ class ExprEngine : private NO_DEFAULTS { /// \param world The world were the result will be distributed /// \param pmap The process map for the result tensor tiles void init_distribution(World* world, - const std::shared_ptr& pmap) { + const std::shared_ptr& pmap) { TA_ASSERT(world); TA_ASSERT(pmap); TA_ASSERT(pmap->procs() == @@ -241,7 +241,7 @@ class ExprEngine : private NO_DEFAULTS { /// Process map accessor /// \return A const reference to the process map - const std::shared_ptr& pmap() const { return pmap_; } + const std::shared_ptr& pmap() const { return pmap_; } /// Set the permute tiles flag diff --git a/src/TiledArray/expressions/leaf_engine.h b/src/TiledArray/expressions/leaf_engine.h index 2acbaa54ee..a69a4cd2c5 100644 --- a/src/TiledArray/expressions/leaf_engine.h +++ b/src/TiledArray/expressions/leaf_engine.h @@ -127,7 +127,7 @@ class LeafEngine : public ExprEngine { void init_indices() {} void init_distribution(World* world, - const std::shared_ptr& pmap) { + const std::shared_ptr& pmap) { ExprEngine_::init_distribution(world, (pmap ? pmap : array_.pmap())); } diff --git a/src/TiledArray/expressions/mult_engine.h b/src/TiledArray/expressions/mult_engine.h index 8d4e33de6f..19788505fd 100644 --- a/src/TiledArray/expressions/mult_engine.h +++ b/src/TiledArray/expressions/mult_engine.h @@ -346,7 +346,8 @@ class MultEngine : public ContEngine> { /// tensor. /// \param world The world were the result will be distributed /// \param pmap The process map for the result tensor tiles - void init_distribution(World* world, std::shared_ptr pmap) { + void init_distribution(World* world, + std::shared_ptr pmap) { if (this->product_type() == TensorProduct::Contraction) ContEngine_::init_distribution(world, pmap); else @@ -599,7 +600,8 @@ class ScalMultEngine /// tensor. /// \param world The world were the result will be distributed /// \param pmap The process map for the result tensor tiles - void init_distribution(World* world, std::shared_ptr pmap) { + void init_distribution(World* world, + std::shared_ptr pmap) { if (this->product_type() == TensorProduct::Contraction) ContEngine_::init_distribution(world, pmap); else diff --git a/src/TiledArray/expressions/unary_engine.h b/src/TiledArray/expressions/unary_engine.h index 81f0908146..621c4a71b3 100644 --- a/src/TiledArray/expressions/unary_engine.h +++ b/src/TiledArray/expressions/unary_engine.h @@ -136,7 +136,7 @@ class UnaryEngine : ExprEngine { /// \param world The world were the result will be distributed /// \param pmap The process map for the result tensor tiles void init_distribution(World* world, - const std::shared_ptr& pmap) { + const std::shared_ptr& pmap) { arg_.init_distribution(world, pmap); ExprEngine_::init_distribution(world, arg_.pmap()); } diff --git a/src/TiledArray/initialize.h b/src/TiledArray/initialize.h index 5c2647e5b5..c86fa1d151 100644 --- a/src/TiledArray/initialize.h +++ b/src/TiledArray/initialize.h @@ -17,23 +17,37 @@ bool initialized(); /// @return true if TiledArray has been finalized at least once bool finalized(); +// clang-format off /// @name TiledArray initialization. /// These functions initialize TiledArray and (if needed) MADWorld /// runtime. -/// @note the default World object is set to the object returned by these. -/// @warning MADWorld can only be initialized/finalized once, hence if -/// TiledArray initializes MADWorld -/// it can also be initialized/finalized only once. +// clang-format on /// @{ +// clang-format off +/// @param[in] argc the number of non-null strings pointed to by @p argv +/// @param[in] argv array of `argc+1` pointers to strings (the last of which is null) +/// specifying arguments passed to madness::initialize, MPI_Init_threads, and other similar initializers; +/// @param[in] comm the MADNESS communicator (an madness::SafeMPI::Intracomm object) to use for TiledArray computation +/// @param[in] quiet if true, will prevent initializers from writing to standard streams, if possible; the default is true /// @throw TiledArray::Exception if TiledArray initialized MADWorld and /// TiledArray::finalize() had been called -World& initialize( - int& argc, char**& argv, - const SafeMPI::Intracomm& comm, - bool quiet = true -); +/// @note - `argc` and `argv` are typically the values received by the main() function of the application +/// @note - variants of initialize that do not take `comm` will construct default communicator +/// @note - the default World object is set to the object returned by these. +/// @note - The following environment variables can be used to control TiledArray +/// initialization: +/// | Environment Variable | Default| Description | +/// |----------------------|--------|-------------| +/// | `TA_LINALG_BACKEND` | none | If set, chooses the linear algebra backend to use; valid values are `scalapack` (distributed library ScaLAPACK, only available if configured with `ENABLE_SCALAPACK=ON`), `lapack` (non-distributed library LAPACK, always available), and `ttg` (experimental [TTG](https://github.com/TESSEorg/TTG) backend, only implements Cholesky); the default is to choose best available backend automatically (recommended) | +/// | `TA_LINALG_DISTRIBUTED_MINSIZE` | 4194304 | Unless `TA_LINALG_BACKEND` is set, this controls the minimum matrix size (#rows times #columns) for which the distributed backend if chosen when selecting the best available backend | +/// @warning MADWorld can only be initialized/finalized once, hence if +/// TiledArray initializes MADWorld +/// it can also be initialized/finalized only once. +// clang-format on +World& initialize(int& argc, char**& argv, const SafeMPI::Intracomm& comm, + bool quiet = true); inline World& initialize(int& argc, char**& argv, bool quiet = true) { return TiledArray::initialize(argc, argv, SafeMPI::COMM_WORLD, quiet); diff --git a/src/TiledArray/math/linalg/basic.cpp b/src/TiledArray/math/linalg/basic.cpp new file mode 100644 index 0000000000..84535f42a3 --- /dev/null +++ b/src/TiledArray/math/linalg/basic.cpp @@ -0,0 +1,73 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2022 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * + * util.h + * Aug 8, 2022 + * + */ + +#include "TiledArray/external/madness.h" +#include "TiledArray/math/linalg/forward.h" + +namespace TiledArray::math::linalg { + +namespace detail { + +LinearAlgebraBackend& linalg_backend_accessor() { + static LinearAlgebraBackend backend = LinearAlgebraBackend::BestAvailable; + return backend; +} + +std::size_t& linalg_crossover_to_distributed_accessor() { + static std::size_t crossover_to_distributed = 2048 * 2048; + return crossover_to_distributed; +} + +} // namespace detail + +/// @return the linear algebra backend to use for matrix algebra +/// @note the default is LinearAlgebraBackend::BestAvailable +LinearAlgebraBackend get_linalg_backend() { + return detail::linalg_backend_accessor(); +} + +/// @param[in] b the linear algebra backend to use after this +/// @note this is a collective call over the default world +void set_linalg_backend(LinearAlgebraBackend b) { + get_default_world().gop.fence(); + detail::linalg_backend_accessor() = b; +} + +/// @return the crossover-to-distributed threshold specifies the matrix volume +/// for which to switch to the distributed-memory backend (if available) for +/// linear algebra solvers +std::size_t get_linalg_crossover_to_distributed() { + return detail::linalg_crossover_to_distributed_accessor(); +} + +/// @param[in] c the crossover-to-distributed threshold to use following this +/// call +/// @note this is a collective call over the default world +void set_linalg_crossover_to_distributed(std::size_t c) { + get_default_world().gop.fence(); + detail::linalg_crossover_to_distributed_accessor() = c; +} + +} // namespace TiledArray::math::linalg diff --git a/src/TiledArray/math/linalg/basic.h b/src/TiledArray/math/linalg/basic.h index 88f995f8ea..c245889e6a 100644 --- a/src/TiledArray/math/linalg/basic.h +++ b/src/TiledArray/math/linalg/basic.h @@ -26,11 +26,44 @@ #ifndef TILEDARRAY_MATH_LINALG_BASIC_H__INCLUDED #define TILEDARRAY_MATH_LINALG_BASIC_H__INCLUDED +#include "TiledArray/math/linalg/forward.h" + #include "TiledArray/dist_array.h" #include "TiledArray/external/eigen.h" namespace TiledArray::math::linalg { +namespace detail { + +template +inline bool prefer_distributed(const DistArray& matrix) { + const auto prefer_distributed = + get_linalg_backend() == LinearAlgebraBackend::BestAvailable && + matrix.elements_range().volume() >= + get_linalg_crossover_to_distributed() && + matrix.world().size() > 1; + return prefer_distributed; +} + +template +struct symmetric_matrix_shape { + symmetric_matrix_shape(T v) : v_(v) {} + T operator()(const Range::index_type& idx) const { + TA_ASSERT(idx.size() == 2); + if constexpr (Uplo == lapack::Uplo::Lower) { + return (idx[0] >= idx[1]) ? v_ : 0.; + } else if constexpr (Uplo == lapack::Uplo::Upper) { + return (idx[0] <= idx[1]) ? v_ : 0.; + } else { // Uplo == lapack::Uplo::General + return v_; + } + } + + T v_; +}; + +} // namespace detail + // freestanding adaptors for DistArray needed by solvers like DIIS template @@ -60,8 +93,20 @@ inline void axpy(DistArray& y, S alpha, y(vars) = y(vars) + numeric_type(alpha) * x(vars); } +namespace non_distributed {} +namespace scalapack {} +namespace ttg {} + } // namespace TiledArray::math::linalg +namespace TiledArray { +using TiledArray::math::linalg::get_linalg_backend; +using TiledArray::math::linalg::get_linalg_crossover_to_distributed; +using TiledArray::math::linalg::LinearAlgebraBackend; +using TiledArray::math::linalg::set_linalg_backend; +using TiledArray::math::linalg::set_linalg_crossover_to_distributed; +} // namespace TiledArray + namespace Eigen { // freestanding adaptors for Eigen::MatrixBase needed by solvers like DIIS @@ -109,4 +154,88 @@ inline auto norm2(const Eigen::MatrixBase& m) { } // namespace Eigen +#ifndef TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG +#if (TILEDARRAY_HAS_TTG && TILEDARRAY_HAS_SCALAPACK) +#define TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG || \ + TiledArray::math::linalg::detail::prefer_distributed(MATRIX)) \ + return TiledArray::math::linalg::ttg::FN; \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK || \ + TiledArray::math::linalg::detail::prefer_distributed(MATRIX)) \ + return scalapack::FN; \ + return non_distributed::FN; +#elif (TILEDARRAY_HAS_TTG && !TILEDARRAY_HAS_SCALAPACK) +#define TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG || \ + TiledArray::math::linalg::detail::prefer_distributed(MATRIX)) \ + return TiledArray::math::linalg::ttg::FN; \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK) \ + TA_EXCEPTION("ScaLAPACK lineear algebra backend is not available"); \ + return non_distributed::FN; +#elif !TILEDARRAY_HAS_TTG && TILEDARRAY_HAS_SCALAPACK +#define TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG) \ + TA_EXCEPTION("TTG linear algebra backend is not available"); \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK || \ + TiledArray::math::linalg::detail::prefer_distributed(MATRIX)) \ + return scalapack::FN; \ + return non_distributed::FN; +#else // !TILEDARRAY_HAS_TTG && !TILEDARRAY_HAS_SCALAPACK +#define TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG) \ + TA_EXCEPTION("TTG linear algebra backend is not available"); \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK) \ + TA_EXCEPTION("ScaLAPACK lineear algebra backend is not available"); \ + return non_distributed::FN; +#endif // !TILEDARRAY_HAS_TTG && !TILEDARRAY_HAS_SCALAPACK +#endif // defined(TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG) + +#ifndef TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG_STRINGIFY +#define TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG_STRINGIFY(FN) #FN +#endif // defined(TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG_STRINGIFY) + +#ifndef TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG +#if TILEDARRAY_HAS_TTG && TILEDARRAY_HAS_SCALAPACK +#define TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG) \ + TA_EXCEPTION(TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG_STRINGIFY( \ + FN) " is not provided by the TTG backend"); \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK || \ + TiledArray::math::linalg::detail::prefer_distributed(MATRIX)) \ + return scalapack::FN; \ + return non_distributed::FN; +#elif TILEDARRAY_HAS_TTG && !TILEDARRAY_HAS_SCALAPACK +#define TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG) \ + TA_EXCEPTION(TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG_STRINGIFY( \ + FN) " is not provided by the TTG backend"); \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK) \ + TA_EXCEPTION("ScaLAPACK lineear algebra backend is not available"); \ + return non_distributed::FN; +#elif !TILEDARRAY_HAS_TTG && TILEDARRAY_HAS_SCALAPACK +#define TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG) \ + TA_EXCEPTION("TTG linear algebra backend is not available"); \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK || \ + TiledArray::math::linalg::detail::prefer_distributed(MATRIX)) \ + return scalapack::FN; \ + return non_distributed::FN; +#else // !TILEDARRAY_HAS_TTG && !TILEDARRAY_HAS_SCALAPACK +#define TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(FN, MATRIX) \ + TA_MAX_THREADS; \ + if (get_linalg_backend() == LinearAlgebraBackend::TTG) \ + TA_EXCEPTION("TTG linear algebra backend is not available"); \ + if (get_linalg_backend() == LinearAlgebraBackend::ScaLAPACK) \ + TA_EXCEPTION("ScaLAPACK lineear algebra backend is not available"); \ + return non_distributed::FN; +#endif // !TILEDARRAY_HAS_TTG && !TILEDARRAY_HAS_SCALAPACK +#endif // defined(TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG) + #endif // TILEDARRAY_MATH_LINALG_BASIC_H__INCLUDED diff --git a/src/TiledArray/math/linalg/cholesky.h b/src/TiledArray/math/linalg/cholesky.h index 512d2fffcc..9ebc298354 100644 --- a/src/TiledArray/math/linalg/cholesky.h +++ b/src/TiledArray/math/linalg/cholesky.h @@ -28,6 +28,9 @@ #if TILEDARRAY_HAS_SCALAPACK #include #endif +#if TILEDARRAY_HAS_TTG +#include +#endif #include #include @@ -35,46 +38,26 @@ namespace TiledArray::math::linalg { template auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) - return scalapack::cholesky(A, l_trange); -#endif - return non_distributed::cholesky(A, l_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG(cholesky(A, l_trange), A) } template auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) - return scalapack::cholesky_linv(A, l_trange); -#endif - return non_distributed::cholesky_linv(A, l_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_W_TTG(cholesky_linv(A, l_trange), A); } template auto cholesky_solve(const Array& A, const Array& B, TiledRange x_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) - return scalapack::cholesky_solve(A, B, x_trange); -#endif - return non_distributed::cholesky_solve(A, B, x_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(cholesky_solve(A, B, x_trange), A); } template auto cholesky_lsolve(Op transpose, const Array& A, const Array& B, TiledRange l_trange = TiledRange(), TiledRange x_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) - return scalapack::cholesky_lsolve(transpose, A, B, l_trange, - x_trange); -#endif - return non_distributed::cholesky_lsolve(transpose, A, B, l_trange, x_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG( + cholesky_lsolve(transpose, A, B, l_trange, x_trange), A); } } // namespace TiledArray::math::linalg diff --git a/src/TiledArray/math/linalg/forward.h b/src/TiledArray/math/linalg/forward.h index d22df73567..28c7e3c352 100644 --- a/src/TiledArray/math/linalg/forward.h +++ b/src/TiledArray/math/linalg/forward.h @@ -52,6 +52,25 @@ struct SVD { enum Vectors { ValuesOnly, LeftVectors, RightVectors, AllVectors }; }; +/// known linear algebra backends +enum LinearAlgebraBackend { + /// choose the best that's available, taking into consideration the problem + /// size and # of ranks + BestAvailable, + /// LAPACK on rank 0, followed by broadcast + LAPACK, + /// ScaLAPACK + ScaLAPACK, + /// TTG (currently only provides cholesky and cholesky_linv) + TTG +}; + +LinearAlgebraBackend get_linalg_backend(); +void set_linalg_backend(LinearAlgebraBackend b); + +std::size_t get_linalg_crossover_to_distributed(); +void set_linalg_crossover_to_distributed(std::size_t c); + } // namespace TiledArray::math::linalg namespace TiledArray { diff --git a/src/TiledArray/math/linalg/heig.h b/src/TiledArray/math/linalg/heig.h index 4ab01dd760..6a0b4e938b 100644 --- a/src/TiledArray/math/linalg/heig.h +++ b/src/TiledArray/math/linalg/heig.h @@ -35,25 +35,13 @@ namespace TiledArray::math::linalg { template auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) { - return scalapack::heig(A, evec_trange); - } -#endif - return non_distributed::heig(A, evec_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(heig(A, evec_trange), A); } template auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) { - return scalapack::heig(A, B, evec_trange); - } -#endif - return non_distributed::heig(A, B, evec_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(heig(A, B, evec_trange), A); } } // namespace TiledArray::math::linalg diff --git a/src/TiledArray/math/linalg/lu.h b/src/TiledArray/math/linalg/lu.h index 6147d4f2c9..04e5545854 100644 --- a/src/TiledArray/math/linalg/lu.h +++ b/src/TiledArray/math/linalg/lu.h @@ -36,24 +36,12 @@ namespace TiledArray::math::linalg { template auto lu_solve(const ArrayA& A, const ArrayB& B, TiledRange x_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) { - return scalapack::lu_solve(A, B, x_trange); - } -#endif - return non_distributed::lu_solve(A, B, x_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(lu_solve(A, B, x_trange), A); } template auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) { - return scalapack::lu_inv(A, ainv_trange); - } -#endif - return non_distributed::lu_inv(A, ainv_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(lu_inv(A, ainv_trange), A); } } // namespace TiledArray::math::linalg diff --git a/src/TiledArray/math/linalg/non-distributed/cholesky.h b/src/TiledArray/math/linalg/non-distributed/cholesky.h index 7f333b2617..4196002533 100644 --- a/src/TiledArray/math/linalg/non-distributed/cholesky.h +++ b/src/TiledArray/math/linalg/non-distributed/cholesky.h @@ -66,6 +66,7 @@ auto rank_local_cholesky(const DistArray& A) { * * @returns The lower triangular Cholesky factor L in TA format * @note this is a collective operation with respect to the world of @p A + * @throw TiledArray::Exception if A is not HPD */ template >> @@ -89,9 +90,10 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { * is_contiguous_tensor_v is true) * * @param[in] A Input array to be diagonalized. Must be rank-2 - * @returns The lower triangular Cholesky factor L as a ContiguousTensor + * @return The lower triangular Cholesky factor L as a DistArray * @note this is a non-collective operation, only computes on the rank on which * invoked + * @throw TiledArray::Exception if A is not HPD */ template >> diff --git a/src/TiledArray/math/linalg/qr.h b/src/TiledArray/math/linalg/qr.h index 1683e6792c..5d7f705b41 100644 --- a/src/TiledArray/math/linalg/qr.h +++ b/src/TiledArray/math/linalg/qr.h @@ -13,41 +13,39 @@ namespace TiledArray::math::linalg { template -auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), - TiledRange r_trange = TiledRange() ) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (V.world().size() > 1 && V.elements_range().volume() > 10000000) { - return scalapack::householder_qr( V, q_trange, r_trange ); - } -#endif - return non_distributed::householder_qr( V, q_trange, r_trange ); +auto householder_qr(const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange()) { + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG( + householder_qr(V, q_trange, r_trange), V); } template -auto cholesky_qr( const ArrayV& V, TiledRange r_trange = TiledRange() ) { +auto cholesky_qr(const ArrayV& V, TiledRange r_trange = TiledRange()) { TA_MAX_THREADS; // Form Grammian - ArrayV G; G("i,j") = V("k,i").conj() * V("k,j"); + ArrayV G; + G("i,j") = V("k,i").conj() * V("k,j"); // Obtain Cholesky L and its inverse - auto [L, Linv] = cholesky_linv( G, r_trange ); + auto [L, Linv] = cholesky_linv(G, r_trange); // Q = V * L**-H - ArrayV Q; Q("i,j") = V("i,k") * Linv("j,k").conj(); + ArrayV Q; + Q("i,j") = V("i,k") * Linv("j,k").conj(); if constexpr (not QOnly) { // R = L**H - ArrayV R; R("i,j") = L("j,i"); - return std::make_tuple( Q, R ); - } else return Q; - + ArrayV R; + R("i,j") = L("j,i"); + return std::make_tuple(Q, R); + } else + return Q; } } // namespace TiledArray::math::linalg namespace TiledArray { - using TiledArray::math::linalg::householder_qr; - using TiledArray::math::linalg::cholesky_qr; +using TiledArray::math::linalg::cholesky_qr; +using TiledArray::math::linalg::householder_qr; } // namespace TiledArray #endif diff --git a/src/TiledArray/math/linalg/svd.h b/src/TiledArray/math/linalg/svd.h index 2eaf7a8598..7a843c6fb6 100644 --- a/src/TiledArray/math/linalg/svd.h +++ b/src/TiledArray/math/linalg/svd.h @@ -25,7 +25,7 @@ #define TILEDARRAY_MATH_LINALG_SVD_H__INCLUDED #include -#ifdef TILEDARRAY_HAS_SCALAPACK +#if TILEDARRAY_HAS_SCALAPACK #include #endif // TILEDARRAY_HAS_SCALAPACK #include @@ -36,13 +36,8 @@ namespace TiledArray::math::linalg { template auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { - TA_MAX_THREADS; -#if TILEDARRAY_HAS_SCALAPACK - if (A.world().size() > 1 && A.elements_range().volume() > 10000000) { - return scalapack::svd(A, u_trange, vt_trange); - } -#endif - return non_distributed::svd(A, u_trange, vt_trange); + TILEDARRAY_MATH_LINALG_DISPATCH_WO_TTG(svd(A, u_trange, vt_trange), + A); } } // namespace TiledArray::math::linalg diff --git a/src/TiledArray/math/linalg/ttg/cholesky.h b/src/TiledArray/math/linalg/ttg/cholesky.h new file mode 100644 index 0000000000..66a67a8034 --- /dev/null +++ b/src/TiledArray/math/linalg/ttg/cholesky.h @@ -0,0 +1,201 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * cholesky.h + * Created: 26 July, 2022 + * + */ + +#ifndef TILEDARRAY_MATH_LINALG_TTG_CHOL_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_TTG_CHOL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_TTG + +#include +#include + +#include +#include + +namespace TiledArray::math::linalg::ttg { + +/** + * @brief Compute the Cholesky factorization of a HPD rank-2 tensor + * + * A(i,j) = L(i,k) * conj(L(j,k)) + * + * Example Usage: + * + * auto L = cholesky(A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] l_trange TiledRange for resulting Cholesky factor. If left + * empty, will default to array.trange() + * @param[in] NB ScaLAPACK block size. Defaults to 128 + * + * @returns The lower triangular Cholesky factor L in TA format + */ +template +auto cholesky(const Array& A, TiledRange l_trange = {}, + size_t NB = default_block_size()) { + using value_type = typename Array::element_type; + using Tile = typename Array::value_type; + using Policy = typename Array::policy_type; + using shape_type = typename Policy::shape_type; + constexpr auto is_dense_policy = is_dense_v; + + ::ttg::Edge> input; + ::ttg::Edge> output; + const auto A_descr = MatrixDescriptor( + A); // make_potrf_ttg keeps references to this, must outlive all work + auto potrf_ttg = potrf::make_potrf_ttg(A_descr, input, output, + /* defer_write = */ true); + + // make result + shape_type L_shape; + if constexpr (!is_dense_policy) + L_shape = shape_type( + math::linalg::detail::symmetric_matrix_shape(1), + A.trange()); + Array L(A.world(), A.trange(), L_shape, + A.pmap()); // potrf produces a dense result for now + auto store_potrf_ttg = + make_writer_ttg( + L, output, /* defer_write = */ true); + + [[maybe_unused]] auto connected = make_graph_executable(potrf_ttg.get()); + + // uncomment to trace + ::ttg::trace_on(); + + // start + ::ttg::execute(); + + // *now* "connect" input data to TTG + // TTG expect lower triangle of the matrix, and col-major tiles + flow_matrix_to_tt( + A, potrf_ttg.get()); + + A.world().gop.fence(); + ::ttg::fence(); + + return L; +} + +/** + * @brief Compute the inverse of the Cholesky factor of an HPD rank-2 tensor. + * Optionally return the Cholesky factor itself + * + * A(i,j) = L(i,k) * conj(L(j,k)) -> compute Linv + * + * Example Usage: + * + * auto Linv = cholesky_Linv(A, ...) + * auto [L,Linv] = cholesky_Linv(A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * @tparam Both Whether or not to return the cholesky factor + * + * @param[in] A Input array to be diagonalized. Must be rank-2 + * @param[in] l_trange TiledRange for resulting inverse Cholesky factor. + * If left empty, will default to array.trange() + * @param[in] NB target block size. Defaults to 128 + * + * @returns The inverse lower triangular Cholesky factor in TA format + */ +template +auto cholesky_linv(const Array& A, TiledRange l_trange = {}, + size_t NB = default_block_size()) { + using value_type = typename Array::element_type; + using Tile = typename Array::value_type; + using Policy = typename Array::policy_type; + using shape_type = typename Policy::shape_type; + constexpr auto is_dense_policy = is_dense_v; + + ::ttg::Edge> input; + ::ttg::Edge> potrf2trtri; + ::ttg::Edge> potrf_output; + ::ttg::Edge> trtri_output; + const auto A_descr = MatrixDescriptor( + A); // make_potrf_ttg and make_trtri_ttg keep references to this, must + // outlive all work + auto portf_out_edge = + Both ? ::ttg::fuse(potrf2trtri, potrf_output) : potrf2trtri; + auto potrf_ttg = potrf::make_potrf_ttg(A_descr, input, portf_out_edge, + /* defer_write = */ true); + auto trtri_ttg = trtri_LOWER::make_trtri_ttg(A_descr, lapack::Diag::NonUnit, + potrf2trtri, trtri_output, + /* defer_write = */ true); + + // make result(s) + shape_type L_shape; + if constexpr (!is_dense_policy) + L_shape = shape_type( + math::linalg::detail::symmetric_matrix_shape(1), + A.trange(), + /* per-element norm values already */ true); + Array L; + if constexpr (Both) + L = Array(A.world(), A.trange(), L_shape, + A.pmap()); // trtri produces a dense result for now + Array Linv(A.world(), A.trange(), L_shape, // Linv has same shape as L + A.pmap()); // trtri produces a dense result for now + std::unique_ptr<::ttg::TTBase> store_potrf_tt_ptr; + if constexpr (Both) { + auto store_potrf_ttg = + make_writer_ttg( + L, potrf_output, /* defer_write = */ true); + store_potrf_tt_ptr = std::move(store_potrf_ttg); + } + auto store_trtri_ttg = + make_writer_ttg( + Linv, trtri_output, /* defer_write = */ true); + + [[maybe_unused]] auto connected = make_graph_executable(trtri_ttg.get()); + + // uncomment to trace + ::ttg::trace_on(); + + // start + ::ttg::execute(); + + // *now* "connect" input data to TTG + // TTG expect lower triangle of the matrix, and col-major tiles + flow_matrix_to_tt( + A, potrf_ttg.get()); + + A.world().gop.fence(); + ::ttg::fence(); + + // Copy L if needed + if constexpr (Both) { + return std::tuple(L, Linv); + } else + return Linv; +} + +} // namespace TiledArray::math::linalg::ttg + +#endif // TILEDARRAY_HAS_TTG +#endif // TILEDARRAY_MATH_LINALG_TTG_CHOL_H__INCLUDED diff --git a/src/TiledArray/math/linalg/ttg/util.h b/src/TiledArray/math/linalg/ttg/util.h new file mode 100644 index 0000000000..1a9e60b7ba --- /dev/null +++ b/src/TiledArray/math/linalg/ttg/util.h @@ -0,0 +1,318 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * util.h + * Created: 26 July, 2022 + * + */ + +#ifndef TILEDARRAY_MATH_LINALG_TTG_UTIL_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_TTG_UTIL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_TTG + +#include +#include + +#include + +#include +#include + +namespace TiledArray::math::linalg::ttg { + +namespace detail { +inline std::size_t& default_block_size_accessor() { + static std::size_t block_size = 128; + return block_size; +} +} // namespace detail + +inline std::size_t default_block_size() { + return detail::default_block_size_accessor(); +} + +inline void set_default_block_size(std::size_t NB) { + TA_ASSERT(NB > 0); + detail::default_block_size_accessor() = NB; +} + +template +class MatrixDescriptor { + public: + using element_type = typename Tile::value_type; + + MatrixDescriptor(const DistArray& da) + : trange_(da.trange()), pmap_(da.pmap()), shape_(da.shape_shared()) { + TA_ASSERT(trange_.rank() == 2); + } + + /** Number of tiled rows **/ + auto rows(void) const { return trange_.dim(0).tile_extent(); } + + /** Number of rows in tile */ + // int rows_in_tile(void) const { + // return pm->super.mb; + // } + + /** Number of rows in the matrix */ + auto rows_in_matrix(void) const { return trange_.dim(0).extent(); } + + /** Number of tiled columns **/ + auto cols(void) const { return trange_.dim(1).tile_extent(); } + + /** Number of columns in tile */ + // int cols_in_tile(void) const { + // return pm->super.nb; + // } + + /** Number of columns in the matrix */ + auto cols_in_matrix(void) const { return trange_.dim(1).extent(); } + + /* The rank storing the tile at {row, col} */ + int rank_of(int row, int col) const { + const auto ord = trange_.tiles_range().ordinal({row, col}); + return pmap_->owner(ord); + } + + bool is_local(int row, int col) const { + return ::ttg::default_execution_context().rank() == rank_of(row, col); + } + + private: + TiledRange trange_; + std::shared_ptr pmap_; + std::shared_ptr shape_; +}; + +namespace detail { + +/// sends ptr to managed data as MatrixTile to a TT +template +struct ForwardMatrixTile : public madness::CallbackInterface { + using index1_type = TiledRange1::index1_type; + using tile_type = Tile; + using element_type = typename tile_type::value_type; + + ForwardMatrixTile(::ttg::TerminalBase* term, index1_type r, index1_type c, + index1_type r_extent, index1_type c_extent, + const madness::Future* rc_fut) + : in(static_cast<::ttg::In>*>(term)), + r(r), + c(c), + r_extent(r_extent), + c_extent(c_extent), + rc_fut(rc_fut) {} + + void notify() override { + const auto& tile = rc_fut->get(); + const auto tile_range = tile.range(); + TA_ASSERT(r_extent == tile_range.dim(0).extent()); + TA_ASSERT(c_extent == tile_range.dim(1).extent()); + + // currently TTG assumes col-major tiles + if constexpr (Layout == lapack::Layout::RowMajor) { + in->send(Key2(r, c), + MatrixTile( + r_extent, c_extent, + // WARNING: const-smell since all interfaces uses + // MatrixTile + const_cast(tile.data()), c_extent)); + } else { + auto tile_permuted = permute(tile, Permutation{1, 0}); + in->send(Key2(r, c), + MatrixTile(r_extent, c_extent, + tile_permuted.data_shared(), r_extent)); + } + delete this; + } + + ::ttg::In>* in; + index1_type r; + index1_type c; + index1_type r_extent; + index1_type c_extent; + const madness::Future* rc_fut; +}; + +} // namespace detail + +// clang-format off +/// connects a matrix represented by DistArray to the in terminals of a TT/TTG + +/// @tparam Layout specifies the layout of the tiles expected by the receiving TT/TTG +/// @tparam Uplo specifies whether to write: +/// - `lapack::Uplo::General`: all data +/// - `lapack::Uplo::Lower`: lower triangle only (upper triangle of the diagonal tiles is untouched) +/// - `lapack::Uplo::Upper`: upper triangle only (lower triangle of the diagonal tiles is untouched) +/// @param A the DistArray object whose data will flow to @p tt +/// @param tt the TT/TTG object that will receive the data +// clang-format on +template +void flow_matrix_to_tt(const DistArray& A, ::ttg::TTBase* tt) { + using element_type = typename Tile::value_type; + const auto ntiles_row = TiledArray::extent(A.trange().tiles_range().dim(0)); + const auto ntiles_col = TiledArray::extent(A.trange().tiles_range().dim(1)); + for (Range1::index1_type r = 0; r < ntiles_row; r++) { + const auto r_extent = TiledArray::extent(A.trange().dim(0).tile(r)); + const Range1::index1_type cbegin = + Uplo == lapack::Uplo::Upper ? std::min(r + 1, ntiles_col) : 0; + const Range1::index1_type cfence = + Uplo == lapack::Uplo::Lower ? std::min(r + 1, ntiles_col) : ntiles_col; + for (Range1::index1_type c = cbegin; c < cfence; c++) { + if (A.is_local({r, c})) { + const auto c_extent = TiledArray::extent(A.trange().dim(1).tile(c)); + if (!A.is_zero({r, c})) { + auto& rc_fut = A.find_local({r, c}); + const_cast&>(rc_fut).register_callback( + new detail::ForwardMatrixTile( + tt->template in<0>(), r, c, r_extent, c_extent, &rc_fut)); + } else { + static_cast<::ttg::In>*>( + tt->template in<0>()) + ->send( + Key2(r, c), + MatrixTile( + r_extent, c_extent, + Layout == lapack::Layout::RowMajor ? c_extent : r_extent) + .fill(0.)); + } + } + } + } +} + +// clang-format off +/// creates a TTG that will write MatrixTile's to a DistArray + +/// @tparam Layout specifies the layout of the incoming tiles +/// @tparam Uplo specifies whether to write: +/// - `lapack::Uplo::General`: all data +/// - `lapack::Uplo::Lower`: lower triangle only (upper triangle of the diagonal tiles is zeroed out) +/// - `lapack::Uplo::Upper`: upper triangle only (lower triangle of the diagonal tiles is zeroed out) +/// @tparam Tile a tile type +/// @tparam Policy a policy type +/// @param A the DistArray object that will contain the data +/// @param result the Edge that connects the resulting TTG to the source +/// @param defer_write +/// @return a unique_ptr to the TTG object that will write MatrixTile's to @p A +// clang-format on +template +auto make_writer_ttg( + DistArray& A, + ::ttg::Edge>& result, + bool defer_write) { + using T = typename Tile::value_type; + constexpr auto is_dense_policy = is_dense_v; + constexpr auto is_sparse_policy = !is_dense_policy; + + auto keymap2 = [pmap = A.pmap_shared(), + range = A.trange().tiles_range()](const Key2& key) { + const auto IJ = range.ordinal({key.I, key.J}); + return pmap->owner(IJ); + }; + + auto f = [&A](const Key2& key, MatrixTile&& tile, std::tuple<>& out) { + TA_ASSERT( + tile.lda() == + (Layout == lapack::Layout::ColMajor + ? tile.rows() + : tile.cols())); // the code below only works if tile's LD == rows + const int I = key.I; + const int J = key.J; + auto rng = A.trange().make_tile_range({I, J}); + if constexpr (Uplo != lapack::Uplo::General) { + if (I != J && + (is_dense_policy || + (is_sparse_policy && + !A.template is_zero( + {J, I})))) { // write zero tiles that A does not know are zero + if constexpr (Uplo == lapack::Uplo::Lower) { // zero out upper + TA_ASSERT(I > J); + A.set({J, I}, Tile(A.trange().make_tile_range({J, I}), 0.0)); + } + if constexpr (Uplo == lapack::Uplo::Upper) { // zero out lower + TA_ASSERT(I < J); + A.set({I, J}, Tile(rng, 0.0)); + } + } + } + + // incoming data is moved if RowMajor, else need to permute + auto tile_IJ = Layout == lapack::Layout::ColMajor + ? permute(Tile(A.trange().make_tile_range({J, I}), 1, + std::move(std::move(tile).yield_data())), + Permutation{1, 0}) + : Tile(rng, 1, std::move(std::move(tile).yield_data())); + // zero out the lower/upper triangle of the diagonal + // tiles + if constexpr (Uplo != lapack::Uplo::General) { + if (I == J) { + const auto lo = rng.lobound_data()[0]; + const auto up = rng.upbound_data()[0]; + if constexpr (Uplo == lapack::Uplo::Lower) { // zero out upper + Range1::index1_type ij = 1; + auto* tile_IJ_data = tile_IJ.data(); + for (auto i = lo; i != up; ++i, ij += (1 + i - lo)) { + for (auto j = i + 1; j != up; ++j, ++ij) { + tile_IJ_data[ij] = 0.0; + } + } + } + if constexpr (Uplo == lapack::Uplo::Upper) { // zero out lower + Range1::index1_type ij = 0; + auto* tile_IJ_data = tile_IJ.data(); + for (auto i = lo; i != up; ++i) { + for (auto j = lo; j != i; ++j, ++ij) { + tile_IJ_data[ij] = 0.0; + } + ij += up - i; + } + } + } + } + TA_ASSERT(!A.template is_zero( + {I, J})); // make sure that the nonzero tile produced by TTG is known + // to be non-zero by A + A.set({I, J}, std::move(tile_IJ)); + if (::ttg::tracing()) ::ttg::print("WRITE2TA(", key, ")"); + }; + + auto result_tt = ::ttg::make_tt(f, ::ttg::edges(result), ::ttg::edges(), + "Final Output", {"result"}, {}); + result_tt->set_keymap(keymap2); + result_tt->set_defer_writer(defer_write); + + auto ins = std::make_tuple(result_tt->template in<0>()); + std::vector> ops(1); + ops[0] = std::move(result_tt); + + return make_ttg(std::move(ops), ins, std::make_tuple(), "Result Writer"); +} + +} // namespace TiledArray::math::linalg::ttg + +#endif // TILEDARRAY_HAS_TTG + +#endif // TILEDARRAY_MATH_LINALG_TTG_UTIL_H__INCLUDED diff --git a/src/TiledArray/policies/dense_policy.h b/src/TiledArray/policies/dense_policy.h index d64088f5c7..39247f368c 100644 --- a/src/TiledArray/policies/dense_policy.h +++ b/src/TiledArray/policies/dense_policy.h @@ -50,9 +50,9 @@ class DensePolicy { /// \param world The world of the process map /// \param size The number of tiles in the array /// \return A shared pointer to a process map - static std::shared_ptr default_pmap(World& world, - const std::size_t size) { - return std::make_shared(world, size); + static std::shared_ptr default_pmap( + World& world, const std::size_t size) { + return std::make_shared(world, size); } }; // class DensePolicy diff --git a/src/TiledArray/policies/sparse_policy.h b/src/TiledArray/policies/sparse_policy.h index 8e6ea0524a..181c40ad84 100644 --- a/src/TiledArray/policies/sparse_policy.h +++ b/src/TiledArray/policies/sparse_policy.h @@ -47,9 +47,9 @@ class SparsePolicy { /// \param world The world of the process map /// \param size The number of tiles in the array /// \return A shared pointer to a process map - static std::shared_ptr default_pmap(World& world, - const std::size_t size) { - return std::make_shared(world, size); + static std::shared_ptr default_pmap( + World& world, const std::size_t size) { + return std::make_shared(world, size); } }; // class SparsePolicy diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index cad65bf229..0b35f1e951 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -21,6 +21,7 @@ #define TILEDARRAY_RANGE_H__INCLUDED #include +#include #include #include #include @@ -672,9 +673,9 @@ class Range { /// \param d the dimension index, a nonnegative integer less than rank() /// \return the pair of {lower,upper} bounds for dimension \c d - std::pair dim(std::size_t d) const { + Range1 dim(std::size_t d) const { TA_ASSERT(d < rank()); - return std::make_pair(lobound(d), upbound(d)); + return Range1(lobound(d), upbound(d)); } /// Range lower bound data accessor diff --git a/src/TiledArray/range1.h b/src/TiledArray/range1.h new file mode 100644 index 0000000000..ee58032ff1 --- /dev/null +++ b/src/TiledArray/range1.h @@ -0,0 +1,247 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2022 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#ifndef TILEDARRAY_RANGE1_H__INCLUDED +#define TILEDARRAY_RANGE1_H__INCLUDED + +#include + +#include +#include + +#include + +namespace TiledArray { + +/// an integer range `[first,second)` +/// @note previously represented by std::pair, hence the design +struct Range1 { + typedef TA_1INDEX_TYPE index1_type; + index1_type first = 0; + index1_type second = 0; //< N.B. second >= first + + Range1() = default; + Range1(Range1 const&) = default; + Range1(Range1&&) = default; + Range1& operator=(Range1 const&) = default; + Range1& operator=(Range1&&) = default; + + /// constructs `[0,end)` + /// \param[in] end the value immediately past the last value in the range + /// \pre 0 <= end + template , Range1> && + std::is_integral_v>> + explicit Range1(I end) : second(static_cast(end)) { + TA_ASSERT(second >= first); + } + + /// constructs `[begin,end)` + /// \param[in] begin the first value in the range + /// \param[in] end the value immediately past the last value in the range + /// \pre begin <= end + template && + std::is_integral_v>> + explicit Range1(I1 begin, I2 end) + : first(static_cast(begin)), + second(static_cast(end)) { + TA_ASSERT(second >= first); + } + + /// @return the lower bound of this range, i.e. first + auto lobound() const noexcept { return first; } + + /// @return the upper bound of this range, i.e. second + auto upbound() const noexcept { return second; } + + /// @return the extent of this range, i.e. second - first + auto extent() const noexcept { return second - first; } + + /// swaps `*this` with @p other + /// @p other a Range1 object + void swap(Range1& other) noexcept { + std::swap(first, other.first); + std::swap(second, other.second); + } + + /// converts this to a std::pair + /// @return std::pair representation of this + explicit operator std::pair() const { + return std::make_pair(first, second); + } + + /// \brief Range1 iterator type + /// + /// Iterates over Range1 + class Iterator + : public boost::iterator_facade { + public: + Iterator(value_type v) : v(v) {} + + private: + value_type v; + + friend class boost::iterator_core_access; + + /// \brief increments this iterator + void increment() { ++v; } + /// \brief decrements this iterator + void decrement() { --v; } + + /// \brief advances this iterator by `n` positions + void advance(difference_type n) { v += n; } + + /// \brief advances this iterator by `n` positions + auto distance_to(Iterator other) { return other.v - v; } + + /// \brief Iterator comparer + /// \return true, if \c `*this==*other` + bool equal(Iterator const& other) const { return this->v == other.v; } + + /// \brief dereferences this iterator + /// \return const reference to the current index + const auto& dereference() const { return v; } + }; + friend class Iterator; + + typedef Iterator const_iterator; ///< Iterator type + + /// Begin local element iterator + + /// \return An iterator that points to the beginning of the local element set + virtual const_iterator begin() const { return Iterator{first}; } + + /// End local element iterator + + /// \return An iterator that points to the beginning of the local element set + virtual const_iterator end() const { return Iterator{second}; } + + /// Begin local element iterator + + /// \return An iterator that points to the beginning of the local element set + const const_iterator cbegin() const { return begin(); } + + /// End local element iterator + + /// \return An iterator that points to the beginning of the local element set + const const_iterator cend() const { return end(); } + + /// @} + + template >>::type* = nullptr> + void serialize(Archive& ar) { + ar& first& second; + } + + template >>::type* = nullptr> + void serialize(Archive& ar) const { + ar& first& second; + } +}; + +inline bool operator==(const Range1& x, const Range1& y) { + return x.first == y.first && x.second == y.second; +} + +inline bool operator!=(const Range1& x, const Range1& y) { return !(x == y); } + +/// Exchange the data of the two given ranges. +inline void swap(Range1& r0, Range1& r1) { // no throw + r0.swap(r1); +} + +/// Test that two Range1 objects are congruent + +/// This function tests that the sizes of the two Range1 objects coincide. +/// \param r1 an Range1 object +/// \param r2 an Range1 object +inline bool is_congruent(const Range1& r1, const Range1& r2) { + return r1.extent() == r2.extent(); +} + +inline auto extent(const Range1& r) { return r.extent(); } + +template +constexpr auto get(const Range1& r) { + static_assert(I < 2, ""); + if constexpr (I == 0) + return r.first; + else + return r.second; +} + +/// constructs `[0,end)` +/// \param[in] end the value immediately past the last value in the range +/// \pre 0 <= end +/// \note syntactic sugar a la boost::irange +template +Range1 irange(I end) { + return Range1{end}; +} + +/// constructs `[begin,end)` +/// \param[in] begin the first value in the range +/// \param[in] end the value immediately past the last value in the range +/// \pre begin <= end +/// \note syntactic sugar a la boost::irange +template && + std::is_integral_v>> +Range1 irange(I1 begin, I2 end) { + return Range1{begin, end}; +} + +} // namespace TiledArray + +// Range1 is tuple-like +namespace std { +template <> +struct tuple_size<::TiledArray::Range1> { + static constexpr std::size_t value = 2; +}; + +template <> +struct tuple_element<0, ::TiledArray::Range1> { + using type = decltype(::TiledArray::Range1::first); +}; +template <> +struct tuple_element<1, ::TiledArray::Range1> { + using type = decltype(::TiledArray::Range1::second); +}; + +} // namespace std + +namespace boost { +template +constexpr auto get(const ::TiledArray::Range1& r) { + static_assert(I < 2, ""); + if constexpr (I == 0) + return r.first; + else + return r.second; +} +} // namespace boost + +#endif // TILEDARRAY_RANGE1_H__INCLUDED diff --git a/src/TiledArray/sparse_shape.h b/src/TiledArray/sparse_shape.h index b63820af32..c9fb723378 100644 --- a/src/TiledArray/sparse_shape.h +++ b/src/TiledArray/sparse_shape.h @@ -296,11 +296,36 @@ class SparseShape { zero_tile_count_(tile_norm < threshold_ ? trange.tiles_range().area() : 0ul) {} - /// "Dense" constructor + /// Constructs a SparseShape from a functor returning norm values - /// This constructor will scale the tile norms, i.e. multiply each tile norm - /// by the inverse of its volume. - /// \param tile_norms The Frobenius norm of tiles by default + /// \tparam Op callable of signature `value_type(const Range::index&)` + /// \param tile_norm_op a functor that returns Frobenius norms of tiles + /// \param trange The tiled range of the tensor + /// \param do_not_scale if true, assume that the tile norms in \c tile_norms + /// are already scaled + template < + typename Op, + typename = std::enable_if_t, const Range::index_type&>>> + SparseShape(Op&& tile_norm_op, const TiledRange& trange, + bool do_not_scale = false) + : tile_norms_(trange.tiles_range(), std::forward(tile_norm_op)), + size_vectors_(initialize_size_vectors(trange)), + zero_tile_count_(0ul) { + TA_ASSERT(!tile_norms_.empty()); + TA_ASSERT(tile_norms_.range() == trange.tiles_range()); + + if (!do_not_scale) { + zero_tile_count_ = scale_tile_norms( + tile_norms_, size_vectors_.get()); + } else { + zero_tile_count_ = screen_out_zero_tiles(); + } + } + + /// Constructor from a tensor of (scaled/unscaled) norm values + + /// \param tile_norms The Frobenius norm of tiles /// \param trange The tiled range of the tensor /// \param do_not_scale if true, assume that the tile norms in \c tile_norms /// are already scaled diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index f6106a13b7..5fe2a3cb21 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -190,21 +190,11 @@ class Tensor { this->data_ = std::shared_ptr(ptr, std::move(deleter)); } - /// Construct a tensor with a range equal to \c range. The data is - /// uninitialized. - /// \param range The range of the tensor - /// \param batch_size The batch size - /// \param data shared pointer to the data - Tensor(const range_type& range, size_t batch_size, - std::shared_ptr data) - : range_(range), batch_size_(batch_size), data_(data) {} - range_type range_; ///< range size_t batch_size_ = 1; std::shared_ptr data_; ///< Shared pointer to the data public: - // Compiler generated functions Tensor() = default; /// Construct a tensor with a range equal to \c range. The data is @@ -241,6 +231,22 @@ class Tensor { detail::tensor_init([value]() -> Value { return value; }, *this); } + /// Construct a tensor with a fill op that takes an element index + + /// \tparam ElementIndexOp callable of signature `value_type(const + /// Range::index_type&)` \param range An array with the size of of each + /// dimension \param element_idx_op a callable of type ElementIndexOp + template >> + Tensor(const range_type& range, const ElementIndexOp& element_idx_op) + : Tensor(range, 1, default_construct{false}) { + auto* data_ptr = data_.get(); + for (auto&& element_idx : range) { + data_ptr[range.ordinal(element_idx)] = element_idx_op(element_idx); + } + } + /// Construct an evaluated tensor template data) + : range_(range), batch_size_(batch_size), data_(data) {} + /// The batch size accessor /// @return the size of tensor batch represented by `*this` @@ -663,9 +677,19 @@ class Tensor { /// Mutable access to the data - /// \return A const pointer to the tensor data + /// \return A mutable pointer to the tensor data pointer data() { return this->data_.get(); } + /// Read-only shared_ptr to the data + + /// \return A const shared_ptr to the tensor data + std::shared_ptr data_shared() const { return this->data_; } + + /// Mutable shared_ptr to the data + + /// \return A mutable shared_ptr to the tensor data + std::shared_ptr data_shared() { return this->data_; } + /// Test if the tensor is empty /// \return \c true if this tensor was default constructed (contains no diff --git a/src/TiledArray/tensor_impl.h b/src/TiledArray/tensor_impl.h index 05374834f6..f13be27e01 100644 --- a/src/TiledArray/tensor_impl.h +++ b/src/TiledArray/tensor_impl.h @@ -49,10 +49,10 @@ class TensorImpl : private NO_DEFAULTS { pmap_interface; ///< Process map interface type private: - World& world_; ///< World that contains - const trange_type trange_; ///< Tiled range type - const shape_type shape_; ///< Tensor shape - std::shared_ptr pmap_; ///< Process map for tiles + World& world_; ///< World that contains + const trange_type trange_; ///< Tiled range type + std::shared_ptr shape_; ///< Tensor shape + std::shared_ptr pmap_; ///< Process map for tiles public: /// Constructor @@ -68,14 +68,17 @@ class TensorImpl : private NO_DEFAULTS { /// \throw TiledArray::Exception When the size of shape is not equal to /// zero TensorImpl(World& world, const trange_type& trange, const shape_type& shape, - const std::shared_ptr& pmap, + const std::shared_ptr& pmap, bool replicate_shape = true) - : world_(world), trange_(trange), shape_(shape), pmap_(pmap) { + : world_(world), + trange_(trange), + shape_(std::make_shared(shape)), + pmap_(pmap) { // ensure that shapes are identical on every rank if (replicate_shape) - world.gop.broadcast_serializable(shape_, 0); + world.gop.broadcast_serializable(*shape_, 0); else - TA_ASSERT(is_replicated(world, shape_)); + TA_ASSERT(is_replicated(world, *shape_)); // Validate input data. TA_ASSERT(pmap_); TA_ASSERT(pmap_->size() == trange_.tiles_range().volume()); @@ -83,7 +86,7 @@ class TensorImpl : private NO_DEFAULTS { typename pmap_interface::size_type(world_.rank())); TA_ASSERT(pmap_->procs() == typename pmap_interface::size_type(world_.size())); - TA_ASSERT(shape_.validate(trange_.tiles_range())); + TA_ASSERT(shape_->validate(trange_.tiles_range())); } /// Virtual destructor @@ -93,7 +96,16 @@ class TensorImpl : private NO_DEFAULTS { /// \return A shared pointer to the process map of this tensor /// \throw nothing - const std::shared_ptr& pmap() const { return pmap_; } + const std::shared_ptr& pmap() const { return pmap_; } + + /// Tensor process map accessor + + /// \return A shared pointer to the process map of this tensor + /// \note mirrors the shape_shared , the use of this name is recommended, but + /// pmap() is not deprecated + const std::shared_ptr& pmap_shared() const { + return pmap_; + } /// Tiles range accessor @@ -153,20 +165,26 @@ class TensorImpl : private NO_DEFAULTS { template bool is_zero(const Index& i) const { TA_ASSERT(trange_.tiles_range().includes(i)); - return shape_.is_zero(trange_.tiles_range().ordinal(i)); + return shape_->is_zero(trange_.tiles_range().ordinal(i)); } /// Query the density of the tensor /// \return \c true if the tensor is dense, otherwise false /// \throw nothing - bool is_dense() const { return shape_.is_dense(); } + bool is_dense() const { return shape_->is_dense(); } /// Tensor shape accessor /// \return A reference to the tensor shape map - /// \throw TiledArray::Exception When this tensor is dense - const shape_type& shape() const { return shape_; } + const shape_type& shape() const { return *shape_; } + + /// Tensor shape accessor + + /// \return A reference to the tensor shape shared_ptr object + const std::shared_ptr& shape_shared() const { + return shape_; + } /// Tiled range accessor diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 4fe2cbac85..a116c087e1 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -20,7 +20,10 @@ #ifndef TILEDARRAY_TILED_RANGE1_H__INCLUDED #define TILEDARRAY_TILED_RANGE1_H__INCLUDED +#include + #include +#include #include #include #include @@ -41,9 +44,9 @@ class TiledRange1 { struct Enabler {}; public: - typedef TA_1INDEX_TYPE index1_type; - typedef std::pair range_type; - typedef std::vector::const_iterator const_iterator; + using range_type = Range1; + using index1_type = range_type::index1_type; + using const_iterator = std::vector::const_iterator; /// Default constructor creates an empty range (tile and element ranges are /// both [0,0)) . @@ -148,14 +151,55 @@ class TiledRange1 { const range_type& elements_range() const { return elements_range_; } /// Tile range extent accessor - index1_type tile_extent() const { return range_.second - range_.first; } + /// \return the number of tiles in the range + index1_type tile_extent() const { return TiledArray::extent(range_); } /// Elements range extent accessor - index1_type extent() const { - return elements_range_.second - elements_range_.first; + /// \return the number of elements in the range + index1_type extent() const { return TiledArray::extent(elements_range_); } + + /// Computes hashmarks + /// \return the hashmarks of the tiled range, consisting of the following + /// values: + /// `{ tile(0).first, tile(0).second, tile(1).second, tile(2).second + /// ... }` + std::vector hashmarks() const { + std::vector result; + result.reserve(tile_extent() + 1); + result.push_back(elements_range().first); + for (auto& t : tiles_ranges_) { + result.push_back(t.second); + } + return result; } - /// Tile range accessor + /// \return the size of the largest tile in the range + /// \pre `this->tile_extent() > 0` + index1_type largest_tile_extent() const { + TA_ASSERT(tile_extent() > 0); + using TiledArray::extent; + auto largest_tile_it = + std::max_element(tiles_ranges_.begin(), tiles_ranges_.end(), + [](const auto& tile1, const auto& tile2) { + return extent(tile1) < extent(tile2); + }); + return extent(*largest_tile_it); + } + + /// \return the size of the smallest tile in the range + /// \pre `this->tile_extent() > 0` + index1_type smallest_tile_extent() const { + TA_ASSERT(tile_extent() > 0); + using TiledArray::extent; + auto smallest_tile_it = + std::min_element(tiles_ranges_.begin(), tiles_ranges_.end(), + [](const auto& tile1, const auto& tile2) { + return extent(tile1) < extent(tile2); + }); + return extent(*smallest_tile_it); + } + + /// Accesses range of a particular tile /// \param i tile index /// \return A const reference to the range of tile \c i @@ -262,16 +306,17 @@ class TiledRange1 { /// Initialize elem2tile void init_elem2tile_() const { + using TiledArray::extent; // check for 0 size range. - if ((elements_range_.second - elements_range_.first) == 0) return; + if (extent(elements_range_) == 0) return; static std::mutex mtx; { std::lock_guard lock(mtx); if (elem2tile_.empty()) { // initialize elem2tile map - elem2tile_.resize(elements_range_.second - elements_range_.first); - const auto end = range_.second - range_.first; + elem2tile_.resize(extent(elements_range_)); + const auto end = extent(range_); for (index1_type t = 0; t < end; ++t) for (index1_type e = tiles_ranges_[t].first; e < tiles_ranges_[t].second; ++e) @@ -340,7 +385,7 @@ inline TiledRange1 concat(const TiledRange1& r1, const TiledRange1& r2) { hashmarks.push_back(tile.second); } for (const auto& tile : r2) { - hashmarks.push_back(hashmarks.back() + tile.second - tile.first); + hashmarks.push_back(hashmarks.back() + TiledArray::extent(tile)); } return TiledRange1(hashmarks.begin(), hashmarks.end()); } else { @@ -357,8 +402,8 @@ inline TiledRange1 concat(const TiledRange1& r1, const TiledRange1& r2) { inline bool is_congruent(const TiledRange1& r1, const TiledRange1& r2) { return std::equal(r1.begin(), r1.end(), r2.begin(), [](const auto& tile1, const auto& tile2) { - return (tile1.second - tile1.first) == - (tile2.second - tile2.first); + return TiledArray::extent(tile1) == + TiledArray::extent(tile2); }); } diff --git a/src/TiledArray/tiledarray.cpp b/src/TiledArray/tiledarray.cpp index 29b60a61d6..67f4b62caa 100644 --- a/src/TiledArray/tiledarray.cpp +++ b/src/TiledArray/tiledarray.cpp @@ -2,6 +2,8 @@ #include #include +#include + #include #ifdef TILEDARRAY_HAS_CUDA @@ -10,6 +12,13 @@ #include #endif +#if TILEDARRAY_HAS_TTG +#include +#endif + +#include +#include + namespace TiledArray { namespace { @@ -98,6 +107,44 @@ TiledArray::World& TiledArray::initialize(int& argc, char**& argv, TiledArray::set_num_threads(1); madness::print_meminfo_disable(); initialized_accessor() = true; + + // if have TTG initialize it also +#if TILEDARRAY_HAS_TTG + ttg::initialize(argc, argv, -1, madness::ParsecRuntime::context()); +#endif + + // check if user specified linear algebra backend + params + auto* linalg_backend_cstr = std::getenv("TA_LINALG_BACKEND"); + if (linalg_backend_cstr) { + using namespace std::literals::string_literals; + if ("scalapack"s == linalg_backend_cstr) { + TiledArray::set_linalg_backend( + TiledArray::LinearAlgebraBackend::ScaLAPACK); + } else if ("ttg"s == linalg_backend_cstr) { + TiledArray::set_linalg_backend(TiledArray::LinearAlgebraBackend::TTG); + } else if ("lapack"s == linalg_backend_cstr) { + TiledArray::set_linalg_backend( + TiledArray::LinearAlgebraBackend::LAPACK); + } else + TA_EXCEPTION( + "TiledArray::initialize: invalid value of environment variable " + "TA_LINALG_BACKEND, valid values are \"scalapack\", \"ttg\", and " + "\"lapack\""); + } + const char* linalg_distributed_minsize_cstr = + std::getenv("TA_LINALG_DISTRIBUTED_MINSIZE"); + if (linalg_distributed_minsize_cstr) { + char* end; + const auto linalg_distributed_minsize = + std::strtoul(linalg_distributed_minsize_cstr, &end, 10); + if (errno == ERANGE) + TA_EXCEPTION( + "TiledArray::initialize: invalid value of environment variable " + "TA_LINALG_DISTRIBUTED_MINSIZE"); + TiledArray::set_linalg_crossover_to_distributed( + linalg_distributed_minsize); + } + return default_world; } else throw Exception("TiledArray already initialized"); @@ -106,6 +153,11 @@ TiledArray::World& TiledArray::initialize(int& argc, char**& argv, /// Finalizes TiledArray (and MADWorld runtime, if it had not been initialized /// when TiledArray::initialize was called). void TiledArray::finalize() { + // finalize in the reverse order of initialize +#if TILEDARRAY_HAS_TTG + ttg::finalize(); +#endif + // reset number of threads TiledArray::set_num_threads(TiledArray::max_threads); #ifdef TILEDARRAY_HAS_CUDA diff --git a/src/TiledArray/type_traits.h b/src/TiledArray/type_traits.h index 1de19bb316..41c5e077d8 100644 --- a/src/TiledArray/type_traits.h +++ b/src/TiledArray/type_traits.h @@ -73,6 +73,7 @@ class Future; } // namespace madness namespace TiledArray { +class Range1; template class Tile; class DensePolicy; @@ -878,6 +879,12 @@ constexpr const bool is_gettable_v = is_gettable::value; template struct is_gettable_pair : std::false_type {}; +// specialize for Range1 to avoid dependence on the header order inclusion +template <> +struct is_gettable_pair : std::true_type {}; +template <> +struct is_gettable_pair : std::true_type {}; + template struct is_gettable_pair< T, std::enable_if_t && is_gettable_v<1, T>>> @@ -897,6 +904,8 @@ struct is_integral_pair_< typename std::enable_if::value && std::is_integral::value>::type> : std::true_type {}; +template <> +struct is_integral_pair_ : std::true_type {}; /// @tparam T a type /// @c is_integral_pair::value is true if @c T is an @c std::pair and /// both @c std::is_integral::value and @c std::is_integral::value are @@ -904,6 +913,9 @@ struct is_integral_pair_< template struct is_integral_pair : is_integral_pair_ {}; +template +struct is_integral_pair : is_integral_pair {}; + /// \c is_integral_pair_v is an alias for \c is_integral_pair::value template constexpr const bool is_integral_pair_v = is_integral_pair::value; @@ -1264,7 +1276,8 @@ static constexpr bool is_gpair_range_v = is_gpair_range::value; /// \c at(pair, i) extracts i-th element from gpair template >> + typename = std::enable_if_t< + is_gpair_v>>> decltype(auto) at(GeneralizedPair&& v, std::size_t idx) { assert(idx == 0 || idx == 1); if constexpr (is_gettable_pair_v>) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0fccf921b5..1cb3d06bb4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -32,6 +32,7 @@ set(executable ta_test) # N.B.: The order of files here represents the order in which the tests are run. # N.B. 2: if you want to trim this down you may need to resolve linker errors due to missing fixture deps manually set(ta_test_src_files ta_test.cpp + range1.cpp range.cpp btas.cpp meta.cpp diff --git a/tests/dist_array.cpp b/tests/dist_array.cpp index f36155156a..4f2e1dbe9b 100644 --- a/tests/dist_array.cpp +++ b/tests/dist_array.cpp @@ -507,7 +507,7 @@ BOOST_AUTO_TEST_CASE(truncate) { BOOST_AUTO_TEST_CASE(make_replicated) { // Get a copy of the original process map - std::shared_ptr distributed_pmap = a.pmap(); + std::shared_ptr distributed_pmap = a.pmap(); // Convert array to a replicated array. BOOST_REQUIRE_NO_THROW(a.make_replicated()); diff --git a/tests/dist_eval_array_eval.cpp b/tests/dist_eval_array_eval.cpp index e70f2839be..fa120dae1d 100644 --- a/tests/dist_eval_array_eval.cpp +++ b/tests/dist_eval_array_eval.cpp @@ -76,9 +76,8 @@ struct ArrayEvalImplFixture : public TiledRangeFixture { const DistArray& array, TiledArray::World& world, const typename TiledArray::detail::DistEval::shape_type& shape, - const std::shared_ptr< - typename TiledArray::detail::DistEval::pmap_interface>& - pmap, + const std::shared_ptr::pmap_interface>& pmap, const Permutation& perm, const Op& op) { typedef TiledArray::detail::ArrayEvalImpl, Op, Policy> diff --git a/tests/dist_eval_binary_eval.cpp b/tests/dist_eval_binary_eval.cpp index 5e8368fd81..6451c457f6 100644 --- a/tests/dist_eval_binary_eval.cpp +++ b/tests/dist_eval_binary_eval.cpp @@ -83,9 +83,8 @@ struct BinaryEvalFixture : public TiledRangeFixture { const DistArray& array, TiledArray::World& world, const typename TiledArray::detail::DistEval::shape_type& shape, - const std::shared_ptr< - typename TiledArray::detail::DistEval::pmap_interface>& - pmap, + const std::shared_ptr::pmap_interface>& pmap, const Permutation& perm, const Op& op) { typedef TiledArray::detail::ArrayEvalImpl, Op, Policy> @@ -100,14 +99,15 @@ struct BinaryEvalFixture : public TiledRangeFixture { template static TiledArray::detail::DistEval - make_binary_eval(const TiledArray::detail::DistEval& left, - const TiledArray::detail::DistEval& right, - TiledArray::World& world, - const typename TiledArray::detail::DistEval< - typename Op::result_type, Policy>::shape_type& shape, - const std::shared_ptr::pmap_interface>& pmap, - const Permutation& perm, const Op& op) { + make_binary_eval( + const TiledArray::detail::DistEval& left, + const TiledArray::detail::DistEval& right, + TiledArray::World& world, + const typename TiledArray::detail::DistEval::shape_type& shape, + const std::shared_ptr::pmap_interface>& pmap, + const Permutation& perm, const Op& op) { typedef TiledArray::detail::BinaryEvalImpl< TiledArray::detail::DistEval, TiledArray::detail::DistEval, Op, Policy> diff --git a/tests/dist_eval_contraction_eval.cpp b/tests/dist_eval_contraction_eval.cpp index 6e59e2f93b..0e4373486b 100644 --- a/tests/dist_eval_contraction_eval.cpp +++ b/tests/dist_eval_contraction_eval.cpp @@ -243,9 +243,8 @@ struct ContractionEvalFixture : public SparseShapeFixture { const DistArray& array, TiledArray::World& world, const typename TiledArray::detail::DistEval::shape_type& shape, - const std::shared_ptr< - typename TiledArray::detail::DistEval::pmap_interface>& - pmap, + const std::shared_ptr::pmap_interface>& pmap, const Permutation& perm, const Op& op) { typedef TiledArray::detail::ArrayEvalImpl, Op, Policy> diff --git a/tests/dist_eval_unary_eval.cpp b/tests/dist_eval_unary_eval.cpp index 7f07f7ee81..9e2a60ae3f 100644 --- a/tests/dist_eval_unary_eval.cpp +++ b/tests/dist_eval_unary_eval.cpp @@ -89,9 +89,8 @@ struct UnaryEvalImplFixture : public TiledRangeFixture { const DistArray& array, TiledArray::World& world, const typename TiledArray::detail::DistEval::shape_type& shape, - const std::shared_ptr< - typename TiledArray::detail::DistEval::pmap_interface>& - pmap, + const std::shared_ptr::pmap_interface>& pmap, const Permutation& perm, const Op& op) { typedef TiledArray::detail::ArrayEvalImpl, Op, Policy> @@ -111,9 +110,8 @@ struct UnaryEvalImplFixture : public TiledRangeFixture { TiledArray::World& world, const typename TiledArray::detail::DistEval::shape_type& shape, - const std::shared_ptr< - typename TiledArray::detail::DistEval::pmap_interface>& - pmap, + const std::shared_ptr::pmap_interface>& pmap, const Permutation& perm, const Op& op) { typedef TiledArray::detail::UnaryEvalImpl< TiledArray::detail::DistEval, Op, Policy> diff --git a/tests/linalg.cpp b/tests/linalg.cpp index 3e031b5e72..5759d3753f 100644 --- a/tests/linalg.cpp +++ b/tests/linalg.cpp @@ -29,6 +29,15 @@ namespace scalapack = TA::math::linalg::scalapack; #define TILEDARRAY_SCALAPACK_TEST(...) #endif +#if TILEDARRAY_HAS_TTG +#include "TiledArray/math/linalg/ttg/cholesky.h" +#define TILEDARRAY_TTG_TEST(F, E) \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray::ttg", non_dist::F, TiledArray::math::linalg::ttg::F, E); +#else +#define TILEDARRAY_TTG_TEST(...) +#endif + struct ReferenceFixture { size_t N; std::vector htoeplitz_vector; @@ -105,15 +114,40 @@ struct LinearAlgebraFixture : ReferenceFixture { } } } +#endif + template static void compare(const char* context, const A& non_dist, const A& result, double e) { - BOOST_TEST_CONTEXT(context) - ; + BOOST_TEST_CONTEXT(context); + // std::cout << "context=" << context << ": should have obtained:\n" + // << std::setprecision(15) << non_dist << std::endl; + // std::cout << "context=" << context << ": what actually obtained:\n" + // << std::setprecision(15) << result << std::endl; auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get(); BOOST_CHECK_SMALL(diff_with_non_dist, e); } -#endif + + template + static void for_each_pair_of_tuples_impl(T&& t1, T&& t2, F f, + std::integer_sequence) { + auto l = {(f(std::get(t1), std::get(t2)), 0)...}; + } + + template + static void for_each_pair_of_tuples(std::tuple const& t1, + std::tuple const& t2, F f) { + for_each_pair_of_tuples_impl( + t1, t2, f, std::make_integer_sequence()); + } + + template + static void compare(const char* context, const std::tuple& non_dist, + const std::tuple& result, double e) { + for_each_pair_of_tuples(non_dist, result, [&](auto& arg1, auto& arg2) { + compare(context, arg1, arg2, e); + }); + } }; TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { @@ -550,18 +584,20 @@ BOOST_AUTO_TEST_CASE(cholesky) { decltype(A) A_minus_LLt; A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj(); - BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + const double epsilon = N * N * std::numeric_limits::epsilon(); + + BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), epsilon); // check against NON_DIST also auto L_ref = non_dist::cholesky(A); decltype(L) L_diff; L_diff("i,j") = L("i,j") - L_ref("i,j"); - BOOST_CHECK_SMALL(L_diff("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(L_diff("i,j").norm().get(), epsilon); - GlobalFixture::world->gop.fence(); + TILEDARRAY_SCALAPACK_TEST(cholesky(A), epsilon); + + TILEDARRAY_TTG_TEST(cholesky(A), epsilon); } BOOST_AUTO_TEST_CASE(cholesky_linv) { @@ -602,7 +638,7 @@ BOOST_AUTO_TEST_CASE(cholesky_linv) { TILEDARRAY_SCALAPACK_TEST(cholesky_linv(Acopy), epsilon); - GlobalFixture::world->gop.fence(); + TILEDARRAY_TTG_TEST(cholesky_linv(Acopy), epsilon); } BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { @@ -640,9 +676,9 @@ BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { BOOST_CHECK_SMALL(norm, epsilon); - // TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); - GlobalFixture::world->gop.fence(); + TILEDARRAY_TTG_TEST(cholesky_linv(A), epsilon); } BOOST_AUTO_TEST_CASE(cholesky_solve) { @@ -890,50 +926,48 @@ BOOST_AUTO_TEST_CASE(svd_allvectors) { #endif template -void householder_qr_q_only_test( const ArrayT& A, double tol ) { - +void householder_qr_q_only_test(const ArrayT& A, double tol) { using value_type = typename ArrayT::element_type; - #if TILEDARRAY_HAS_SCALAPACK - auto Q = use_scalapack ? scalapack::householder_qr( A ) : - non_dist:: householder_qr( A ); - #else +#if TILEDARRAY_HAS_SCALAPACK + auto Q = use_scalapack ? scalapack::householder_qr(A) + : non_dist::householder_qr(A); +#else static_assert(not use_scalapack); - auto Q = non_dist::householder_qr( A ); - #endif - + auto Q = non_dist::householder_qr(A); +#endif // Make sure the Q is orthogonal at least - TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); Iden.make_replicated(); auto I_eig = TA::array_to_eigen(Iden); const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); - + BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); } template -void householder_qr_test( const ArrayT& A, double tol ) { - - #if TILEDARRAY_HAS_SCALAPACK - auto [Q,R] = use_scalapack ? scalapack::householder_qr( A ) : - non_dist:: householder_qr( A ); - #else +void householder_qr_test(const ArrayT& A, double tol) { +#if TILEDARRAY_HAS_SCALAPACK + auto [Q, R] = use_scalapack ? scalapack::householder_qr(A) + : non_dist::householder_qr(A); +#else static_assert(not use_scalapack); - auto [Q,R] = non_dist::householder_qr( A ); - #endif + auto [Q, R] = non_dist::householder_qr(A); +#endif // Check reconstruction error TA::TArray QR_ERROR; QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); - BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); + BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol); // Check orthonormality of Q - TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); Iden.make_replicated(); auto I_eig = TA::array_to_eigen(Iden); const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); } BOOST_AUTO_TEST_CASE(householder_qr_q_only) { @@ -948,11 +982,10 @@ BOOST_AUTO_TEST_CASE(householder_qr_q_only) { }); double tol = N * N * std::numeric_limits::epsilon(); - householder_qr_q_only_test( ref_ta, tol ); - #if TILEDARRAY_HAS_SCALAPACK - householder_qr_q_only_test( ref_ta, tol ); - #endif - + householder_qr_q_only_test(ref_ta, tol); +#if TILEDARRAY_HAS_SCALAPACK + householder_qr_q_only_test(ref_ta, tol); +#endif GlobalFixture::world->gop.fence(); } @@ -968,54 +1001,46 @@ BOOST_AUTO_TEST_CASE(householder_qr) { return this->make_ta_reference(t, range); }); - double tol = N * N * std::numeric_limits::epsilon(); - householder_qr_test( ref_ta, tol ); - #if TILEDARRAY_HAS_SCALAPACK - householder_qr_test( ref_ta, tol ); - #endif + householder_qr_test(ref_ta, tol); +#if TILEDARRAY_HAS_SCALAPACK + householder_qr_test(ref_ta, tol); +#endif GlobalFixture::world->gop.fence(); } - - - - - template -void cholesky_qr_q_only_test( const ArrayT& A, double tol ) { - +void cholesky_qr_q_only_test(const ArrayT& A, double tol) { using value_type = typename ArrayT::element_type; - auto Q = TiledArray::math::linalg::cholesky_qr( A ); - + auto Q = TiledArray::math::linalg::cholesky_qr(A); // Make sure the Q is orthogonal at least - TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); Iden.make_replicated(); auto I_eig = TA::array_to_eigen(Iden); const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); - + BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); } template -void cholesky_qr_test( const ArrayT& A, double tol ) { - - auto [Q,R] = TiledArray::math::linalg::cholesky_qr( A ); +void cholesky_qr_test(const ArrayT& A, double tol) { + auto [Q, R] = TiledArray::math::linalg::cholesky_qr(A); // Check reconstruction error TA::TArray QR_ERROR; QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); - BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); + BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol); // Check orthonormality of Q - TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); Iden.make_replicated(); auto I_eig = TA::array_to_eigen(Iden); const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); } BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) { @@ -1030,7 +1055,7 @@ BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) { }); double tol = N * N * std::numeric_limits::epsilon(); - cholesky_qr_q_only_test( ref_ta, tol ); + cholesky_qr_q_only_test(ref_ta, tol); GlobalFixture::world->gop.fence(); } @@ -1046,9 +1071,8 @@ BOOST_AUTO_TEST_CASE(cholesky_qr) { return this->make_ta_reference(t, range); }); - double tol = N * N * std::numeric_limits::epsilon(); - cholesky_qr_test( ref_ta, tol ); + cholesky_qr_test(ref_ta, tol); GlobalFixture::world->gop.fence(); } diff --git a/tests/range1.cpp b/tests/range1.cpp new file mode 100644 index 0000000000..bc2fabdd6c --- /dev/null +++ b/tests/range1.cpp @@ -0,0 +1,177 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2022 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include "TiledArray/range1.h" +#include "TiledArray/range.h" +#include "TiledArray/type_traits.h" + +#include +#include + +#include "unit_test_config.h" + +using namespace TiledArray; + +BOOST_AUTO_TEST_SUITE(range1_suite, TA_UT_LABEL_SERIAL) + +BOOST_AUTO_TEST_CASE(traits) { + static_assert(detail::is_integral_range_v); + static_assert(detail::is_integral_range_v); + static_assert(detail::is_integral_pair_v); + static_assert(detail::is_integral_pair_v); + static_assert(detail::is_gettable_pair_v); + static_assert(detail::is_gettable_pair_v); + static_assert(detail::is_range_v); + static_assert(detail::is_range_v); + static_assert(!detail::is_contiguous_range_v< + Range1>); // it's not a container! so not contiguous according + // to the defintion of std::contiguous_range + static_assert( + detail::is_gpair_range_v>); + static_assert(detail::is_gpair_range_v< + boost::container::small_vector>); + static_assert(detail::is_gpair_range_v>); +} + +BOOST_AUTO_TEST_CASE(constructors) { + BOOST_CHECK_NO_THROW(Range1{}); + BOOST_CHECK_EQUAL(Range1{}.first, 0); + BOOST_CHECK_EQUAL(Range1{}.second, 0); + // decomposable via structure bindings; + auto [first, second] = Range1{}; + BOOST_CHECK_EQUAL(first, Range1{}.first); + BOOST_CHECK_EQUAL(second, Range1{}.second); + + BOOST_CHECK_NO_THROW(Range1{1}); + BOOST_CHECK_EQUAL(Range1{1}.first, 0); + BOOST_CHECK_EQUAL(Range1{1}.second, 1); + + BOOST_CHECK_NO_THROW((Range1{1, 1})); + BOOST_CHECK_NO_THROW(Range1(1, 1)); + BOOST_CHECK_EQUAL(Range1(1, 1).first, 1); + BOOST_CHECK_EQUAL(Range1(1, 1).first, 1); + + BOOST_CHECK_NO_THROW((Range1{-11, 13})); + BOOST_CHECK_EQUAL(Range1(-11, 13).first, -11); + BOOST_CHECK_EQUAL(Range1(-11, 13).second, 13); + + Range1 rng{-11, 13}; + // copy + BOOST_CHECK_NO_THROW((Range1{rng})); + // move + BOOST_CHECK_NO_THROW(Range1(Range1{-11, 13})); +} + +BOOST_AUTO_TEST_CASE(accessors) { + Range1 r{1, 10}; + BOOST_CHECK_NO_THROW(r.lobound()); + BOOST_CHECK_EQUAL(r.lobound(), 1); + BOOST_CHECK_NO_THROW(r.upbound()); + BOOST_CHECK_EQUAL(r.upbound(), 10); + BOOST_CHECK_NO_THROW(r.extent()); + BOOST_CHECK_EQUAL(r.extent(), 9); +} + +BOOST_AUTO_TEST_CASE(iteration) { + Range1 r{0, 10}; + + BOOST_CHECK_NO_THROW(r.begin()); + BOOST_CHECK_NO_THROW(r.cbegin()); + BOOST_CHECK_NO_THROW(r.end()); + BOOST_CHECK_NO_THROW(r.cend()); + + auto it = r.begin(); + BOOST_CHECK_EQUAL(*it, 0); + BOOST_CHECK_NO_THROW(++it); + BOOST_CHECK_EQUAL(*it, 1); + BOOST_CHECK_NO_THROW(it++); + BOOST_CHECK_EQUAL(*it, 2); + BOOST_CHECK_NO_THROW(--it); + BOOST_CHECK_EQUAL(*it, 1); + BOOST_CHECK_NO_THROW(it--); + BOOST_CHECK_EQUAL(*it, 0); + BOOST_CHECK_NO_THROW(it += 2); + BOOST_CHECK_EQUAL(*it, 2); + BOOST_CHECK_NO_THROW(it -= 2); + BOOST_CHECK_EQUAL(*it, 0); + BOOST_CHECK(it == r.begin()); + + auto end = r.end(); + BOOST_CHECK_EQUAL(*end, 10); + BOOST_CHECK(it != end); +} + +BOOST_AUTO_TEST_CASE(comparison) { + Range1 r1{0, 10}; + Range1 r2{0, 10}; + Range1 r3{1, 10}; + Range1 r4{0, 9}; + BOOST_CHECK(r1 == r1); + BOOST_CHECK(r1 == r2); + BOOST_CHECK(r1 != r3); + BOOST_CHECK(r1 != r4); +} + +BOOST_AUTO_TEST_CASE(serialization) { + Range1 r{1, 10}; + + std::size_t buf_size = sizeof(Range1); + unsigned char* buf = new unsigned char[buf_size]; + madness::archive::BufferOutputArchive oar(buf, buf_size); + oar& r; + std::size_t nbyte = oar.size(); + oar.close(); + + Range1 rs; + madness::archive::BufferInputArchive iar(buf, nbyte); + iar& rs; + iar.close(); + + delete[] buf; + + BOOST_CHECK(rs == r); +} + +BOOST_AUTO_TEST_CASE(swap) { + Range1 r{0, 10}; + Range1 empty_range; + + // swap with empty range + BOOST_CHECK_NO_THROW(r.swap(empty_range)); + BOOST_CHECK(r == Range1{}); + + // Check that empty_range contains the data of r. + BOOST_CHECK(empty_range == Range1(0, 10)); + + // Swap the data back + BOOST_CHECK_NO_THROW(r.swap(empty_range)); + BOOST_CHECK(empty_range == Range1{}); + BOOST_CHECK(r == Range1(0, 10)); +} + +BOOST_AUTO_TEST_CASE(irange) { + // to avoid ambiguity between test scope and fn + BOOST_CHECK_NO_THROW(TA::irange(1)); + BOOST_CHECK(TA::irange(1) == Range1(1)); + + BOOST_CHECK_NO_THROW(TA::irange(1, 2)); + BOOST_CHECK(TA::irange(1, 2) == Range1(1, 2)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/tiled_range1.cpp b/tests/tiled_range1.cpp index 0833b948f4..312389ff84 100644 --- a/tests/tiled_range1.cpp +++ b/tests/tiled_range1.cpp @@ -25,7 +25,7 @@ using namespace TiledArray; -BOOST_FIXTURE_TEST_SUITE(range1_suite, Range1Fixture, TA_UT_LABEL_SERIAL) +BOOST_FIXTURE_TEST_SUITE(tiled_range1_suite, Range1Fixture, TA_UT_LABEL_SERIAL) BOOST_AUTO_TEST_CASE(range_accessor) { BOOST_CHECK_EQUAL(tr1.tiles_range().first, tiles.first); @@ -119,7 +119,7 @@ BOOST_AUTO_TEST_CASE(constructor) { BOOST_CHECK_EQUAL(r.elements_range().first, -1); BOOST_CHECK_EQUAL(r.elements_range().second, 28); } -#else // TA_SIGNED_1INDEX_TYPE +#else // TA_SIGNED_1INDEX_TYPE BOOST_CHECK_THROW(TiledRange1 r({-1, 0, 2, 5, 10, 17, 28}), TiledArray::Exception); #endif // TA_SIGNED_1INDEX_TYPE