From e29d4e6041e94908b5aaf2aa640b402634523eea Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 31 Dec 2022 16:51:01 -0500 Subject: [PATCH 1/7] implement support for tracing memory used by TA::Tensor --- CMakeLists.txt | 5 +- INSTALL.md | 1 + src/CMakeLists.txt | 2 + src/TiledArray/config.h.in | 3 + src/TiledArray/tensor/tensor.h | 225 ++++++++++++++++++++++++++- src/TiledArray/util/ptr_registry.cpp | 183 ++++++++++++++++++++++ src/TiledArray/util/ptr_registry.h | 171 ++++++++++++++++++++ tests/ta_test.cpp | 33 ++++ 8 files changed, 617 insertions(+), 6 deletions(-) create mode 100644 src/TiledArray/util/ptr_registry.cpp create mode 100644 src/TiledArray/util/ptr_registry.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 02a8012b99..06eb7fb533 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,10 @@ if((ENABLE_GPERFTOOLS OR ENABLE_TCMALLOC_MINIMAL) AND CMAKE_SYSTEM_NAME MATCHES add_feature_info(Libunwind ENABLE_LIBUNWIND "Libunwind provides stack unwinding") endif() -option(TA_TENSOR_MEM_PROFILE "Turn on instrumented profiling of TA::Tensor memory use" OFF) +option(TA_TENSOR_MEM_TRACE "Turn on instrumented tracing of TA::Tensor memory use" OFF) +add_feature_info(TENSOR_MEM_TRACE TA_TENSOR_MEM_TRACE "instrumented tracing of TA::Tensor memory use") + +option(TA_TENSOR_MEM_PROFILE "Turn on instrumented profiling of TA::Tensor memory use" ${TA_TENSOR_MEM_TRACE}) add_feature_info(TENSOR_MEM_PROFILE TA_TENSOR_MEM_PROFILE "instrumented profiling of TA::Tensor memory use") option(TA_EXPERT "TiledArray Expert mode: disables automatically downloading or building dependencies" OFF) diff --git a/INSTALL.md b/INSTALL.md index fb459d7690..5be4905635 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -417,6 +417,7 @@ support may be added. * `TA_SIGNED_1INDEX_TYPE` -- Set to `OFF` to use unsigned 1-index coordinate type (default for TiledArray 1.0.0-alpha.2 and older). The default is `ON`, which enables the use of negative indices in coordinates. * `TA_MAX_SOO_RANK_METADATA` -- Specifies the maximum rank for which to use Small Object Optimization (hence, avoid the use of the heap) for metadata. The default is `8`. * `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile host memory allocations used by TA::Tensor. This causes the use of Umpire for host memory allocation. This also enables additional tracing facilities provided by Umpire; these can be controlled via [environment variable `UMPIRE_LOG_LEVEL`](https://umpire.readthedocs.io/en/develop/sphinx/features/logging_and_replay.html), but note that the default is to log Umpire info into a file rather than stdout. +* `TA_TENSOR_MEM_TRACE` -- Set to `ON` to *trace* host memory allocations used by TA::Tensor. This turns on support for tracking memory used by `Tensor` objects; such tracking must be enabled programmatically. This can greatly increase memory consumption by the application and is only intended for expert developers troubleshooting memory use by TiledArray. * `TA_UT_CTEST_TIMEOUT` -- The value (in seconds) of the timeout to use for running the TA unit tests via CTest when building the `check`/`check-tiledarray` targets. The default timeout is 1500s. # Build TiledArray diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7b3b05d003..73da617ef2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -189,6 +189,8 @@ TiledArray/util/bug.h TiledArray/util/function.h TiledArray/util/initializer_list.h TiledArray/util/logger.h +TiledArray/util/ptr_registry.cpp +TiledArray/util/ptr_registry.h TiledArray/util/random.h TiledArray/util/singleton.h TiledArray/util/threads.h diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in index 68f5dc1374..0c4d5d5cbc 100644 --- a/src/TiledArray/config.h.in +++ b/src/TiledArray/config.h.in @@ -81,6 +81,9 @@ /* Is TA::Tensor memory profiling enabled? */ #cmakedefine TA_TENSOR_MEM_PROFILE 1 +/* Is TA::Tensor memory tracing enabled? */ +#cmakedefine TA_TENSOR_MEM_TRACE 1 + /* Is TTG available? */ #cmakedefine TILEDARRAY_HAS_TTG 1 diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 94e59425cf..a778450ab5 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -20,6 +20,8 @@ #ifndef TILEDARRAY_TENSOR_TENSOR_H__INCLUDED #define TILEDARRAY_TENSOR_TENSOR_H__INCLUDED +#include "TiledArray/config.h" + #include "TiledArray/host/allocator.h" #include "TiledArray/math/blas.h" @@ -30,6 +32,7 @@ #include "TiledArray/tile_interface/permute.h" #include "TiledArray/tile_interface/trace.h" #include "TiledArray/util/logger.h" +#include "TiledArray/util/ptr_registry.h" namespace TiledArray { @@ -61,6 +64,15 @@ class Tensor { std::is_assignable, T>::value, "Tensor: T must be an assignable type (e.g. cannot be const)"); +#ifdef TA_TENSOR_MEM_TRACE + template + std::string make_string(Ts&&... ts) { + std::ostringstream oss; + (oss << ... << ts); + return oss.str(); + } +#endif + public: typedef Range range_type; ///< Tensor range type typedef typename range_type::index1_type index1_type; ///< 1-index type @@ -107,12 +119,33 @@ class Tensor { std::uninitialized_default_construct_n(ptr, size); // std::uninitialized_value_construct_n(ptr, size); } - auto deleter = [allocator = std::move(allocator), - size](auto&& ptr) mutable { + auto deleter = [ +#ifdef TA_TENSOR_MEM_TRACE + this, +#endif + allocator = std::move(allocator), + size](auto&& ptr) mutable { std::destroy_n(ptr, size); + // N.B. deregister ptr *before* deallocating to avoid possible race + // between reallocation and deregistering +#ifdef TA_TENSOR_MEM_TRACE + const auto nbytes = size * sizeof(T); + if (nbytes >= trace_if_larger_than_) { + ptr_registry()->erase(ptr, nbytes, + make_string("created by TA::Tensor*=", this)); + } +#endif allocator.deallocate(ptr, size); }; this->data_ = std::shared_ptr(ptr, std::move(deleter)); +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor::data_.get()=", data_.get())); + ptr_registry()->insert(data_.get(), nbytes(), + make_string("created by TA::Tensor*=", this)); + } +#endif } Tensor(range_type&& range, size_t batch_size, bool default_construct) @@ -124,12 +157,33 @@ class Tensor { std::uninitialized_default_construct_n(ptr, size); // std::uninitialized_value_construct_n(ptr, size); } - auto deleter = [allocator = std::move(allocator), - size](auto&& ptr) mutable { + auto deleter = [ +#ifdef TA_TENSOR_MEM_TRACE + this, +#endif + allocator = std::move(allocator), + size](auto&& ptr) mutable { std::destroy_n(ptr, size); + // N.B. deregister ptr *before* deallocating to avoid possible race + // between reallocation and deregistering +#ifdef TA_TENSOR_MEM_TRACE + const auto nbytes = size * sizeof(T); + if (nbytes >= trace_if_larger_than_) { + ptr_registry()->erase(ptr, nbytes, + make_string("created by TA::Tensor*=", this)); + } +#endif allocator.deallocate(ptr, size); }; this->data_ = std::shared_ptr(ptr, std::move(deleter)); +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor::data_.get()=", data_.get())); + ptr_registry()->insert(data_.get(), nbytes(), + make_string("created by TA::Tensor*=", this)); + } +#endif } range_type range_; ///< range @@ -138,6 +192,41 @@ class Tensor { public: Tensor() = default; + Tensor(const Tensor& other) + : range_(other.range_), + batch_size_(other.batch_size_), + data_(other.data_) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor(const Tensor& other)::data_.get()=", + data_.get())); + } +#endif + } + Tensor(Tensor&& other) + : range_(std::move(other.range_)), + batch_size_(std::move(other.batch_size_)), + data_(std::move(other.data_)) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + &other, + make_string("TA::Tensor(Tensor&& other)::data_.get()=", data_.get())); + ptr_registry()->insert( + this, + make_string("TA::Tensor(Tensor&& other)::data_.get()=", data_.get())); + } +#endif + } + ~Tensor() { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + this, make_string("TA::~Tensor()::data_.get()=", data_.get())); + } +#endif + } /// Construct a tensor with a range equal to \c range. The data is /// uninitialized. @@ -359,7 +448,15 @@ class Tensor { /// \param data shared pointer to the data Tensor(const range_type& range, size_t batch_size, std::shared_ptr data) - : range_(range), batch_size_(batch_size), data_(data) {} + : range_(range), batch_size_(batch_size), data_(data) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor(range, batch_size, data)::data_.get()=", + data_.get())); + } +#endif + } /// The batch size accessor @@ -412,6 +509,55 @@ class Tensor { return *this; } + Tensor& operator=(const Tensor& other) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + this, + make_string("TA::Tensor::operator=(const Tensor&)::data_.get()=", + data_.get())); + } +#endif + range_ = other.range_; + batch_size_ = other.batch_size_; + data_ = other.data_; +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, + make_string("TA::Tensor::operator=(const Tensor&)::data_.get()=", + data_.get())); + } +#endif + return *this; + } + + Tensor& operator=(Tensor&& other) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + this, make_string("TA::Tensor::operator=(Tensor&&)::data_.get()=", + data_.get())); + } + if (other.nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + &other, make_string("TA::Tensor::operator=(Tensor&&)::data_.get()=", + data_.get())); + } +#endif + range_ = std::move(other.range_); + batch_size_ = std::move(other.batch_size_); + data_ = std::move(other.data_); +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor::operator=(Tensor&&)::data_.get()=", + data_.get())); + } +#endif + return *this; + } + /// Tensor range object accessor /// \return The tensor range object @@ -422,6 +568,14 @@ class Tensor { /// \return The number of elements in the tensor ordinal_type size() const { return (this->range().volume()); } + /// Tensor data size (in bytes) accessor + + /// \return The number of bytes occupied by this tensor's data + /// \warning this only returns valid value if this is a tensor of scalars + std::size_t nbytes() const { + return this->range().volume() * this->batch_size_ * sizeof(T); + } + /// Const element accessor /// \tparam Ordinal an integer type that represents an ordinal @@ -668,9 +822,35 @@ class Tensor { /// \param other The tensor to swap with this void swap(Tensor& other) { +#ifdef TA_TENSOR_MEM_TRACE + bool this_to_be_traced = false; + bool other_to_be_traced = false; + if (nbytes() >= trace_if_larger_than_) { + this_to_be_traced = true; + ptr_registry()->erase( + this, make_string("TA::Tensor::swap()::data_.get()=", data_.get())); + } + if (other.nbytes() >= trace_if_larger_than_) { + other_to_be_traced = true; + ptr_registry()->erase( + &other, + make_string("TA::Tensor::swap()::data_.get()=", other.data_.get())); + } +#endif std::swap(data_, other.data_); std::swap(range_, other.range_); std::swap(batch_size_, other.batch_size_); +#ifdef TA_TENSOR_MEM_TRACE + if (other_to_be_traced) { + ptr_registry()->insert( + this, make_string("TA::Tensor::swap()::data_.get()=", data_.get())); + } + if (this_to_be_traced) { + ptr_registry()->insert( + &other, + make_string("TA::Tensor::swap()::data_.get()=", other.data_.get())); + } +#endif } // clang-format off @@ -1995,8 +2175,43 @@ class Tensor { return reduce(other, mult_add_op, add_op, numeric_type(0)); } + /// @return pointer to the PtrRegistry object used for tracing TA::Tensor + /// lifetime + /// @warning only nonnull if configured with `TA_TENSOR_MEM_TRACE=ON` + static PtrRegistry* ptr_registry() { +#ifdef TA_TENSOR_MEM_TRACE + static PtrRegistry registry; + return ®istry; +#else + return nullptr; +#endif + } + +#ifdef TA_TENSOR_MEM_TRACE + /// @param nbytes sets the minimum size of TA::Tensor objects whose lifetime + /// will be tracked; must be greater or equal to 1 + static void trace_if_larger_than(std::size_t nbytes) { + TA_ASSERT(nbytes >= 1); + trace_if_larger_than_ = nbytes; + } + /// @return the minimum size of TA::Tensor objects whose lifetime + /// will be tracked + static std::size_t trace_if_larger_than() { return trace_if_larger_than_; } +#endif + + private: +#ifdef TA_TENSOR_MEM_TRACE + static std::size_t trace_if_larger_than_; +#endif + }; // class Tensor +#ifdef TA_TENSOR_MEM_TRACE +template +std::size_t Tensor::trace_if_larger_than_ = + std::numeric_limits::max(); +#endif + template Tensor operator*(const Permutation& p, const Tensor& t) { return t.permute(p); diff --git a/src/TiledArray/util/ptr_registry.cpp b/src/TiledArray/util/ptr_registry.cpp new file mode 100644 index 0000000000..31e22cc4dc --- /dev/null +++ b/src/TiledArray/util/ptr_registry.cpp @@ -0,0 +1,183 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2022 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * + * util/ptr_registry.cpp + * December 31, 2022 + * + */ + +#include + +#include "TiledArray/util/ptr_registry.h" + +#include "TiledArray/host/env.h" +#include "TiledArray/util/backtrace.h" + +namespace TiledArray { + +namespace detail { +void remove_linebreaks(std::string& str) { + auto it = str.begin(); + while (it != str.end()) { + if (*it == '\n') + it = str.erase(it); + else + ++it; + } +} +} // namespace detail + +PtrRegistry::PtrRegistry() = default; + +PtrRegistry::PtrRegistry(std::ostream& os) : log_(&os) {} + +PtrRegistry::~PtrRegistry() = default; + +PtrRegistry& PtrRegistry::log(std::ostream* os_ptr) { + log_ = os_ptr; + return *this; +} + +std::ostream* PtrRegistry::log() const { return log_; } + +const PtrRegistry::sized_ptr_container_type& PtrRegistry::sized_ptrs() const { + return ptrs_; +} +const PtrRegistry::ptr_container_type& PtrRegistry::unsized_ptrs() const { + if (unsized_ptrs_ == nullptr) { + unsized_ptrs_ = &(const_cast(ptrs_)[0]); + } + return *unsized_ptrs_; +} + +std::size_t PtrRegistry::size() const { + return std::accumulate(ptrs_.begin(), ptrs_.end(), 0, + [](const auto& total_size, const auto& sz_ptrs) { + auto&& [sz, ptrs] = sz_ptrs; + return total_size + ptrs.size(); + }); +} + +void PtrRegistry::insert(void* ptr, std::size_t sz, const std::string& context, + bool backtrace) { + std::scoped_lock lock(this->mtx_); + std::string creation_context = context; + if (backtrace) { + detail::Backtrace bt; + auto bt_str = bt.str(0); + detail::remove_linebreaks(bt_str); + creation_context += ":::::" + bt_str; + } + if (log_) { + *log_ << "PtrRegistry::insert():::::" << ptr << ":::::" << creation_context + << std::endl +#ifdef TA_TENSOR_MEM_PROFILE + << " TA::Tensor allocator status {" + << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() + << "," + << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() + << "," + << "act=" << hostEnv::instance()->host_allocator().getActualSize() + << "}" + << " bytes" << std::endl +#endif // TA_TENSOR_MEM_PROFILE + ; + } + auto& sz_ptrs = ptrs_[sz]; + TA_ASSERT(sz_ptrs.find(ptr) == sz_ptrs.end()); + sz_ptrs.emplace(ptr, std::move(creation_context)); +} + +void PtrRegistry::erase(void* ptr, std::size_t sz, const std::string& context, + bool backtrace) { + std::scoped_lock lock(this->mtx_); + if (log_) { + std::string erasure_context = context; + if (backtrace) { + detail::Backtrace bt; + auto bt_str = bt.str(0); + detail::remove_linebreaks(bt_str); + erasure_context += ":::::" + bt_str; + } + *log_ << "PtrRegistry::erase():::::" << ptr << ":::::" << erasure_context + << std::endl +#ifdef TA_TENSOR_MEM_PROFILE + << " TA::Tensor allocator status {" + << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() + << "," + << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() + << "," + << "act=" << hostEnv::instance()->host_allocator().getActualSize() + << "}" + << " bytes" << std::endl +#endif // TA_TENSOR_MEM_PROFILE + ; + } + auto& sz_ptrs = ptrs_[sz]; + auto it = sz_ptrs.find(ptr); + TA_ASSERT(it != sz_ptrs.end()); + sz_ptrs.erase(it); +} + +PtrRegistry& PtrRegistry::insert(void* ptr, std::size_t sz, + const std::string& context) { + this->insert(ptr, sz, context, /* backtrace = */ false); + return *this; +} + +PtrRegistry& PtrRegistry::insert(void* ptr, const std::string& context) { + this->insert(ptr, /* sz = */ 0, context, /* backtrace = */ false); + return *this; +} + +PtrRegistry& PtrRegistry::insert_bt(void* ptr, std::size_t sz, + const std::string& context) { + this->insert(ptr, sz, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::insert_bt(void* ptr, const std::string& context) { + this->insert(ptr, /* sz = */ 0, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::erase(void* ptr, std::size_t sz, + const std::string& context) { + this->erase(ptr, sz, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::erase(void* ptr, const std::string& context) { + this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::erase_bt(void* ptr, std::size_t sz, + const std::string& context) { + this->erase(ptr, sz, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::erase_bt(void* ptr, const std::string& context) { + this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ true); + return *this; +} + +} // namespace TiledArray diff --git a/src/TiledArray/util/ptr_registry.h b/src/TiledArray/util/ptr_registry.h new file mode 100644 index 0000000000..e5c1de049e --- /dev/null +++ b/src/TiledArray/util/ptr_registry.h @@ -0,0 +1,171 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2022 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * + * util/ptr_registry.h + * December 31, 2022 + * + */ + +#ifndef TILEDARRAY_UTIL_PTR_REGISTRY_H +#define TILEDARRAY_UTIL_PTR_REGISTRY_H + +#include +#include +#include +#include + +namespace TiledArray { + +/// Registry of pointers + +/// Stores {pointer,creation_context} pairs in a hash table (one table per +/// pointer "size"; hash tables are stored in the order of increasing size). +/// +/// @details Useful for capturing graphs of pointers to detect issues +/// with smart pointer lifetimes (e.g., stray refs and/or cycles) and +/// tracing memory use +struct PtrRegistry { + using ptr_container_type = std::unordered_map; + using sized_ptr_container_type = std::map; + + /// constructs empty registry + PtrRegistry(); + PtrRegistry(const PtrRegistry&) = delete; + PtrRegistry(PtrRegistry&&) = delete; + PtrRegistry& operator=(const PtrRegistry&) = delete; + PtrRegistry& operator=(PtrRegistry&&) = delete; + + /// constructs an empty registry configured to log insert/erase + /// events to \p log + /// \param log an ostream for logging insert/erase events + PtrRegistry(std::ostream& log); + + ~PtrRegistry(); + + /// sets the active logger to `*log` + /// \param log pointer to an std::ostream to use for logging insert/erase + /// events; if null, will do no logging + PtrRegistry& log(std::ostream* log); + + /// @return pointer to the active logger; if null, no logging is performed + std::ostream* log() const; + + /// @return reference to the size-ordered containers of sized pointers + const sized_ptr_container_type& sized_ptrs() const; + + /// @return reference to the container of unsized (0-sized) pointers + const ptr_container_type& unsized_ptrs() const; + + /// @return total number of pointers in the registry + std::size_t size() const; + + /// inserts \p ptr associated with size \p sz to the registry + /// \param ptr pointer to register + /// \param sz size of the object pointed to by \p ptr + /// \param context creation context; stored alongside the pointer + /// \return `*this` + PtrRegistry& insert(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// inserts \p ptr without associated size (i.e. `sz=0`) to the registry + /// \param ptr pointer to register + /// \param context creation context; stored alongside the pointer + /// \return `*this` + /// \note equivalent to `this->insert(ptr, 0, context); + PtrRegistry& insert(void* ptr, const std::string& context = ""); + + /// inserts \p ptr associated with size \p sz to the registry, + /// appends backtrace of the caller to context separated by `:::::` + /// \param ptr pointer to register + /// \param sz size of the object pointed to by \p ptr + /// \param context creation context; stored alongside the pointer + /// \return `*this` + PtrRegistry& insert_bt(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// inserts \p ptr without associated size (i.e. with size=0) to the registry, + /// appends backtrace of the caller to context separated by `:::::` + /// \param ptr pointer to register + /// \param context creation context; stored alongside the pointer + /// \return `*this` + PtrRegistry& insert_bt(void* ptr, const std::string& context = ""); + + /// erases \p ptr associated with size \p sz from the registry + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this to the log + /// \return `*this` + PtrRegistry& erase(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// erases \p ptr without associated size (i.e., `sz=0`) from the registry + /// \param ptr pointer to erase + /// \param context erasure context; if logging, will append this to the log + /// \return `*this` + /// \note equivalent to `this->erase(ptr, 0, context); + PtrRegistry& erase(void* ptr, const std::string& context = ""); + + /// erases \p ptr associated with size \p sz from the registry + /// \details introduced for symmetry with insert_bt() ) + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this and the + /// backtrace of the caller to the log \return `*this` + PtrRegistry& erase_bt(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// erases \p ptr without associated size (i.e. with `sz=0`) from the registry + /// \details introduced for symmetry with insert_bt() ) + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this and the + /// backtrace of the caller to the log \return `*this` \note equivalent to + /// `this->erase_bt(ptr, 0, context); + PtrRegistry& erase_bt(void* ptr, const std::string& context = ""); + + private: + std::ostream* log_ = nullptr; + sized_ptr_container_type ptrs_; + mutable ptr_container_type* unsized_ptrs_ = nullptr; // &(ptrs_[0]) + std::mutex mtx_; + + /// inserts \p ptr associated with size \p sz to the registry, + /// \param ptr pointer to register + /// \param sz size of the object pointed to by \p ptr + /// \param context string context to attach to the pointer + /// \param backtrace if true, appends backtrace of the caller to \p context + /// separated by `:::::` + void insert(void* ptr, std::size_t sz, const std::string& context, + bool backtrace); + + /// erases \p ptr associated with size \p sz from the registry + /// \details introduced for symmetry with insert_bt() ) + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this to the log + /// \param backtrace if true and logging, appends the backtrace of caller to + /// the log + void erase(void* ptr, std::size_t sz, const std::string& context, + bool backtrace); +}; + +} // namespace TiledArray + +#endif // TILEDARRAY_UTIL_PTR_REGISTRY_H diff --git a/tests/ta_test.cpp b/tests/ta_test.cpp index a7141c96a9..8d81e66849 100644 --- a/tests/ta_test.cpp +++ b/tests/ta_test.cpp @@ -23,6 +23,10 @@ #include "TiledArray/external/madness.h" #include "unit_test_config.h" +#ifdef TA_TENSOR_MEM_TRACE +#include +#endif + #include #if (TA_ASSERT_POLICY != TA_ASSERT_THROW) #error "TiledArray unit tests require TA_ASSERT_POLICY=TA_ASSERT_THROW" @@ -40,6 +44,23 @@ GlobalFixture::GlobalFixture() { boost::unit_test::log_all_errors); } +#ifdef TA_TENSOR_MEM_TRACE + { + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::ptr_registry()->log(&std::cout); + TiledArray::Tensor>::trace_if_larger_than(1); + // TiledArray::Tensor>::ptr_registry()->log(&std::cout); + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::ptr_registry()->log(&std::cout); + TiledArray::Tensor>::trace_if_larger_than(1); + // TiledArray::Tensor>::ptr_registry()->log(&std::cout); + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::ptr_registry()->log(&std::cout); + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::trace_if_larger_than(1); + } +#endif + // uncomment to create or create+launch debugger // TiledArray::create_debugger("gdb_xterm", "ta_test"); // TiledArray::create_debugger("lldb_xterm", "ta_test"); @@ -53,6 +74,18 @@ GlobalFixture::~GlobalFixture() { TiledArray::finalize(); world = nullptr; } +#ifdef TA_TENSOR_MEM_TRACE + { + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + TA_ASSERT(TiledArray::Tensor>::ptr_registry()->size() == + 0); + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + TA_ASSERT( + TiledArray::Tensor>::ptr_registry()->size() == 0); + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + } +#endif } TiledArray::World* GlobalFixture::world = nullptr; From 499a69850ff92249fb8372fcbcb33509a7b3c3ab Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 1 Jan 2023 01:32:54 -0500 Subject: [PATCH 2/7] disable backtrace appendage in PtrRegistry::erase --- src/TiledArray/util/ptr_registry.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/util/ptr_registry.cpp b/src/TiledArray/util/ptr_registry.cpp index 31e22cc4dc..0c52cde048 100644 --- a/src/TiledArray/util/ptr_registry.cpp +++ b/src/TiledArray/util/ptr_registry.cpp @@ -160,12 +160,12 @@ PtrRegistry& PtrRegistry::insert_bt(void* ptr, const std::string& context) { PtrRegistry& PtrRegistry::erase(void* ptr, std::size_t sz, const std::string& context) { - this->erase(ptr, sz, context, /* backtrace = */ true); + this->erase(ptr, sz, context, /* backtrace = */ false); return *this; } PtrRegistry& PtrRegistry::erase(void* ptr, const std::string& context) { - this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ true); + this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ false); return *this; } From f81aa816754290d9eafcba99de7278346deae90b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 1 Jan 2023 02:12:16 -0500 Subject: [PATCH 3/7] can control whether backtrace is appended in PtrRegistry::{insert,erase} on per-registry rather than per-call basis --- src/TiledArray/util/ptr_registry.cpp | 15 +++++++++++---- src/TiledArray/util/ptr_registry.h | 10 ++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/TiledArray/util/ptr_registry.cpp b/src/TiledArray/util/ptr_registry.cpp index 0c52cde048..e309713391 100644 --- a/src/TiledArray/util/ptr_registry.cpp +++ b/src/TiledArray/util/ptr_registry.cpp @@ -57,6 +57,13 @@ PtrRegistry& PtrRegistry::log(std::ostream* os_ptr) { std::ostream* PtrRegistry::log() const { return log_; } +PtrRegistry& PtrRegistry::append_backtrace(bool bt) { + append_backtrace_ = bt; + return *this; +} + +bool PtrRegistry::append_backtrace() const { return append_backtrace_; } + const PtrRegistry::sized_ptr_container_type& PtrRegistry::sized_ptrs() const { return ptrs_; } @@ -138,12 +145,12 @@ void PtrRegistry::erase(void* ptr, std::size_t sz, const std::string& context, PtrRegistry& PtrRegistry::insert(void* ptr, std::size_t sz, const std::string& context) { - this->insert(ptr, sz, context, /* backtrace = */ false); + this->insert(ptr, sz, context, /* backtrace = */ append_backtrace_); return *this; } PtrRegistry& PtrRegistry::insert(void* ptr, const std::string& context) { - this->insert(ptr, /* sz = */ 0, context, /* backtrace = */ false); + this->insert(ptr, /* sz = */ 0, context, /* backtrace = */ append_backtrace_); return *this; } @@ -160,12 +167,12 @@ PtrRegistry& PtrRegistry::insert_bt(void* ptr, const std::string& context) { PtrRegistry& PtrRegistry::erase(void* ptr, std::size_t sz, const std::string& context) { - this->erase(ptr, sz, context, /* backtrace = */ false); + this->erase(ptr, sz, context, /* backtrace = */ append_backtrace_); return *this; } PtrRegistry& PtrRegistry::erase(void* ptr, const std::string& context) { - this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ false); + this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ append_backtrace_); return *this; } diff --git a/src/TiledArray/util/ptr_registry.h b/src/TiledArray/util/ptr_registry.h index e5c1de049e..22a62c424e 100644 --- a/src/TiledArray/util/ptr_registry.h +++ b/src/TiledArray/util/ptr_registry.h @@ -67,6 +67,15 @@ struct PtrRegistry { /// @return pointer to the active logger; if null, no logging is performed std::ostream* log() const; + /// specifies whether to append backtrace to context provided to insert/erase + /// \param if true, calls to insert/erase will append backtrace to the + /// provided context by default + PtrRegistry& append_backtrace(bool bt); + + /// @return true if backtrace will be appended to context provided to + /// insert/erase + bool append_backtrace() const; + /// @return reference to the size-ordered containers of sized pointers const sized_ptr_container_type& sized_ptrs() const; @@ -144,6 +153,7 @@ struct PtrRegistry { std::ostream* log_ = nullptr; sized_ptr_container_type ptrs_; mutable ptr_container_type* unsized_ptrs_ = nullptr; // &(ptrs_[0]) + bool append_backtrace_ = false; std::mutex mtx_; /// inserts \p ptr associated with size \p sz to the registry, From 717fdad13644f8f0366bbfd19e5a16fc976e70b5 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 1 Jan 2023 12:41:29 -0500 Subject: [PATCH 4/7] added missing #include to ptr_registry.h --- src/TiledArray/util/ptr_registry.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/TiledArray/util/ptr_registry.h b/src/TiledArray/util/ptr_registry.h index 22a62c424e..3baf3219f9 100644 --- a/src/TiledArray/util/ptr_registry.h +++ b/src/TiledArray/util/ptr_registry.h @@ -28,6 +28,7 @@ #include #include +#include #include #include From 614c9eccfe0d2636dd47c7eff9fe489151ea7a25 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 2 Jan 2023 09:30:23 -0500 Subject: [PATCH 5/7] dox++ --- src/TiledArray/range.h | 7 ++-- src/TiledArray/tensor/tensor.h | 63 +++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index 0b35f1e951..9c0e54a757 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -591,7 +591,9 @@ class Range { /// Move Constructor - /// \param other The range to be copied + /// \param[in,out] other The range to be copied; set to default (null) state + /// on return + /// \post `other == Range{}` Range(Range_&& other) : datavec_(std::move(other.datavec_)), offset_(other.offset_), @@ -639,7 +641,8 @@ class Range { /// Move assignment operator - /// \param other The range to be copied + /// \param[in,out] other The range to be copied; set to default (null) state + /// on return /// \return A reference to this object /// \throw nothing Range_& operator=(Range_&& other) { diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index a778450ab5..5009391c16 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -51,7 +51,7 @@ struct TraceIsDefined, enable_if_numeric_t> : std::true_type {}; /// An N-dimensional tensor object -/// A contiguous row-major tensor with shallow-copy semantics. +/// A contiguous row-major tensor with __shallow-copy__ semantics. /// As of TiledArray 1.1 Tensor represents a batch of tensors with same Range /// (the default batch size = 1). /// \tparam T the value type of this tensor @@ -186,12 +186,22 @@ class Tensor { #endif } - range_type range_; ///< range + range_type range_; ///< Range + /// Number of `range_`-sized blocks in `data_` + /// \note this is not used for (in)equality comparison size_t batch_size_ = 1; std::shared_ptr data_; ///< Shared pointer to the data public: + /// constructs an empty (null) Tensor + /// \post `this->empty()` Tensor() = default; + + /// copy constructor + + /// \param[in] other an object to copy data from + /// \post `*this` is a shallow copy of \p other , + /// i.e. `*this == other && this->data()==other.data()` Tensor(const Tensor& other) : range_(other.range_), batch_size_(other.batch_size_), @@ -204,6 +214,13 @@ class Tensor { } #endif } + + /// move constructor + + /// \param[in,out] other an object to move data from; + /// on return \p other is in empty (null) but not + /// necessarily default state + /// \post `other.empty()` Tensor(Tensor&& other) : range_(std::move(other.range_)), batch_size_(std::move(other.batch_size_)), @@ -219,6 +236,7 @@ class Tensor { } #endif } + ~Tensor() { #ifdef TA_TENSOR_MEM_TRACE if (nbytes() >= trace_if_larger_than_) { @@ -509,6 +527,11 @@ class Tensor { return *this; } + /// copy assignment operator + + /// \param[in] other an object to copy data from + /// \post `*this` is a shallow copy of \p other , + /// i.e. `*this == other && this->data()==other.data()` Tensor& operator=(const Tensor& other) { #ifdef TA_TENSOR_MEM_TRACE if (nbytes() >= trace_if_larger_than_) { @@ -532,6 +555,12 @@ class Tensor { return *this; } + /// move assignment operator + + /// \param[in,out] other an object to move data from; + /// on return \p other is in empty (null) but not + /// necessarily default state + /// \post `other.empty()` Tensor& operator=(Tensor&& other) { #ifdef TA_TENSOR_MEM_TRACE if (nbytes() >= trace_if_larger_than_) { @@ -788,9 +817,21 @@ class Tensor { /// Test if the tensor is empty - /// \return \c true if this tensor was default constructed (contains no - /// data), otherwise \c false. - bool empty() const { return this->data_.use_count() == 0; } + /// \return \c true if this tensor contains no + /// data, otherwise \c false. + /// \note Empty Tensor is defaul_ish_ , i.e. it is *equal* to + /// a default-constructed Tensor + /// (`this->empty()` is equivalent to `*this == Tensor{}`), + /// but is not identical + /// to a default-constructed Tensor (e.g., `this->empty()` does not + /// imply `this->batch_size() == Tensor{}.batch_size()`) + bool empty() const { + // empty data_ implies default values for range_ (but NOT batch_size_) + TA_ASSERT( + (this->data_.use_count() == 0 && !this->range_) || + (this->data_.use_count() != 0 && this->range_)); // range is empty + return this->data_.use_count() == 0; + } /// MADNESS serialization function @@ -2403,11 +2444,23 @@ void gemm(Alpha alpha, const Tensor& A, const Tensor& B, // template // const typename Tensor::range_type Tensor::empty_range_; +/// equality comparison +/// \param[in] a a Tensor object +/// \param[in] b another Tensor object +/// \return true if ranges and data of \p a and \p b are equal +/// \internal this does not compare batch_size so any +/// 2 empty tensors are equal even if their batch_size +/// differ template bool operator==(const Tensor& a, const Tensor& b) { return a.range() == b.range() && std::equal(a.data(), a.data() + a.size(), b.data()); } + +/// inequality comparison +/// \param[in] a a Tensor object +/// \param[in] b another Tensor object +/// \return true if ranges and data of \p a and \p b are not equal template bool operator!=(const Tensor& a, const Tensor& b) { return !(a == b); From 6f869b4ca3296aba3e057c7d20b7734a3e42a648 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 2 Jan 2023 09:31:19 -0500 Subject: [PATCH 6/7] [unit] added move copy/assignment tests to tensor_suite --- tests/tensor.cpp | 68 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/tests/tensor.cpp b/tests/tensor.cpp index dabdc6f934..b329b5af44 100644 --- a/tests/tensor.cpp +++ b/tests/tensor.cpp @@ -116,8 +116,7 @@ BOOST_AUTO_TEST_CASE(copy_constructor) { BOOST_CHECK_EQUAL(tc.empty(), t.empty()); - // Check that range data is correct - BOOST_CHECK_EQUAL(tc.data(), t.data()); + BOOST_CHECK_EQUAL(tc.data(), t.data()); // N.B. shallow copy! BOOST_CHECK_EQUAL(tc.size(), t.size()); BOOST_CHECK_EQUAL(tc.range(), t.range()); BOOST_CHECK_EQUAL(tc.begin(), t.begin()); @@ -129,6 +128,28 @@ BOOST_AUTO_TEST_CASE(copy_constructor) { BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); } +BOOST_AUTO_TEST_CASE(move_constructor) { + TensorN tc(t); + TensorN td(std::move(tc)); + + BOOST_CHECK_EQUAL(td.empty(), t.empty()); + + // Check that range data is correct + BOOST_CHECK_EQUAL(td.data(), t.data()); + BOOST_CHECK_EQUAL(td.size(), t.size()); + BOOST_CHECK_EQUAL(td.range(), t.range()); + BOOST_CHECK_EQUAL(td.begin(), t.begin()); + BOOST_CHECK_EQUAL(td.end(), t.end()); + BOOST_CHECK_EQUAL(const_cast(td).begin(), + const_cast(t).begin()); + BOOST_CHECK_EQUAL(const_cast(td).end(), + const_cast(t).end()); + BOOST_CHECK_EQUAL_COLLECTIONS(td.begin(), td.end(), t.begin(), t.end()); + + // check that moved-from object is empty + BOOST_CHECK(tc.empty()); +} + BOOST_AUTO_TEST_CASE(permute_constructor) { Permutation perm = make_perm(); @@ -289,6 +310,46 @@ BOOST_AUTO_TEST_CASE(clone) { BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); } +BOOST_AUTO_TEST_CASE(copy_assignment_operator) { + TensorN tc; + tc = t; + + BOOST_CHECK_EQUAL(tc.empty(), t.empty()); + + BOOST_CHECK_EQUAL(tc.data(), t.data()); // N.B. shallow copy! + BOOST_CHECK_EQUAL(tc.size(), t.size()); + BOOST_CHECK_EQUAL(tc.range(), t.range()); + BOOST_CHECK_EQUAL(tc.begin(), t.begin()); + BOOST_CHECK_EQUAL(tc.end(), t.end()); + BOOST_CHECK_EQUAL(const_cast(tc).begin(), + const_cast(t).begin()); + BOOST_CHECK_EQUAL(const_cast(tc).end(), + const_cast(t).end()); + BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); +} + +BOOST_AUTO_TEST_CASE(move_assignment_operator) { + TensorN td(t); + TensorN tc; + tc = std::move(td); + + BOOST_CHECK_EQUAL(tc.empty(), t.empty()); + + // Check that range data is correct + BOOST_CHECK_EQUAL(tc.data(), t.data()); + BOOST_CHECK_EQUAL(tc.size(), t.size()); + BOOST_CHECK_EQUAL(tc.range(), t.range()); + BOOST_CHECK_EQUAL(tc.begin(), t.begin()); + BOOST_CHECK_EQUAL(tc.end(), t.end()); + BOOST_CHECK_EQUAL(const_cast(tc).begin(), + const_cast(t).begin()); + BOOST_CHECK_EQUAL(const_cast(tc).end(), + const_cast(t).end()); + BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); + // moved-from object is empty + BOOST_CHECK(td.empty()); +} + BOOST_AUTO_TEST_CASE(range_accessor) { BOOST_CHECK_EQUAL_COLLECTIONS( t.range().lobound_data(), t.range().lobound_data() + t.range().rank(), @@ -532,8 +593,7 @@ BOOST_AUTO_TEST_CASE(gemm) { integer m = 1, n = 1, k = 1; gemm_helper_nt.compute_matrix_sizes(m, n, k, x.range(), y.range()); math::blas::gemm(TiledArray::math::blas::Op::NoTrans, - TiledArray::math::blas::Op::Trans, - m, n, k, alpha, + TiledArray::math::blas::Op::Trans, m, n, k, alpha, x.data(), k, y.data(), k, 0, z0_ref.data(), n); } for (std::size_t i = 0ul; i < z0.size(); ++i) From 848af23515fe4327e3648179ffd150e86516505e Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 2 Jan 2023 14:23:43 -0500 Subject: [PATCH 7/7] PtrRegistry can skip registration (this avoid locking in insert/erase) and per-thread logging --- src/TiledArray/util/ptr_registry.cpp | 110 +++++++++++++++++++-------- src/TiledArray/util/ptr_registry.h | 33 ++++++++ 2 files changed, 112 insertions(+), 31 deletions(-) diff --git a/src/TiledArray/util/ptr_registry.cpp b/src/TiledArray/util/ptr_registry.cpp index e309713391..6f8f96a41c 100644 --- a/src/TiledArray/util/ptr_registry.cpp +++ b/src/TiledArray/util/ptr_registry.cpp @@ -57,6 +57,26 @@ PtrRegistry& PtrRegistry::log(std::ostream* os_ptr) { std::ostream* PtrRegistry::log() const { return log_; } +PtrRegistry& PtrRegistry::log_only(bool tf) { + log_only_ = tf; + return *this; +} + +bool PtrRegistry::log_only() const { return log_only_; } + +PtrRegistry& PtrRegistry::thread_local_logging(bool tf) { + thread_local_logging_ = tf; + return *this; +} + +bool PtrRegistry::thread_local_logging() const { return thread_local_logging_; } + +PtrRegistry& PtrRegistry::thread_local_log_filename_prefix( + const std::string& pfx) { + thread_local_log_filename_prefix_ = pfx; + return *this; +} + PtrRegistry& PtrRegistry::append_backtrace(bool bt) { append_backtrace_ = bt; return *this; @@ -84,7 +104,11 @@ std::size_t PtrRegistry::size() const { void PtrRegistry::insert(void* ptr, std::size_t sz, const std::string& context, bool backtrace) { - std::scoped_lock lock(this->mtx_); + auto* log = thread_local_logging_ ? thread_local_log() : log_; + + // early exit + if (log_only_ && !log) return; + std::string creation_context = context; if (backtrace) { detail::Backtrace bt; @@ -92,30 +116,39 @@ void PtrRegistry::insert(void* ptr, std::size_t sz, const std::string& context, detail::remove_linebreaks(bt_str); creation_context += ":::::" + bt_str; } - if (log_) { - *log_ << "PtrRegistry::insert():::::" << ptr << ":::::" << creation_context - << std::endl + if (log) { + *log << "PtrRegistry::insert():::::" << ptr << ":::::" << creation_context + << std::endl #ifdef TA_TENSOR_MEM_PROFILE - << " TA::Tensor allocator status {" - << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() - << "," - << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() - << "," - << "act=" << hostEnv::instance()->host_allocator().getActualSize() - << "}" - << " bytes" << std::endl + << " TA::Tensor allocator status {" + << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() + << "," + << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() + << "," + << "act=" << hostEnv::instance()->host_allocator().getActualSize() + << "}" + << " bytes" << std::endl #endif // TA_TENSOR_MEM_PROFILE ; } - auto& sz_ptrs = ptrs_[sz]; - TA_ASSERT(sz_ptrs.find(ptr) == sz_ptrs.end()); - sz_ptrs.emplace(ptr, std::move(creation_context)); + + // track unless log_only_=true + if (!log_only_) { + std::scoped_lock lock(this->mtx_); + auto& sz_ptrs = ptrs_[sz]; + TA_ASSERT(sz_ptrs.find(ptr) == sz_ptrs.end()); + sz_ptrs.emplace(ptr, std::move(creation_context)); + } } void PtrRegistry::erase(void* ptr, std::size_t sz, const std::string& context, bool backtrace) { - std::scoped_lock lock(this->mtx_); - if (log_) { + auto* log = thread_local_logging_ ? thread_local_log() : log_; + + // early exit + if (log_only_ && !log) return; + + if (log) { std::string erasure_context = context; if (backtrace) { detail::Backtrace bt; @@ -123,24 +156,29 @@ void PtrRegistry::erase(void* ptr, std::size_t sz, const std::string& context, detail::remove_linebreaks(bt_str); erasure_context += ":::::" + bt_str; } - *log_ << "PtrRegistry::erase():::::" << ptr << ":::::" << erasure_context - << std::endl + *log << "PtrRegistry::erase():::::" << ptr << ":::::" << erasure_context + << std::endl #ifdef TA_TENSOR_MEM_PROFILE - << " TA::Tensor allocator status {" - << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() - << "," - << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() - << "," - << "act=" << hostEnv::instance()->host_allocator().getActualSize() - << "}" - << " bytes" << std::endl + << " TA::Tensor allocator status {" + << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() + << "," + << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() + << "," + << "act=" << hostEnv::instance()->host_allocator().getActualSize() + << "}" + << " bytes" << std::endl #endif // TA_TENSOR_MEM_PROFILE ; } - auto& sz_ptrs = ptrs_[sz]; - auto it = sz_ptrs.find(ptr); - TA_ASSERT(it != sz_ptrs.end()); - sz_ptrs.erase(it); + + // track unless log_only=true + if (!log_only_) { + std::scoped_lock lock(this->mtx_); + auto& sz_ptrs = ptrs_[sz]; + auto it = sz_ptrs.find(ptr); + TA_ASSERT(it != sz_ptrs.end()); + sz_ptrs.erase(it); + } } PtrRegistry& PtrRegistry::insert(void* ptr, std::size_t sz, @@ -187,4 +225,14 @@ PtrRegistry& PtrRegistry::erase_bt(void* ptr, const std::string& context) { return *this; } +std::ostream* PtrRegistry::thread_local_log() { + static thread_local std::shared_ptr thread_local_log_ = + std::make_shared( + thread_local_log_filename_prefix_ + ".thread_id=" + + std::to_string( + std::hash{}(std::this_thread::get_id())) + + ".trace"); + return thread_local_log_.get(); +} + } // namespace TiledArray diff --git a/src/TiledArray/util/ptr_registry.h b/src/TiledArray/util/ptr_registry.h index 3baf3219f9..17d33dce9d 100644 --- a/src/TiledArray/util/ptr_registry.h +++ b/src/TiledArray/util/ptr_registry.h @@ -68,6 +68,26 @@ struct PtrRegistry { /// @return pointer to the active logger; if null, no logging is performed std::ostream* log() const; + /// controls whether this will only do logging + + /// \param tf if true, this will only perform logging and not track the + /// pointers \note turning this on will avoid locking in insert/erase + PtrRegistry& log_only(bool tf); + + /// @return true, if this will only perform logging and not track the pointers + bool log_only() const; + + /// controls whether logging will be on per-thread basis + /// \param tf if true, this will perform logging on per-thread basis + PtrRegistry& thread_local_logging(bool tf); + + /// @return true, if this will perform logging on per-thread basis + bool thread_local_logging() const; + + /// controls filename for storing per-thread logs + /// \param pfx specifies the filename prefix for per-thread logs + PtrRegistry& thread_local_log_filename_prefix(const std::string& pfx); + /// specifies whether to append backtrace to context provided to insert/erase /// \param if true, calls to insert/erase will append backtrace to the /// provided context by default @@ -91,6 +111,7 @@ struct PtrRegistry { /// \param sz size of the object pointed to by \p ptr /// \param context creation context; stored alongside the pointer /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& insert(void* ptr, std::size_t sz, const std::string& context = ""); @@ -99,6 +120,7 @@ struct PtrRegistry { /// \param context creation context; stored alongside the pointer /// \return `*this` /// \note equivalent to `this->insert(ptr, 0, context); + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& insert(void* ptr, const std::string& context = ""); /// inserts \p ptr associated with size \p sz to the registry, @@ -107,6 +129,7 @@ struct PtrRegistry { /// \param sz size of the object pointed to by \p ptr /// \param context creation context; stored alongside the pointer /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& insert_bt(void* ptr, std::size_t sz, const std::string& context = ""); @@ -115,6 +138,7 @@ struct PtrRegistry { /// \param ptr pointer to register /// \param context creation context; stored alongside the pointer /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& insert_bt(void* ptr, const std::string& context = ""); /// erases \p ptr associated with size \p sz from the registry @@ -122,6 +146,7 @@ struct PtrRegistry { /// \param sz size of the object pointed to by \p ptr /// \param context erasure context; if logging, will append this to the log /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& erase(void* ptr, std::size_t sz, const std::string& context = ""); @@ -130,6 +155,7 @@ struct PtrRegistry { /// \param context erasure context; if logging, will append this to the log /// \return `*this` /// \note equivalent to `this->erase(ptr, 0, context); + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& erase(void* ptr, const std::string& context = ""); /// erases \p ptr associated with size \p sz from the registry @@ -138,6 +164,7 @@ struct PtrRegistry { /// \param sz size of the object pointed to by \p ptr /// \param context erasure context; if logging, will append this and the /// backtrace of the caller to the log \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& erase_bt(void* ptr, std::size_t sz, const std::string& context = ""); @@ -148,6 +175,7 @@ struct PtrRegistry { /// \param context erasure context; if logging, will append this and the /// backtrace of the caller to the log \return `*this` \note equivalent to /// `this->erase_bt(ptr, 0, context); + /// \note calls to this are serialized unless `this->log_only()==true` PtrRegistry& erase_bt(void* ptr, const std::string& context = ""); private: @@ -156,6 +184,11 @@ struct PtrRegistry { mutable ptr_container_type* unsized_ptrs_ = nullptr; // &(ptrs_[0]) bool append_backtrace_ = false; std::mutex mtx_; + bool log_only_ = false; + bool thread_local_logging_ = false; + std::string thread_local_log_filename_prefix_; + + std::ostream* thread_local_log(); /// inserts \p ptr associated with size \p sz to the registry, /// \param ptr pointer to register