diff --git a/CMakeLists.txt b/CMakeLists.txt index 02a8012b99..06eb7fb533 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,10 @@ if((ENABLE_GPERFTOOLS OR ENABLE_TCMALLOC_MINIMAL) AND CMAKE_SYSTEM_NAME MATCHES add_feature_info(Libunwind ENABLE_LIBUNWIND "Libunwind provides stack unwinding") endif() -option(TA_TENSOR_MEM_PROFILE "Turn on instrumented profiling of TA::Tensor memory use" OFF) +option(TA_TENSOR_MEM_TRACE "Turn on instrumented tracing of TA::Tensor memory use" OFF) +add_feature_info(TENSOR_MEM_TRACE TA_TENSOR_MEM_TRACE "instrumented tracing of TA::Tensor memory use") + +option(TA_TENSOR_MEM_PROFILE "Turn on instrumented profiling of TA::Tensor memory use" ${TA_TENSOR_MEM_TRACE}) add_feature_info(TENSOR_MEM_PROFILE TA_TENSOR_MEM_PROFILE "instrumented profiling of TA::Tensor memory use") option(TA_EXPERT "TiledArray Expert mode: disables automatically downloading or building dependencies" OFF) diff --git a/INSTALL.md b/INSTALL.md index fb459d7690..5be4905635 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -417,6 +417,7 @@ support may be added. * `TA_SIGNED_1INDEX_TYPE` -- Set to `OFF` to use unsigned 1-index coordinate type (default for TiledArray 1.0.0-alpha.2 and older). The default is `ON`, which enables the use of negative indices in coordinates. * `TA_MAX_SOO_RANK_METADATA` -- Specifies the maximum rank for which to use Small Object Optimization (hence, avoid the use of the heap) for metadata. The default is `8`. * `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile host memory allocations used by TA::Tensor. This causes the use of Umpire for host memory allocation. This also enables additional tracing facilities provided by Umpire; these can be controlled via [environment variable `UMPIRE_LOG_LEVEL`](https://umpire.readthedocs.io/en/develop/sphinx/features/logging_and_replay.html), but note that the default is to log Umpire info into a file rather than stdout. +* `TA_TENSOR_MEM_TRACE` -- Set to `ON` to *trace* host memory allocations used by TA::Tensor. This turns on support for tracking memory used by `Tensor` objects; such tracking must be enabled programmatically. This can greatly increase memory consumption by the application and is only intended for expert developers troubleshooting memory use by TiledArray. * `TA_UT_CTEST_TIMEOUT` -- The value (in seconds) of the timeout to use for running the TA unit tests via CTest when building the `check`/`check-tiledarray` targets. The default timeout is 1500s. # Build TiledArray diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7b3b05d003..73da617ef2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -189,6 +189,8 @@ TiledArray/util/bug.h TiledArray/util/function.h TiledArray/util/initializer_list.h TiledArray/util/logger.h +TiledArray/util/ptr_registry.cpp +TiledArray/util/ptr_registry.h TiledArray/util/random.h TiledArray/util/singleton.h TiledArray/util/threads.h diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in index 68f5dc1374..0c4d5d5cbc 100644 --- a/src/TiledArray/config.h.in +++ b/src/TiledArray/config.h.in @@ -81,6 +81,9 @@ /* Is TA::Tensor memory profiling enabled? */ #cmakedefine TA_TENSOR_MEM_PROFILE 1 +/* Is TA::Tensor memory tracing enabled? */ +#cmakedefine TA_TENSOR_MEM_TRACE 1 + /* Is TTG available? */ #cmakedefine TILEDARRAY_HAS_TTG 1 diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index 0b35f1e951..9c0e54a757 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -591,7 +591,9 @@ class Range { /// Move Constructor - /// \param other The range to be copied + /// \param[in,out] other The range to be copied; set to default (null) state + /// on return + /// \post `other == Range{}` Range(Range_&& other) : datavec_(std::move(other.datavec_)), offset_(other.offset_), @@ -639,7 +641,8 @@ class Range { /// Move assignment operator - /// \param other The range to be copied + /// \param[in,out] other The range to be copied; set to default (null) state + /// on return /// \return A reference to this object /// \throw nothing Range_& operator=(Range_&& other) { diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 94e59425cf..5009391c16 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -20,6 +20,8 @@ #ifndef TILEDARRAY_TENSOR_TENSOR_H__INCLUDED #define TILEDARRAY_TENSOR_TENSOR_H__INCLUDED +#include "TiledArray/config.h" + #include "TiledArray/host/allocator.h" #include "TiledArray/math/blas.h" @@ -30,6 +32,7 @@ #include "TiledArray/tile_interface/permute.h" #include "TiledArray/tile_interface/trace.h" #include "TiledArray/util/logger.h" +#include "TiledArray/util/ptr_registry.h" namespace TiledArray { @@ -48,7 +51,7 @@ struct TraceIsDefined, enable_if_numeric_t> : std::true_type {}; /// An N-dimensional tensor object -/// A contiguous row-major tensor with shallow-copy semantics. +/// A contiguous row-major tensor with __shallow-copy__ semantics. /// As of TiledArray 1.1 Tensor represents a batch of tensors with same Range /// (the default batch size = 1). /// \tparam T the value type of this tensor @@ -61,6 +64,15 @@ class Tensor { std::is_assignable, T>::value, "Tensor: T must be an assignable type (e.g. cannot be const)"); +#ifdef TA_TENSOR_MEM_TRACE + template + std::string make_string(Ts&&... ts) { + std::ostringstream oss; + (oss << ... << ts); + return oss.str(); + } +#endif + public: typedef Range range_type; ///< Tensor range type typedef typename range_type::index1_type index1_type; ///< 1-index type @@ -107,12 +119,33 @@ class Tensor { std::uninitialized_default_construct_n(ptr, size); // std::uninitialized_value_construct_n(ptr, size); } - auto deleter = [allocator = std::move(allocator), - size](auto&& ptr) mutable { + auto deleter = [ +#ifdef TA_TENSOR_MEM_TRACE + this, +#endif + allocator = std::move(allocator), + size](auto&& ptr) mutable { std::destroy_n(ptr, size); + // N.B. deregister ptr *before* deallocating to avoid possible race + // between reallocation and deregistering +#ifdef TA_TENSOR_MEM_TRACE + const auto nbytes = size * sizeof(T); + if (nbytes >= trace_if_larger_than_) { + ptr_registry()->erase(ptr, nbytes, + make_string("created by TA::Tensor*=", this)); + } +#endif allocator.deallocate(ptr, size); }; this->data_ = std::shared_ptr(ptr, std::move(deleter)); +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor::data_.get()=", data_.get())); + ptr_registry()->insert(data_.get(), nbytes(), + make_string("created by TA::Tensor*=", this)); + } +#endif } Tensor(range_type&& range, size_t batch_size, bool default_construct) @@ -124,21 +157,95 @@ class Tensor { std::uninitialized_default_construct_n(ptr, size); // std::uninitialized_value_construct_n(ptr, size); } - auto deleter = [allocator = std::move(allocator), - size](auto&& ptr) mutable { + auto deleter = [ +#ifdef TA_TENSOR_MEM_TRACE + this, +#endif + allocator = std::move(allocator), + size](auto&& ptr) mutable { std::destroy_n(ptr, size); + // N.B. deregister ptr *before* deallocating to avoid possible race + // between reallocation and deregistering +#ifdef TA_TENSOR_MEM_TRACE + const auto nbytes = size * sizeof(T); + if (nbytes >= trace_if_larger_than_) { + ptr_registry()->erase(ptr, nbytes, + make_string("created by TA::Tensor*=", this)); + } +#endif allocator.deallocate(ptr, size); }; this->data_ = std::shared_ptr(ptr, std::move(deleter)); +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor::data_.get()=", data_.get())); + ptr_registry()->insert(data_.get(), nbytes(), + make_string("created by TA::Tensor*=", this)); + } +#endif } - range_type range_; ///< range + range_type range_; ///< Range + /// Number of `range_`-sized blocks in `data_` + /// \note this is not used for (in)equality comparison size_t batch_size_ = 1; std::shared_ptr data_; ///< Shared pointer to the data public: + /// constructs an empty (null) Tensor + /// \post `this->empty()` Tensor() = default; + /// copy constructor + + /// \param[in] other an object to copy data from + /// \post `*this` is a shallow copy of \p other , + /// i.e. `*this == other && this->data()==other.data()` + Tensor(const Tensor& other) + : range_(other.range_), + batch_size_(other.batch_size_), + data_(other.data_) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor(const Tensor& other)::data_.get()=", + data_.get())); + } +#endif + } + + /// move constructor + + /// \param[in,out] other an object to move data from; + /// on return \p other is in empty (null) but not + /// necessarily default state + /// \post `other.empty()` + Tensor(Tensor&& other) + : range_(std::move(other.range_)), + batch_size_(std::move(other.batch_size_)), + data_(std::move(other.data_)) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + &other, + make_string("TA::Tensor(Tensor&& other)::data_.get()=", data_.get())); + ptr_registry()->insert( + this, + make_string("TA::Tensor(Tensor&& other)::data_.get()=", data_.get())); + } +#endif + } + + ~Tensor() { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + this, make_string("TA::~Tensor()::data_.get()=", data_.get())); + } +#endif + } + /// Construct a tensor with a range equal to \c range. The data is /// uninitialized. /// \param range The range of the tensor @@ -359,7 +466,15 @@ class Tensor { /// \param data shared pointer to the data Tensor(const range_type& range, size_t batch_size, std::shared_ptr data) - : range_(range), batch_size_(batch_size), data_(data) {} + : range_(range), batch_size_(batch_size), data_(data) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor(range, batch_size, data)::data_.get()=", + data_.get())); + } +#endif + } /// The batch size accessor @@ -412,6 +527,66 @@ class Tensor { return *this; } + /// copy assignment operator + + /// \param[in] other an object to copy data from + /// \post `*this` is a shallow copy of \p other , + /// i.e. `*this == other && this->data()==other.data()` + Tensor& operator=(const Tensor& other) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + this, + make_string("TA::Tensor::operator=(const Tensor&)::data_.get()=", + data_.get())); + } +#endif + range_ = other.range_; + batch_size_ = other.batch_size_; + data_ = other.data_; +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, + make_string("TA::Tensor::operator=(const Tensor&)::data_.get()=", + data_.get())); + } +#endif + return *this; + } + + /// move assignment operator + + /// \param[in,out] other an object to move data from; + /// on return \p other is in empty (null) but not + /// necessarily default state + /// \post `other.empty()` + Tensor& operator=(Tensor&& other) { +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + this, make_string("TA::Tensor::operator=(Tensor&&)::data_.get()=", + data_.get())); + } + if (other.nbytes() >= trace_if_larger_than_) { + ptr_registry()->erase( + &other, make_string("TA::Tensor::operator=(Tensor&&)::data_.get()=", + data_.get())); + } +#endif + range_ = std::move(other.range_); + batch_size_ = std::move(other.batch_size_); + data_ = std::move(other.data_); +#ifdef TA_TENSOR_MEM_TRACE + if (nbytes() >= trace_if_larger_than_) { + ptr_registry()->insert( + this, make_string("TA::Tensor::operator=(Tensor&&)::data_.get()=", + data_.get())); + } +#endif + return *this; + } + /// Tensor range object accessor /// \return The tensor range object @@ -422,6 +597,14 @@ class Tensor { /// \return The number of elements in the tensor ordinal_type size() const { return (this->range().volume()); } + /// Tensor data size (in bytes) accessor + + /// \return The number of bytes occupied by this tensor's data + /// \warning this only returns valid value if this is a tensor of scalars + std::size_t nbytes() const { + return this->range().volume() * this->batch_size_ * sizeof(T); + } + /// Const element accessor /// \tparam Ordinal an integer type that represents an ordinal @@ -634,9 +817,21 @@ class Tensor { /// Test if the tensor is empty - /// \return \c true if this tensor was default constructed (contains no - /// data), otherwise \c false. - bool empty() const { return this->data_.use_count() == 0; } + /// \return \c true if this tensor contains no + /// data, otherwise \c false. + /// \note Empty Tensor is defaul_ish_ , i.e. it is *equal* to + /// a default-constructed Tensor + /// (`this->empty()` is equivalent to `*this == Tensor{}`), + /// but is not identical + /// to a default-constructed Tensor (e.g., `this->empty()` does not + /// imply `this->batch_size() == Tensor{}.batch_size()`) + bool empty() const { + // empty data_ implies default values for range_ (but NOT batch_size_) + TA_ASSERT( + (this->data_.use_count() == 0 && !this->range_) || + (this->data_.use_count() != 0 && this->range_)); // range is empty + return this->data_.use_count() == 0; + } /// MADNESS serialization function @@ -668,9 +863,35 @@ class Tensor { /// \param other The tensor to swap with this void swap(Tensor& other) { +#ifdef TA_TENSOR_MEM_TRACE + bool this_to_be_traced = false; + bool other_to_be_traced = false; + if (nbytes() >= trace_if_larger_than_) { + this_to_be_traced = true; + ptr_registry()->erase( + this, make_string("TA::Tensor::swap()::data_.get()=", data_.get())); + } + if (other.nbytes() >= trace_if_larger_than_) { + other_to_be_traced = true; + ptr_registry()->erase( + &other, + make_string("TA::Tensor::swap()::data_.get()=", other.data_.get())); + } +#endif std::swap(data_, other.data_); std::swap(range_, other.range_); std::swap(batch_size_, other.batch_size_); +#ifdef TA_TENSOR_MEM_TRACE + if (other_to_be_traced) { + ptr_registry()->insert( + this, make_string("TA::Tensor::swap()::data_.get()=", data_.get())); + } + if (this_to_be_traced) { + ptr_registry()->insert( + &other, + make_string("TA::Tensor::swap()::data_.get()=", other.data_.get())); + } +#endif } // clang-format off @@ -1995,8 +2216,43 @@ class Tensor { return reduce(other, mult_add_op, add_op, numeric_type(0)); } + /// @return pointer to the PtrRegistry object used for tracing TA::Tensor + /// lifetime + /// @warning only nonnull if configured with `TA_TENSOR_MEM_TRACE=ON` + static PtrRegistry* ptr_registry() { +#ifdef TA_TENSOR_MEM_TRACE + static PtrRegistry registry; + return ®istry; +#else + return nullptr; +#endif + } + +#ifdef TA_TENSOR_MEM_TRACE + /// @param nbytes sets the minimum size of TA::Tensor objects whose lifetime + /// will be tracked; must be greater or equal to 1 + static void trace_if_larger_than(std::size_t nbytes) { + TA_ASSERT(nbytes >= 1); + trace_if_larger_than_ = nbytes; + } + /// @return the minimum size of TA::Tensor objects whose lifetime + /// will be tracked + static std::size_t trace_if_larger_than() { return trace_if_larger_than_; } +#endif + + private: +#ifdef TA_TENSOR_MEM_TRACE + static std::size_t trace_if_larger_than_; +#endif + }; // class Tensor +#ifdef TA_TENSOR_MEM_TRACE +template +std::size_t Tensor::trace_if_larger_than_ = + std::numeric_limits::max(); +#endif + template Tensor operator*(const Permutation& p, const Tensor& t) { return t.permute(p); @@ -2188,11 +2444,23 @@ void gemm(Alpha alpha, const Tensor& A, const Tensor& B, // template // const typename Tensor::range_type Tensor::empty_range_; +/// equality comparison +/// \param[in] a a Tensor object +/// \param[in] b another Tensor object +/// \return true if ranges and data of \p a and \p b are equal +/// \internal this does not compare batch_size so any +/// 2 empty tensors are equal even if their batch_size +/// differ template bool operator==(const Tensor& a, const Tensor& b) { return a.range() == b.range() && std::equal(a.data(), a.data() + a.size(), b.data()); } + +/// inequality comparison +/// \param[in] a a Tensor object +/// \param[in] b another Tensor object +/// \return true if ranges and data of \p a and \p b are not equal template bool operator!=(const Tensor& a, const Tensor& b) { return !(a == b); diff --git a/src/TiledArray/util/ptr_registry.cpp b/src/TiledArray/util/ptr_registry.cpp new file mode 100644 index 0000000000..6f8f96a41c --- /dev/null +++ b/src/TiledArray/util/ptr_registry.cpp @@ -0,0 +1,238 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2022 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * + * util/ptr_registry.cpp + * December 31, 2022 + * + */ + +#include + +#include "TiledArray/util/ptr_registry.h" + +#include "TiledArray/host/env.h" +#include "TiledArray/util/backtrace.h" + +namespace TiledArray { + +namespace detail { +void remove_linebreaks(std::string& str) { + auto it = str.begin(); + while (it != str.end()) { + if (*it == '\n') + it = str.erase(it); + else + ++it; + } +} +} // namespace detail + +PtrRegistry::PtrRegistry() = default; + +PtrRegistry::PtrRegistry(std::ostream& os) : log_(&os) {} + +PtrRegistry::~PtrRegistry() = default; + +PtrRegistry& PtrRegistry::log(std::ostream* os_ptr) { + log_ = os_ptr; + return *this; +} + +std::ostream* PtrRegistry::log() const { return log_; } + +PtrRegistry& PtrRegistry::log_only(bool tf) { + log_only_ = tf; + return *this; +} + +bool PtrRegistry::log_only() const { return log_only_; } + +PtrRegistry& PtrRegistry::thread_local_logging(bool tf) { + thread_local_logging_ = tf; + return *this; +} + +bool PtrRegistry::thread_local_logging() const { return thread_local_logging_; } + +PtrRegistry& PtrRegistry::thread_local_log_filename_prefix( + const std::string& pfx) { + thread_local_log_filename_prefix_ = pfx; + return *this; +} + +PtrRegistry& PtrRegistry::append_backtrace(bool bt) { + append_backtrace_ = bt; + return *this; +} + +bool PtrRegistry::append_backtrace() const { return append_backtrace_; } + +const PtrRegistry::sized_ptr_container_type& PtrRegistry::sized_ptrs() const { + return ptrs_; +} +const PtrRegistry::ptr_container_type& PtrRegistry::unsized_ptrs() const { + if (unsized_ptrs_ == nullptr) { + unsized_ptrs_ = &(const_cast(ptrs_)[0]); + } + return *unsized_ptrs_; +} + +std::size_t PtrRegistry::size() const { + return std::accumulate(ptrs_.begin(), ptrs_.end(), 0, + [](const auto& total_size, const auto& sz_ptrs) { + auto&& [sz, ptrs] = sz_ptrs; + return total_size + ptrs.size(); + }); +} + +void PtrRegistry::insert(void* ptr, std::size_t sz, const std::string& context, + bool backtrace) { + auto* log = thread_local_logging_ ? thread_local_log() : log_; + + // early exit + if (log_only_ && !log) return; + + std::string creation_context = context; + if (backtrace) { + detail::Backtrace bt; + auto bt_str = bt.str(0); + detail::remove_linebreaks(bt_str); + creation_context += ":::::" + bt_str; + } + if (log) { + *log << "PtrRegistry::insert():::::" << ptr << ":::::" << creation_context + << std::endl +#ifdef TA_TENSOR_MEM_PROFILE + << " TA::Tensor allocator status {" + << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() + << "," + << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() + << "," + << "act=" << hostEnv::instance()->host_allocator().getActualSize() + << "}" + << " bytes" << std::endl +#endif // TA_TENSOR_MEM_PROFILE + ; + } + + // track unless log_only_=true + if (!log_only_) { + std::scoped_lock lock(this->mtx_); + auto& sz_ptrs = ptrs_[sz]; + TA_ASSERT(sz_ptrs.find(ptr) == sz_ptrs.end()); + sz_ptrs.emplace(ptr, std::move(creation_context)); + } +} + +void PtrRegistry::erase(void* ptr, std::size_t sz, const std::string& context, + bool backtrace) { + auto* log = thread_local_logging_ ? thread_local_log() : log_; + + // early exit + if (log_only_ && !log) return; + + if (log) { + std::string erasure_context = context; + if (backtrace) { + detail::Backtrace bt; + auto bt_str = bt.str(0); + detail::remove_linebreaks(bt_str); + erasure_context += ":::::" + bt_str; + } + *log << "PtrRegistry::erase():::::" << ptr << ":::::" << erasure_context + << std::endl +#ifdef TA_TENSOR_MEM_PROFILE + << " TA::Tensor allocator status {" + << "hw=" << hostEnv::instance()->host_allocator().getHighWatermark() + << "," + << "cur=" << hostEnv::instance()->host_allocator().getCurrentSize() + << "," + << "act=" << hostEnv::instance()->host_allocator().getActualSize() + << "}" + << " bytes" << std::endl +#endif // TA_TENSOR_MEM_PROFILE + ; + } + + // track unless log_only=true + if (!log_only_) { + std::scoped_lock lock(this->mtx_); + auto& sz_ptrs = ptrs_[sz]; + auto it = sz_ptrs.find(ptr); + TA_ASSERT(it != sz_ptrs.end()); + sz_ptrs.erase(it); + } +} + +PtrRegistry& PtrRegistry::insert(void* ptr, std::size_t sz, + const std::string& context) { + this->insert(ptr, sz, context, /* backtrace = */ append_backtrace_); + return *this; +} + +PtrRegistry& PtrRegistry::insert(void* ptr, const std::string& context) { + this->insert(ptr, /* sz = */ 0, context, /* backtrace = */ append_backtrace_); + return *this; +} + +PtrRegistry& PtrRegistry::insert_bt(void* ptr, std::size_t sz, + const std::string& context) { + this->insert(ptr, sz, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::insert_bt(void* ptr, const std::string& context) { + this->insert(ptr, /* sz = */ 0, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::erase(void* ptr, std::size_t sz, + const std::string& context) { + this->erase(ptr, sz, context, /* backtrace = */ append_backtrace_); + return *this; +} + +PtrRegistry& PtrRegistry::erase(void* ptr, const std::string& context) { + this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ append_backtrace_); + return *this; +} + +PtrRegistry& PtrRegistry::erase_bt(void* ptr, std::size_t sz, + const std::string& context) { + this->erase(ptr, sz, context, /* backtrace = */ true); + return *this; +} + +PtrRegistry& PtrRegistry::erase_bt(void* ptr, const std::string& context) { + this->erase(ptr, /* sz = */ 0, context, /* backtrace = */ true); + return *this; +} + +std::ostream* PtrRegistry::thread_local_log() { + static thread_local std::shared_ptr thread_local_log_ = + std::make_shared( + thread_local_log_filename_prefix_ + ".thread_id=" + + std::to_string( + std::hash{}(std::this_thread::get_id())) + + ".trace"); + return thread_local_log_.get(); +} + +} // namespace TiledArray diff --git a/src/TiledArray/util/ptr_registry.h b/src/TiledArray/util/ptr_registry.h new file mode 100644 index 0000000000..17d33dce9d --- /dev/null +++ b/src/TiledArray/util/ptr_registry.h @@ -0,0 +1,215 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2022 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * + * util/ptr_registry.h + * December 31, 2022 + * + */ + +#ifndef TILEDARRAY_UTIL_PTR_REGISTRY_H +#define TILEDARRAY_UTIL_PTR_REGISTRY_H + +#include +#include +#include +#include +#include + +namespace TiledArray { + +/// Registry of pointers + +/// Stores {pointer,creation_context} pairs in a hash table (one table per +/// pointer "size"; hash tables are stored in the order of increasing size). +/// +/// @details Useful for capturing graphs of pointers to detect issues +/// with smart pointer lifetimes (e.g., stray refs and/or cycles) and +/// tracing memory use +struct PtrRegistry { + using ptr_container_type = std::unordered_map; + using sized_ptr_container_type = std::map; + + /// constructs empty registry + PtrRegistry(); + PtrRegistry(const PtrRegistry&) = delete; + PtrRegistry(PtrRegistry&&) = delete; + PtrRegistry& operator=(const PtrRegistry&) = delete; + PtrRegistry& operator=(PtrRegistry&&) = delete; + + /// constructs an empty registry configured to log insert/erase + /// events to \p log + /// \param log an ostream for logging insert/erase events + PtrRegistry(std::ostream& log); + + ~PtrRegistry(); + + /// sets the active logger to `*log` + /// \param log pointer to an std::ostream to use for logging insert/erase + /// events; if null, will do no logging + PtrRegistry& log(std::ostream* log); + + /// @return pointer to the active logger; if null, no logging is performed + std::ostream* log() const; + + /// controls whether this will only do logging + + /// \param tf if true, this will only perform logging and not track the + /// pointers \note turning this on will avoid locking in insert/erase + PtrRegistry& log_only(bool tf); + + /// @return true, if this will only perform logging and not track the pointers + bool log_only() const; + + /// controls whether logging will be on per-thread basis + /// \param tf if true, this will perform logging on per-thread basis + PtrRegistry& thread_local_logging(bool tf); + + /// @return true, if this will perform logging on per-thread basis + bool thread_local_logging() const; + + /// controls filename for storing per-thread logs + /// \param pfx specifies the filename prefix for per-thread logs + PtrRegistry& thread_local_log_filename_prefix(const std::string& pfx); + + /// specifies whether to append backtrace to context provided to insert/erase + /// \param if true, calls to insert/erase will append backtrace to the + /// provided context by default + PtrRegistry& append_backtrace(bool bt); + + /// @return true if backtrace will be appended to context provided to + /// insert/erase + bool append_backtrace() const; + + /// @return reference to the size-ordered containers of sized pointers + const sized_ptr_container_type& sized_ptrs() const; + + /// @return reference to the container of unsized (0-sized) pointers + const ptr_container_type& unsized_ptrs() const; + + /// @return total number of pointers in the registry + std::size_t size() const; + + /// inserts \p ptr associated with size \p sz to the registry + /// \param ptr pointer to register + /// \param sz size of the object pointed to by \p ptr + /// \param context creation context; stored alongside the pointer + /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& insert(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// inserts \p ptr without associated size (i.e. `sz=0`) to the registry + /// \param ptr pointer to register + /// \param context creation context; stored alongside the pointer + /// \return `*this` + /// \note equivalent to `this->insert(ptr, 0, context); + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& insert(void* ptr, const std::string& context = ""); + + /// inserts \p ptr associated with size \p sz to the registry, + /// appends backtrace of the caller to context separated by `:::::` + /// \param ptr pointer to register + /// \param sz size of the object pointed to by \p ptr + /// \param context creation context; stored alongside the pointer + /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& insert_bt(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// inserts \p ptr without associated size (i.e. with size=0) to the registry, + /// appends backtrace of the caller to context separated by `:::::` + /// \param ptr pointer to register + /// \param context creation context; stored alongside the pointer + /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& insert_bt(void* ptr, const std::string& context = ""); + + /// erases \p ptr associated with size \p sz from the registry + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this to the log + /// \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& erase(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// erases \p ptr without associated size (i.e., `sz=0`) from the registry + /// \param ptr pointer to erase + /// \param context erasure context; if logging, will append this to the log + /// \return `*this` + /// \note equivalent to `this->erase(ptr, 0, context); + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& erase(void* ptr, const std::string& context = ""); + + /// erases \p ptr associated with size \p sz from the registry + /// \details introduced for symmetry with insert_bt() ) + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this and the + /// backtrace of the caller to the log \return `*this` + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& erase_bt(void* ptr, std::size_t sz, + const std::string& context = ""); + + /// erases \p ptr without associated size (i.e. with `sz=0`) from the registry + /// \details introduced for symmetry with insert_bt() ) + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this and the + /// backtrace of the caller to the log \return `*this` \note equivalent to + /// `this->erase_bt(ptr, 0, context); + /// \note calls to this are serialized unless `this->log_only()==true` + PtrRegistry& erase_bt(void* ptr, const std::string& context = ""); + + private: + std::ostream* log_ = nullptr; + sized_ptr_container_type ptrs_; + mutable ptr_container_type* unsized_ptrs_ = nullptr; // &(ptrs_[0]) + bool append_backtrace_ = false; + std::mutex mtx_; + bool log_only_ = false; + bool thread_local_logging_ = false; + std::string thread_local_log_filename_prefix_; + + std::ostream* thread_local_log(); + + /// inserts \p ptr associated with size \p sz to the registry, + /// \param ptr pointer to register + /// \param sz size of the object pointed to by \p ptr + /// \param context string context to attach to the pointer + /// \param backtrace if true, appends backtrace of the caller to \p context + /// separated by `:::::` + void insert(void* ptr, std::size_t sz, const std::string& context, + bool backtrace); + + /// erases \p ptr associated with size \p sz from the registry + /// \details introduced for symmetry with insert_bt() ) + /// \param ptr pointer to erase + /// \param sz size of the object pointed to by \p ptr + /// \param context erasure context; if logging, will append this to the log + /// \param backtrace if true and logging, appends the backtrace of caller to + /// the log + void erase(void* ptr, std::size_t sz, const std::string& context, + bool backtrace); +}; + +} // namespace TiledArray + +#endif // TILEDARRAY_UTIL_PTR_REGISTRY_H diff --git a/tests/ta_test.cpp b/tests/ta_test.cpp index a7141c96a9..8d81e66849 100644 --- a/tests/ta_test.cpp +++ b/tests/ta_test.cpp @@ -23,6 +23,10 @@ #include "TiledArray/external/madness.h" #include "unit_test_config.h" +#ifdef TA_TENSOR_MEM_TRACE +#include +#endif + #include #if (TA_ASSERT_POLICY != TA_ASSERT_THROW) #error "TiledArray unit tests require TA_ASSERT_POLICY=TA_ASSERT_THROW" @@ -40,6 +44,23 @@ GlobalFixture::GlobalFixture() { boost::unit_test::log_all_errors); } +#ifdef TA_TENSOR_MEM_TRACE + { + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::ptr_registry()->log(&std::cout); + TiledArray::Tensor>::trace_if_larger_than(1); + // TiledArray::Tensor>::ptr_registry()->log(&std::cout); + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::ptr_registry()->log(&std::cout); + TiledArray::Tensor>::trace_if_larger_than(1); + // TiledArray::Tensor>::ptr_registry()->log(&std::cout); + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::ptr_registry()->log(&std::cout); + TiledArray::Tensor::trace_if_larger_than(1); + // TiledArray::Tensor::trace_if_larger_than(1); + } +#endif + // uncomment to create or create+launch debugger // TiledArray::create_debugger("gdb_xterm", "ta_test"); // TiledArray::create_debugger("lldb_xterm", "ta_test"); @@ -53,6 +74,18 @@ GlobalFixture::~GlobalFixture() { TiledArray::finalize(); world = nullptr; } +#ifdef TA_TENSOR_MEM_TRACE + { + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + TA_ASSERT(TiledArray::Tensor>::ptr_registry()->size() == + 0); + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + TA_ASSERT( + TiledArray::Tensor>::ptr_registry()->size() == 0); + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + TA_ASSERT(TiledArray::Tensor::ptr_registry()->size() == 0); + } +#endif } TiledArray::World* GlobalFixture::world = nullptr; diff --git a/tests/tensor.cpp b/tests/tensor.cpp index dabdc6f934..b329b5af44 100644 --- a/tests/tensor.cpp +++ b/tests/tensor.cpp @@ -116,8 +116,7 @@ BOOST_AUTO_TEST_CASE(copy_constructor) { BOOST_CHECK_EQUAL(tc.empty(), t.empty()); - // Check that range data is correct - BOOST_CHECK_EQUAL(tc.data(), t.data()); + BOOST_CHECK_EQUAL(tc.data(), t.data()); // N.B. shallow copy! BOOST_CHECK_EQUAL(tc.size(), t.size()); BOOST_CHECK_EQUAL(tc.range(), t.range()); BOOST_CHECK_EQUAL(tc.begin(), t.begin()); @@ -129,6 +128,28 @@ BOOST_AUTO_TEST_CASE(copy_constructor) { BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); } +BOOST_AUTO_TEST_CASE(move_constructor) { + TensorN tc(t); + TensorN td(std::move(tc)); + + BOOST_CHECK_EQUAL(td.empty(), t.empty()); + + // Check that range data is correct + BOOST_CHECK_EQUAL(td.data(), t.data()); + BOOST_CHECK_EQUAL(td.size(), t.size()); + BOOST_CHECK_EQUAL(td.range(), t.range()); + BOOST_CHECK_EQUAL(td.begin(), t.begin()); + BOOST_CHECK_EQUAL(td.end(), t.end()); + BOOST_CHECK_EQUAL(const_cast(td).begin(), + const_cast(t).begin()); + BOOST_CHECK_EQUAL(const_cast(td).end(), + const_cast(t).end()); + BOOST_CHECK_EQUAL_COLLECTIONS(td.begin(), td.end(), t.begin(), t.end()); + + // check that moved-from object is empty + BOOST_CHECK(tc.empty()); +} + BOOST_AUTO_TEST_CASE(permute_constructor) { Permutation perm = make_perm(); @@ -289,6 +310,46 @@ BOOST_AUTO_TEST_CASE(clone) { BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); } +BOOST_AUTO_TEST_CASE(copy_assignment_operator) { + TensorN tc; + tc = t; + + BOOST_CHECK_EQUAL(tc.empty(), t.empty()); + + BOOST_CHECK_EQUAL(tc.data(), t.data()); // N.B. shallow copy! + BOOST_CHECK_EQUAL(tc.size(), t.size()); + BOOST_CHECK_EQUAL(tc.range(), t.range()); + BOOST_CHECK_EQUAL(tc.begin(), t.begin()); + BOOST_CHECK_EQUAL(tc.end(), t.end()); + BOOST_CHECK_EQUAL(const_cast(tc).begin(), + const_cast(t).begin()); + BOOST_CHECK_EQUAL(const_cast(tc).end(), + const_cast(t).end()); + BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); +} + +BOOST_AUTO_TEST_CASE(move_assignment_operator) { + TensorN td(t); + TensorN tc; + tc = std::move(td); + + BOOST_CHECK_EQUAL(tc.empty(), t.empty()); + + // Check that range data is correct + BOOST_CHECK_EQUAL(tc.data(), t.data()); + BOOST_CHECK_EQUAL(tc.size(), t.size()); + BOOST_CHECK_EQUAL(tc.range(), t.range()); + BOOST_CHECK_EQUAL(tc.begin(), t.begin()); + BOOST_CHECK_EQUAL(tc.end(), t.end()); + BOOST_CHECK_EQUAL(const_cast(tc).begin(), + const_cast(t).begin()); + BOOST_CHECK_EQUAL(const_cast(tc).end(), + const_cast(t).end()); + BOOST_CHECK_EQUAL_COLLECTIONS(tc.begin(), tc.end(), t.begin(), t.end()); + // moved-from object is empty + BOOST_CHECK(td.empty()); +} + BOOST_AUTO_TEST_CASE(range_accessor) { BOOST_CHECK_EQUAL_COLLECTIONS( t.range().lobound_data(), t.range().lobound_data() + t.range().rank(), @@ -532,8 +593,7 @@ BOOST_AUTO_TEST_CASE(gemm) { integer m = 1, n = 1, k = 1; gemm_helper_nt.compute_matrix_sizes(m, n, k, x.range(), y.range()); math::blas::gemm(TiledArray::math::blas::Op::NoTrans, - TiledArray::math::blas::Op::Trans, - m, n, k, alpha, + TiledArray::math::blas::Op::Trans, m, n, k, alpha, x.data(), k, y.data(), k, 0, z0_ref.data(), n); } for (std::size_t i = 0ul; i < z0.size(); ++i)