From dbe8b9b3d30bd41b65df20c86e5b3bb413811d49 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 28 May 2024 19:21:06 +1200 Subject: [PATCH 1/9] [ML] Experiment with different allocator types --- bin/autodetect/Main.cc | 3 +++ include/api/CJsonOutputWriter.h | 4 +-- include/core/CBoostJsonPoolAllocator.h | 37 ++++++++++++++++++++++---- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index a39e25f600..0286207967 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -60,6 +60,9 @@ int main(int argc, char** argv) { + char c; + std::cin >> c; + // Register the set of counters in which this program is interested const ml::counter_t::TCounterTypeSet counters{ ml::counter_t::E_TSADNumberNewPeopleNotAllowed, diff --git a/include/api/CJsonOutputWriter.h b/include/api/CJsonOutputWriter.h index d5bae88698..ffeb149733 100644 --- a/include/api/CJsonOutputWriter.h +++ b/include/api/CJsonOutputWriter.h @@ -333,13 +333,13 @@ class API_EXPORT CJsonOutputWriter { //! The documents in this vector will reference memory owned by //! m_JsonPoolAllocator. (Hence this is declared after the memory pool //! so that it's destroyed first when the destructor runs.) - TDocumentWeakPtrVec m_NestedDocs; + TDocumentWeakPtrVec m_NestedDocs; // XXX //! Bucket data waiting to be written. The map is keyed on bucket time. //! The documents in this map will reference memory owned by //! m_JsonPoolAllocator. (Hence this is declared after the memory pool //! so that it's destroyed first when the destructor runs.) - TTimeBucketDataMap m_BucketDataByTime; + TTimeBucketDataMap m_BucketDataByTime; // XXX }; } } diff --git a/include/core/CBoostJsonPoolAllocator.h b/include/core/CBoostJsonPoolAllocator.h index 53788e65e3..b8a425048f 100644 --- a/include/core/CBoostJsonPoolAllocator.h +++ b/include/core/CBoostJsonPoolAllocator.h @@ -20,6 +20,36 @@ namespace json = boost::json; namespace ml { namespace core { + +namespace { +//! Size of the fixed buffer to allocate for parsing JSON +static const size_t FIXED_BUFFER_SIZE = 2*1024*1024; + +class custom_resource : public boost::container::pmr::memory_resource +{ +private: + void* do_allocate( std::size_t bytes, std::size_t /*align*/ ) override + { + return ::operator new( bytes ); + } + + void do_deallocate( void* ptr, std::size_t /*bytes*/, std::size_t /*align*/ ) override + { + return ::operator delete( ptr ); + } + + bool do_is_equal( memory_resource const& other ) const noexcept override + { + // since the global allocation and deallocation functions are used, + // any instance of a custom_resource can deallocate memory allocated + // by another instance of a logging_resource + return dynamic_cast< custom_resource const* >( &other ) != nullptr; + } + +public: + custom_resource(unsigned char [FIXED_BUFFER_SIZE]) {} +}; +} //! \brief //! A boost::json memory allocator using a fixed size buffer //! @@ -56,16 +86,13 @@ class CBoostJsonPoolAllocator { //! \return reference to the underlying storage pointer json::storage_ptr& get() { return m_JsonStoragePointer; } - -private: - //! Size of the fixed buffer to allocate for parsing JSON - static const size_t FIXED_BUFFER_SIZE = 4096; - private: //! fixed size memory buffer used to optimize allocator performance unsigned char m_FixedBuffer[FIXED_BUFFER_SIZE]; //! storage pointer to use for allocating boost::json objects +// json::storage_ptr m_JsonStoragePointer{ +// json::make_shared_resource(m_FixedBuffer)}; json::storage_ptr m_JsonStoragePointer{ json::make_shared_resource(m_FixedBuffer)}; From 4ecf14e19111bf92fb609e589fbd2bb06fb32ea9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 29 May 2024 14:42:08 +1200 Subject: [PATCH 2/9] Tidy up --- bin/autodetect/Main.cc | 4 --- include/core/CBoostJsonPoolAllocator.h | 40 ++++++++++---------------- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 0286207967..ed5dd1f134 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -59,10 +59,6 @@ #include int main(int argc, char** argv) { - - char c; - std::cin >> c; - // Register the set of counters in which this program is interested const ml::counter_t::TCounterTypeSet counters{ ml::counter_t::E_TSADNumberNewPeopleNotAllowed, diff --git a/include/core/CBoostJsonPoolAllocator.h b/include/core/CBoostJsonPoolAllocator.h index b8a425048f..099e5eff13 100644 --- a/include/core/CBoostJsonPoolAllocator.h +++ b/include/core/CBoostJsonPoolAllocator.h @@ -22,42 +22,32 @@ namespace ml { namespace core { namespace { -//! Size of the fixed buffer to allocate for parsing JSON -static const size_t FIXED_BUFFER_SIZE = 2*1024*1024; -class custom_resource : public boost::container::pmr::memory_resource -{ +class custom_resource : public boost::container::pmr::memory_resource { private: - void* do_allocate( std::size_t bytes, std::size_t /*align*/ ) override - { - return ::operator new( bytes ); + void* do_allocate(std::size_t bytes, std::size_t /*align*/) override { + return ::operator new(bytes); } - void do_deallocate( void* ptr, std::size_t /*bytes*/, std::size_t /*align*/ ) override - { - return ::operator delete( ptr ); + void do_deallocate(void* ptr, std::size_t /*bytes*/, std::size_t /*align*/) override { + return ::operator delete(ptr); } - bool do_is_equal( memory_resource const& other ) const noexcept override - { - // since the global allocation and deallocation functions are used, + bool do_is_equal(memory_resource const& other) const noexcept override { + // since the global allocation and de-allocation functions are used, // any instance of a custom_resource can deallocate memory allocated // by another instance of a logging_resource - return dynamic_cast< custom_resource const* >( &other ) != nullptr; + return dynamic_cast(&other) != nullptr; } - -public: - custom_resource(unsigned char [FIXED_BUFFER_SIZE]) {} }; } //! \brief -//! A boost::json memory allocator using a fixed size buffer +//! A custom boost::json memory allocator //! //! DESCRIPTION:\n -//! Encapsulates a boost::json monotonic_resource optimized with a fixed size buffer, see https://www.boost.org/doc/libs/1_83_0/libs/json/doc/html/json/allocators/storage_ptr.html +//! Encapsulates a custom boost::json memory_resource, see https://www.boost.org/doc/libs/1_83_0/libs/json/doc/html/json/allocators/storage_ptr.html //! //! IMPLEMENTATION DECISIONS:\n -//! Use a fixed size buffer for the allocator for performance reasons //! //! Retain documents created to ensure that the associated memory allocator exists for the documents //! lifetime @@ -87,14 +77,14 @@ class CBoostJsonPoolAllocator { //! \return reference to the underlying storage pointer json::storage_ptr& get() { return m_JsonStoragePointer; } private: - //! fixed size memory buffer used to optimize allocator performance - unsigned char m_FixedBuffer[FIXED_BUFFER_SIZE]; //! storage pointer to use for allocating boost::json objects -// json::storage_ptr m_JsonStoragePointer{ -// json::make_shared_resource(m_FixedBuffer)}; + //! We use a custom resource allocator for more predictable + //! and timely allocation/de-allocations, see + //! https://www.boost.org/doc/libs/1_83_0/libs/json/doc/html/json/allocators/storage_ptr.html#json.allocators.storage_ptr.user_defined_resource + //! for more details. json::storage_ptr m_JsonStoragePointer{ - json::make_shared_resource(m_FixedBuffer)}; + json::make_shared_resource()}; //! Container used to persist boost::json documents TDocumentPtrVec m_JsonDocumentStore; From 707886e461b69a976a927c01aa95c465ba593b2b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 29 May 2024 15:15:03 +1200 Subject: [PATCH 3/9] Formatting --- include/core/CBoostJsonPoolAllocator.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/core/CBoostJsonPoolAllocator.h b/include/core/CBoostJsonPoolAllocator.h index 099e5eff13..ff16d724ae 100644 --- a/include/core/CBoostJsonPoolAllocator.h +++ b/include/core/CBoostJsonPoolAllocator.h @@ -76,15 +76,14 @@ class CBoostJsonPoolAllocator { //! \return reference to the underlying storage pointer json::storage_ptr& get() { return m_JsonStoragePointer; } -private: +private: //! storage pointer to use for allocating boost::json objects //! We use a custom resource allocator for more predictable //! and timely allocation/de-allocations, see //! https://www.boost.org/doc/libs/1_83_0/libs/json/doc/html/json/allocators/storage_ptr.html#json.allocators.storage_ptr.user_defined_resource //! for more details. - json::storage_ptr m_JsonStoragePointer{ - json::make_shared_resource()}; + json::storage_ptr m_JsonStoragePointer{json::make_shared_resource()}; //! Container used to persist boost::json documents TDocumentPtrVec m_JsonDocumentStore; From 2e4b1ac1c5d3e4ac7818d3a3d0007f10001a71e3 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 29 May 2024 15:34:17 +1200 Subject: [PATCH 4/9] Update changelog --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 4b0ff237d3..31a742dfd2 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Bug Fixes * Remove ineffective optimizations for duplicate strings. (See {ml-pull}2652[#2652], issue: {ml-issue}2130[#2130].) +* Use custom Boost.JSON resource allocator. (See {ml-pull}2674[#2674].) == {es} version 8.13.0 From 36fedd531c0f7428d638f3b8e9622f36ca623590 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 29 May 2024 15:38:36 +1200 Subject: [PATCH 5/9] Further tidy up --- bin/autodetect/Main.cc | 1 + include/api/CJsonOutputWriter.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index ed5dd1f134..a39e25f600 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -59,6 +59,7 @@ #include int main(int argc, char** argv) { + // Register the set of counters in which this program is interested const ml::counter_t::TCounterTypeSet counters{ ml::counter_t::E_TSADNumberNewPeopleNotAllowed, diff --git a/include/api/CJsonOutputWriter.h b/include/api/CJsonOutputWriter.h index ffeb149733..d5bae88698 100644 --- a/include/api/CJsonOutputWriter.h +++ b/include/api/CJsonOutputWriter.h @@ -333,13 +333,13 @@ class API_EXPORT CJsonOutputWriter { //! The documents in this vector will reference memory owned by //! m_JsonPoolAllocator. (Hence this is declared after the memory pool //! so that it's destroyed first when the destructor runs.) - TDocumentWeakPtrVec m_NestedDocs; // XXX + TDocumentWeakPtrVec m_NestedDocs; //! Bucket data waiting to be written. The map is keyed on bucket time. //! The documents in this map will reference memory owned by //! m_JsonPoolAllocator. (Hence this is declared after the memory pool //! so that it's destroyed first when the destructor runs.) - TTimeBucketDataMap m_BucketDataByTime; // XXX + TTimeBucketDataMap m_BucketDataByTime; }; } } From eaee6ef59b0c209b1f0d6386fca7cc304bbaed91 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:09:19 +0200 Subject: [PATCH 6/9] add allocation release --- include/api/CJsonOutputWriter.h | 3 +++ include/core/CBoostJsonWriterBase.h | 11 ++++++++--- include/core/CScopedBoostJsonPoolAllocator.h | 9 +++++++-- lib/api/CJsonOutputWriter.cc | 4 ++++ 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/include/api/CJsonOutputWriter.h b/include/api/CJsonOutputWriter.h index d5bae88698..e89e259c7c 100644 --- a/include/api/CJsonOutputWriter.h +++ b/include/api/CJsonOutputWriter.h @@ -273,6 +273,9 @@ class API_EXPORT CJsonOutputWriter { //! \p allocatorName A unique identifier for the allocator void pushAllocator(const std::string& allocatorName); + //! release the allocator + void releaseAllocator(const std::string& allocatorName); + //! revert to using the previous allocator for JSON output processing void popAllocator(); diff --git a/include/core/CBoostJsonWriterBase.h b/include/core/CBoostJsonWriterBase.h index 1cd9a76c0e..49becdbd4b 100644 --- a/include/core/CBoostJsonWriterBase.h +++ b/include/core/CBoostJsonWriterBase.h @@ -24,10 +24,7 @@ #include #include -#include -#include #include -#include #include namespace json = boost::json; @@ -134,6 +131,14 @@ class CBoostJsonWriterBase { return this->getAllocator()->get(); } + void releaseAllocator(const std::string& allocatorName) { + if (m_AllocatorCache.find(allocatorName) != m_AllocatorCache.end()) { + TPoolAllocatorPtr allocator = m_AllocatorCache[allocatorName]; + allocator.reset(); + m_AllocatorCache.erase(allocatorName); + } + } + bool isComplete() const { bool ret = m_Levels.empty() || m_Levels.top() == 0; return ret; diff --git a/include/core/CScopedBoostJsonPoolAllocator.h b/include/core/CScopedBoostJsonPoolAllocator.h index 6b29f0e4f9..78ab678b47 100644 --- a/include/core/CScopedBoostJsonPoolAllocator.h +++ b/include/core/CScopedBoostJsonPoolAllocator.h @@ -12,6 +12,7 @@ #define INCLUDED_ml_core_CScopedBoostJsonPoolAllocator_h #include +#include namespace ml { namespace core { @@ -31,14 +32,18 @@ class CScopedBoostJsonPoolAllocator { //! \p allocatorName Unique identifier for the allocator //! \p jsonOutputWriter JSON output writer that will make use of the allocator CScopedBoostJsonPoolAllocator(const std::string& allocatorName, T& writer) - : m_Writer(writer) { + : m_Writer(writer), m_AllocatorName(allocatorName) { m_Writer.pushAllocator(allocatorName); } - ~CScopedBoostJsonPoolAllocator() { m_Writer.popAllocator(); } + ~CScopedBoostJsonPoolAllocator() { + m_Writer.popAllocator(); + m_Writer.releaseAllocator(m_AllocatorName); + } private: T& m_Writer; + std::string m_AllocatorName; }; } } diff --git a/lib/api/CJsonOutputWriter.cc b/lib/api/CJsonOutputWriter.cc index 2a7a987c23..44e5a5699d 100644 --- a/lib/api/CJsonOutputWriter.cc +++ b/lib/api/CJsonOutputWriter.cc @@ -850,6 +850,10 @@ void CJsonOutputWriter::pushAllocator(const std::string& allocatorName) { m_Writer.pushAllocator(allocatorName); } +void CJsonOutputWriter::releaseAllocator(const std::string& allocatorName) { + m_Writer.releaseAllocator(allocatorName); +} + void CJsonOutputWriter::popAllocator() { m_Writer.popAllocator(); } From 4f51f207db9767d25ba12368dc5227d1ca4e9678 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 4 Jun 2024 09:47:28 +0200 Subject: [PATCH 7/9] formatting --- include/core/CScopedBoostJsonPoolAllocator.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/core/CScopedBoostJsonPoolAllocator.h b/include/core/CScopedBoostJsonPoolAllocator.h index 78ab678b47..5c7dbdd41d 100644 --- a/include/core/CScopedBoostJsonPoolAllocator.h +++ b/include/core/CScopedBoostJsonPoolAllocator.h @@ -36,10 +36,10 @@ class CScopedBoostJsonPoolAllocator { m_Writer.pushAllocator(allocatorName); } - ~CScopedBoostJsonPoolAllocator() { - m_Writer.popAllocator(); + ~CScopedBoostJsonPoolAllocator() { + m_Writer.popAllocator(); m_Writer.releaseAllocator(m_AllocatorName); - } + } private: T& m_Writer; From 784159b5fa10c4d238d834ee5a0fb98a1207ef91 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:10:30 +0200 Subject: [PATCH 8/9] Documentation --- docs/CHANGELOG.asciidoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 31a742dfd2..6e83419c34 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,13 @@ //=== Regressions +== {es} version 8.14.1 + +=== Bug Fixes + +* Improve memory allocation management for JSON processing to reduce memory usage. + (See {ml-pull}2676[#2676].) + == {es} version 8.14.0 === Bug Fixes From ae4ad65e9d7b55b8eb12d467d4631e26afcbcda2 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:11:11 +0200 Subject: [PATCH 9/9] Documentation --- docs/CHANGELOG.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 6e83419c34..a50b860ad9 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -30,7 +30,7 @@ == {es} version 8.14.1 -=== Bug Fixes +=== Enhancements * Improve memory allocation management for JSON processing to reduce memory usage. (See {ml-pull}2676[#2676].)