From a6fc86739de6d04a23a8537470e8a487d5d48428 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 24 Feb 2020 11:40:07 +0100 Subject: [PATCH 01/11] write memory in the new format --- .../api/CDataFrameAnalysisInstrumentation.h | 13 ++++++-- lib/api/CDataFrameAnalysisInstrumentation.cc | 33 ++++++++++++------- lib/api/CDataFrameOutliersRunner.cc | 3 +- lib/api/CDataFrameTrainBoostedTreeRunner.cc | 2 +- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/include/api/CDataFrameAnalysisInstrumentation.h b/include/api/CDataFrameAnalysisInstrumentation.h index 4a0ccdedd4..5acebea9b0 100644 --- a/include/api/CDataFrameAnalysisInstrumentation.h +++ b/include/api/CDataFrameAnalysisInstrumentation.h @@ -30,7 +30,7 @@ class API_EXPORT CDataFrameAnalysisInstrumentation : public maths::CDataFrameAnalysisInstrumentationInterface { public: - CDataFrameAnalysisInstrumentation(); + explicit CDataFrameAnalysisInstrumentation(const std::string& jobId); //! Adds \p delta to the memory usage statistics. void updateMemoryUsage(std::int64_t delta) override; @@ -73,7 +73,7 @@ class API_EXPORT CDataFrameAnalysisInstrumentation private: void writeProgress(std::uint32_t step); - void writeMemory(std::uint32_t step); + void writeMemory(std::int64_t timestamp); void writeState(std::uint32_t step); private: @@ -81,16 +81,25 @@ class API_EXPORT CDataFrameAnalysisInstrumentation std::atomic_size_t m_FractionalProgress; std::atomic m_Memory; core::CRapidJsonConcurrentLineWriter* m_Writer; + std::string m_JobId; }; class API_EXPORT CDataFrameOutliersInstrumentation final : public CDataFrameAnalysisInstrumentation { +public: + explicit CDataFrameOutliersInstrumentation(const std::string& jobId) + : CDataFrameAnalysisInstrumentation(jobId){}; + protected: counter_t::ECounterTypes memoryCounterType() override; }; class API_EXPORT CDataFrameTrainBoostedTreeInstrumentation final : public CDataFrameAnalysisInstrumentation { +public: + explicit CDataFrameTrainBoostedTreeInstrumentation(const std::string& jobId) + : CDataFrameAnalysisInstrumentation(jobId){}; + protected: counter_t::ECounterTypes memoryCounterType() override; }; diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc index 62502f83e8..d14abf5db8 100644 --- a/lib/api/CDataFrameAnalysisInstrumentation.cc +++ b/lib/api/CDataFrameAnalysisInstrumentation.cc @@ -3,16 +3,21 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ - #include +#include + namespace ml { namespace api { namespace { const std::string STEP_TAG{"step"}; const std::string PROGRESS_TAG{"progress"}; -const std::string PEAK_MEMORY_USAGE_TAG{"peak_memory_usage"}; +const std::string PEAK_MEMORY_USAGE_TAG{"peak_usage_bytes"}; +const std::string TYPE_TAG{"type"}; +const std::string JOB_ID_TAG{"job_id"}; +const std::string TIMESTAMP_TAG{"timestamp"}; +const std::string MEMORY_TYPE{"analytics_memory_usage"}; const std::size_t MAXIMUM_FRACTIONAL_PROGRESS{std::size_t{1} << ((sizeof(std::size_t) - 2) * 8)}; @@ -51,8 +56,8 @@ double CDataFrameAnalysisInstrumentation::progress() const { static_cast(MAXIMUM_FRACTIONAL_PROGRESS); } -CDataFrameAnalysisInstrumentation::CDataFrameAnalysisInstrumentation() - : m_Finished{false}, m_FractionalProgress{0}, m_Memory{0}, m_Writer{nullptr} { +CDataFrameAnalysisInstrumentation::CDataFrameAnalysisInstrumentation(const std::string& jobId) + : m_Finished{false}, m_FractionalProgress{0}, m_Memory{0}, m_Writer{nullptr}, m_JobId{jobId} { } void CDataFrameAnalysisInstrumentation::resetProgress() { @@ -64,14 +69,14 @@ void CDataFrameAnalysisInstrumentation::writer(core::CRapidJsonConcurrentLineWri m_Writer = writer; } -void CDataFrameAnalysisInstrumentation::nextStep(std::uint32_t /*step*/) { - // TODO reactivate state writing, once the Java backend can accept it - // this->writeState(step); +void CDataFrameAnalysisInstrumentation::nextStep(std::uint32_t step) { + this->writeState(step); } void CDataFrameAnalysisInstrumentation::writeState(std::uint32_t step) { - this->writeProgress(step); - this->writeMemory(step); + // this->writeProgress(step); + int64_t timestamp = core::CTimeUtils::toEpochMs(core::CTimeUtils::now()); + this->writeMemory(timestamp); } std::int64_t CDataFrameAnalysisInstrumentation::memory() const { @@ -89,11 +94,15 @@ void CDataFrameAnalysisInstrumentation::writeProgress(std::uint32_t step) { } } -void CDataFrameAnalysisInstrumentation::writeMemory(std::uint32_t step) { +void CDataFrameAnalysisInstrumentation::writeMemory(std::int64_t timestamp) { if (m_Writer != nullptr) { m_Writer->StartObject(); - m_Writer->Key(STEP_TAG); - m_Writer->Uint(step); + m_Writer->Key(TYPE_TAG); + m_Writer->String(MEMORY_TYPE); + m_Writer->Key(JOB_ID_TAG); + m_Writer->String(m_JobId); + m_Writer->Key(TIMESTAMP_TAG); + m_Writer->Int64(timestamp); m_Writer->Key(PEAK_MEMORY_USAGE_TAG); m_Writer->Int64(m_Memory.load()); m_Writer->EndObject(); diff --git a/lib/api/CDataFrameOutliersRunner.cc b/lib/api/CDataFrameOutliersRunner.cc index 406d23b7a8..7e5d698292 100644 --- a/lib/api/CDataFrameOutliersRunner.cc +++ b/lib/api/CDataFrameOutliersRunner.cc @@ -73,7 +73,8 @@ CDataFrameOutliersRunner::CDataFrameOutliersRunner(const CDataFrameAnalysisSpeci CDataFrameOutliersRunner::CDataFrameOutliersRunner(const CDataFrameAnalysisSpecification& spec) : CDataFrameAnalysisRunner{spec}, m_Method{static_cast( - maths::COutliers::E_Ensemble)} { + maths::COutliers::E_Ensemble)}, + m_Instrumentation{spec.jobId()} { } std::size_t CDataFrameOutliersRunner::numberExtraColumns() const { diff --git a/lib/api/CDataFrameTrainBoostedTreeRunner.cc b/lib/api/CDataFrameTrainBoostedTreeRunner.cc index d4ad4fd8fc..ef7d4fc59c 100644 --- a/lib/api/CDataFrameTrainBoostedTreeRunner.cc +++ b/lib/api/CDataFrameTrainBoostedTreeRunner.cc @@ -68,7 +68,7 @@ CDataFrameTrainBoostedTreeRunner::CDataFrameTrainBoostedTreeRunner( const CDataFrameAnalysisSpecification& spec, const CDataFrameAnalysisParameters& parameters, TLossFunctionUPtr loss) - : CDataFrameAnalysisRunner{spec} { + : CDataFrameAnalysisRunner{spec}, m_Instrumentation{spec.jobId()} { m_DependentVariableFieldName = parameters[DEPENDENT_VARIABLE_NAME].as(); From 2aceced4dc950d193aebd97ea131784fa102345c Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 24 Feb 2020 12:24:57 +0100 Subject: [PATCH 02/11] add unit test --- .../CDataFrameAnalysisInstrumentationTest.cc | 46 +++++++++++++++++++ .../CDataFrameAnalyzerTrainingTest.cc | 2 +- .../unittest/CDataFrameMockAnalysisRunner.cc | 2 +- .../unittest/CDataFrameMockAnalysisRunner.h | 8 +++- 4 files changed, 54 insertions(+), 4 deletions(-) create mode 100644 lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc diff --git a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc new file mode 100644 index 0000000000..96b129d5c4 --- /dev/null +++ b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#include + +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(CDataFrameAnalysisInstrumentationTest) + +using namespace ml; + +BOOST_AUTO_TEST_CASE(testMemoryState) { + std::string jobId("JOB123"); + std::int64_t memoryUsage = 1000; + std::int64_t timestamp = core::CTimeUtils::toEpochMs(core::CTimeUtils::now()); + std::stringstream s_Output; + { + core::CJsonOutputStreamWrapper streamWrapper(s_Output); + core::CRapidJsonConcurrentLineWriter writer(streamWrapper); + api::CDataFrameTrainBoostedTreeInstrumentation instrumentation(jobId); + instrumentation.updateMemoryUsage(memoryUsage); + instrumentation.writer(&writer); + instrumentation.nextStep(0); + s_Output.flush(); + } + + rapidjson::Document results; + rapidjson::ParseResult ok(results.Parse(s_Output.str())); + BOOST_TEST_REQUIRE(static_cast(ok) == true); + BOOST_TEST_REQUIRE(results.IsArray() == true); + + const auto& result{results[0]}; + BOOST_TEST_REQUIRE(result["job_id"].GetString() == jobId); + BOOST_TEST_REQUIRE(result["type"].GetString() == "analytics_memory_usage"); + BOOST_TEST_REQUIRE(result["peak_usage_bytes"].GetInt64() == memoryUsage); + BOOST_REQUIRE_SMALL(result["timestamp"].GetInt64() - timestamp, 10l); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc index b80144af63..c2752a53a7 100644 --- a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc +++ b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc @@ -265,7 +265,7 @@ void addPredictionTestData(EPredictionType type, treeFactory.featureBagFraction(featureBagFraction); } - ml::api::CDataFrameTrainBoostedTreeInstrumentation instrumentation; + ml::api::CDataFrameTrainBoostedTreeInstrumentation instrumentation("testJob"); treeFactory.analysisInstrumentation(instrumentation); auto tree = treeFactory.buildFor(*frame, weights.size()); diff --git a/lib/api/unittest/CDataFrameMockAnalysisRunner.cc b/lib/api/unittest/CDataFrameMockAnalysisRunner.cc index b573e8299f..13bca6ec82 100644 --- a/lib/api/unittest/CDataFrameMockAnalysisRunner.cc +++ b/lib/api/unittest/CDataFrameMockAnalysisRunner.cc @@ -10,7 +10,7 @@ #include CDataFrameMockAnalysisRunner::CDataFrameMockAnalysisRunner(const ml::api::CDataFrameAnalysisSpecification& spec) - : ml::api::CDataFrameAnalysisRunner{spec} { + : ml::api::CDataFrameAnalysisRunner{spec}, m_Instrumentation{spec.jobId()} { } std::size_t CDataFrameMockAnalysisRunner::numberExtraColumns() const { diff --git a/lib/api/unittest/CDataFrameMockAnalysisRunner.h b/lib/api/unittest/CDataFrameMockAnalysisRunner.h index f74d24985a..cfa648abfd 100644 --- a/lib/api/unittest/CDataFrameMockAnalysisRunner.h +++ b/lib/api/unittest/CDataFrameMockAnalysisRunner.h @@ -16,8 +16,12 @@ #include class CDataFrameMockAnalysisState final : public ml::api::CDataFrameAnalysisInstrumentation { -protected: - ml::counter_t::ECounterTypes memoryCounterType() override; + public: + CDataFrameMockAnalysisState(const std::string& jobId) + : ml::api::CDataFrameAnalysisInstrumentation(jobId) {} + + protected: + ml::counter_t::ECounterTypes memoryCounterType() override; }; class CDataFrameMockAnalysisRunner final : public ml::api::CDataFrameAnalysisRunner { From ac75f2b1e30da53980ce6054ec040a380fa95b4b Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 24 Feb 2020 12:47:51 +0100 Subject: [PATCH 03/11] formatting --- .../CDataFrameAnalysisInstrumentationTest.cc | 14 +++++++------- lib/api/unittest/CDataFrameMockAnalysisRunner.h | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc index 96b129d5c4..fbb75842ac 100644 --- a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc @@ -22,13 +22,13 @@ BOOST_AUTO_TEST_CASE(testMemoryState) { std::int64_t timestamp = core::CTimeUtils::toEpochMs(core::CTimeUtils::now()); std::stringstream s_Output; { - core::CJsonOutputStreamWrapper streamWrapper(s_Output); - core::CRapidJsonConcurrentLineWriter writer(streamWrapper); - api::CDataFrameTrainBoostedTreeInstrumentation instrumentation(jobId); - instrumentation.updateMemoryUsage(memoryUsage); - instrumentation.writer(&writer); - instrumentation.nextStep(0); - s_Output.flush(); + core::CJsonOutputStreamWrapper streamWrapper(s_Output); + core::CRapidJsonConcurrentLineWriter writer(streamWrapper); + api::CDataFrameTrainBoostedTreeInstrumentation instrumentation(jobId); + instrumentation.updateMemoryUsage(memoryUsage); + instrumentation.writer(&writer); + instrumentation.nextStep(0); + s_Output.flush(); } rapidjson::Document results; diff --git a/lib/api/unittest/CDataFrameMockAnalysisRunner.h b/lib/api/unittest/CDataFrameMockAnalysisRunner.h index cfa648abfd..b35f9e4a5f 100644 --- a/lib/api/unittest/CDataFrameMockAnalysisRunner.h +++ b/lib/api/unittest/CDataFrameMockAnalysisRunner.h @@ -16,12 +16,12 @@ #include class CDataFrameMockAnalysisState final : public ml::api::CDataFrameAnalysisInstrumentation { - public: - CDataFrameMockAnalysisState(const std::string& jobId) - : ml::api::CDataFrameAnalysisInstrumentation(jobId) {} +public: + CDataFrameMockAnalysisState(const std::string& jobId) + : ml::api::CDataFrameAnalysisInstrumentation(jobId) {} - protected: - ml::counter_t::ECounterTypes memoryCounterType() override; +protected: + ml::counter_t::ECounterTypes memoryCounterType() override; }; class CDataFrameMockAnalysisRunner final : public ml::api::CDataFrameAnalysisRunner { From 16c7147c1fad3e50b69602702d66d0bf21bcb394 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 24 Feb 2020 13:50:22 +0100 Subject: [PATCH 04/11] reviewers comments --- lib/api/CDataFrameAnalysisInstrumentation.cc | 2 +- .../unittest/CDataFrameAnalysisInstrumentationTest.cc | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc index d14abf5db8..1c36fcd8d1 100644 --- a/lib/api/CDataFrameAnalysisInstrumentation.cc +++ b/lib/api/CDataFrameAnalysisInstrumentation.cc @@ -75,7 +75,7 @@ void CDataFrameAnalysisInstrumentation::nextStep(std::uint32_t step) { void CDataFrameAnalysisInstrumentation::writeState(std::uint32_t step) { // this->writeProgress(step); - int64_t timestamp = core::CTimeUtils::toEpochMs(core::CTimeUtils::now()); + std::int64_t timestamp{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; this->writeMemory(timestamp); } diff --git a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc index fbb75842ac..842f2d2aa9 100644 --- a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc @@ -17,9 +17,9 @@ BOOST_AUTO_TEST_SUITE(CDataFrameAnalysisInstrumentationTest) using namespace ml; BOOST_AUTO_TEST_CASE(testMemoryState) { - std::string jobId("JOB123"); - std::int64_t memoryUsage = 1000; - std::int64_t timestamp = core::CTimeUtils::toEpochMs(core::CTimeUtils::now()); + std::string jobId{"JOB123"}; + std::int64_t memoryUsage{1000}; + std::int64_t timeBefore{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; std::stringstream s_Output; { core::CJsonOutputStreamWrapper streamWrapper(s_Output); @@ -30,6 +30,7 @@ BOOST_AUTO_TEST_CASE(testMemoryState) { instrumentation.nextStep(0); s_Output.flush(); } + std::int64_t timeAfter{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; rapidjson::Document results; rapidjson::ParseResult ok(results.Parse(s_Output.str())); @@ -40,7 +41,8 @@ BOOST_AUTO_TEST_CASE(testMemoryState) { BOOST_TEST_REQUIRE(result["job_id"].GetString() == jobId); BOOST_TEST_REQUIRE(result["type"].GetString() == "analytics_memory_usage"); BOOST_TEST_REQUIRE(result["peak_usage_bytes"].GetInt64() == memoryUsage); - BOOST_REQUIRE_SMALL(result["timestamp"].GetInt64() - timestamp, 10l); + BOOST_TEST_REQUIRE(result["timestamp"].GetInt64() >= timeBefore); + BOOST_TEST_REQUIRE(result["timestamp"].GetInt64() <= timeAfter); } BOOST_AUTO_TEST_SUITE_END() From d429b69e36e39442b0dcbdd4e6728640854e5dcb Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 24 Feb 2020 13:51:25 +0100 Subject: [PATCH 05/11] variable renaming --- lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc index 842f2d2aa9..5e030c1d20 100644 --- a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc @@ -20,20 +20,20 @@ BOOST_AUTO_TEST_CASE(testMemoryState) { std::string jobId{"JOB123"}; std::int64_t memoryUsage{1000}; std::int64_t timeBefore{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; - std::stringstream s_Output; + std::stringstream outpustStream; { - core::CJsonOutputStreamWrapper streamWrapper(s_Output); + core::CJsonOutputStreamWrapper streamWrapper(outpustStream); core::CRapidJsonConcurrentLineWriter writer(streamWrapper); api::CDataFrameTrainBoostedTreeInstrumentation instrumentation(jobId); instrumentation.updateMemoryUsage(memoryUsage); instrumentation.writer(&writer); instrumentation.nextStep(0); - s_Output.flush(); + outpustStream.flush(); } std::int64_t timeAfter{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; rapidjson::Document results; - rapidjson::ParseResult ok(results.Parse(s_Output.str())); + rapidjson::ParseResult ok(results.Parse(outpustStream.str())); BOOST_TEST_REQUIRE(static_cast(ok) == true); BOOST_TEST_REQUIRE(results.IsArray() == true); From 5f62945472e1ecdc64dfd4a053f48edef13dad3a Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 24 Feb 2020 13:54:06 +0100 Subject: [PATCH 06/11] add forgottent unittest Makefile --- lib/api/unittest/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/api/unittest/Makefile b/lib/api/unittest/Makefile index 18e64a2cd4..34bcc8ba4e 100644 --- a/lib/api/unittest/Makefile +++ b/lib/api/unittest/Makefile @@ -26,6 +26,7 @@ SRCS=\ CConfigUpdaterTest.cc \ CCsvInputParserTest.cc \ CCsvOutputWriterTest.cc \ + CDataFrameAnalysisInstrumentationTest.cc \ CDataFrameAnalysisRunnerTest.cc \ CDataFrameAnalysisSpecificationTest.cc \ CDataFrameAnalyzerFeatureImportanceTest.cc \ From 4b404b335c2d9b78bd1f85fa5816cb3b45fbb22c Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 25 Feb 2020 13:49:33 +0100 Subject: [PATCH 07/11] change memory output format --- lib/api/CDataFrameAnalysisInstrumentation.cc | 5 +++-- .../CDataFrameAnalysisInstrumentationTest.cc | 20 +++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc index 1c36fcd8d1..6ba32736b2 100644 --- a/lib/api/CDataFrameAnalysisInstrumentation.cc +++ b/lib/api/CDataFrameAnalysisInstrumentation.cc @@ -97,8 +97,8 @@ void CDataFrameAnalysisInstrumentation::writeProgress(std::uint32_t step) { void CDataFrameAnalysisInstrumentation::writeMemory(std::int64_t timestamp) { if (m_Writer != nullptr) { m_Writer->StartObject(); - m_Writer->Key(TYPE_TAG); - m_Writer->String(MEMORY_TYPE); + m_Writer->Key(MEMORY_TYPE); + m_Writer->StartObject(); m_Writer->Key(JOB_ID_TAG); m_Writer->String(m_JobId); m_Writer->Key(TIMESTAMP_TAG); @@ -106,6 +106,7 @@ void CDataFrameAnalysisInstrumentation::writeMemory(std::int64_t timestamp) { m_Writer->Key(PEAK_MEMORY_USAGE_TAG); m_Writer->Int64(m_Memory.load()); m_Writer->EndObject(); + m_Writer->EndObject(); } } diff --git a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc index 5e030c1d20..d0b47bdc69 100644 --- a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc @@ -20,29 +20,29 @@ BOOST_AUTO_TEST_CASE(testMemoryState) { std::string jobId{"JOB123"}; std::int64_t memoryUsage{1000}; std::int64_t timeBefore{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; - std::stringstream outpustStream; + std::stringstream outputStream; { - core::CJsonOutputStreamWrapper streamWrapper(outpustStream); + core::CJsonOutputStreamWrapper streamWrapper(outputStream); core::CRapidJsonConcurrentLineWriter writer(streamWrapper); api::CDataFrameTrainBoostedTreeInstrumentation instrumentation(jobId); instrumentation.updateMemoryUsage(memoryUsage); instrumentation.writer(&writer); instrumentation.nextStep(0); - outpustStream.flush(); + outputStream.flush(); } std::int64_t timeAfter{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; - rapidjson::Document results; - rapidjson::ParseResult ok(results.Parse(outpustStream.str())); + rapidjson::ParseResult ok(results.Parse(outputStream.str())); BOOST_TEST_REQUIRE(static_cast(ok) == true); BOOST_TEST_REQUIRE(results.IsArray() == true); const auto& result{results[0]}; - BOOST_TEST_REQUIRE(result["job_id"].GetString() == jobId); - BOOST_TEST_REQUIRE(result["type"].GetString() == "analytics_memory_usage"); - BOOST_TEST_REQUIRE(result["peak_usage_bytes"].GetInt64() == memoryUsage); - BOOST_TEST_REQUIRE(result["timestamp"].GetInt64() >= timeBefore); - BOOST_TEST_REQUIRE(result["timestamp"].GetInt64() <= timeAfter); + BOOST_TEST_REQUIRE(result["analytics_memory_usage"].IsObject() == true); + BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["job_id"].GetString() == jobId); + BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["peak_usage_bytes"].GetInt64() == + memoryUsage); + BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["timestamp"].GetInt64() >= timeBefore); + BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["timestamp"].GetInt64() <= timeAfter); } BOOST_AUTO_TEST_SUITE_END() From 53632f747eba225bbb35b8deed34c15f5f21bbf2 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Fri, 28 Feb 2020 15:12:41 +0100 Subject: [PATCH 08/11] Add enhancement note --- docs/CHANGELOG.asciidoc | 2 ++ lib/api/CDataFrameAnalysisInstrumentation.cc | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 05c035cb0b..5d745f3c14 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -56,6 +56,8 @@ necessary. This will improve the allocation of data frame analyses to cluster no (See {ml-pull}1003[#1003].) * Upgrade the compiler used on Linux from gcc 7.3 to gcc 7.5, and the binutils used in the build from version 2.20 to 2.34. (See {ml-pull}1013[#1013].) +* Add instrumentation of the peak memory consumption for data frame analytics jobs. +(See {ml-pull}1022[#1022].) === Bug Fixes diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc index 6ba32736b2..239eadd9db 100644 --- a/lib/api/CDataFrameAnalysisInstrumentation.cc +++ b/lib/api/CDataFrameAnalysisInstrumentation.cc @@ -70,7 +70,8 @@ void CDataFrameAnalysisInstrumentation::writer(core::CRapidJsonConcurrentLineWri } void CDataFrameAnalysisInstrumentation::nextStep(std::uint32_t step) { - this->writeState(step); + // TODO uncomment, once Java code becomes available + // this->writeState(step); } void CDataFrameAnalysisInstrumentation::writeState(std::uint32_t step) { From 42afc23ce303cf220baaddfae95a9d89d3cf716f Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 5 Mar 2020 10:23:51 +0000 Subject: [PATCH 09/11] Fix tests and warnings --- .../api/CDataFrameAnalysisInstrumentation.h | 10 ++++++---- lib/api/CDataFrameAnalysisInstrumentation.cc | 5 ++--- .../CDataFrameAnalysisInstrumentationTest.cc | 20 +++++++++++-------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/include/api/CDataFrameAnalysisInstrumentation.h b/include/api/CDataFrameAnalysisInstrumentation.h index 5acebea9b0..637ebc1730 100644 --- a/include/api/CDataFrameAnalysisInstrumentation.h +++ b/include/api/CDataFrameAnalysisInstrumentation.h @@ -84,23 +84,25 @@ class API_EXPORT CDataFrameAnalysisInstrumentation std::string m_JobId; }; +//! \brief Outlier instrumentation. class API_EXPORT CDataFrameOutliersInstrumentation final : public CDataFrameAnalysisInstrumentation { public: explicit CDataFrameOutliersInstrumentation(const std::string& jobId) - : CDataFrameAnalysisInstrumentation(jobId){}; + : CDataFrameAnalysisInstrumentation(jobId) {} -protected: +private: counter_t::ECounterTypes memoryCounterType() override; }; +//! \brief Predictive model training instrumentation. class API_EXPORT CDataFrameTrainBoostedTreeInstrumentation final : public CDataFrameAnalysisInstrumentation { public: explicit CDataFrameTrainBoostedTreeInstrumentation(const std::string& jobId) - : CDataFrameAnalysisInstrumentation(jobId){}; + : CDataFrameAnalysisInstrumentation(jobId) {} -protected: +private: counter_t::ECounterTypes memoryCounterType() override; }; } diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc index 239eadd9db..394f9e031b 100644 --- a/lib/api/CDataFrameAnalysisInstrumentation.cc +++ b/lib/api/CDataFrameAnalysisInstrumentation.cc @@ -70,12 +70,11 @@ void CDataFrameAnalysisInstrumentation::writer(core::CRapidJsonConcurrentLineWri } void CDataFrameAnalysisInstrumentation::nextStep(std::uint32_t step) { - // TODO uncomment, once Java code becomes available - // this->writeState(step); + this->writeState(step); } void CDataFrameAnalysisInstrumentation::writeState(std::uint32_t step) { - // this->writeProgress(step); + this->writeProgress(step); std::int64_t timestamp{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; this->writeMemory(timestamp); } diff --git a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc index d0b47bdc69..f6e4988794 100644 --- a/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisInstrumentationTest.cc @@ -31,18 +31,22 @@ BOOST_AUTO_TEST_CASE(testMemoryState) { outputStream.flush(); } std::int64_t timeAfter{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; + rapidjson::Document results; rapidjson::ParseResult ok(results.Parse(outputStream.str())); + BOOST_TEST_REQUIRE(static_cast(ok) == true); BOOST_TEST_REQUIRE(results.IsArray() == true); - - const auto& result{results[0]}; - BOOST_TEST_REQUIRE(result["analytics_memory_usage"].IsObject() == true); - BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["job_id"].GetString() == jobId); - BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["peak_usage_bytes"].GetInt64() == - memoryUsage); - BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["timestamp"].GetInt64() >= timeBefore); - BOOST_TEST_REQUIRE(result["analytics_memory_usage"]["timestamp"].GetInt64() <= timeAfter); + for (auto i = results.Begin(); i != results.End(); ++i) { + if (i->HasMember("analytics_memory_usage")) { + BOOST_TEST_REQUIRE((*i)["analytics_memory_usage"].IsObject() == true); + BOOST_TEST_REQUIRE((*i)["analytics_memory_usage"]["job_id"].GetString() == jobId); + BOOST_TEST_REQUIRE( + (*i)["analytics_memory_usage"]["peak_usage_bytes"].GetInt64() == memoryUsage); + BOOST_TEST_REQUIRE((*i)["analytics_memory_usage"]["timestamp"].GetInt64() >= timeBefore); + BOOST_TEST_REQUIRE((*i)["analytics_memory_usage"]["timestamp"].GetInt64() <= timeAfter); + } + } } BOOST_AUTO_TEST_SUITE_END() From 70d6a25ae8057c29ec1ffb9089633cf80bb51f2a Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 5 Mar 2020 10:48:51 +0000 Subject: [PATCH 10/11] Progress reporting isn't wanted from here yet --- lib/api/CDataFrameAnalysisInstrumentation.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc index 394f9e031b..d6b22f83a7 100644 --- a/lib/api/CDataFrameAnalysisInstrumentation.cc +++ b/lib/api/CDataFrameAnalysisInstrumentation.cc @@ -74,7 +74,7 @@ void CDataFrameAnalysisInstrumentation::nextStep(std::uint32_t step) { } void CDataFrameAnalysisInstrumentation::writeState(std::uint32_t step) { - this->writeProgress(step); + //this->writeProgress(step); std::int64_t timestamp{core::CTimeUtils::toEpochMs(core::CTimeUtils::now())}; this->writeMemory(timestamp); } From 94ac1ef47d1cf86d70901c3e38ac1299bc679351 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 5 Mar 2020 11:14:16 +0000 Subject: [PATCH 11/11] Keep class parameters at start --- include/api/CDataFrameAnalysisInstrumentation.h | 2 +- lib/api/CDataFrameAnalysisInstrumentation.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/api/CDataFrameAnalysisInstrumentation.h b/include/api/CDataFrameAnalysisInstrumentation.h index 637ebc1730..48f45731b4 100644 --- a/include/api/CDataFrameAnalysisInstrumentation.h +++ b/include/api/CDataFrameAnalysisInstrumentation.h @@ -77,11 +77,11 @@ class API_EXPORT CDataFrameAnalysisInstrumentation void writeState(std::uint32_t step); private: + std::string m_JobId; std::atomic_bool m_Finished; std::atomic_size_t m_FractionalProgress; std::atomic m_Memory; core::CRapidJsonConcurrentLineWriter* m_Writer; - std::string m_JobId; }; //! \brief Outlier instrumentation. diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc index d6b22f83a7..e58a667352 100644 --- a/lib/api/CDataFrameAnalysisInstrumentation.cc +++ b/lib/api/CDataFrameAnalysisInstrumentation.cc @@ -57,7 +57,7 @@ double CDataFrameAnalysisInstrumentation::progress() const { } CDataFrameAnalysisInstrumentation::CDataFrameAnalysisInstrumentation(const std::string& jobId) - : m_Finished{false}, m_FractionalProgress{0}, m_Memory{0}, m_Writer{nullptr}, m_JobId{jobId} { + : m_JobId{jobId}, m_Finished{false}, m_FractionalProgress{0}, m_Memory{0}, m_Writer{nullptr} { } void CDataFrameAnalysisInstrumentation::resetProgress() {