From 1c26b8db6e59f7148c7cff4a8305d60a27d69cba Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 17 Jan 2023 10:57:35 +0100 Subject: [PATCH 1/4] add multimodal identification --- include/maths/common/CModel.h | 5 ++++- include/maths/common/COneOfNPrior.h | 3 +++ include/maths/time_series/CTimeSeriesModel.h | 3 +++ lib/api/CJsonOutputWriter.cc | 6 ++++++ lib/maths/common/COneOfNPrior.cc | 13 +++++++++++++ lib/maths/common/unittest/COneOfNPriorTest.cc | 3 +++ lib/maths/time_series/CTimeSeriesModel.cc | 13 +++++++++++++ 7 files changed, 45 insertions(+), 1 deletion(-) diff --git a/include/maths/common/CModel.h b/include/maths/common/CModel.h index 4126ad4cd9..46385bfd45 100644 --- a/include/maths/common/CModel.h +++ b/include/maths/common/CModel.h @@ -260,6 +260,7 @@ struct MATHS_COMMON_EXPORT SAnomalyScoreExplanation { double s_UpperConfidenceBound{std::numeric_limits::quiet_NaN()}; bool s_HighVariancePenalty{false}; bool s_IncompleteBucketPenalty{false}; + bool s_MultimodalDistribution{false}; std::string print() const { return "Anomaly Score Explanation:\ntype: " + std::to_string(s_AnomalyType) + @@ -273,7 +274,9 @@ struct MATHS_COMMON_EXPORT SAnomalyScoreExplanation { "\n high variance penalty: " + std::to_string(static_cast(s_HighVariancePenalty)) + ", incomplete bucket_penalty: " + - std::to_string(static_cast(s_IncompleteBucketPenalty)); + std::to_string(static_cast(s_IncompleteBucketPenalty)) + + ", is multimodal distribution" + + std::to_string(static_cast(s_MultimodalDistribution)); } }; diff --git a/include/maths/common/COneOfNPrior.h b/include/maths/common/COneOfNPrior.h index 692dce40f5..54127190dc 100644 --- a/include/maths/common/COneOfNPrior.h +++ b/include/maths/common/COneOfNPrior.h @@ -216,6 +216,9 @@ class MATHS_COMMON_EXPORT COneOfNPrior : public CPrior { //! \note \p numberSamples is truncated to the number of samples received. void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const override; + //! Determines whether the prior distribution is multimodal. + bool isMultimodal() const; + private: //! The common c.d.f. implementation. bool minusLogJointCdfImpl(bool complement, diff --git a/include/maths/time_series/CTimeSeriesModel.h b/include/maths/time_series/CTimeSeriesModel.h index 27c4c8bda5..f2c93860dd 100644 --- a/include/maths/time_series/CTimeSeriesModel.h +++ b/include/maths/time_series/CTimeSeriesModel.h @@ -272,6 +272,9 @@ class MATHS_TIME_SERIES_EXPORT CUnivariateTimeSeriesModel : public common::CMode //! Abort on failure. void checkRestoredInvariants() const; + //! Determines whether the residual model has a multimodal prior distribution. + bool multimodalPrior() const; + private: //! A unique identifier for this model. std::size_t m_Id; diff --git a/lib/api/CJsonOutputWriter.cc b/lib/api/CJsonOutputWriter.cc index 094976c5ba..e96c68d09c 100644 --- a/lib/api/CJsonOutputWriter.cc +++ b/lib/api/CJsonOutputWriter.cc @@ -95,6 +95,7 @@ const std::string TYPICAL_VALUE("typical_value"); const std::string UPPER_CONFIDENCE_BOUND("upper_confidence_bound"); const std::string HIGH_VARIANCE_PENALTY("high_variance_penalty"); const std::string INCOMPLETE_BUCKET_PENALTY("incomplete_bucket_penalty"); +const std::string MULTIMODAL_DISTRIBUTION("multimodal_distribution"); //! Get a numeric field from a JSON document. //! Assumes the document contains the field. @@ -1001,6 +1002,11 @@ void CJsonOutputWriter::writeAnomalyScoreExplanationObject( results.s_AnomalyScoreExplanation.s_IncompleteBucketPenalty, anomalyScoreExplanation); } + if (results.s_AnomalyScoreExplanation.s_MultimodalDistribution) { + m_Writer.addBoolFieldToObj(MULTIMODAL_DISTRIBUTION, + results.s_AnomalyScoreExplanation.s_MultimodalDistribution, + anomalyScoreExplanation); + } } } } diff --git a/lib/maths/common/COneOfNPrior.cc b/lib/maths/common/COneOfNPrior.cc index 2480c1ae80..81b4307187 100644 --- a/lib/maths/common/COneOfNPrior.cc +++ b/lib/maths/common/COneOfNPrior.cc @@ -769,6 +769,19 @@ void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, LOG_TRACE(<< "samples = " << samples); } +bool COneOfNPrior::isMultimodal() const { + double sumOfWeights{0.0}; + for (const auto& weight : this->weights()) { + sumOfWeights += weight; + } + for (const auto& model : m_Models) { + if (model.second->type() == EPrior::E_Multimodal && model.first / sumOfWeights > 0.1) { + return true; + } + } + return false; +} + bool COneOfNPrior::minusLogJointCdfImpl(bool complement, const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights, diff --git a/lib/maths/common/unittest/COneOfNPriorTest.cc b/lib/maths/common/unittest/COneOfNPriorTest.cc index 1d5fd0579d..2a116669b4 100644 --- a/lib/maths/common/unittest/COneOfNPriorTest.cc +++ b/lib/maths/common/unittest/COneOfNPriorTest.cc @@ -435,6 +435,7 @@ BOOST_AUTO_TEST_CASE(testModelSelection) { BOOST_TEST_REQUIRE(logWeightRatio > expectedLogWeightRatio); BOOST_TEST_REQUIRE(logWeightRatio < 0.95 * expectedLogWeightRatio); + BOOST_REQUIRE_EQUAL(filter.isMultimodal(), false); } { @@ -485,6 +486,7 @@ BOOST_AUTO_TEST_CASE(testModelSelection) { BOOST_TEST_REQUIRE(logWeightRatio > expectedLogWeightRatio); BOOST_TEST_REQUIRE(logWeightRatio < 0.75 * expectedLogWeightRatio); + BOOST_REQUIRE_EQUAL(filter.isMultimodal(), false); } { // Check we correctly select the multimodal model when the data have @@ -522,6 +524,7 @@ BOOST_AUTO_TEST_CASE(testModelSelection) { LOG_DEBUG(<< "logWeightRatio = " << logWeightRatio); BOOST_TEST_REQUIRE(std::exp(logWeightRatio) < 1e-6); + BOOST_REQUIRE_EQUAL(filter.isMultimodal(), true); } } diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index 99024df1ed..95ee9c741e 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1075,6 +1076,10 @@ bool CUnivariateTimeSeriesModel::uncorrelatedProbability( result.s_AnomalyScoreExplanation.s_UpperConfidenceBound = interval[2][0]; } + result.s_AnomalyScoreExplanation.s_MultimodalDistribution = this->multimodalPrior(); + LOG_DEBUG(<< "Multimodel distribution: " + << result.s_AnomalyScoreExplanation.s_MultimodalDistribution); + result.s_Probability = pOverall; result.s_FeatureProbabilities = std::move(featureProbabilities); result.s_Tail = {tail}; @@ -1082,6 +1087,14 @@ bool CUnivariateTimeSeriesModel::uncorrelatedProbability( return true; } +bool CUnivariateTimeSeriesModel::multimodalPrior() const { + LOG_DEBUG(<< "Residual model type " << m_ResidualModel->type()); + if (m_ResidualModel->type() == common::CPrior::E_OneOfN) { + return static_cast(m_ResidualModel.get())->isMultimodal(); + } + return false; +} + bool CUnivariateTimeSeriesModel::correlatedProbability( const common::CModelProbabilityParams& params, const TTime2Vec1Vec& time, From 8eb4196e3bea056159d9e56a47fdaa5574f0b7cd Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 17 Jan 2023 11:03:03 +0100 Subject: [PATCH 2/4] changelog --- docs/CHANGELOG.asciidoc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 936b1c9ebb..b525e465c9 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,12 @@ //=== Regressions +== {es} version 8.7.0 + +=== Enhancements + +* Add identification of multimodal distribution to anomaly explanations. (See {ml-pull}2440[#2440].) + == {es} version 8.6.0 === Bug Fixes From 52a81444b83b1dc0a320f7de932f62d3fb2da38b Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 23 Jan 2023 14:01:59 +0100 Subject: [PATCH 3/4] review comments --- include/maths/common/COneOfNPrior.h | 4 ++-- include/maths/common/CPrior.h | 3 +++ include/maths/time_series/CTimeSeriesModel.h | 3 --- lib/maths/common/COneOfNPrior.cc | 11 ++++------- lib/maths/common/CPrior.cc | 5 +++++ lib/maths/common/unittest/COneOfNPriorTest.cc | 6 +++--- lib/maths/time_series/CTimeSeriesModel.cc | 11 ++--------- 7 files changed, 19 insertions(+), 24 deletions(-) diff --git a/include/maths/common/COneOfNPrior.h b/include/maths/common/COneOfNPrior.h index 54127190dc..87a9b703a8 100644 --- a/include/maths/common/COneOfNPrior.h +++ b/include/maths/common/COneOfNPrior.h @@ -216,8 +216,8 @@ class MATHS_COMMON_EXPORT COneOfNPrior : public CPrior { //! \note \p numberSamples is truncated to the number of samples received. void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const override; - //! Determines whether the prior distribution is multimodal. - bool isMultimodal() const; + //! Determines whether the selected distribution is multimodal. + bool isSelectedModelMultimodal() const override; private: //! The common c.d.f. implementation. diff --git a/include/maths/common/CPrior.h b/include/maths/common/CPrior.h index 3a0addf76f..2c33327aea 100644 --- a/include/maths/common/CPrior.h +++ b/include/maths/common/CPrior.h @@ -438,6 +438,9 @@ class MATHS_COMMON_EXPORT CPrior { TDouble1Vec& resamples, TDoubleWeightsAry1Vec& resamplesWeights) const; + //! Determines whether the selected distribution is multimodal. + virtual bool isSelectedModelMultimodal() const; + protected: //! \brief Defines a set of operations to adjust the offset parameter //! of those priors with non-negative support. diff --git a/include/maths/time_series/CTimeSeriesModel.h b/include/maths/time_series/CTimeSeriesModel.h index f2c93860dd..27c4c8bda5 100644 --- a/include/maths/time_series/CTimeSeriesModel.h +++ b/include/maths/time_series/CTimeSeriesModel.h @@ -272,9 +272,6 @@ class MATHS_TIME_SERIES_EXPORT CUnivariateTimeSeriesModel : public common::CMode //! Abort on failure. void checkRestoredInvariants() const; - //! Determines whether the residual model has a multimodal prior distribution. - bool multimodalPrior() const; - private: //! A unique identifier for this model. std::size_t m_Id; diff --git a/lib/maths/common/COneOfNPrior.cc b/lib/maths/common/COneOfNPrior.cc index 81b4307187..053c877911 100644 --- a/lib/maths/common/COneOfNPrior.cc +++ b/lib/maths/common/COneOfNPrior.cc @@ -769,13 +769,10 @@ void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, LOG_TRACE(<< "samples = " << samples); } -bool COneOfNPrior::isMultimodal() const { - double sumOfWeights{0.0}; - for (const auto& weight : this->weights()) { - sumOfWeights += weight; - } - for (const auto& model : m_Models) { - if (model.second->type() == EPrior::E_Multimodal && model.first / sumOfWeights > 0.1) { +bool COneOfNPrior::isSelectedModelMultimodal() const { + auto weights = this->weights(); + for (std::size_t i = 0; i < weights.size(); ++i) { + if (weights[i] > 0.1 && m_Models[i].second->type() == EPrior::E_Multimodal) { return true; } } diff --git a/lib/maths/common/CPrior.cc b/lib/maths/common/CPrior.cc index 1c81c3c323..c0b3010748 100644 --- a/lib/maths/common/CPrior.cc +++ b/lib/maths/common/CPrior.cc @@ -319,6 +319,11 @@ CPrior::TStrStrPr CPrior::printMarginalLikelihoodStatistics() const { return this->doPrintMarginalLikelihoodStatistics(); } + +bool CPrior::isSelectedModelMultimodal() const { + return false; +} + const double CPrior::FALLBACK_DECAY_RATE = 0.001; const std::size_t CPrior::ADJUST_OFFSET_SAMPLE_SIZE = 50; const std::string CPrior::UNKNOWN_VALUE_STRING = ""; diff --git a/lib/maths/common/unittest/COneOfNPriorTest.cc b/lib/maths/common/unittest/COneOfNPriorTest.cc index 2a116669b4..d3273bdc9d 100644 --- a/lib/maths/common/unittest/COneOfNPriorTest.cc +++ b/lib/maths/common/unittest/COneOfNPriorTest.cc @@ -435,7 +435,7 @@ BOOST_AUTO_TEST_CASE(testModelSelection) { BOOST_TEST_REQUIRE(logWeightRatio > expectedLogWeightRatio); BOOST_TEST_REQUIRE(logWeightRatio < 0.95 * expectedLogWeightRatio); - BOOST_REQUIRE_EQUAL(filter.isMultimodal(), false); + BOOST_REQUIRE_EQUAL(filter.isSelectedModelMultimodal(), false); } { @@ -486,7 +486,7 @@ BOOST_AUTO_TEST_CASE(testModelSelection) { BOOST_TEST_REQUIRE(logWeightRatio > expectedLogWeightRatio); BOOST_TEST_REQUIRE(logWeightRatio < 0.75 * expectedLogWeightRatio); - BOOST_REQUIRE_EQUAL(filter.isMultimodal(), false); + BOOST_REQUIRE_EQUAL(filter.isSelectedModelMultimodal(), false); } { // Check we correctly select the multimodal model when the data have @@ -524,7 +524,7 @@ BOOST_AUTO_TEST_CASE(testModelSelection) { LOG_DEBUG(<< "logWeightRatio = " << logWeightRatio); BOOST_TEST_REQUIRE(std::exp(logWeightRatio) < 1e-6); - BOOST_REQUIRE_EQUAL(filter.isMultimodal(), true); + BOOST_REQUIRE_EQUAL(filter.isSelectedModelMultimodal(), true); } } diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index 95ee9c741e..a0d35a34e9 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -1076,7 +1076,8 @@ bool CUnivariateTimeSeriesModel::uncorrelatedProbability( result.s_AnomalyScoreExplanation.s_UpperConfidenceBound = interval[2][0]; } - result.s_AnomalyScoreExplanation.s_MultimodalDistribution = this->multimodalPrior(); + result.s_AnomalyScoreExplanation.s_MultimodalDistribution = + m_ResidualModel->isSelectedModelMultimodal(); LOG_DEBUG(<< "Multimodel distribution: " << result.s_AnomalyScoreExplanation.s_MultimodalDistribution); @@ -1087,14 +1088,6 @@ bool CUnivariateTimeSeriesModel::uncorrelatedProbability( return true; } -bool CUnivariateTimeSeriesModel::multimodalPrior() const { - LOG_DEBUG(<< "Residual model type " << m_ResidualModel->type()); - if (m_ResidualModel->type() == common::CPrior::E_OneOfN) { - return static_cast(m_ResidualModel.get())->isMultimodal(); - } - return false; -} - bool CUnivariateTimeSeriesModel::correlatedProbability( const common::CModelProbabilityParams& params, const TTime2Vec1Vec& time, From 8e390c01740a815a515516b25ed1760796dcd492 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 23 Jan 2023 14:03:25 +0100 Subject: [PATCH 4/4] remove unused include --- lib/maths/time_series/CTimeSeriesModel.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index a0d35a34e9..7ff521d35e 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include