From 3c3bc123aaf336ee5b37bef5cdb7b4dadbc98e37 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 30 Sep 2022 10:26:28 +0100 Subject: [PATCH 1/2] [ML] Fix for 'No statistics' error message An anomaly detector job ignores records that fall in the initial, incomplete bucket. However, a model is created with a start time of -1 in the expectation that more records will be received that fall into subsequent buckets. In the case that no more records are received the model continues to exist with start time of -1. In the scenario that a job is created initially as lookback and has such models with start time of -1 in existence at the point the job closes, and then is re-opened as a realtime job at some point in the future, any attempt to sample the model will fail with the 'No statistics at ...' error message due to the start time (-1) falling outside the current bucket. This change always updates the model's start time to that of the current record, even if the record data is not to be added to the associated data gatherer. --- devbin/model_extractor/Main.cc | 2 +- lib/model/CIndividualModel.cc | 6 ++++-- lib/model/CMetricModel.cc | 10 +++++++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/devbin/model_extractor/Main.cc b/devbin/model_extractor/Main.cc index 575f4e2d3b..9d0921e195 100644 --- a/devbin/model_extractor/Main.cc +++ b/devbin/model_extractor/Main.cc @@ -147,7 +147,7 @@ int main(int argc, char** argv) { while (restoredJob.restoreState(restoreSearcher, completeToTime)) { assert(completeToTime > prevCompleteToTime); prevCompleteToTime = completeToTime; - LOG_DEBUG(<< "Restore complete to time " << completeToTime << std::endl); + LOG_DEBUG(<< "Restore complete to time " << completeToTime); core::CNamedPipeFactory::TOStreamP persistStrm{&ioMgr.outputStream(), [](std::ostream*) {}}; diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 021e24be0a..1fb67d2422 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -133,8 +133,10 @@ bool CIndividualModel::isPopulation() const { CIndividualModel::TOptionalUInt64 CIndividualModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time - << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No statistics at " << time << " for " << this->description() + << ", current bucket = " << this->printCurrentBucket() + << ", partitionFieldValue = " << this->dataGatherer().partitionFieldValue() + << ", personName = " << this->dataGatherer().personName(pid)); return TOptionalUInt64(); } diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index 7791a771d1..b06731b419 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -189,13 +189,17 @@ void CMetricModel::sample(core_t::TTime startTime, CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (!gatherer.validateSampleTimes(startTime, endTime)) { - return; - } + m_CurrentBucketStats.s_StartTime = std::max(m_CurrentBucketStats.s_StartTime, startTime); this->createUpdateNewModels(startTime, resourceMonitor); m_CurrentBucketStats.s_InterimCorrections.clear(); m_CurrentBucketStats.s_Annotations.clear(); + m_CurrentBucketStats.s_FeatureData.clear(); + m_CurrentBucketStats.s_PersonCounts.clear(); + + if (!gatherer.validateSampleTimes(startTime, endTime)) { + return; + } for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { LOG_TRACE(<< "Sampling [" << time << "," << time + bucketLength << ")"); From b1208a9a65cb6996a9c81d5b69b9c531193d12cd Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 30 Sep 2022 10:36:26 +0100 Subject: [PATCH 2/2] Update docs --- docs/CHANGELOG.asciidoc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index bd4c1d64b6..fe31029069 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,12 @@ //=== Regressions +== {es} version 8.6.0 + +=== Bug Fixes + +* Fix for 'No statistics' error message. (See {ml-pull}2410[#2410].) + == {es} version 8.5.0 === Enhancements