diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index decc34c53a..95ebecaf43 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -42,6 +42,10 @@ * Improve accuracy of anomaly detection median estimation. (See {ml-pull}2367[#2367], issue: {ml-issue}2364[#2364].) +=== Bug Fixes + +* Fix potential cause of classification and regression job failures. (See {ml-pull}2385[#2385].) + == {es} version 8.3.0 === Enhancements diff --git a/lib/maths/analytics/CBoostedTreeImpl.cc b/lib/maths/analytics/CBoostedTreeImpl.cc index 55fc9d258d..3ad4b380b6 100644 --- a/lib/maths/analytics/CBoostedTreeImpl.cc +++ b/lib/maths/analytics/CBoostedTreeImpl.cc @@ -1663,16 +1663,17 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame, this->nodeFeatureBag(treeFeatureBag, featureSampleProbabilities, nodeFeatureBag); nodeFeatureBag = merge(featuresToInclude, std::move(nodeFeatureBag)); - std::size_t numberSplittableLeaves{splittableLeaves.size()}; - std::size_t currentNumberInternalNodes{(tree.size() - 1) / 2}; + auto numberSplittableLeaves = + static_cast(splittableLeaves.size()); + auto currentNumberInternalNodes = static_cast(tree.size() - 1) / 2; + auto lastPotentialSplit = numberSplittableLeaves + currentNumberInternalNodes - + static_cast(maximumNumberInternalNodes); auto smallestCurrentCandidateGainIndex = - static_cast(numberSplittableLeaves) - - static_cast(maximumNumberInternalNodes - currentNumberInternalNodes); + std::min(lastPotentialSplit, numberSplittableLeaves - 1); double smallestCandidateGain{ - smallestCurrentCandidateGainIndex >= 0 - ? splittableLeaves[static_cast(smallestCurrentCandidateGainIndex)] - ->gain() - : 0.0}; + smallestCurrentCandidateGainIndex < 0 + ? 0.0 + : splittableLeaves[smallestCurrentCandidateGainIndex]->gain()}; TLeafNodeStatisticsPtr leftChild; TLeafNodeStatisticsPtr rightChild;