elastic · tveasey · Oct 25, 2019 · Oct 25, 2019 · Oct 25, 2019 · Oct 25, 2019
diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
@@ -34,6 +34,8 @@
 
 * Improve performance of boosted tree training for both classification and regression.
 (See {ml-pull}775[#775].)
+* Reduce the peak memory used by boosted tree training and fix an overcounting bug
+estimating maximum memory usage. (See {ml-pull}781[#781].)
 
 == {es} version 7.5.0
 

diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
@@ -274,8 +274,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                             const CDataFrameCategoryEncoder& encoder,
                             const TRegularization& regularization,
                             const TDoubleVecVec& candidateSplits,
+                            const TSizeVec& featureBag,
                             std::size_t depth,
-                            TSizeVec featureBag,
                             const core::CPackedBitVector& rowMask);
 
         //! Only called by split but is public so it's accessible to std::make_shared.
@@ -285,15 +285,17 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                             const CDataFrameCategoryEncoder& encoder,
                             const TRegularization& regularization,
                             const TDoubleVecVec& candidateSplits,
-                            std::size_t depth,
-                            TSizeVec featureBag,
+                            const TSizeVec& featureBag,
                             bool isLeftChild,
+                            std::size_t depth,
                             const CBoostedTreeNode& split,
                             const core::CPackedBitVector& parentRowMask);
         //! Only called by split but is public so it's accessible to std::make_shared.
         CLeafNodeStatistics(std::size_t id,
                             const CLeafNodeStatistics& parent,
                             const CLeafNodeStatistics& sibling,
+                            const TRegularization& regularization,
+                            const TSizeVec& featureBag,
                             core::CPackedBitVector rowMask);
 
         CLeafNodeStatistics(const CLeafNodeStatistics&) = delete;
@@ -314,37 +316,34 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                    const CDataFrameCategoryEncoder& encoder,
                    const TRegularization& regularization,
                    const TDoubleVecVec& candidateSplits,
-                   TSizeVec featureBag,
+                   const TSizeVec& featureBag,
                    const CBoostedTreeNode& split,
                    bool leftChildHasFewerRows);
 
         //! Order two leaves by decreasing gain in splitting them.
         bool operator<(const CLeafNodeStatistics& rhs) const {
-            return this->bestSplitStatistics() < rhs.bestSplitStatistics();
+            return m_BestSplit < rhs.m_BestSplit;
         }
 
         //! Get the gain in loss of the best split of this leaf.
-        double gain() const { return this->bestSplitStatistics().s_Gain; }
+        double gain() const { return m_BestSplit.s_Gain; }
 
-        double curvature() const {
-            return this->bestSplitStatistics().s_Curvature;
-        }
+        double curvature() const { return this->m_BestSplit.s_Curvature; }
 
         //! Get the best (feature, feature value) split.
         TSizeDoublePr bestSplit() const {
-            const auto& split = this->bestSplitStatistics();
-            return {split.s_Feature, split.s_SplitAt};
+            return {m_BestSplit.s_Feature, m_BestSplit.s_SplitAt};
         }
 
         //! Check if the left child has fewer rows than the right child.
         bool leftChildHasFewerRows() const {
-            return this->bestSplitStatistics().s_LeftChildHasFewerRows;
+            return m_BestSplit.s_LeftChildHasFewerRows;
         }
 
         //! Check if we should assign the missing feature rows to the left child
         //! of the split.
         bool assignMissingToLeft() const {
-            return this->bestSplitStatistics().s_AssignMissingToLeft;
+            return m_BestSplit.s_AssignMissingToLeft;
         }
 
         //! Get the node's identifier.
@@ -355,8 +354,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
 
         //! Get the memory used by this object.
         std::size_t memoryUsage() const {
-            std::size_t mem{core::CMemory::dynamicSize(m_FeatureBag)};
-            mem += core::CMemory::dynamicSize(m_RowMask);
+            std::size_t mem{core::CMemory::dynamicSize(m_RowMask)};
             mem += core::CMemory::dynamicSize(m_Derivatives);
             mem += core::CMemory::dynamicSize(m_MissingDerivatives);
             return mem;
@@ -367,12 +365,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
         //! and \p numberSplitsPerFeature.
         static std::size_t estimateMemoryUsage(std::size_t numberRows,
                                                std::size_t numberCols,
-                                               double featureBagFraction,
                                                std::size_t numberSplitsPerFeature) {
-            std::size_t featureBagSize{
-                static_cast<std::size_t>(std::ceil(
-                    featureBagFraction * static_cast<double>(numberCols - 1))) *
-                sizeof(std::size_t)};
             // We will typically get the close to the best compression for most of the
             // leaves when the set of splits becomes large, corresponding to the worst
             // case for memory usage. This is because the rows will be spread over many
@@ -381,13 +374,13 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             std::size_t derivativesSize{(numberCols - 1) * numberSplitsPerFeature *
                                         sizeof(SAggregateDerivatives)};
             std::size_t missingDerivativesSize{(numberCols - 1) * sizeof(SAggregateDerivatives)};
-            return sizeof(CLeafNodeStatistics) + featureBagSize + rowMaskSize +
-                   derivativesSize + missingDerivativesSize;
+            return sizeof(CLeafNodeStatistics) + rowMaskSize + derivativesSize + missingDerivativesSize;
         }
 
     private:
         //! \brief Statistics relating to a split of the node.
         struct SSplitStatistics : private boost::less_than_comparable<SSplitStatistics> {
+            SSplitStatistics() = default;
             SSplitStatistics(double gain,
                              double curvature,
                              std::size_t feature,
@@ -410,12 +403,12 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                 return result.str();
             }
 
-            double s_Gain;
-            double s_Curvature;
-            std::size_t s_Feature;
-            double s_SplitAt;
-            bool s_LeftChildHasFewerRows;
-            bool s_AssignMissingToLeft;
+            double s_Gain = -INF;
+            double s_Curvature = 0.0;
+            std::size_t s_Feature = -1;
+            double s_SplitAt = INF;
+            bool s_LeftChildHasFewerRows = true;
+            bool s_AssignMissingToLeft = true;
         };
 
         //! \brief Aggregate derivatives.
@@ -487,25 +480,17 @@ class MATHS_EXPORT CBoostedTreeImpl final {
         void addRowDerivatives(const CEncodedDataFrameRowRef& row,
                                SSplitAggregateDerivatives& splitAggregateDerivatives) const;
 
-        const SSplitStatistics& bestSplitStatistics() const {
-            if (m_BestSplit == boost::none) {
-                m_BestSplit = this->computeBestSplitStatistics();
-            }
-            return *m_BestSplit;
-        }
-
-        SSplitStatistics computeBestSplitStatistics() const;
+        SSplitStatistics computeBestSplitStatistics(const TRegularization& regularization,
+                                                    const TSizeVec& featureBag) const;
 
     private:
         std::size_t m_Id;
-        const TRegularization& m_Regularization;
         const TDoubleVecVec& m_CandidateSplits;
         std::size_t m_Depth;
-        TSizeVec m_FeatureBag;
         core::CPackedBitVector m_RowMask;
         TAggregateDerivativesVecVec m_Derivatives;
         TAggregateDerivativesVec m_MissingDerivatives;
-        mutable boost::optional<SSplitStatistics> m_BestSplit;
+        SSplitStatistics m_BestSplit;
     };
 
 private:

diff --git a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc
@@ -398,7 +398,7 @@ void CDataFrameAnalyzerTrainingTest::testRunBoostedTreeRegressionTraining() {
               << "ms");
 
     CPPUNIT_ASSERT(core::CProgramCounters::counter(
-                       counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 3900000);
+                       counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 2400000);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 300000);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) > 0);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) <= duration);
@@ -636,7 +636,7 @@ void CDataFrameAnalyzerTrainingTest::testRunBoostedTreeClassifierTraining() {
     LOG_DEBUG(<< "time to train = " << core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain)
               << "ms");
     CPPUNIT_ASSERT(core::CProgramCounters::counter(
-                       counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 3900000);
+                       counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 2400000);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1200000);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) > 0);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) <= duration);