Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

* Improve performance of boosted tree training for both classification and regression.
(See {ml-pull}775[#775].)
* Reduce the peak memory used by boosted tree training and fix an overcounting bug
estimating maximum memory usage. (See {ml-pull}781[#781].)

== {es} version 7.5.0

Expand Down
63 changes: 24 additions & 39 deletions include/maths/CBoostedTreeImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
const CDataFrameCategoryEncoder& encoder,
const TRegularization& regularization,
const TDoubleVecVec& candidateSplits,
const TSizeVec& featureBag,
std::size_t depth,
TSizeVec featureBag,
const core::CPackedBitVector& rowMask);

//! Only called by split but is public so it's accessible to std::make_shared.
Expand All @@ -285,15 +285,17 @@ class MATHS_EXPORT CBoostedTreeImpl final {
const CDataFrameCategoryEncoder& encoder,
const TRegularization& regularization,
const TDoubleVecVec& candidateSplits,
std::size_t depth,
TSizeVec featureBag,
const TSizeVec& featureBag,
bool isLeftChild,
std::size_t depth,
const CBoostedTreeNode& split,
const core::CPackedBitVector& parentRowMask);
//! Only called by split but is public so it's accessible to std::make_shared.
CLeafNodeStatistics(std::size_t id,
const CLeafNodeStatistics& parent,
const CLeafNodeStatistics& sibling,
const TRegularization& regularization,
const TSizeVec& featureBag,
core::CPackedBitVector rowMask);

CLeafNodeStatistics(const CLeafNodeStatistics&) = delete;
Expand All @@ -314,37 +316,34 @@ class MATHS_EXPORT CBoostedTreeImpl final {
const CDataFrameCategoryEncoder& encoder,
const TRegularization& regularization,
const TDoubleVecVec& candidateSplits,
TSizeVec featureBag,
const TSizeVec& featureBag,
const CBoostedTreeNode& split,
bool leftChildHasFewerRows);

//! Order two leaves by decreasing gain in splitting them.
bool operator<(const CLeafNodeStatistics& rhs) const {
return this->bestSplitStatistics() < rhs.bestSplitStatistics();
return m_BestSplit < rhs.m_BestSplit;
}

//! Get the gain in loss of the best split of this leaf.
double gain() const { return this->bestSplitStatistics().s_Gain; }
double gain() const { return m_BestSplit.s_Gain; }

double curvature() const {
return this->bestSplitStatistics().s_Curvature;
}
double curvature() const { return this->m_BestSplit.s_Curvature; }

//! Get the best (feature, feature value) split.
TSizeDoublePr bestSplit() const {
const auto& split = this->bestSplitStatistics();
return {split.s_Feature, split.s_SplitAt};
return {m_BestSplit.s_Feature, m_BestSplit.s_SplitAt};
}

//! Check if the left child has fewer rows than the right child.
bool leftChildHasFewerRows() const {
return this->bestSplitStatistics().s_LeftChildHasFewerRows;
return m_BestSplit.s_LeftChildHasFewerRows;
}

//! Check if we should assign the missing feature rows to the left child
//! of the split.
bool assignMissingToLeft() const {
return this->bestSplitStatistics().s_AssignMissingToLeft;
return m_BestSplit.s_AssignMissingToLeft;
}

//! Get the node's identifier.
Expand All @@ -355,8 +354,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {

//! Get the memory used by this object.
std::size_t memoryUsage() const {
std::size_t mem{core::CMemory::dynamicSize(m_FeatureBag)};
mem += core::CMemory::dynamicSize(m_RowMask);
std::size_t mem{core::CMemory::dynamicSize(m_RowMask)};
mem += core::CMemory::dynamicSize(m_Derivatives);
mem += core::CMemory::dynamicSize(m_MissingDerivatives);
return mem;
Expand All @@ -367,12 +365,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
//! and \p numberSplitsPerFeature.
static std::size_t estimateMemoryUsage(std::size_t numberRows,
std::size_t numberCols,
double featureBagFraction,
std::size_t numberSplitsPerFeature) {
std::size_t featureBagSize{
static_cast<std::size_t>(std::ceil(
featureBagFraction * static_cast<double>(numberCols - 1))) *
sizeof(std::size_t)};
// We will typically get the close to the best compression for most of the
// leaves when the set of splits becomes large, corresponding to the worst
// case for memory usage. This is because the rows will be spread over many
Expand All @@ -381,13 +374,13 @@ class MATHS_EXPORT CBoostedTreeImpl final {
std::size_t derivativesSize{(numberCols - 1) * numberSplitsPerFeature *
sizeof(SAggregateDerivatives)};
std::size_t missingDerivativesSize{(numberCols - 1) * sizeof(SAggregateDerivatives)};
return sizeof(CLeafNodeStatistics) + featureBagSize + rowMaskSize +
derivativesSize + missingDerivativesSize;
return sizeof(CLeafNodeStatistics) + rowMaskSize + derivativesSize + missingDerivativesSize;
}

private:
//! \brief Statistics relating to a split of the node.
struct SSplitStatistics : private boost::less_than_comparable<SSplitStatistics> {
SSplitStatistics() = default;
SSplitStatistics(double gain,
double curvature,
std::size_t feature,
Expand All @@ -410,12 +403,12 @@ class MATHS_EXPORT CBoostedTreeImpl final {
return result.str();
}

double s_Gain;
double s_Curvature;
std::size_t s_Feature;
double s_SplitAt;
bool s_LeftChildHasFewerRows;
bool s_AssignMissingToLeft;
double s_Gain = -INF;
double s_Curvature = 0.0;
std::size_t s_Feature = -1;
double s_SplitAt = INF;
bool s_LeftChildHasFewerRows = true;
bool s_AssignMissingToLeft = true;
};

//! \brief Aggregate derivatives.
Expand Down Expand Up @@ -487,25 +480,17 @@ class MATHS_EXPORT CBoostedTreeImpl final {
void addRowDerivatives(const CEncodedDataFrameRowRef& row,
SSplitAggregateDerivatives& splitAggregateDerivatives) const;

const SSplitStatistics& bestSplitStatistics() const {
if (m_BestSplit == boost::none) {
m_BestSplit = this->computeBestSplitStatistics();
}
return *m_BestSplit;
}

SSplitStatistics computeBestSplitStatistics() const;
SSplitStatistics computeBestSplitStatistics(const TRegularization& regularization,
const TSizeVec& featureBag) const;

private:
std::size_t m_Id;
const TRegularization& m_Regularization;
const TDoubleVecVec& m_CandidateSplits;
std::size_t m_Depth;
TSizeVec m_FeatureBag;
core::CPackedBitVector m_RowMask;
TAggregateDerivativesVecVec m_Derivatives;
TAggregateDerivativesVec m_MissingDerivatives;
mutable boost::optional<SSplitStatistics> m_BestSplit;
SSplitStatistics m_BestSplit;
};

private:
Expand Down
4 changes: 2 additions & 2 deletions lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ void CDataFrameAnalyzerTrainingTest::testRunBoostedTreeRegressionTraining() {
<< "ms");

CPPUNIT_ASSERT(core::CProgramCounters::counter(
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 3900000);
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 2400000);
CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 300000);
CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) > 0);
CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) <= duration);
Expand Down Expand Up @@ -636,7 +636,7 @@ void CDataFrameAnalyzerTrainingTest::testRunBoostedTreeClassifierTraining() {
LOG_DEBUG(<< "time to train = " << core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain)
<< "ms");
CPPUNIT_ASSERT(core::CProgramCounters::counter(
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 3900000);
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 2400000);
CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1200000);
CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) > 0);
CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) <= duration);
Expand Down
Loading