Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SYCL. Add loss-guided hist building #10251

Merged
merged 1 commit into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions plugin/sycl/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,93 @@ ::sycl::event SubtractionHist(::sycl::queue qu,
const GHistRow<GradientSumT, MemoryType::on_device>& src2,
size_t size, ::sycl::event event_priv);

/*!
* \brief Histograms of gradient statistics for multiple nodes
*/
template<typename GradientSumT, MemoryType memory_type = MemoryType::shared>
class HistCollection {
public:
using GHistRowT = GHistRow<GradientSumT, memory_type>;

// Access histogram for i-th node
GHistRowT& operator[](bst_uint nid) {
return *(data_.at(nid));
}

const GHistRowT& operator[](bst_uint nid) const {
return *(data_.at(nid));
}

// Initialize histogram collection
void Init(::sycl::queue qu, uint32_t nbins) {
qu_ = qu;
if (nbins_ != nbins) {
nbins_ = nbins;
data_.clear();
}
}

// Create an empty histogram for i-th node
::sycl::event AddHistRow(bst_uint nid) {
::sycl::event event;
if (data_.count(nid) == 0) {
data_[nid] =
std::make_shared<GHistRowT>(&qu_, nbins_,
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
&event);
} else {
data_[nid]->Resize(&qu_, nbins_,
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
&event);
}
return event;
}

private:
/*! \brief Number of all bins over all features */
uint32_t nbins_ = 0;

std::unordered_map<uint32_t, std::shared_ptr<GHistRowT>> data_;

::sycl::queue qu_;
};

/*!
* \brief Stores temporary histograms to compute them in parallel
*/
template<typename GradientSumT>
class ParallelGHistBuilder {
public:
using GHistRowT = GHistRow<GradientSumT, MemoryType::on_device>;

void Init(::sycl::queue qu, size_t nbins) {
qu_ = qu;
if (nbins != nbins_) {
hist_buffer_.Init(qu_, nbins);
nbins_ = nbins;
}
}

void Reset(size_t nblocks) {
hist_device_buffer_.Resize(&qu_, nblocks * nbins_ * 2);
}

GHistRowT& GetDeviceBuffer() {
return hist_device_buffer_;
}

protected:
/*! \brief Number of bins in each histogram */
size_t nbins_ = 0;
/*! \brief Buffers for histograms for all nodes processed */
HistCollection<GradientSumT> hist_buffer_;

/*! \brief Buffer for additional histograms for Parallel processing */
GHistRowT hist_device_buffer_;

::sycl::queue qu_;
};

/*!
* \brief Builder for histograms of gradient statistics
*/
Expand Down
6 changes: 6 additions & 0 deletions plugin/sycl/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ class USMVector {
qu->fill(data_.get(), v, size_).wait();
}

USMVector(::sycl::queue* qu, size_t size, T v,
::sycl::event* event) : size_(size), capacity_(size) {
data_ = allocate_memory_(qu, size_);
*event = qu->fill(data_.get(), v, size_, *event);
}

USMVector(::sycl::queue* qu, const std::vector<T> &vec) {
size_ = vec.size();
capacity_ = size_;
Expand Down
46 changes: 46 additions & 0 deletions plugin/sycl/tree/hist_row_adder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*!
* Copyright 2017-2024 by Contributors
* \file hist_row_adder.h
*/
#ifndef PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
#define PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_

#include <vector>
#include <algorithm>

namespace xgboost {
namespace sycl {
namespace tree {

template <typename GradientSumT>
class HistRowsAdder {
public:
virtual void AddHistRows(HistUpdater<GradientSumT>* builder,
std::vector<int>* sync_ids, RegTree *p_tree) = 0;
virtual ~HistRowsAdder() = default;
};

template <typename GradientSumT>
class BatchHistRowsAdder: public HistRowsAdder<GradientSumT> {
public:
void AddHistRows(HistUpdater<GradientSumT>* builder,
std::vector<int>* sync_ids, RegTree *p_tree) override {
builder->builder_monitor_.Start("AddHistRows");

for (auto const& entry : builder->nodes_for_explicit_hist_build_) {
int nid = entry.nid;
auto event = builder->hist_.AddHistRow(nid);
}
for (auto const& node : builder->nodes_for_subtraction_trick_) {
auto event = builder->hist_.AddHistRow(node.nid);
}

builder->builder_monitor_.Stop("AddHistRows");
}
};

} // namespace tree
} // namespace sycl
} // namespace xgboost

#endif // PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
68 changes: 68 additions & 0 deletions plugin/sycl/tree/hist_synchronizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*!
* Copyright 2017-2024 by Contributors
* \file hist_synchronizer.h
*/
#ifndef PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
#define PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_

#include <vector>

#include "../common/hist_util.h"
#include "expand_entry.h"

namespace xgboost {
namespace sycl {
namespace tree {

template <typename GradientSumT>
class HistUpdater;

template <typename GradientSumT>
class HistSynchronizer {
public:
virtual void SyncHistograms(HistUpdater<GradientSumT>* builder,
const std::vector<int>& sync_ids,
RegTree *p_tree) = 0;
virtual ~HistSynchronizer() = default;
};

template <typename GradientSumT>
class BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {
public:
void SyncHistograms(HistUpdater<GradientSumT>* builder,
const std::vector<int>& sync_ids,
RegTree *p_tree) override {
builder->builder_monitor_.Start("SyncHistograms");
const size_t nbins = builder->hist_builder_.GetNumBins();

hist_sync_events_.resize(builder->nodes_for_explicit_hist_build_.size());
for (int i = 0; i < builder->nodes_for_explicit_hist_build_.size(); i++) {
const auto entry = builder->nodes_for_explicit_hist_build_[i];
auto& this_hist = builder->hist_[entry.nid];

if (!(*p_tree)[entry.nid].IsRoot()) {
const size_t parent_id = (*p_tree)[entry.nid].Parent();
auto& parent_hist = builder->hist_[parent_id];
auto& sibling_hist = builder->hist_[entry.GetSiblingId(p_tree, parent_id)];
hist_sync_events_[i] = common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,
this_hist, nbins, ::sycl::event());
}
}
builder->qu_.wait_and_throw();

builder->builder_monitor_.Stop("SyncHistograms");
}

std::vector<::sycl::event> GetEvents() const {
return hist_sync_events_;
}

private:
std::vector<::sycl::event> hist_sync_events_;
};

} // namespace tree
} // namespace sycl
} // namespace xgboost

#endif // PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
93 changes: 93 additions & 0 deletions plugin/sycl/tree/hist_updater.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,69 @@

#include <oneapi/dpl/random>

#include "../common/hist_util.h"

namespace xgboost {
namespace sycl {
namespace tree {

template <typename GradientSumT>
void HistUpdater<GradientSumT>::SetHistSynchronizer(
HistSynchronizer<GradientSumT> *sync) {
hist_synchronizer_.reset(sync);
}

template <typename GradientSumT>
void HistUpdater<GradientSumT>::SetHistRowsAdder(
HistRowsAdder<GradientSumT> *adder) {
hist_rows_adder_.reset(adder);
}

template <typename GradientSumT>
void HistUpdater<GradientSumT>::BuildHistogramsLossGuide(
ExpandEntry entry,
const common::GHistIndexMatrix &gmat,
RegTree *p_tree,
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
nodes_for_explicit_hist_build_.clear();
nodes_for_subtraction_trick_.clear();
nodes_for_explicit_hist_build_.push_back(entry);

if (!(*p_tree)[entry.nid].IsRoot()) {
auto sibling_id = entry.GetSiblingId(p_tree);
nodes_for_subtraction_trick_.emplace_back(sibling_id, p_tree->GetDepth(sibling_id));
}

std::vector<int> sync_ids;
hist_rows_adder_->AddHistRows(this, &sync_ids, p_tree);
qu_.wait_and_throw();
BuildLocalHistograms(gmat, p_tree, gpair_device);
hist_synchronizer_->SyncHistograms(this, sync_ids, p_tree);
}

template<typename GradientSumT>
void HistUpdater<GradientSumT>::BuildLocalHistograms(
const common::GHistIndexMatrix &gmat,
RegTree *p_tree,
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
builder_monitor_.Start("BuildLocalHistograms");
const size_t n_nodes = nodes_for_explicit_hist_build_.size();
::sycl::event event;

for (size_t i = 0; i < n_nodes; i++) {
const int32_t nid = nodes_for_explicit_hist_build_[i].nid;

if (row_set_collection_[nid].Size() > 0) {
event = BuildHist(gpair_device, row_set_collection_[nid], gmat, &(hist_[nid]),
&(hist_buffer_.GetDeviceBuffer()), event);
} else {
common::InitHist(qu_, &(hist_[nid]), hist_[nid].Size(), &event);
}
}
qu_.wait_and_throw();
builder_monitor_.Stop("BuildLocalHistograms");
}

template<typename GradientSumT>
void HistUpdater<GradientSumT>::InitSampling(
const USMVector<GradientPair, MemoryType::on_device> &gpair,
Expand Down Expand Up @@ -70,6 +129,21 @@ void HistUpdater<GradientSumT>::InitData(
// initialize the row set
{
row_set_collection_.Clear();

// initialize histogram collection
uint32_t nbins = gmat.cut.Ptrs().back();
hist_.Init(qu_, nbins);

hist_buffer_.Init(qu_, nbins);
size_t buffer_size = kBufferSize;
if (buffer_size > info.num_row_ / kMinBlockSize + 1) {
buffer_size = info.num_row_ / kMinBlockSize + 1;
}
hist_buffer_.Reset(buffer_size);

// initialize histogram builder
hist_builder_ = common::GHistBuilder<GradientSumT>(qu_, nbins);

USMVector<size_t, MemoryType::on_device>* row_indices = &(row_set_collection_.Data());
row_indices->Resize(&qu_, info.num_row_);
size_t* p_row_indices = row_indices->Data();
Expand Down Expand Up @@ -122,6 +196,25 @@ void HistUpdater<GradientSumT>::InitData(
}
}
row_set_collection_.Init();

{
/* determine layout of data */
const size_t nrow = info.num_row_;
const size_t ncol = info.num_col_;
const size_t nnz = info.num_nonzero_;
// number of discrete bins for feature 0
const uint32_t nbins_f0 = gmat.cut.Ptrs()[1] - gmat.cut.Ptrs()[0];
if (nrow * ncol == nnz) {
// dense data with zero-based indexing
data_layout_ = kDenseDataZeroBased;
} else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) {
// dense data with one-based indexing
data_layout_ = kDenseDataOneBased;
} else {
// sparse data
data_layout_ = kSparseData;
}
}
}

template class HistUpdater<float>;
Expand Down
Loading
Loading