Skip to content

Commit

Permalink
Changing omp_get_num_threads to omp_get_max_threads (#1831)
Browse files Browse the repository at this point in the history
* Updating dmlc-core

* Changing omp_get_num_threads to omp_get_max_threads
  • Loading branch information
sdenel authored and tqchen committed Dec 4, 2016
1 parent 47ba2de commit 7078c41
Show file tree
Hide file tree
Showing 10 changed files with 22 additions and 71 deletions.
6 changes: 1 addition & 5 deletions src/c_api/c_api.cc
Expand Up @@ -282,11 +282,7 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());

API_BEGIN();
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
const int nthread = omp_get_max_threads();
data::SimpleCSRSource& mat = *source;
common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
builder.InitBudget(0, nthread);
Expand Down
17 changes: 2 additions & 15 deletions src/data/simple_dmatrix.cc
Expand Up @@ -83,13 +83,8 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled,
// clear rowset
buffered_rowset_.clear();
// bit map
int nthread;
const int nthread = omp_get_max_threads();
std::vector<bool> bmap;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}

pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
Expand Down Expand Up @@ -204,15 +199,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
size_t buffer_begin,
const std::vector<bool>& enabled,
SparsePage* pcol) {
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
if (nthread > max_nthread) {
nthread = max_nthread;
}
}
const int nthread = std::min(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 2, 1));
pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
Expand Down
7 changes: 1 addition & 6 deletions src/data/sparse_page_dmatrix.cc
Expand Up @@ -169,12 +169,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
SparsePage *pcol) {
pcol->Clear();
pcol->min_index = buffered_rowset_[begin];
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
nthread = std::max(nthread, std::max(omp_get_num_procs() / 2 - 1, 1));
}
const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1));
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
builder.InitBudget(info.num_col, nthread);
Expand Down
12 changes: 2 additions & 10 deletions src/gbm/gbtree.cc
Expand Up @@ -301,11 +301,7 @@ class GBTree : public GradientBooster {
void PredictLeaf(DMatrix* p_fmat,
std::vector<bst_float>* out_preds,
unsigned ntree_limit) override {
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
const int nthread = omp_get_max_threads();
InitThreadTemp(nthread);
this->PredPath(p_fmat, out_preds, ntree_limit);
}
Expand Down Expand Up @@ -365,11 +361,7 @@ class GBTree : public GradientBooster {
unsigned tree_begin,
unsigned tree_end) {
const MetaInfo& info = p_fmat->info();
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
const int nthread = omp_get_max_threads();
CHECK_EQ(num_group, mparam.num_output_group);
InitThreadTemp(nthread);
std::vector<bst_float> &preds = *out_preds;
Expand Down
11 changes: 1 addition & 10 deletions src/tree/updater_basemaker-inl.h
Expand Up @@ -118,15 +118,6 @@ class BaseMaker: public TreeUpdater {
}
return n.cdefault();
}
/*! \brief get number of omp thread in current context */
inline static int get_nthread() {
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
return nthread;
}
// ------class member helpers---------
/*! \brief initialize temp data structure */
inline void InitData(const std::vector<bst_gpair> &gpair,
Expand Down Expand Up @@ -350,7 +341,7 @@ class BaseMaker: public TreeUpdater {
std::vector<TStats> *p_node_stats) {
std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
const MetaInfo &info = fmat.info();
thread_temp.resize(this->get_nthread());
thread_temp.resize(omp_get_max_threads());
p_node_stats->resize(tree.param.num_nodes);
#pragma omp parallel
{
Expand Down
22 changes: 8 additions & 14 deletions src/tree/updater_colmaker.cc
Expand Up @@ -81,7 +81,7 @@ class ColMaker: public TreeUpdater {
struct Builder {
public:
// constructor
explicit Builder(const TrainParam& param) : param(param) {}
explicit Builder(const TrainParam& param) : param(param), nthread(omp_get_max_threads()) {}
// update one tree, growing
virtual void Update(const std::vector<bst_gpair>& gpair,
DMatrix* p_fmat,
Expand Down Expand Up @@ -166,10 +166,6 @@ class ColMaker: public TreeUpdater {
}
{
// setup temp space for each thread
#pragma omp parallel
{
this->nthread = omp_get_num_threads();
}
// reserve a small space
stemp.clear();
stemp.resize(this->nthread, std::vector<ThreadEntry>());
Expand Down Expand Up @@ -277,8 +273,7 @@ class ColMaker: public TreeUpdater {
for (size_t j = 0; j < qexpand.size(); ++j) {
temp[qexpand[j]].stats.Clear();
}
nthread = omp_get_num_threads();
bst_uint step = (col.length + nthread - 1) / nthread;
bst_uint step = (col.length + this->nthread - 1) / this->nthread;
bst_uint end = std::min(col.length, step * (tid + 1));
for (bst_uint i = tid * step; i < end; ++i) {
const bst_uint ridx = col[i].index;
Expand All @@ -298,15 +293,15 @@ class ColMaker: public TreeUpdater {
for (bst_omp_uint j = 0; j < nnode; ++j) {
const int nid = qexpand[j];
TStats sum(param), tmp(param), c(param);
for (int tid = 0; tid < nthread; ++tid) {
for (int tid = 0; tid < this->nthread; ++tid) {
tmp = stemp[tid][nid].stats;
stemp[tid][nid].stats = sum;
sum.Add(tmp);
if (tid != 0) {
std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue);
}
}
for (int tid = 0; tid < nthread; ++tid) {
for (int tid = 0; tid < this->nthread; ++tid) {
stemp[tid][nid].stats_extra = sum;
ThreadEntry &e = stemp[tid][nid];
bst_float fsplit;
Expand Down Expand Up @@ -341,7 +336,7 @@ class ColMaker: public TreeUpdater {
}
if (need_backward) {
tmp = sum;
ThreadEntry &e = stemp[nthread-1][nid];
ThreadEntry &e = stemp[this->nthread-1][nid];
c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight &&
tmp.sum_hess >= param.min_child_weight) {
Expand All @@ -357,8 +352,7 @@ class ColMaker: public TreeUpdater {
TStats c(param), cright(param);
const int tid = omp_get_thread_num();
std::vector<ThreadEntry> &temp = stemp[tid];
nthread = static_cast<bst_uint>(omp_get_num_threads());
bst_uint step = (col.length + nthread - 1) / nthread;
bst_uint step = (col.length + this->nthread - 1) / this->nthread;
bst_uint end = std::min(col.length, step * (tid + 1));
for (bst_uint i = tid * step; i < end; ++i) {
const bst_uint ridx = col[i].index;
Expand Down Expand Up @@ -599,7 +593,7 @@ class ColMaker: public TreeUpdater {
#endif
int poption = param.parallel_option;
if (poption == 2) {
poption = static_cast<int>(nsize) * 2 < nthread ? 1 : 0;
poption = static_cast<int>(nsize) * 2 < this->nthread ? 1 : 0;
}
if (poption == 0) {
#pragma omp parallel for schedule(dynamic, batch_size)
Expand Down Expand Up @@ -760,7 +754,7 @@ class ColMaker: public TreeUpdater {
// --data fields--
const TrainParam& param;
// number of omp thread used during training
int nthread;
const int nthread;
// Per feature: shuffle index of each feature index
std::vector<bst_uint> feat_index;
// Instance Data: current node position in the tree of each instance
Expand Down
8 changes: 4 additions & 4 deletions src/tree/updater_histmaker.cc
Expand Up @@ -336,7 +336,7 @@ class CQHistMaker: public HistMaker<TStats> {
auto lazy_get_hist = [&]()
#endif
{
thread_hist.resize(this->get_nthread());
thread_hist.resize(omp_get_max_threads());
// start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
iter->BeforeFirst();
Expand Down Expand Up @@ -410,7 +410,7 @@ class CQHistMaker: public HistMaker<TStats> {
}
{
// get smmary
thread_sketch.resize(this->get_nthread());
thread_sketch.resize(omp_get_max_threads());

// TWOPASS: use the real set + split set in the column iteration.
this->SetDefaultPostion(p_fmat, tree);
Expand Down Expand Up @@ -695,7 +695,7 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
this->wspace.Init(this->param, 1);
// to gain speedup in recovery
{
this->thread_hist.resize(this->get_nthread());
this->thread_hist.resize(omp_get_max_threads());

// TWOPASS: use the real set + split set in the column iteration.
this->SetDefaultPostion(p_fmat, tree);
Expand Down Expand Up @@ -756,7 +756,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
const RegTree &tree) override {
const MetaInfo &info = p_fmat->info();
// initialize the data structure
int nthread = BaseMaker::get_nthread();
const int nthread = omp_get_max_threads();
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
for (size_t i = 0; i < sketchs.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps);
Expand Down
6 changes: 1 addition & 5 deletions src/tree/updater_refresh.cc
Expand Up @@ -34,11 +34,7 @@ class TreeRefresher: public TreeUpdater {
std::vector<std::vector<TStats> > stemp;
std::vector<RegTree::FVec> fvec_temp;
// setup temp space for each thread
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
const int nthread = omp_get_max_threads();
fvec_temp.resize(nthread, RegTree::FVec());
stemp.resize(nthread, std::vector<TStats>());
#pragma omp parallel
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_skmaker.cc
Expand Up @@ -141,7 +141,7 @@ class SketchMaker: public BaseMaker {
for (size_t i = 0; i < sketchs.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps);
}
thread_sketch.resize(this->get_nthread());
thread_sketch.resize(omp_get_max_threads());
// number of rows in
const size_t nrows = p_fmat->buffered_rowset().size();
// start accumulating statistics
Expand Down

0 comments on commit 7078c41

Please sign in to comment.