Permalink
Browse files

Changing omp_get_num_threads to omp_get_max_threads (#1831)

* Updating dmlc-core

* Changing omp_get_num_threads to omp_get_max_threads
  • Loading branch information...
1 parent 47ba2de commit 7078c41dad5c92da6a81afaecd49768c80c9242a @sdenel sdenel committed with tqchen Dec 4, 2016
View
@@ -282,11 +282,7 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
API_BEGIN();
- int nthread;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- }
+ const int nthread = omp_get_max_threads();
data::SimpleCSRSource& mat = *source;
common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
builder.InitBudget(0, nthread);
@@ -83,13 +83,8 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled,
// clear rowset
buffered_rowset_.clear();
// bit map
- int nthread;
+ const int nthread = omp_get_max_threads();
std::vector<bool> bmap;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- }
-
pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
@@ -204,15 +199,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
size_t buffer_begin,
const std::vector<bool>& enabled,
SparsePage* pcol) {
- int nthread;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
- if (nthread > max_nthread) {
- nthread = max_nthread;
- }
- }
+ const int nthread = std::min(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 2, 1));
pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
@@ -169,12 +169,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
SparsePage *pcol) {
pcol->Clear();
pcol->min_index = buffered_rowset_[begin];
- int nthread;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- nthread = std::max(nthread, std::max(omp_get_num_procs() / 2 - 1, 1));
- }
+ const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1));
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
builder.InitBudget(info.num_col, nthread);
View
@@ -301,11 +301,7 @@ class GBTree : public GradientBooster {
void PredictLeaf(DMatrix* p_fmat,
std::vector<bst_float>* out_preds,
unsigned ntree_limit) override {
- int nthread;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- }
+ const int nthread = omp_get_max_threads();
InitThreadTemp(nthread);
this->PredPath(p_fmat, out_preds, ntree_limit);
}
@@ -365,11 +361,7 @@ class GBTree : public GradientBooster {
unsigned tree_begin,
unsigned tree_end) {
const MetaInfo& info = p_fmat->info();
- int nthread;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- }
+ const int nthread = omp_get_max_threads();
CHECK_EQ(num_group, mparam.num_output_group);
InitThreadTemp(nthread);
std::vector<bst_float> &preds = *out_preds;
@@ -118,15 +118,6 @@ class BaseMaker: public TreeUpdater {
}
return n.cdefault();
}
- /*! \brief get number of omp thread in current context */
- inline static int get_nthread() {
- int nthread;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- }
- return nthread;
- }
// ------class member helpers---------
/*! \brief initialize temp data structure */
inline void InitData(const std::vector<bst_gpair> &gpair,
@@ -350,7 +341,7 @@ class BaseMaker: public TreeUpdater {
std::vector<TStats> *p_node_stats) {
std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
const MetaInfo &info = fmat.info();
- thread_temp.resize(this->get_nthread());
+ thread_temp.resize(omp_get_max_threads());
p_node_stats->resize(tree.param.num_nodes);
#pragma omp parallel
{
@@ -81,7 +81,7 @@ class ColMaker: public TreeUpdater {
struct Builder {
public:
// constructor
- explicit Builder(const TrainParam& param) : param(param) {}
+ explicit Builder(const TrainParam& param) : param(param), nthread(omp_get_max_threads()) {}
// update one tree, growing
virtual void Update(const std::vector<bst_gpair>& gpair,
DMatrix* p_fmat,
@@ -166,10 +166,6 @@ class ColMaker: public TreeUpdater {
}
{
// setup temp space for each thread
- #pragma omp parallel
- {
- this->nthread = omp_get_num_threads();
- }
// reserve a small space
stemp.clear();
stemp.resize(this->nthread, std::vector<ThreadEntry>());
@@ -277,8 +273,7 @@ class ColMaker: public TreeUpdater {
for (size_t j = 0; j < qexpand.size(); ++j) {
temp[qexpand[j]].stats.Clear();
}
- nthread = omp_get_num_threads();
- bst_uint step = (col.length + nthread - 1) / nthread;
+ bst_uint step = (col.length + this->nthread - 1) / this->nthread;
bst_uint end = std::min(col.length, step * (tid + 1));
for (bst_uint i = tid * step; i < end; ++i) {
const bst_uint ridx = col[i].index;
@@ -298,15 +293,15 @@ class ColMaker: public TreeUpdater {
for (bst_omp_uint j = 0; j < nnode; ++j) {
const int nid = qexpand[j];
TStats sum(param), tmp(param), c(param);
- for (int tid = 0; tid < nthread; ++tid) {
+ for (int tid = 0; tid < this->nthread; ++tid) {
tmp = stemp[tid][nid].stats;
stemp[tid][nid].stats = sum;
sum.Add(tmp);
if (tid != 0) {
std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue);
}
}
- for (int tid = 0; tid < nthread; ++tid) {
+ for (int tid = 0; tid < this->nthread; ++tid) {
stemp[tid][nid].stats_extra = sum;
ThreadEntry &e = stemp[tid][nid];
bst_float fsplit;
@@ -341,7 +336,7 @@ class ColMaker: public TreeUpdater {
}
if (need_backward) {
tmp = sum;
- ThreadEntry &e = stemp[nthread-1][nid];
+ ThreadEntry &e = stemp[this->nthread-1][nid];
c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight &&
tmp.sum_hess >= param.min_child_weight) {
@@ -357,8 +352,7 @@ class ColMaker: public TreeUpdater {
TStats c(param), cright(param);
const int tid = omp_get_thread_num();
std::vector<ThreadEntry> &temp = stemp[tid];
- nthread = static_cast<bst_uint>(omp_get_num_threads());
- bst_uint step = (col.length + nthread - 1) / nthread;
+ bst_uint step = (col.length + this->nthread - 1) / this->nthread;
bst_uint end = std::min(col.length, step * (tid + 1));
for (bst_uint i = tid * step; i < end; ++i) {
const bst_uint ridx = col[i].index;
@@ -599,7 +593,7 @@ class ColMaker: public TreeUpdater {
#endif
int poption = param.parallel_option;
if (poption == 2) {
- poption = static_cast<int>(nsize) * 2 < nthread ? 1 : 0;
+ poption = static_cast<int>(nsize) * 2 < this->nthread ? 1 : 0;
}
if (poption == 0) {
#pragma omp parallel for schedule(dynamic, batch_size)
@@ -760,7 +754,7 @@ class ColMaker: public TreeUpdater {
// --data fields--
const TrainParam& param;
// number of omp thread used during training
- int nthread;
+ const int nthread;
// Per feature: shuffle index of each feature index
std::vector<bst_uint> feat_index;
// Instance Data: current node position in the tree of each instance
@@ -336,7 +336,7 @@ class CQHistMaker: public HistMaker<TStats> {
auto lazy_get_hist = [&]()
#endif
{
- thread_hist.resize(this->get_nthread());
+ thread_hist.resize(omp_get_max_threads());
// start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
iter->BeforeFirst();
@@ -410,7 +410,7 @@ class CQHistMaker: public HistMaker<TStats> {
}
{
// get smmary
- thread_sketch.resize(this->get_nthread());
+ thread_sketch.resize(omp_get_max_threads());
// TWOPASS: use the real set + split set in the column iteration.
this->SetDefaultPostion(p_fmat, tree);
@@ -695,7 +695,7 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
this->wspace.Init(this->param, 1);
// to gain speedup in recovery
{
- this->thread_hist.resize(this->get_nthread());
+ this->thread_hist.resize(omp_get_max_threads());
// TWOPASS: use the real set + split set in the column iteration.
this->SetDefaultPostion(p_fmat, tree);
@@ -756,7 +756,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
const RegTree &tree) override {
const MetaInfo &info = p_fmat->info();
// initialize the data structure
- int nthread = BaseMaker::get_nthread();
+ const int nthread = omp_get_max_threads();
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
for (size_t i = 0; i < sketchs.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps);
@@ -34,11 +34,7 @@ class TreeRefresher: public TreeUpdater {
std::vector<std::vector<TStats> > stemp;
std::vector<RegTree::FVec> fvec_temp;
// setup temp space for each thread
- int nthread;
- #pragma omp parallel
- {
- nthread = omp_get_num_threads();
- }
+ const int nthread = omp_get_max_threads();
fvec_temp.resize(nthread, RegTree::FVec());
stemp.resize(nthread, std::vector<TStats>());
#pragma omp parallel
@@ -141,7 +141,7 @@ class SketchMaker: public BaseMaker {
for (size_t i = 0; i < sketchs.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps);
}
- thread_sketch.resize(this->get_nthread());
+ thread_sketch.resize(omp_get_max_threads());
// number of rows in
const size_t nrows = p_fmat->buffered_rowset().size();
// start accumulating statistics

0 comments on commit 7078c41

Please sign in to comment.