Skip to content

Commit

Permalink
Fix 630 (#747)
Browse files Browse the repository at this point in the history
* fix #630

* fix #630: critical section when insert into a labals vector

* fix vector copy
  • Loading branch information
sh1ng committed Apr 5, 2019
1 parent 0dc49cd commit 9297d58
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 11 deletions.
24 changes: 13 additions & 11 deletions src/gpu/kmeans/kmeans_h2o4gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -837,40 +837,42 @@ int kmeans_predict(int verbose, int gpu_idtry, int n_gputry, size_t rows,
thrust::device_vector<T> *d_centroids[n_gpu];
thrust::device_vector<T> *data_dots[n_gpu];
thrust::device_vector<T> *centroid_dots[n_gpu];
thrust::host_vector<int> *h_labels = new thrust::host_vector<int>(0);
thrust::host_vector<int> *h_labels = new thrust::host_vector<int>(rows);

#pragma omp parallel for
for (int q = 0; q < n_gpu; q++) {
// TODO: that may ignore up to n_gpu - 1 rows
const size_t chunk_size = rows / n_gpu;
CUDACHECK(cudaSetDevice(dList[q]));
kmeans::detail::labels_init();

data_dots[q] = new thrust::device_vector<T>(rows / n_gpu);
data_dots[q] = new thrust::device_vector<T>(chunk_size);
centroid_dots[q] = new thrust::device_vector<T>(k);

d_centroids[q] = new thrust::device_vector<T>(k * cols);
d_data[q] = new thrust::device_vector<T>(rows / n_gpu * cols);
d_data[q] = new thrust::device_vector<T>(chunk_size * cols);

copy_data(verbose, 'r', *d_centroids[q], &centroids[0], 0, k, k, cols);

copy_data(verbose, 'r', *d_data[q], &srcdata[0], q, rows, rows / n_gpu,
cols);
copy_data(verbose, 'r', *d_data[q], &srcdata[0], q, rows, chunk_size, cols);

kmeans::detail::make_self_dots(rows / n_gpu, cols, *d_data[q],
*data_dots[q]);
kmeans::detail::make_self_dots(chunk_size, cols, *d_data[q], *data_dots[q]);

thrust::device_vector<int> d_labels(rows / n_gpu);
thrust::device_vector<int> d_labels(chunk_size);

kmeans::detail::batch_calculate_distances(
verbose, q, rows / n_gpu, cols, k, *d_data[q], *d_centroids[q],
verbose, q, chunk_size, cols, k, *d_data[q], *d_centroids[q],
*data_dots[q], *centroid_dots[q],
[&](int n, size_t offset,
thrust::device_vector<T> &pairwise_distances) {
kmeans::detail::relabel(n, k, pairwise_distances, d_labels, offset);
});

h_labels->insert(h_labels->end(), d_labels.begin(), d_labels.end());
#pragma omp critical
thrust::copy(d_labels.begin(), d_labels.end(),
h_labels->begin() + q * chunk_size);
}

// TODO: check memory freeing
*pred_labels = h_labels->data();

#pragma omp parallel for
Expand Down
2 changes: 2 additions & 0 deletions src/interface_py/requirements_runtime_demos.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ seaborn==0.8.1
# for some demos/tests
feather-format==0.4.0
psutil==5.4.5
# for jupiter notebooks
pillow==4.2.1

0 comments on commit 9297d58

Please sign in to comment.