Skip to content

Commit

Permalink
[SYCL][HIP] Fix infinite loop when parallel_for range exceeds INT_MAX (
Browse files Browse the repository at this point in the history
…#5115)

This is the equivalent for HIP of the changes in #5095.

It also fixes #4255 for the HIP plugin.
  • Loading branch information
npmiller authored Dec 17, 2021
1 parent e0e5336 commit fd0b108
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ int getAttribute(pi_device device, hipDeviceAttribute_t attribute) {
}
/// \endcond

void simpleGuessLocalWorkSize(int *threadsPerBlock,
void simpleGuessLocalWorkSize(size_t *threadsPerBlock,
const size_t *global_work_size,
const size_t maxThreadsPerBlock[3],
pi_kernel kernel) {
Expand All @@ -314,8 +314,7 @@ void simpleGuessLocalWorkSize(int *threadsPerBlock,

//(void)minGrid; // Not used, avoid warnings

threadsPerBlock[0] = std::min(static_cast<int>(maxThreadsPerBlock[0]),
static_cast<int>(global_work_size[0]));
threadsPerBlock[0] = std::min(maxThreadsPerBlock[0], global_work_size[0]);

// Find a local work group size that is a divisor of the global
// work group size to produce uniform work groups.
Expand Down Expand Up @@ -2501,7 +2500,7 @@ pi_result hip_piEnqueueKernelLaunch(

// Set the number of threads per block to the number of threads per warp
// by default unless user has provided a better number
int threadsPerBlock[3] = {32, 1, 1};
size_t threadsPerBlock[3] = {32u, 1u, 1u};
size_t maxWorkGroupSize = 0u;
size_t maxThreadsPerBlock[3] = {};
bool providedLocalWorkGroupSize = (local_work_size != nullptr);
Expand Down Expand Up @@ -2531,7 +2530,7 @@ pi_result hip_piEnqueueKernelLaunch(
return PI_INVALID_WORK_GROUP_SIZE;
if (0u != (global_work_size[dim] % local_work_size[dim]))
return PI_INVALID_WORK_GROUP_SIZE;
threadsPerBlock[dim] = static_cast<int>(local_work_size[dim]);
threadsPerBlock[dim] = local_work_size[dim];
return PI_SUCCESS;
};

Expand All @@ -2551,12 +2550,11 @@ pi_result hip_piEnqueueKernelLaunch(
return PI_INVALID_WORK_GROUP_SIZE;
}

int blocksPerGrid[3] = {1, 1, 1};
size_t blocksPerGrid[3] = {1u, 1u, 1u};

for (size_t i = 0; i < work_dim; i++) {
blocksPerGrid[i] =
static_cast<int>(global_work_size[i] + threadsPerBlock[i] - 1) /
threadsPerBlock[i];
(global_work_size[i] + threadsPerBlock[i] - 1) / threadsPerBlock[i];
}

pi_result retError = PI_SUCCESS;
Expand Down

0 comments on commit fd0b108

Please sign in to comment.