-
Notifications
You must be signed in to change notification settings - Fork 27
Closed
Description
While testing the Command Buffer Emulation layer, I noticed that clCommandNDRangeKernelKHR does not return CL_INVALID_WORK_GROUP_SIZE when invalid work size is passed whereas clEnqueueNDRangeKernel returns it.
When using the Command Buffer Emulation layer this error code is actually returned when calling clEnqueueCommandBufferKHR.
Test case
I set up a simple test based on 04Julia sample code to show this problem:
#include <popl/popl.hpp>
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include <stb/stb_image_write.h>
#include <CL/opencl.hpp>
#include <chrono>
const char *filename = "julia.bmp";
const float cr = -0.123f;
const float ci = 0.745f;
static const char kernelString[] = R"CLC(
kernel void Julia( global uchar4* dst, float cr, float ci )
{
const float cMinX = -1.5f;
const float cMaxX = 1.5f;
const float cMinY = -1.5f;
const float cMaxY = 1.5f;
const int cWidth = get_global_size(0);
const int cIterations = 16;
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
float a = x * ( cMaxX - cMinX ) / cWidth + cMinX;
float b = y * ( cMaxY - cMinY ) / cWidth + cMinY;
float result = 0.0f;
const float thresholdSquared = cIterations * cIterations / 64.0f;
for( int i = 0; i < cIterations; i++ ) {
float aa = a * a;
float bb = b * b;
float magnitudeSquared = aa + bb;
if( magnitudeSquared >= thresholdSquared ) {
break;
}
result += 1.0f / cIterations;
b = 2 * a * b + ci;
a = aa - bb + cr;
}
result = max( result, 0.0f );
result = min( result, 1.0f );
// RGBA
float4 color = (float4)( result, sqrt(result), 1.0f, 1.0f );
dst[ y * cWidth + x ] = convert_uchar4(color * 255.0f);
}
)CLC";
void test(int platformIndex, int deviceIndex, size_t gwx, size_t gwy,
size_t lwx, size_t lwy) {
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
printf("Running on platform: %s\n",
platforms[platformIndex].getInfo<CL_PLATFORM_NAME>().c_str());
std::vector<cl::Device> devices;
platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices);
printf("Running on device: %s\n",
devices[deviceIndex].getInfo<CL_DEVICE_NAME>().c_str());
cl::Context context{devices[deviceIndex]};
cl::CommandQueue commandQueue =
cl::CommandQueue{context, devices[deviceIndex]};
cl::Program program{context, kernelString};
program.build();
cl::Kernel kernel = cl::Kernel{program, "Julia"};
cl::Buffer deviceMemDst =
cl::Buffer{context, CL_MEM_ALLOC_HOST_PTR, gwx * gwy * sizeof(cl_uchar4)};
// execution
{
kernel.setArg(0, deviceMemDst);
kernel.setArg(1, cr);
kernel.setArg(2, ci);
auto ResEnqueue = commandQueue.enqueueNDRangeKernel(
kernel, cl::NullRange, cl::NDRange{gwx, gwy}, cl::NDRange{lwx, lwy});
std::cout << "Result enqueueNDRangeKernel = " << ResEnqueue << std::endl;
// Ensure all processing is complete before stopping the timer.
commandQueue.finish();
cl_command_buffer_khr cmdbuf =
clCreateCommandBufferKHR(1, &commandQueue(), NULL, NULL);
cl_sync_point_khr sync_point;
auto ResAppend = clCommandNDRangeKernelKHR(
cmdbuf, NULL, NULL, kernel(), 2, NULL, cl::NDRange{gwx, gwy},
cl::NDRange{lwx, lwy}, 0, NULL, &sync_point, NULL);
clFinalizeCommandBufferKHR(cmdbuf);
auto ResEnqueueCB =
clEnqueueCommandBufferKHR(0, NULL, cmdbuf, 0, NULL, NULL);
std::cout << "Result clCommandNDRangeKernelKHR = " << ResAppend
<< std::endl;
std::cout << "Result clEnqueueCommandBufferKHR = " << ResEnqueueCB
<< std::endl;
}
}
int main(int argc, char **argv) {
int platformIndex = 0;
int deviceIndex = 0;
{
popl::OptionParser op("Supported Options");
op.add<popl::Value<int>>("p", "platform", "Platform Index", platformIndex,
&platformIndex);
op.add<popl::Value<int>>("d", "device", "Device Index", deviceIndex,
&deviceIndex);
bool printUsage = false;
try {
op.parse(argc, argv);
} catch (std::exception &e) {
fprintf(stderr, "Error: %s\n\n", e.what());
printUsage = true;
}
if (printUsage || !op.unknown_options().empty() ||
!op.non_option_args().empty()) {
fprintf(stderr,
"Usage: julia [options]\n"
"%s",
op.help().c_str());
return -1;
}
}
std::cout << "Valid Sizes:" << std::endl;
test(platformIndex, deviceIndex, 512, 512, 8, 8);
std::cout << "Invalid Sizes:" << std::endl;
test(platformIndex, deviceIndex, 8, 8, 16, 16);
return 0;
}
Output
$> OPENCL_LAYERS=../../layers/10_cmdbufemu/libCmdBufEmu.so ./julia -p 3
**Valid Sizes:**
Running on platform: Intel(R) OpenCL
Running on device: 12th Gen Intel(R) Core(TM) i9-12900K
Result enqueueNDRangeKernel = 0
Result clCommandNDRangeKernelKHR = 0
Result clEnqueueCommandBufferKHR = 0
**Invalid Sizes:**
Running on platform: Intel(R) OpenCL
Running on device: 12th Gen Intel(R) Core(TM) i9-12900K
Result enqueueNDRangeKernel = **-54**
Result clCommandNDRangeKernelKHR = **0**
Result clEnqueueCommandBufferKHR = **-54**
Request
I understand from the specification that clCommandNDRangeKernelKHR should return the same errors as clEnqueueNDRangeKernel, except for a few, but CL_INVALID_WORK_GROUP_SIZE should not be one of them.
If so, could you please fix this issue?
EwanC and reble
Metadata
Metadata
Assignees
Labels
No labels