Skip to content

Commit

Permalink
add regression test for issue pocl#1435
Browse files Browse the repository at this point in the history
  • Loading branch information
franz committed Apr 15, 2024
1 parent 59160ab commit 3191f5b
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 2 deletions.
6 changes: 4 additions & 2 deletions tests/regression/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ set(PROGRAMS_TO_BUILD test_barrier_between_for_loops test_early_return
test_autolocals_in_constexprs test_issue_553 test_issue_577 test_issue_757
test_flatten_barrier_subs test_alignment_with_dynamic_wg
test_alignment_with_dynamic_wg2 test_alignment_with_dynamic_wg3
test_issue_893 test_builtin_args
test_issue_893 test_issue_1435 test_builtin_args
test_workitem_func_outside_kernel
)

Expand Down Expand Up @@ -83,6 +83,8 @@ add_test_pocl(NAME "regression/test_issue_577" COMMAND "test_issue_577")

add_test_pocl(NAME "regression/test_issue_757" COMMAND "test_issue_757")

add_test_pocl(NAME "regression/test_issue_1435" COMMAND "test_issue_1435")

add_test_pocl(NAME "regression/test_workitem_func_outside_kernel" COMMAND "test_workitem_func_outside_kernel")

if(OPENCL_HEADER_VERSION GREATER 299)
Expand Down Expand Up @@ -231,7 +233,7 @@ foreach(VARIANT ${VARIANTS})
"regression/test_issue_445_${VARIANT}" "regression/test_issue_553_${VARIANT}"
"regression/test_issue_577_${VARIANT}" "regression/test_issue_757_${VARIANT}"
"regression/test_llvm_segfault_issue_889_${VARIANT}"
"regression/test_issue_893_${VARIANT}"
"regression/test_issue_893_${VARIANT}" "regression/test_issue_1435_${VARIANT}"
"regression/test_flatten_barrier_subs_${VARIANT}"
"regression/test_workitem_func_outside_kernel_${VARIANT}"
${OCL_30_TESTS}
Expand Down
133 changes: 133 additions & 0 deletions tests/regression/test_issue_1435.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
Github Issue #1435
*/

#include "pocl_opencl.h"

#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_HPP_TARGET_OPENCL_VERSION 120
#include <CL/opencl.hpp>
#include <cassert>
#include <iostream>

using namespace std;

const char *SOURCE = R"RAW(
__kernel void medfilt2d(__global float *image, // input image
__global float *result, // output array
__local float4 *l_data,// local storage 4x the number of threads
int khs1, // Kernel half-size along dim1 (nb lines)
int khs2, // Kernel half-size along dim2 (nb columns)
int height, // Image size along dim1 (nb lines)
int width) // Image size along dim2 (nb columns)
{
int threadid = get_local_id(0);
int x = get_global_id(1);
if (x < width)
{
union
{
float ary[8];
float8 vec;
} output, input;
input.vec = (float8)(MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT);
int kfs1 = 2 * khs1 + 1;
int kfs2 = 2 * khs2 + 1;
int nbands = (kfs1 + 7) / 8;
for (int y=0; y<height; y++)
{
//Select only the active threads, some may remain inactive
int nb_threads = (nbands * kfs2);
int band_nr = threadid / kfs2;
int band_id = threadid % kfs2;
int pos_x = clamp((int)(x + band_id - khs2), (int) 0, (int) width-1);
int max_vec = clamp(kfs1 - 8 * band_nr, 0, 8);
if (y == 0)
{
for (int i=0; i<max_vec; i++)
{
if (threadid<nb_threads)
{
int pos_y = clamp((int)(y + 8 * band_nr + i - khs1), (int) 0, (int) height-1);
input.ary[i] = image[pos_x + width * pos_y];
}
}
}
else
{
//store storage.s0 to some shared memory to retrieve it from another thread.
l_data[threadid].s0 = input.vec.s0;
//Offset to the bottom
input.vec = (float8)(input.vec.s1,
input.vec.s2,
input.vec.s3,
input.vec.s4,
input.vec.s5,
input.vec.s6,
input.vec.s7,
MAXFLOAT);
barrier(CLK_LOCAL_MEM_FENCE);
int read_from = threadid + kfs2;
if (read_from < nb_threads)
input.vec.s7 = l_data[read_from].s0;
else if (threadid < nb_threads) //we are on the last band
{
int pos_y = clamp((int)(y + 8 * band_nr + max_vec - 1 - khs1), (int) 0, (int) height-1);
input.ary[max_vec - 1] = image[pos_x + width * pos_y];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
}
)RAW";

#if 0

// the shorter code that should trigger the same issue

const char *SOURCE = R"RAW(

__kernel void testkernel(__local float2 *b) {
struct {
int c[1];
float2 d;
} e;
for (int f = 0; f < 2; f++) {
if (f)
for (int g; g < (int)b[0].x; g++)
e.c[g] = 0;
else if (b)
e.d.s0 = b[0].s0;
barrier(0);
}
}
)RAW";

#endif

int main(int argc, char *argv[]) {
cl::Device device = cl::Device::getDefault();
cl::Program program(SOURCE);
program.build("-cl-std=CL1.2");

// This triggers compilation of dynamic WG binaries.
cl::Program::Binaries binaries{};
int err = program.getInfo<>(CL_PROGRAM_BINARIES, &binaries);
if (err == CL_SUCCESS) {
printf("OK\n");
return EXIT_SUCCESS;
} else {
printf("FAIL\n");
return EXIT_FAILURE;
}
}

0 comments on commit 3191f5b

Please sign in to comment.