11/*
22Increment a vector, one value per work item.
33
4- It is useless to do this on a GPU, not enough work per IO,
5- it is just a clEnqueueNDRangeKernel + get_global_id hello world.
4+ It is useless to do this on a GPU, not enough work / IO,
5+ it's just a clEnqueueNDRangeKernel + get_global_id hello world.
66
77- http://stackoverflow.com/questions/15194798/vector-step-addition-slower-on-cuda
88- http://stackoverflow.com/questions/22005405/how-to-add-up-the-elements-of-an-array-in-gpu-any-function-similar-to-cublasdas
@@ -11,47 +11,31 @@ it is just a clEnqueueNDRangeKernel + get_global_id hello world.
1111
1212#include "common.h"
1313
14- int main (int argc , char * * argv ) {
14+ int main (void ) {
1515 const char * source =
16- "__kernel void kmain(__global int *io ) {\n"
17- " io [get_global_id(0)]++;\n"
16+ "__kernel void kmain(__global int *out ) {\n"
17+ " out [get_global_id(0)]++;\n"
1818 "}\n" ;
19- cl_int * io , * expected_output ;
19+ cl_int input [] = { 1 , 2 } ;
2020 cl_mem buffer ;
2121 Common common ;
22- size_t i , n , io_sizeof ;
22+ const size_t global_work_size = sizeof ( input ) / sizeof ( input [ 0 ]) ;
2323
24- if (argc > 1 ) {
25- n = strtoul (argv [1 ], NULL , 10 );
26- } else {
27- n = 2 ;
28- }
29-
30- /* Initialize data. */
31- io_sizeof = n * sizeof (* io );
32- io = malloc (io_sizeof );
33- expected_output = malloc (n * sizeof (* expected_output ));
34- for (i = 0 ; i < n ; ++ i ) {
35- io [i ] = i ;
36- expected_output [i ] = i + 1 ;
37- }
38-
39- /* Run kernel. */
24+ /* Run kernel. */
4025 common_init (& common , source );
41- buffer = clCreateBuffer (common .context , CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR , io_sizeof , io , NULL );
26+ buffer = clCreateBuffer (common .context , CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR , sizeof ( input ), input , NULL );
4227 clSetKernelArg (common .kernel , 0 , sizeof (buffer ), & buffer );
43- clEnqueueNDRangeKernel (common .command_queue , common .kernel , 1 , NULL , & n , NULL , 0 , NULL , NULL );
28+ clEnqueueNDRangeKernel (common .command_queue , common .kernel , 1 , NULL , & global_work_size , NULL , 0 , NULL , NULL );
4429 clFlush (common .command_queue );
4530 clFinish (common .command_queue );
46- clEnqueueReadBuffer (common .command_queue , buffer , CL_TRUE , 0 , io_sizeof , io , 0 , NULL , NULL );
31+ clEnqueueReadBuffer (common .command_queue , buffer , CL_TRUE , 0 , sizeof ( input ), input , 0 , NULL , NULL );
4732
48- /* Assertions. */
49- common_vec_assert_eq_i (io , expected_output , n );
33+ /* Assertions. */
34+ assert (input [0 ] == 2 );
35+ assert (input [1 ] == 3 );
5036
51- /* Cleanup. */
37+ /* Cleanup. */
5238 clReleaseMemObject (buffer );
5339 common_deinit (& common );
54- free (io );
55- free (expected_output );
5640 return EXIT_SUCCESS ;
5741}
0 commit comments