Skip to content

Commit 203bf34

Browse files
committed
Attempt binary shader
1 parent 8815ddb commit 203bf34

File tree

4 files changed

+76
-35
lines changed

4 files changed

+76
-35
lines changed

c/stdio_h.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,16 +75,16 @@ Same as `file_size`, but takes the path instead of a `FILE*`.
7575
*/
7676
long file_size(char *path) {
7777
FILE *fp;
78-
long retur_value;
78+
long return_value;
7979
fp = fopen(path, "r");
8080
if (fp == NULL) {
8181
return -1L;
8282
}
83-
retur_value = fget_file_size(fp);
83+
return_value = fget_file_size(fp);
8484
if (fclose(fp) == EOF) {
8585
return -1L;
8686
}
87-
return retur_value;
87+
return return_value;
8888
}
8989

9090
/*

opencl/binary_shader.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
*/
3+
4+
#include "common.h"
5+
6+
int main(void) {
7+
const char *source =
8+
"__kernel void kmain(__global int *out) {\n"
9+
" out[get_global_id(0)]++;\n"
10+
"}\n"
11+
;
12+
cl_int input[] = {1, 2};
13+
cl_kernel kernel;
14+
cl_mem buffer;
15+
cl_program program;
16+
Common common;
17+
const size_t global_work_size = sizeof(input) / sizeof(input[0]);
18+
19+
/* Run kernel. */
20+
common_init(&common, source);
21+
22+
unsigned char *binary;
23+
size_t binary_size;
24+
FILE *f;
25+
26+
clGetProgramInfo(common.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, NULL);
27+
binary = malloc(binary_size);
28+
clGetProgramInfo(common.program, CL_PROGRAM_BINARIES, binary_size, binary, NULL);
29+
30+
/* Not mandatory, but fun to reverse engineer their format later on. */
31+
f = fopen("a.bin.tmp", "w");
32+
fwrite(binary, binary_size, 1, f);
33+
fclose(f);
34+
35+
program = clCreateProgramWithBinary(common.context, 1, &common.device, &binary_size, (const unsigned char **)&binary, NULL, NULL);
36+
kernel = clCreateKernel(program, "kmain", NULL);
37+
free(binary);
38+
39+
40+
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL);
41+
clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
42+
clEnqueueNDRangeKernel(common.command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
43+
clFlush(common.command_queue);
44+
clFinish(common.command_queue);
45+
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, sizeof(input), input, 0, NULL, NULL);
46+
47+
/* Assertions. */
48+
assert(input[0] == 2);
49+
assert(input[1] == 3);
50+
51+
/* Cleanup. */
52+
clReleaseKernel(kernel);
53+
clReleaseProgram(program);
54+
clReleaseMemObject(buffer);
55+
common_deinit(&common);
56+
return EXIT_SUCCESS;
57+
}

opencl/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ void common_create_program(
5858
clGetProgramBuildInfo(*program, common->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &err_len);
5959
err = malloc(err_len);
6060
clGetProgramBuildInfo(*program, common->device, CL_PROGRAM_BUILD_LOG, err_len, err, NULL);
61-
fprintf(stderr, "error: kernel build:\n%s\n", err);
61+
fprintf(stderr, "error: clCreateProgramWithSource:\n%s\n", err);
6262
free(err);
6363
exit(EXIT_FAILURE);
6464
}

opencl/inc_vector.c

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/*
22
Increment a vector, one value per work item.
33
4-
It is useless to do this on a GPU, not enough work per IO,
5-
it is just a clEnqueueNDRangeKernel + get_global_id hello world.
4+
It is useless to do this on a GPU, not enough work / IO,
5+
it's just a clEnqueueNDRangeKernel + get_global_id hello world.
66
77
- http://stackoverflow.com/questions/15194798/vector-step-addition-slower-on-cuda
88
- http://stackoverflow.com/questions/22005405/how-to-add-up-the-elements-of-an-array-in-gpu-any-function-similar-to-cublasdas
@@ -11,47 +11,31 @@ it is just a clEnqueueNDRangeKernel + get_global_id hello world.
1111

1212
#include "common.h"
1313

14-
int main(int argc, char **argv) {
14+
int main(void) {
1515
const char *source =
16-
"__kernel void kmain(__global int *io) {\n"
17-
" io[get_global_id(0)]++;\n"
16+
"__kernel void kmain(__global int *out) {\n"
17+
" out[get_global_id(0)]++;\n"
1818
"}\n";
19-
cl_int *io, *expected_output;
19+
cl_int input[] = {1, 2};
2020
cl_mem buffer;
2121
Common common;
22-
size_t i, n, io_sizeof;
22+
const size_t global_work_size = sizeof(input) / sizeof(input[0]);
2323

24-
if (argc > 1) {
25-
n = strtoul(argv[1], NULL, 10);
26-
} else {
27-
n = 2;
28-
}
29-
30-
/* Initialize data. */
31-
io_sizeof = n * sizeof(*io);
32-
io = malloc(io_sizeof);
33-
expected_output = malloc(n * sizeof(*expected_output));
34-
for (i = 0; i < n; ++i) {
35-
io[i] = i;
36-
expected_output[i] = i + 1;
37-
}
38-
39-
/* Run kernel. */
24+
/* Run kernel. */
4025
common_init(&common, source);
41-
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, io_sizeof, io, NULL);
26+
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL);
4227
clSetKernelArg(common.kernel, 0, sizeof(buffer), &buffer);
43-
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &n, NULL, 0, NULL, NULL);
28+
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
4429
clFlush(common.command_queue);
4530
clFinish(common.command_queue);
46-
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, io_sizeof, io, 0, NULL, NULL);
31+
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, sizeof(input), input, 0, NULL, NULL);
4732

48-
/* Assertions. */
49-
common_vec_assert_eq_i(io, expected_output, n);
33+
/* Assertions. */
34+
assert(input[0] == 2);
35+
assert(input[1] == 3);
5036

51-
/* Cleanup. */
37+
/* Cleanup. */
5238
clReleaseMemObject(buffer);
5339
common_deinit(&common);
54-
free(io);
55-
free(expected_output);
5640
return EXIT_SUCCESS;
5741
}

0 commit comments

Comments
 (0)