This repository has been archived by the owner on Jul 25, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.c
103 lines (81 loc) · 3.08 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Note: Most of the code comes from the MacResearch OpenCL podcast
#include <stdio.h>
#include <stdlib.h>
#include <OpenCL/OpenCL.h>
#include "bmp.h"
#include "cl_helper.h"
int runCL(int width, int height)
{
cl_kernel kernel;
cl_command_queue cmd_queue[16];
cl_context context;
cl_int err = 0;
cl_uint num_devices = 0;
cl_device_id devices[16];
size_t returned_size = 0;
// Multiply by 3 here, since we need red, green and blue for each pixel
size_t buffer_size = sizeof(char) * width * height * 3;
cl_mem image;
char *host_image = (char *) malloc(buffer_size);
context = create_context(&num_devices);
if(num_devices == 0) {
printf("No compute devices found\n");
return -1;
}
print_debug_info(context);
err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id) * 16,
&devices, NULL);
check_succeeded("Getting context info", err);
int i;
for(i = 0; i < num_devices; i++) {
cmd_queue[i] = clCreateCommandQueue(context, devices[i], 0, &err);
check_succeeded("Creating command queue", err);
}
// Mark this write only, since the kernel does not have to read the image it
// is writing. I am not sure if this has any performance benefit.
image = clCreateBuffer(context, CL_MEM_WRITE_ONLY, buffer_size, NULL, &err);
check_succeeded("Creating buffer", err);
// Load the program source from disk
const char *filename = "mandelbrot.cl";
kernel = load_kernel_from_file(context, filename);
// Now setup the arguments to our kernel
// In our case, we just need to give it a pointer to the image
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &image);
check_succeeded("Setting kernel arg", err);
// Run the calculation by enqueuing it and forcing the
// command queue to complete the task
// To support multiple compute devices, need to split this up
// among all of them. Easiest way to split up is block alloc.
//
// Assuming that num_devices divides width and height evenly
size_t device_work_size[2] = {width, height/num_devices};
for(i = 0; i < num_devices; i++) {
size_t device_work_offset[2] = {0, device_work_size[1]*i};
size_t offset = device_work_offset[1]*3*width;
err = clEnqueueNDRangeKernel(cmd_queue[i], kernel, 2, device_work_offset,
device_work_size, NULL, 0, NULL, NULL);
check_succeeded("Running kernel", err);
// Non-blocking read, so we can continue queuing up more kernels
err = clEnqueueReadBuffer(cmd_queue[i], image, CL_FALSE, offset,
buffer_size/num_devices,
host_image, 0, NULL, NULL);
check_succeeded("Reading buffer", err);
}
for(i = 0; i < num_devices; i++) {
clFinish(cmd_queue[i]);
}
// Now write the file
write_bmp("output.bmp", width, height, host_image);
// Release OpenCL objects
clReleaseMemObject(image);
for(i = 0; i < num_devices; i++) {
clReleaseCommandQueue(cmd_queue[i]);
}
clReleaseContext(context);
free(host_image);
return CL_SUCCESS;
}
int main(int argc, const char * argv[]) {
runCL(1024, 1024);
return 0;
}