Skip to content

Commit

Permalink
huge boost with cuda 3.0 opencl lib
Browse files Browse the repository at this point in the history
git-svn-id: https://orbit.nmr.mgh.harvard.edu/svn/mcxcl/mcxcl@14 4848f366-3658-0410-a320-90152d9d5b07
  • Loading branch information
Qianqian Fang committed Feb 26, 2010
1 parent cc58c47 commit dd2d8d6
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 26 deletions.
4 changes: 3 additions & 1 deletion example/quicktest/run_qtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ if [ ! -e semi60x60x60.bin ]; then
fi

# use -T 256 gives huge speed boost for ATI cards
time ../../bin/mcxcl -t 2048 -T 256 -g 10 -m 1000000 -f qtest.inp -s qtest -r 1 -a 0 -b 0 -k ../../src/mcx_core.cl
#time ../../bin/mcxcl -t 2048 -T 128 -g 10 -m 1000000 -f qtest.inp -s qtest -r 1 -a 0 -b 0 -k ../../src/mcx_core.cl
time ../../bin/mcxcl -t 1024 -T 128 -g 10 -m 100000 -f qtest.inp -s qtest -r 1 -a 0 -b 0 -k ../../src/mcx_core.cl

# use CPU backend, set CPU_MAX_COMPUTE_UNITS=n to specify number of CPU cores
#time ../../bin/mcxcl -t 2048 -T 256 -g 10 -m 100000 -f qtest.inp -s qtest -r 1 -a 0 -b 0 -k ../../src/mcx_core.cl -c
5 changes: 2 additions & 3 deletions src/mcx_core.cl
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,15 @@ typedef struct PhotonData {
this is the core Monte Carlo simulation kernel, please see Fig. 1 in Fang2009
*/
__kernel void mcx_main_loop( const int nphoton, const int ophoton,__global const uchar media[],
__global float field[], __global float genergy[], const float4 vsize, const float minstep,
__global float field[], __global float genergy[], const float minstep,
float twin0, float twin1, float tmax, uint4 dimlen,
uchar isrowmajor, uchar save2pt, float Rtstep,
const float4 p0, const float4 c0, const float4 maxidx,
const uint4 cp0, const uint4 cp1, const uint2 cachebox,
const uchar doreflect, const uchar doreflect3,
const float minenergy, const float sradius2, __global uint n_seed[],__global float4 n_pos[],
__global float4 n_dir[],__global float4 n_len[],__constant float4 gproperty[]){

int idx= get_local_size(0) * get_group_id(0)+ get_local_id(0);
int idx= get_global_id(0);

float4 npos=n_pos[idx]; //{x,y,z}: x,y,z coordinates,{w}:packet weight
float4 ndir=n_dir[idx]; //{x,y,z}: ix,iy,iz unitary direction vector, {w}:total scat event
Expand Down
40 changes: 18 additions & 22 deletions src/mcx_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,9 @@ void mcx_run_simulation(Config *cfg){

mcx_assess((program=clCreateProgramWithSource(context, 1,(const char **)&(cfg->clsource), NULL, &status),status));
if(cfg->iscpu && cfg->isverbose){
status=clBuildProgram(program, 0, NULL, "-D __DEVICE_EMULATION__ -cl-fast-relaxed-math", NULL, NULL);
status=clBuildProgram(program, 0, NULL, "-D __DEVICE_EMULATION__ -cl-mad-enable -cl-fast-relaxed-math", NULL, NULL);
}else{
status=clBuildProgram(program, 0, NULL, "-cl-fast-relaxed-math", NULL, NULL);
status=clBuildProgram(program, 0, NULL, "-cl-mad-enable -cl-fast-relaxed-math", NULL, NULL);
}
if(status!=CL_SUCCESS){
size_t len;
Expand All @@ -307,24 +307,20 @@ void mcx_run_simulation(Config *cfg){
mcx_assess(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&gmedia));
mcx_assess(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*)&gfield));
mcx_assess(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*)&genergy));
mcx_assess(clSetKernelArg(kernel, 5, sizeof(cl_float4), (void*)&(cfg->steps)));
mcx_assess(clSetKernelArg(kernel, 6, sizeof(cl_float), (void*)&(minstep)));
mcx_assess(clSetKernelArg(kernel, 9, sizeof(cl_float), (void*)&(cfg->tend)));
mcx_assess(clSetKernelArg(kernel,10, sizeof(cl_uint4), (void*)&(dimlen)));
mcx_assess(clSetKernelArg(kernel,11, sizeof(cl_uchar), (void*)&(cfg->isrowmajor)));
mcx_assess(clSetKernelArg(kernel,12, sizeof(cl_uchar), (void*)&(cfg->issave2pt)));
mcx_assess(clSetKernelArg(kernel,13, sizeof(cl_float), (void*)&(savefreq)));
mcx_assess(clSetKernelArg(kernel,14, sizeof(cl_float4), (void*)&(p0)));
mcx_assess(clSetKernelArg(kernel,15, sizeof(cl_float4), (void*)&(c0)));
mcx_assess(clSetKernelArg(kernel,16, sizeof(cl_float4), (void*)&(maxidx)));
mcx_assess(clSetKernelArg(kernel,17, sizeof(cl_uint4), (void*)&(cp0)));
mcx_assess(clSetKernelArg(kernel,18, sizeof(cl_uint4), (void*)&(cp1)));
mcx_assess(clSetKernelArg(kernel,19, sizeof(cl_uint2), (void*)&(cachebox)));
mcx_assess(clSetKernelArg(kernel,20, sizeof(cl_uchar), (void*)&(cfg->isreflect)));
mcx_assess(clSetKernelArg(kernel,21, sizeof(cl_uchar), (void*)&(cfg->isref3)));
mcx_assess(clSetKernelArg(kernel,22, sizeof(cl_float), (void*)&(cfg->minenergy)));
mcx_assess(clSetKernelArg(kernel,23, sizeof(cl_float), (void*)&(bubbler2)));
mcx_assess(clSetKernelArg(kernel,24, sizeof(cl_mem), (void*)&(gPseed)));
mcx_assess(clSetKernelArg(kernel, 5, sizeof(cl_float), (void*)&(minstep)));
mcx_assess(clSetKernelArg(kernel, 8, sizeof(cl_float), (void*)&(cfg->tend)));
mcx_assess(clSetKernelArg(kernel, 9, sizeof(cl_uint4), (void*)&(dimlen)));
mcx_assess(clSetKernelArg(kernel,10, sizeof(cl_uchar), (void*)&(cfg->isrowmajor)));
mcx_assess(clSetKernelArg(kernel,11, sizeof(cl_uchar), (void*)&(cfg->issave2pt)));
mcx_assess(clSetKernelArg(kernel,12, sizeof(cl_float), (void*)&(savefreq)));
mcx_assess(clSetKernelArg(kernel,13, sizeof(cl_float4), (void*)&(p0)));
mcx_assess(clSetKernelArg(kernel,14, sizeof(cl_float4), (void*)&(c0)));
mcx_assess(clSetKernelArg(kernel,15, sizeof(cl_float4), (void*)&(maxidx)));
mcx_assess(clSetKernelArg(kernel,16, sizeof(cl_uchar), (void*)&(cfg->isreflect)));
mcx_assess(clSetKernelArg(kernel,17, sizeof(cl_uchar), (void*)&(cfg->isref3)));
mcx_assess(clSetKernelArg(kernel,18, sizeof(cl_float), (void*)&(cfg->minenergy)));
mcx_assess(clSetKernelArg(kernel,19, sizeof(cl_float), (void*)&(bubbler2)));
mcx_assess(clSetKernelArg(kernel,20, sizeof(cl_mem), (void*)&(gPseed)));
mcx_assess(clSetKernelArg(kernel,25, sizeof(cl_mem), (void*)&gPpos));
mcx_assess(clSetKernelArg(kernel,26, sizeof(cl_mem), (void*)&gPdir));
mcx_assess(clSetKernelArg(kernel,27, sizeof(cl_mem), (void*)&gPlen));
Expand All @@ -345,8 +341,8 @@ void mcx_run_simulation(Config *cfg){

fprintf(cfg->flog,"simulation run#%2d ... \t",iter+1); fflush(cfg->flog);

mcx_assess(clSetKernelArg(kernel, 7, sizeof(cl_float), (void*)&(twindow0)));
mcx_assess(clSetKernelArg(kernel, 8, sizeof(cl_float), (void*)&(twindow1)));
mcx_assess(clSetKernelArg(kernel, 6, sizeof(cl_float), (void*)&(twindow0)));
mcx_assess(clSetKernelArg(kernel, 7, sizeof(cl_float), (void*)&(twindow1)));
// launch kernel
mcx_assess(clEnqueueNDRangeKernel(commands,kernel,1,NULL,mcgrid,mcblock, 0, NULL,
#ifndef USE_OS_TIMER
Expand Down

0 comments on commit dd2d8d6

Please sign in to comment.