Skip to content

Commit

Permalink
multiple GPU support! set fermi as default, set atomic as default, be…
Browse files Browse the repository at this point in the history
…tter mex compilation

git-svn-id: https://svn.code.sf.net/p/mcx/svn/mcextreme_cuda/trunk@338 373d029a-b463-0410-adc7-be649bff2a08
  • Loading branch information
fangq committed Jun 25, 2015
1 parent 6815577 commit 60238c4
Show file tree
Hide file tree
Showing 9 changed files with 472 additions and 230 deletions.
3 changes: 2 additions & 1 deletion mcxlab/examples/demo_mcxlab_basic.m
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
% This file is part of Monte Carlo eXtreme (MCX) URL:http://mcx.sf.net
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear cfg cfgs
cfg.nphoton=1e7;
cfg.vol=uint8(ones(60,60,60));
cfg.srcpos=[30 30 1];
cfg.srcdir=[0 0 1];
cfg.gpuid=1;
% cfg.gpuid='11'; % use two GPUs together
cfg.autopilot=1;
cfg.prop=[0 0 1 1;0.005 1 0 1.37];
cfg.tstart=0;
Expand All @@ -23,7 +25,6 @@
cfgs(2)=cfg;
cfgs(1).isreflect=0;
cfgs(2).isreflect=1;
cfgs(2).issavedet=1;
cfgs(2).detpos=[30 20 1 1;30 40 1 1;20 30 1 1;40 30 1 1];
% calculate the flux and partial path lengths for the two configurations
[fluxs,detps]=mcxlab(cfgs);
Expand Down
14 changes: 10 additions & 4 deletions mcxlab/mcxlab.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
%====================================================================
% MCXLAB - Monte Carlo eXtreme (MCX) for MATLAB/GNU Octave
%--------------------------------------------------------------------
%Copyright (c) 2010-2014 Qianqian Fang <fangq at nmr.mgh.harvard.edu>
%Copyright (c) 2010-2015 Qianqian Fang <fangq at nmr.mgh.harvard.edu>
% URL: http://mcx.sf.net
%====================================================================
%
Expand Down Expand Up @@ -41,6 +41,15 @@
% cfg.detpos: an N by 4 array, each row specifying a detector: [x,y,z,radius]
% cfg.respin: repeat simulation for the given time (integer) [1]
% cfg.gpuid: which GPU to use (run 'mcx -L' to list all GPUs) [1]
% if set to an integer, gpuid specifies the index (starts at 1)
% of the GPU for the simulation; if set to a binary string made
% of 1s and 0s, it enables multiple GPUs. For example, '1101'
% allows to use the 1st, 2nd and 4th GPUs together.
% cfg.workload an array denoting the relative loads of each selected GPU.
% for example, [50,20,30] allocates 50%, 20% and 30% photons to the
% 3 selected GPUs, respectively; [10,10] evenly divides the load
% between 2 active GPUs. A simple load balancing strategy is to
% use the GPU core counts as the weight.
% cfg.isreflect: [1]-consider refractive index mismatch, 0-matched index
% cfg.isrefint: 1-ref. index mismatch at inner boundaries, [0]-matched index
% cfg.isnormalized:[1]-normalize the output flux to unitary source, 0-no reflection
Expand Down Expand Up @@ -108,9 +117,6 @@
% a byte array (uint8) for each detected photon. The column number
% of seed equals that of detphoton.
%
% if detphoton is ignored, the detected photon will be saved in a .mch file
% if cfg.issavedeet=1; if no output is given, the flux will be saved to a
% .mc2 file if cfg.issave2pt=1 (which is true by default).
%
% Example:
% cfg.nphoton=1e7;
Expand Down
30 changes: 18 additions & 12 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ INCLUDEDIRS=-I/usr/local/cuda/include

ifeq ($(BACKEND),ocelot)
LINKOPT=-L/usr/local/lib `OcelotConfig -l`
CUCCOPT=-arch compute_20 #--maxrregcount 32
CUCCOPT=-g -arch compute_20 #--maxrregcount 32
else
LINKOPT=-L/usr/local/cuda/lib -lcudart -lm
CUCCOPT=#-arch compute_20 #--maxrregcount 32
CUCCOPT=-g #-arch compute_20 #--maxrregcount 32
endif

DLLFLAG=-fPIC

CPPOPT=-g -Wall -O3 -std=c99
CPPOPT=-g -Wall -O3 -std=c99 # -DUSE_OS_TIMER

OBJSUFFIX=.o
EXESUFFIX=
Expand All @@ -41,14 +41,14 @@ ifeq ($(findstring MINGW32,$(PLATFORM)), MINGW32)
CC=nvcc
LINKOPT=-L/c/CUDA/lib -lcudart --link
INCLUDEDIRS +=-I/c/CUDA/include
CPPOPT =-c -D_CRT_SECURE_NO_DEPRECATE -DWIN32
CPPOPT =-c -D_CRT_SECURE_NO_DEPRECATE -DWIN32
OBJSUFFIX=.obj
EXESUFFIX=.exe
DLLFLAG=
else
ifeq ($(findstring x86_64,$(ARCH)), x86_64)
CPPOPT += -m64
CUCCOPT += -m64
CPPOPT +=-m64
CUCCOPT +=-m64
ifeq "$(wildcard /usr/local/cuda/lib64)" "/usr/local/cuda/lib64"
ifeq ($(BACKEND),cuda)
LINKOPT=-L/usr/local/cuda/lib64 -lcudart -lm -lstdc++
Expand All @@ -57,6 +57,9 @@ else
endif
endif

CUCCOPT+=-Xcompiler -fopenmp
CPPOPT+=-fopenmp

all logfast:CUCCOPT+=-use_fast_math
mt: CUCCOPT+=-DUSE_MT_RAND
fast: CUCCOPT+=-DUSE_MT_RAND -use_fast_math
Expand All @@ -67,7 +70,7 @@ racing: CUCCOPT+=-DTEST_RACING
mtatomic: CUCCOPT+=-DUSE_MT_RAND -DUSE_ATOMIC -use_fast_math -arch compute_20 -DMCX_TARGET_NAME='"MT Atomic MCX"'
logatomic: CUCCOPT+=-DUSE_ATOMIC -use_fast_math -arch compute_20 -DMCX_TARGET_NAME='"LL5 Atomic MCX"'
fermi fermimex fermioct: CUCCOPT+=-DUSE_ATOMIC -use_fast_math
fermimex: CUCCOPT+=-DUSE_MT_RAND -use_fast_math
#fermimex: CUCCOPT+=-DUSE_MT_RAND -use_fast_math
mtbox logbox: CUCCOPT+=-DUSE_CACHEBOX -use_fast_math -arch compute_20 -DMCX_TARGET_NAME='"Cachebox MCX"'
debugmt debuglog: CUCCOPT+=-deviceemu
mtatomic logatomic: BINARY:=$(BINARY)_atomic
Expand All @@ -77,17 +80,20 @@ detbox mexbox octbox: CUCCOPT+=-DMCX_TARGET_NAME='"Cached Detective MCX"'
fermi fermimex fermioct: CUCCOPT+=-DMCX_TARGET_NAME='"Fermi MCX"'
det detbox: BINARY:=$(BINARY)_det
logbox detbox: BINARY:=$(BINARY)_cached
all mt fast log logfast racing mtatomic logatomic mtbox logbox debugmt debuglog \
det detbox fermi: LINKOPT+=-fopenmp

mexbox mex fermimex: AR=mex
mexbox mex fermimex: LINKOPT+= CXXFLAGS='$$CXXFLAGS -DSAVE_DETECTORS -DUSE_CACHEBOX'
mexbox mex oct octbox fermimex: OUTPUT_DIR=../mcxlab
mex fermimex fermioct: BINARY=mcxlab
mexbox mex fermimex: LINKOPT+= CXXFLAGS='$$CXXFLAGS -DSAVE_DETECTORS -DUSE_CACHEBOX -DMCX_CONTAINER -fopenmp' -lgomp LDFLAGS='$$LDFLAGS -fopenmp'
mexbox mex oct octbox fermimex fermioct: OUTPUT_DIR=../mcxlab
mex fermimex: BINARY=mcxlab
oct fermioct: BINARY=mcxlab.mex
mexbox: BINARY=mcxlab_atom
mexbox mex oct octbox fermimex fermioct: CUCCOPT+=--compiler-options "$(DLLFLAG) -Wall" -DMCX_CONTAINER
mexbox mex oct octbox fermimex fermioct: CUCCOPT+=--compiler-options "$(DLLFLAG)" -DMCX_CONTAINER
mexbox mex oct octbox fermimex fermioct: CPPOPT+=$(DLLFLAG) -DMCX_CONTAINER
mexbox mex fermimex: LINKOPT+=mcxlab.cpp -cxx -outdir $(OUTPUT_DIR) $(INCLUDEDIRS)

octbox oct fermioct: AR= CXXFLAGS='-DSAVE_DETECTORS -DUSE_CACHEBOX' mkoctfile
octbox oct fermioct: AR= CXXFLAGS='-DSAVE_DETECTORS -DUSE_CACHEBOX -fopenmp' LDFLAGS='-fopenmp' mkoctfile
oct: BINARY=mcxlab.mex
octbox: BINARY=mcxlab_atom.mex
octbox oct fermioct: LINKOPT+=--mex mcxlab.cpp $(INCLUDEDIRS)
Expand Down
18 changes: 16 additions & 2 deletions src/mcextreme.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,37 @@
#include "tictoc.h"
#include "mcx_utils.h"
#include "mcx_core.h"
#ifdef _OPENMP
#include <omp.h>
#endif

int main (int argc, char *argv[]) {
Config mcxconfig;
GPUInfo *gpuinfo=NULL;
unsigned int activedev=0;

mcx_initcfg(&mcxconfig);

// parse command line options to initialize the configurations
mcx_parsecmd(argc,argv,&mcxconfig);

// identify gpu number and set one gpu active
if(!mcx_set_gpu(&mcxconfig,&gpuinfo)){
if(!(activedev=mcx_list_gpu(&mcxconfig,&gpuinfo))){
mcx_error(-1,"No GPU device found\n",__FILE__,__LINE__);
}

#ifdef _OPENMP
omp_set_num_threads(activedev);
#pragma omp parallel
{
#endif
// this launches the MC simulation
mcx_run_simulation(&mcxconfig);
mcx_run_simulation(&mcxconfig,gpuinfo);

#ifdef _OPENMP
}
#endif

// clean up the allocated memory in config and gpuinfo
mcx_cleargpuinfo(&gpuinfo);
Expand Down
Loading

0 comments on commit 60238c4

Please sign in to comment.