Skip to content
Permalink
Browse files

Made KernelResource into a class for subclassing CUDA/OpenCL; fixed l…

…ogical error in beagle.cpp
  • Loading branch information
msuchard committed Sep 7, 2009
1 parent 374d871 commit 22981db0eac15843b02a604156c49dbf0fd93f38
@@ -244,20 +244,6 @@
} \
}

typedef struct {
int paddedStateCount;
int categoryCount;
int patternCount;
char* kernelCode;
int patternBlockSize;
int matrixBlockSize;
int blockPeelingSize;
int isPowerOfTwo;
int smallestPowerOfTwo;
int slowReweighing;
int multiplyBlockSize;
} KernelResource;

typedef struct Dim3Int Dim3Int;

struct Dim3Int
@@ -31,6 +31,7 @@
#endif

#include "libhmsbeagle/GPU/GPUImplDefs.h"
#include "libhmsbeagle/GPU/KernelResource.h"

#ifdef CUDA
#include <cuda.h>
@@ -126,7 +127,7 @@ class GPUInterface {

void DestroyKernelMap();

KernelResource* kernel;
KernelResource* kernelResource;

protected:
void InitializeKernelMap();
@@ -38,9 +38,10 @@
#include "libhmsbeagle/GPU/GPUImplDefs.h"
#include "libhmsbeagle/GPU/GPUImplHelper.h"
#include "libhmsbeagle/GPU/GPUInterface.h"
#include "libhmsbeagle/GPU/KernelResource.h"


std::map<int, KernelResource*> kernelMap;
std::map<int, KernelResource> kernelMap;


#define SAFE_CUDA(call) { \
@@ -66,6 +67,7 @@ GPUInterface::GPUInterface() {
cudaDevice = NULL;
cudaContext = NULL;
cudaModule = NULL;
kernelResource = NULL;

#ifdef BEAGLE_DEBUG_FLOW
fprintf(stderr,"\t\t\tLeaving GPUInterface::GPUInterface\n");
@@ -82,6 +84,10 @@ GPUInterface::~GPUInterface() {
SAFE_CUDA(cuCtxDetach(cudaContext));
}

if (kernelResource != NULL) {
delete kernelResource;
}

#ifdef BEAGLE_DEBUG_FLOW
fprintf(stderr,"\t\t\tLeaving GPUInterface::~GPUInterface\n");
#endif
@@ -129,55 +135,47 @@ int GPUInterface::GetDeviceCount() {
}

void GPUInterface::DestroyKernelMap() {

std::map<int, KernelResource*>::const_iterator itr;
for(itr = kernelMap.begin(); itr != kernelMap.end(); ++itr) {
KernelResource* rsrc = itr->second;
#ifdef BEAGLE_DEBUG_VALUES
fprintf(stderr,"Key: %d %d\n",(*itr).first, rsrc->paddedStateCount);
#endif BEAGLE_DEBUG_VALUES
delete rsrc;
// delete (*itr).second;
}
//kernelMap.clear();
// No longer necessary as std::map automatically clears itself
}

void GPUInterface::InitializeKernelMap() {

#ifdef BEAGLE_DEBUG_FLOW
fprintf(stderr,"\t\t\tLoading kernel information for CUDA!\n");
fprintf(stderr,"\t\t\tLoading kernel information for CUDA!\n");
#endif

KernelResource* kernel4 = new KernelResource;
kernel4->kernelCode = (char*) KERNELS_STRING_4;
kernel4->paddedStateCount = 4;
kernel4->patternBlockSize = PATTERN_BLOCK_SIZE_4;
kernel4->matrixBlockSize = MATRIX_BLOCK_SIZE_4;
kernel4->blockPeelingSize = BLOCK_PEELING_SIZE_4;
kernel4->slowReweighing = SLOW_REWEIGHING_4;
kernel4->multiplyBlockSize = MULTIPLY_BLOCK_SIZE;
kernelMap.insert(std::make_pair(4,kernel4));

KernelResource* kernel48 = new KernelResource;
kernel48->kernelCode = (char*) KERNELS_STRING_48;
kernel48->paddedStateCount = 48;
kernel48->patternBlockSize = PATTERN_BLOCK_SIZE_48;
kernel48->matrixBlockSize = MATRIX_BLOCK_SIZE_48;
kernel48->blockPeelingSize = BLOCK_PEELING_SIZE_48;
kernel48->slowReweighing = SLOW_REWEIGHING_48;
kernel48->multiplyBlockSize = MULTIPLY_BLOCK_SIZE;
kernelMap.insert(std::make_pair(48,kernel48));

KernelResource* kernel64 = new KernelResource;
kernel64->kernelCode = (char*) KERNELS_STRING_64;
kernel64->paddedStateCount = 64;
kernel64->patternBlockSize = PATTERN_BLOCK_SIZE_64;
kernel64->matrixBlockSize = MATRIX_BLOCK_SIZE_64;
kernel64->blockPeelingSize = BLOCK_PEELING_SIZE_64;
kernel64->slowReweighing = SLOW_REWEIGHING_64;
kernel64->multiplyBlockSize = MULTIPLY_BLOCK_SIZE;
kernelMap.insert(std::make_pair(64,kernel64));


KernelResource kernel4 = KernelResource(
4,
(char*) KERNELS_STRING_4,
PATTERN_BLOCK_SIZE_4,
MATRIX_BLOCK_SIZE_4,
BLOCK_PEELING_SIZE_4,
SLOW_REWEIGHING_4,
MULTIPLY_BLOCK_SIZE,
0,0);
kernelMap.insert(std::make_pair(4,kernel4));

KernelResource kernel48 = KernelResource(
48,
(char*) KERNELS_STRING_48,
PATTERN_BLOCK_SIZE_48,
MATRIX_BLOCK_SIZE_48,
BLOCK_PEELING_SIZE_48,
SLOW_REWEIGHING_48,
MULTIPLY_BLOCK_SIZE,
0,0);
kernelMap.insert(std::make_pair(48,kernel48));

KernelResource kernel64 = KernelResource(
64,
(char*) KERNELS_STRING_64,
PATTERN_BLOCK_SIZE_64,
MATRIX_BLOCK_SIZE_64,
BLOCK_PEELING_SIZE_64,
SLOW_REWEIGHING_64,
MULTIPLY_BLOCK_SIZE,
0,0);
kernelMap.insert(std::make_pair(64,kernel64));
}

void GPUInterface::SetDevice(int deviceNumber, int paddedStateCount, int categoryCount, int paddedPatternCount) {
@@ -190,20 +188,27 @@ void GPUInterface::SetDevice(int deviceNumber, int paddedStateCount, int categor
SAFE_CUDA(cuCtxCreate(&cudaContext, CU_CTX_SCHED_AUTO, cudaDevice));

if (kernelMap.size() == 0) {
// kernels have not yet been initialized; do so now. Hopefully, this only occurs once per library load.
InitializeKernelMap();
// kernels have not yet been initialized; do so now. Hopefully, this only occurs once per library load.
InitializeKernelMap();
}

if (kernelMap.count(paddedStateCount) == 0) {
fprintf(stderr,"Critical error: unable to find kernel code for %d states.\n",paddedStateCount);
exit(-1);
}

kernel = kernelMap[paddedStateCount];
kernel->categoryCount = categoryCount;
kernel->patternCount = paddedPatternCount;
// kernel.paddedStateCount = paddedStateCount;
// kernel.kernelCode = kernelMap[paddedStateCount].kernelCode;
// kernel.patternBlockSize = kernelMap[paddedStateCount].patternBlockSize;
// kernel.matrixBlockSize = kernelMap[paddedStateCount].matrixBlockSize;
// kernel.blockPeelingSize = kernelMap[paddedStateCount].blockPeelingSize;
// kernel.slowReweighing = kernelMap[paddedStateCount].slowReweighing;
// kernel.multiplyBlockSize = kernelMap[paddedStateCount].multiplyBlockSize;
kernelResource = kernelMap[paddedStateCount].copy();
kernelResource->categoryCount = categoryCount;
kernelResource->patternCount = paddedPatternCount;

SAFE_CUDA(cuModuleLoadData(&cudaModule, kernel->kernelCode));
SAFE_CUDA(cuModuleLoadData(&cudaModule, kernelResource->kernelCode));

SAFE_CUDA(cuCtxPopCurrent(&cudaContext));

@@ -49,13 +49,13 @@ KernelLauncher::~KernelLauncher() {

void KernelLauncher::SetupKernelBlocksAndGrids() {

kPaddedStateCount = gpu->kernel->paddedStateCount;
kCategoryCount = gpu->kernel->categoryCount;
kPatternCount = gpu->kernel->patternCount;
kMultiplyBlockSize = gpu->kernel->multiplyBlockSize;
kPatternBlockSize = gpu->kernel->patternBlockSize;
kSlowReweighing = gpu->kernel->slowReweighing;
kMatrixBlockSize = gpu->kernel->matrixBlockSize;
kPaddedStateCount = gpu->kernelResource->paddedStateCount;
kCategoryCount = gpu->kernelResource->categoryCount;
kPatternCount = gpu->kernelResource->patternCount;
kMultiplyBlockSize = gpu->kernelResource->multiplyBlockSize;
kPatternBlockSize = gpu->kernelResource->patternBlockSize;
kSlowReweighing = gpu->kernelResource->slowReweighing;
kMatrixBlockSize = gpu->kernelResource->matrixBlockSize;

// Set up block/grid for transition matrices computation
bgTransitionProbabilitiesBlock = Dim3Int(kMultiplyBlockSize, kMultiplyBlockSize);
@@ -6,7 +6,8 @@ if BUILDCUDA
noinst_LTLIBRARIES= libcuda.la
libcuda_la_SOURCES = \
BeagleGPUImpl.cpp BeagleGPUImpl.h GPUImplDefs.h GPUImplHelper.cpp \
GPUImplHelper.h GPUInterface.h GPUInterfaceCUDA.cpp KernelLauncher.cpp KernelLauncher.h
GPUImplHelper.h GPUInterface.h GPUInterfaceCUDA.cpp KernelLauncher.cpp \
KernelLauncher.h KernelResource.cpp KernelResource.h

libcuda_la_CFLAGS = $(CUDA_CFLAGS)
libcuda_la_CXXFLAGS = $(CUDA_CFLAGS)
@@ -17,7 +18,8 @@ if BUILDOPENCL
noinst_LTLIBRARIES= libopencl.la
libopencl_la_SOURCES = \
BeagleGPUImpl.cpp BeagleGPUImpl.h GPUImplDefs.h GPUImplHelper.cpp \
GPUImplHelper.h GPUInterface.h GPUInterfaceOpenCL.cpp KernelLauncher.cpp KernelLauncher.h
GPUImplHelper.h GPUInterface.h GPUInterfaceOpenCL.cpp KernelLauncher.cpp \
KernelLauncher.h KernelResource.cpp KernelResource.h

nodist_libopencl_la_SOURCES = BeagleOpenCL_Kernels.h
BUILT_SOURCES = BeagleOpenCL_Kernels.h
@@ -95,11 +95,11 @@ void beagle_library_initialize(void) {
void beagle_library_finalize(void) {

// Destory GPU kernel info
#if defined(CUDA)
GPUInterface* gpu = new GPUInterface;
gpu->DestroyKernelMap();
delete gpu;
#endif
//#if defined(CUDA)
// GPUInterface* gpu = new GPUInterface;
// gpu->DestroyKernelMap();
// delete gpu;
//#endif

// Destroy implFactory
if (implFactory && loaded) {
@@ -141,7 +141,7 @@ void __attribute__ ((destructor)) beagle_gnu_finalize(void) {
#endif

int beagleFinalize() {
if (!loaded)
if (loaded)
beagle_library_finalize();
return BEAGLE_SUCCESS;
}
@@ -0,0 +1,16 @@
# Suppression for CUDA
{
<insert a suppression name>
Memcheck:Leak
...
obj:/usr/local/cuda/lib/libcuda.dylib
...
}

{
<insert a suppression name>
Memcheck:Cond
...
fun:cuGLUnregisterBufferObject
...
}

0 comments on commit 22981db

Please sign in to comment.
You can’t perform that action at this time.