Skip to content

Commit

Permalink
Re-enable texture object access in regions cuda kernel
Browse files Browse the repository at this point in the history
Remove pre-3.0-compute checks as we don't support 2.0 compute
capability anymore
  • Loading branch information
9prady9 committed Aug 11, 2017
1 parent cf521ad commit 0db6081
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 28 deletions.
23 changes: 9 additions & 14 deletions src/backend/cuda/kernel/regions.hpp
Expand Up @@ -23,8 +23,6 @@
#include <thrust/sort.h>
#include <thrust/transform_scan.h>

#if __CUDACC__

static const int THREADS_X = 16;
static const int THREADS_Y = 16;

Expand All @@ -35,19 +33,18 @@ __device__ static int continue_flag = 1;

// Wrapper function for texture fetch
template<typename T>
__device__ __inline__
static T fetch(const int n,
cuda::Param<T> equiv_map,
cudaTextureObject_t tex)
static inline __device__
T fetch(const int n, cuda::Param<T> equiv_map, cudaTextureObject_t tex)
{
// FIXME: Enable capability >= 3.0
//#if (__CUDA_ARCH__ >= 300)
#if 0
// Kepler bindless texture objects
return tex1Dfetch<T>(tex, n);
#else
}

template<> __device__
STATIC_ double fetch<double>(const int n,
cuda::Param<double> equiv_map,
cudaTextureObject_t tex)
{
return equiv_map.ptr[n];
#endif
}

// The initial label kernel distinguishes between valid (nonzero)
Expand Down Expand Up @@ -477,5 +474,3 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)

cuda::memFree(tmp);
}

#endif // __CUDACC__
41 changes: 27 additions & 14 deletions src/backend/cuda/regions.cu
Expand Up @@ -29,20 +29,28 @@ Array<T> regions(const Array<char> &in, af_connectivity connectivity)

// Create bindless texture object for the equiv map.
cudaTextureObject_t tex = 0;
// FIXME: Currently disabled, only supported on capaibility >= 3.0
//if (compute >= 3.0) {
// cudaResourceDesc resDesc;
// memset(&resDesc, 0, sizeof(resDesc));
// resDesc.resType = cudaResourceTypeLinear;
// resDesc.res.linear.devPtr = out->get();
// resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
// resDesc.res.linear.desc.x = 32; // bits per channel
// resDesc.res.linear.sizeInBytes = dims[0] * dims[1] * sizeof(float);
// cudaTextureDesc texDesc;
// memset(&texDesc, 0, sizeof(texDesc));
// texDesc.readMode = cudaReadModeElementType;
// CUDA_CHECK(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
//}

//Use texture objects with compute 3.0 or higher
if (!std::is_same<T,double>::value) {
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeLinear;
resDesc.res.linear.devPtr = out.get();

if (std::is_signed<T>::value)
resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
else if (std::is_unsigned<T>::value)
resDesc.res.linear.desc.f = cudaChannelFormatKindUnsigned;
else
resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;

resDesc.res.linear.desc.x = sizeof(T)*8; // bits per channel
resDesc.res.linear.sizeInBytes = dims[0] * dims[1] * sizeof(T);
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.readMode = cudaReadModeElementType;
CUDA_CHECK(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
}

switch(connectivity) {
case AF_CONNECTIVITY_4:
Expand All @@ -53,6 +61,11 @@ Array<T> regions(const Array<char> &in, af_connectivity connectivity)
break;
}

//Iterative procedure(while loop) in kernel::regions
//does stream synchronization towards loop end. So, it is
//safe to destroy the texture object
CUDA_CHECK(cudaDestroyTextureObject(tex));

This comment has been minimized.

Copy link
@pavanky

pavanky Aug 11, 2017

Member

👍 good comment.


return out;
}

Expand Down

0 comments on commit 0db6081

Please sign in to comment.