Skip to content

Commit

Permalink
fixing device only code that get called in the host side (codeplaysof…
Browse files Browse the repository at this point in the history
  • Loading branch information
mehdi-goli authored and jiyang1011 committed Apr 29, 2024
1 parent abeabd2 commit 41121f3
Showing 1 changed file with 52 additions and 23 deletions.
75 changes: 52 additions & 23 deletions include/cutlass/cutlass.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,120 +117,132 @@ static const int NumThreadsPerQuadPair = NumThreadsPerQuad * 2;
CUTLASS_HOST_DEVICE uint ThreadIdxX() {
#if defined(__CUDA_ARCH__)
return threadIdx.x;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::local_id::x();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint ThreadIdxY() {
#if defined(__CUDA_ARCH__)
return threadIdx.y;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::local_id::y();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint ThreadIdxZ() {
#if defined(__CUDA_ARCH__)
return threadIdx.z;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::local_id::z();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockIdxX() {
#if defined(__CUDA_ARCH__)
return blockIdx.x;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_id::x();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockIdxY() {
#if defined(__CUDA_ARCH__)
return blockIdx.y;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_id::y();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockIdxZ() {
#if defined(__CUDA_ARCH__)
return blockIdx.z;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_id::z();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockDimX() {
#if defined(__CUDA_ARCH__)
return blockDim.x;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_range::x();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockDimY() {
#if defined(__CUDA_ARCH__)
return blockDim.y;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_range::y();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockDimZ() {
#if defined(__CUDA_ARCH__)
return blockDim.z;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_range::z();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint GridDimX() {
#if defined(__CUDA_ARCH__)
return gridDim.x;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::global_range::x();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint GridDimY() {
#if defined(__CUDA_ARCH__)
return gridDim.y;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::global_range::y();
#else
return 0;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint GridDimZ() {
#if defined(__CUDA_ARCH__)
return gridDim.z;
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::global_range::z();
#else
return 0;
return 0;
#endif
}

Expand All @@ -239,26 +251,27 @@ CUTLASS_HOST_DEVICE uint GridDimZ() {
CUTLASS_DEVICE void syncthreads() {
#if defined(__CUDA_ARCH__)
__syncthreads();
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
syclcompat::wg_barrier();
#endif
}

CUTLASS_DEVICE int syncthreads_and(int cond) {
#if defined(__CUDA_ARCH__)
return __syncthreads_and(cond);
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
return 0;
return 0;
#endif
}

CUTLASS_DEVICE void syncwarp() {
#if defined(__CUDA_ARCH__)
__syncwarp();
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#endif
Expand All @@ -267,7 +280,7 @@ CUTLASS_DEVICE void syncwarp() {
CUTLASS_DEVICE void threadfence() {
#if defined(__CUDA_ARCH__)
__threadfence();
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#endif
Expand All @@ -279,12 +292,14 @@ CUTLASS_DEVICE
uint byte_perm(uint x, uint y, uint s) {
#if defined(__CUDA_ARCH__)
return __byte_perm(x, y, s);
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
return 0;
#else
return 0;
return 0;
#endif
}

Expand All @@ -294,38 +309,44 @@ CUTLASS_DEVICE
uint shfl_up_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) {
#if defined(__CUDA_ARCH__)
return __shfl_up_sync(mask, var, delta, width);
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
return 0;
#else
return 0;
return 0;
#endif
}

CUTLASS_DEVICE
uint shfl_down_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) {
#if defined(__CUDA_ARCH__)
return __shfl_down_sync(mask, var, delta, width);
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
return 0;
#else
return 0;
return 0;
#endif
}

CUTLASS_DEVICE
uint shfl_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) {
#if defined(__CUDA_ARCH__)
return __shfl_sync(mask, var, delta, width);
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
return 0;
#else
return 0;
return 0;
#endif
}

Expand All @@ -335,34 +356,42 @@ template <typename T>
CUTLASS_DEVICE T hfma2(const T a, const T b, const T c) {
#if defined(__CUDA_ARCH__)
return hfma2(a, b, c);
#elif defined(__SYCL_DEVICE_ONLY__)
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return T(0);
return T(0);
#else
return T(0);
return T(0);
#endif
}

// atomic

#if defined(CUTLASS_ENABLE_SYCL)
CUTLASS_DEVICE int atomicAdd(int *address, int val) {
#if defined(__SYCL_DEVICE_ONLY__)
#if defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
return 0;
#endif
#else
return 0;
#endif
}

CUTLASS_DEVICE int atomicCAS(int *address, int compare, int val) {
#if defined(__SYCL_DEVICE_ONLY__)
#if defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
return 0;
#endif
#else
return 0;
#endif
}
#endif

Expand Down

0 comments on commit 41121f3

Please sign in to comment.