Skip to content

Commit

Permalink
No need for atomicAdd for float2, conflicts with CUDA 12.1
Browse files Browse the repository at this point in the history
  • Loading branch information
nshmyrev committed Apr 17, 2023
1 parent 0fee1c1 commit a25f216
Showing 1 changed file with 1 addition and 5 deletions.
6 changes: 1 addition & 5 deletions src/cudafeat/feature-online-batched-cmvn-cuda-kernels.cu
Expand Up @@ -24,18 +24,14 @@ __host__ __device__ inline float2 operator-(const float2 &a, const float2 &b) {
retval.y = a.y - b.y;
return retval;
}

__host__ __device__ inline float2 operator+(const float2 &a, const float2 &b) {
float2 retval;
retval.x = a.x + b.x;
retval.y = a.y + b.y;
return retval;
}

__device__ inline void atomicAdd(float2 *addr, float2 val) {
atomicAdd(reinterpret_cast<float *>(addr), val.x);
atomicAdd(reinterpret_cast<float *>(addr) + 1, val.y);
}

__device__ inline void operator+=(float2 &a, float2 &b) {
// overloading +=
a.x += b.x;
Expand Down

0 comments on commit a25f216

Please sign in to comment.