Skip to content

Commit

Permalink
Fix mask in shfl sync.. (github issue #2339)
Browse files Browse the repository at this point in the history
  • Loading branch information
Evgueni-Petrov-aka-espetrov committed May 3, 2023
1 parent b6ebc3a commit 4e49a45
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions catboost/cuda/methods/kernel/linear_solver.cu
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ namespace NKernel {
}

float sum = ShuffleReduce(x, tmp, min(reduceSize, 32));
sum = __shfl_sync(0xFFFFFF, sum, 0, logicalWarpSize);
sum = __shfl_sync(0xFFFFFFFF, sum, 0, logicalWarpSize);


const float ljj = Ljj[0];
Expand All @@ -158,7 +158,7 @@ namespace NKernel {
}

float sum = ShuffleReduce(x, tmp, min(reduceSize, 32));
sum = __shfl_sync(0xFFFFFF, sum, 0, logicalWarpSize);
sum = __shfl_sync(0xFFFFFFFF, sum, 0, logicalWarpSize);

__syncwarp();

Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/targets/kernel/dcg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ __global__ void RemoveGroupMeanImpl(
}

T mean = ShuffleReduce<T>(localThreadIdx, localMean, LogicalWarpSize);
mean = __shfl_sync(0xFFFFFF, mean, 0, LogicalWarpSize);
mean = __shfl_sync(0xFFFFFFFF, mean, 0, LogicalWarpSize);

for (ui32 i = localThreadIdx; i < groupSize; i += LogicalWarpSize) {
normalized[i] = __ldg(values + i) - mean;
Expand Down

0 comments on commit 4e49a45

Please sign in to comment.