Permalink
Browse files

GPU buffer oriented: avoid global atomics for every ray just to get t…

…he counter; 104 -> 125 Mray/s
  • Loading branch information...
aras-p committed Apr 24, 2018
1 parent aa80735 commit 9243f0a67ef9796e46a1456876e4b091f818a697
Showing with 16 additions and 7 deletions.
  1. +13 −4 Cpp/Windows/ComputeShaderBounce.hlsl
  2. +3 −3 Cpp/Windows/ComputeShaderCameraRays.hlsl
@@ -1,8 +1,15 @@
#include "ComputeShader.hlsl"
groupshared uint s_RayCounter;
[numthreads(kCSRayBatchSize, 1, 1)]
void main(uint3 gid : SV_DispatchThreadID)
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
{
if (tid.x == 0)
s_RayCounter = 0;
GroupMemoryBarrier();
Params params = g_Params[0];
uint rngState = (gid.x * 9781 + params.frames * 6271) | 1;
@@ -12,9 +19,6 @@ void main(uint3 gid : SV_DispatchThreadID)
float3 rdAtten = RayDataGetAtten(rd);
uint2 pixelCoord = uint2(pixelIndex>>11, pixelIndex & 0x7FF);
uint prevRayCount;
g_OutCounts.InterlockedAdd(0, 1, prevRayCount);
Hit rec;
int id = HitWorld(g_Spheres, params.sphereCount, rdRay, kMinT, kMaxT, rec);
float3 col;
@@ -47,4 +51,9 @@ void main(uint3 gid : SV_DispatchThreadID)
}
}
}
InterlockedAdd(s_RayCounter, 1);
GroupMemoryBarrierWithGroupSync();
if (tid.x == 0)
g_OutCounts.InterlockedAdd(0, s_RayCounter);
}
@@ -1,7 +1,7 @@
#include "ComputeShader.hlsl"
[numthreads(kCSGroupSizeX, kCSGroupSizeY, 1)]
void main(uint3 gid : SV_DispatchThreadID)
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
{
float3 col = 0;
Params params = g_Params[0];
@@ -33,6 +33,6 @@ void main(uint3 gid : SV_DispatchThreadID)
dstImage[gid.xy] += float4(col, 0);
}
uint prevRayCount;
g_OutCounts.InterlockedAdd(0, DO_SAMPLES_PER_PIXEL, prevRayCount);
if (tid.x == 0 && tid.y == 0)
g_OutCounts.InterlockedAdd(0, DO_SAMPLES_PER_PIXEL * kCSGroupSizeX * kCSGroupSizeY);
}

0 comments on commit 9243f0a

Please sign in to comment.