Permalink
Browse files

GPU buffer oriented: changes to both be correct and improve perf (put…

… rays into groupshared buffers; also emit colors as point splats and rasterize regularly); 147 -> 611 Mray/s
  • Loading branch information...
aras-p committed Apr 24, 2018
1 parent 326ec92 commit 5f65d93a54b7fc31a1d380ce2baa58f92f199e72
@@ -15,8 +15,8 @@
// Should path tracing be done on the GPU with a compute shader?
#define DO_COMPUTE_GPU 1
#define kCSGroupSizeX 16
#define kCSGroupSizeY 16
#define kCSGroupSizeX 8
#define kCSGroupSizeY 8
#define kCSRayBatchSize 256
// Should float3 struct use SSE?
@@ -166,42 +166,55 @@ bool RayDataIsSkipEmission(RayData rd)
return (rd.flags & (1 << 31)) != 0;
}
struct SplatData
{
float3 color;
uint pixelIndex;
};
SplatData MakeSplatData(float3 color, uint pixelIndex)
{
SplatData sd;
sd.color = color;
sd.pixelIndex = pixelIndex;
return sd;
}
int HitSpheres(Ray r, StructuredBuffer<Sphere> spheres, int sphereCount, float tMin, float tMax, inout Hit outHit)
{
float hitT = tMax;
int id = -1;
float hitT = tMax;
int id = -1;
for (int i = 0; i < sphereCount; ++i)
{
Sphere s = spheres[i];
float3 co = s.center - r.orig;
float nb = dot(co, r.dir);
float c = dot(co, co) - s.radius*s.radius;
float discr = nb * nb - c;
float3 co = s.center - r.orig;
float nb = dot(co, r.dir);
float c = dot(co, co) - s.radius*s.radius;
float discr = nb * nb - c;
if (discr > 0)
{
float discrSq = sqrt(discr);
// Try earlier t
float t = nb - discrSq;
if (t <= tMin) // before min, try later t!
t = nb + discrSq;
if (t > tMin && t < hitT)
{
id = i;
hitT = t;
}
// Try earlier t
float t = nb - discrSq;
if (t <= tMin) // before min, try later t!
t = nb + discrSq;
if (t > tMin && t < hitT)
{
id = i;
hitT = t;
}
}
}
if (id != -1)
{
outHit.pos = RayPointAt(r, hitT);
outHit.normal = (outHit.pos - spheres[id].center) * spheres[id].invRadius;
outHit.t = hitT;
if (id != -1)
{
outHit.pos = RayPointAt(r, hitT);
outHit.normal = (outHit.pos - spheres[id].center) * spheres[id].invRadius;
outHit.t = hitT;
}
return id;
return id;
}
struct Params
@@ -228,11 +241,14 @@ StructuredBuffer<RayData> g_RayBufferSrc : register(t6);
RWTexture2D<float4> dstImage : register(u0);
RWByteAddressBuffer g_OutCounts : register(u1);
RWStructuredBuffer<RayData> g_RayBufferDst : register(u2);
RWStructuredBuffer<SplatData> g_SplatBufferDst : register(u3);
groupshared uint s_GroupRayCounter;
#define kMaxGroupRays 768
groupshared RayData s_GroupRays[kMaxGroupRays];
#define kMinT 0.001f
#define kMaxT 1.0e7f
#define kMaxDepth 10
static int HitWorld(StructuredBuffer<Sphere> spheres, int sphereCount, Ray r, float tMin, float tMax, inout Hit outHit)
@@ -313,7 +329,7 @@ static float3 SurfaceHit(
StructuredBuffer<int> emissives, int emissiveCount,
Ray r, float3 rayAtten,
uint pixelIndex, bool raySkipEmission, Hit hit, int id,
RWStructuredBuffer<RayData> buffer, inout uint state)
inout uint state)
{
Material mat = materials[id];
Ray scattered;
@@ -330,8 +346,9 @@ static float3 SurfaceHit(
#endif
uint rayIdx;
g_OutCounts.InterlockedAdd(4, 1, rayIdx);
buffer[rayIdx] = MakeRayData(scattered, atten * rayAtten, pixelIndex, 0, false, skipEmission);
InterlockedAdd(s_GroupRayCounter, 1, rayIdx);
if (rayIdx < kMaxGroupRays)
s_GroupRays[rayIdx] = MakeRayData(scattered, atten * rayAtten, pixelIndex, 0, false, skipEmission);
// sample lights
#if DO_LIGHT_SAMPLING
@@ -362,10 +379,11 @@ static float3 SurfaceHit(
float omega = 2 * 3.1415926 * (1 - cosAMax);
float3 nl = dot(hit.normal, r.dir) < 0 ? hit.normal : -hit.normal;
uint rayIdx;
g_OutCounts.InterlockedAdd(4, 1, rayIdx);
float3 shadowAtt = (mat.albedo * smat.emissive) * (max(0.0f, dot(l, nl)) * omega / 3.1415926);
buffer[rayIdx] = MakeRayData(MakeRay(hit.pos,l), shadowAtt * rayAtten, pixelIndex, i, true, false);
uint rayIdx;
InterlockedAdd(s_GroupRayCounter, 1, rayIdx);
if (rayIdx < kMaxGroupRays)
s_GroupRays[rayIdx] = MakeRayData(MakeRay(hit.pos, l), shadowAtt * rayAtten, pixelIndex, i, true, false);
}
}
#endif
@@ -1,13 +1,27 @@
#include "ComputeShader.hlsl"
groupshared uint s_RayCounter;
groupshared uint s_GroupSplatCounter;
groupshared SplatData s_GroupSplats[kCSRayBatchSize];
void PushSplat(float3 col, uint pixelIndex)
{
uint splatIndex;
InterlockedAdd(s_GroupSplatCounter, 1, splatIndex);
s_GroupSplats[splatIndex] = MakeSplatData(col, pixelIndex);
}
[numthreads(kCSRayBatchSize, 1, 1)]
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
{
if (tid.x == 0)
{
s_RayCounter = 0;
s_GroupRayCounter = 0;
s_GroupSplatCounter = 0;
}
GroupMemoryBarrierWithGroupSync();
Params params = g_Params[0];
@@ -17,43 +31,68 @@ void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
Ray rdRay = RayDataGetRay(rd);
uint pixelIndex = RayDataGetPixelIndex(rd);
float3 rdAtten = RayDataGetAtten(rd);
uint2 pixelCoord = uint2(pixelIndex>>11, pixelIndex & 0x7FF);
//uint2 pixelCoord = uint2(pixelIndex>>11, pixelIndex & 0x7FF);
Hit rec;
int id = HitWorld(g_Spheres, params.sphereCount, rdRay, kMinT, kMaxT, rec);
float3 col;
// Does not hit anything?
if (id < 0)
{
if (!RayDataIsShadow(rd))
{
// for non-shadow rays, evaluate and add sky
float3 col = SkyHit(rdRay) * rdAtten;
dstImage[pixelCoord] += float4(col, 0);
//dstImage[pixelCoord] += float4(col, 0);
PushSplat(col, pixelIndex);
}
}
else
{
if (!RayDataIsShadow(rd))
{
// A non-shadow ray hit something; evaluate material response (this can queue new rays for next bounce)
col = SurfaceHit(g_Spheres, g_Materials, params.sphereCount, g_Emissives, params.emissiveCount,
float3 col = SurfaceHit(g_Spheres, g_Materials, params.sphereCount, g_Emissives, params.emissiveCount,
rdRay, rdAtten, pixelIndex, RayDataIsSkipEmission(rd), rec, id,
g_RayBufferDst, rngState) * rdAtten;
dstImage[pixelCoord] += float4(col, 0);
rngState) * rdAtten;
//dstImage[pixelCoord] += float4(col, 0);
PushSplat(col, pixelIndex);
}
else
{
// A shadow ray; add illumination if we hit the needed light
if (id == RayDataGetLightID(rd))
{
dstImage[pixelCoord] += float4(rdAtten, 0);
float3 col = rdAtten;
//dstImage[pixelCoord] += float4(rdAtten, 0);
PushSplat(col, pixelIndex);
}
}
}
InterlockedAdd(s_RayCounter, 1);
GroupMemoryBarrierWithGroupSync();
// debugging; add green tint to any places where we didn't have enough space for new rays
//if (s_GroupRayCounter > kMaxGroupRays)
// PushSplat(float3(0, 2, 0), pixelIndex);
if (tid.x == 0)
{
g_OutCounts.InterlockedAdd(0, s_RayCounter);
s_GroupRayCounter = min(s_GroupRayCounter, kMaxGroupRays);
uint rayBufferStart;
g_OutCounts.InterlockedAdd(4, s_GroupRayCounter, rayBufferStart);
for (uint ir = 0; ir < s_GroupRayCounter; ++ir)
{
g_RayBufferDst[rayBufferStart + ir] = s_GroupRays[ir];
}
uint splatBufferStart;
g_OutCounts.InterlockedAdd(8, s_GroupSplatCounter, splatBufferStart);
for (uint is = 0; is < s_GroupSplatCounter; ++is)
{
g_SplatBufferDst[splatBufferStart + is] = s_GroupSplats[is];
}
}
}
@@ -3,7 +3,12 @@
[numthreads(kCSGroupSizeX, kCSGroupSizeY, 1)]
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
{
float3 col = 0;
if (tid.x == 0)
{
s_GroupRayCounter = 0;
}
GroupMemoryBarrierWithGroupSync();
Params params = g_Params[0];
uint rngState = (gid.x * 1973 + gid.y * 9277 + params.frames * 26699) | 1;
@@ -28,11 +33,26 @@ void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
// Hit something; evaluate material response (this can queue new rays for next bounce)
col = SurfaceHit(g_Spheres, g_Materials, params.sphereCount, g_Emissives, params.emissiveCount,
r, float3(1,1,1), (gid.x<<11)|gid.y, false, rec, id,
g_RayBufferDst, rngState);
rngState);
}
dstImage[gid.xy] += float4(col, 0);
}
GroupMemoryBarrierWithGroupSync();
// debugging; add red tint to any places where we didn't have enough space for new rays
//if (s_GroupRayCounter > kMaxGroupRays)
// dstImage[gid.xy] += float4(2,0,0,0);
if (tid.x == 0 && tid.y == 0)
{
g_OutCounts.InterlockedAdd(0, DO_SAMPLES_PER_PIXEL * kCSGroupSizeX * kCSGroupSizeY);
uint rayCount = min(s_GroupRayCounter, kMaxGroupRays);
uint rayBufferStart;
g_OutCounts.InterlockedAdd(4, rayCount, rayBufferStart);
for (uint i = 0; i < rayCount; ++i)
{
g_RayBufferDst[rayBufferStart + i] = s_GroupRays[i];
}
}
}
@@ -0,0 +1,4 @@
float4 main(float3 color : TEXCOORD0) : SV_Target
{
return float4(color, 0);
}
@@ -286,6 +286,24 @@
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_PSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_PSBytecode</VariableName>
</FxCompile>
<FxCompile Include="PixelShaderSplat.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Pixel</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Pixel</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Pixel</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_PSBytecodeSplat</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_PSBytecodeSplat</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_PSBytecodeSplat</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_PSBytecodeSplat</VariableName>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledPixelShaderSplat.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledPixelShaderSplat.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledPixelShaderSplat.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledPixelShaderSplat.h</HeaderFileOutput>
</FxCompile>
<FxCompile Include="VertexShader.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
@@ -304,6 +322,24 @@
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_VSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_VSBytecode</VariableName>
</FxCompile>
<FxCompile Include="VertexShaderSplat.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Vertex</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_VSBytecodeSplat</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_VSBytecodeSplat</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_VSBytecodeSplat</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_VSBytecodeSplat</VariableName>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledVertexShaderSplat.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledVertexShaderSplat.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledVertexShaderSplat.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledVertexShaderSplat.h</HeaderFileOutput>
</FxCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
@@ -66,5 +66,7 @@
<FxCompile Include="ComputeShaderCameraRays.hlsl" />
<FxCompile Include="ComputeShaderFinal.hlsl" />
<FxCompile Include="ComputeShaderCopyCount.hlsl" />
<FxCompile Include="PixelShaderSplat.hlsl" />
<FxCompile Include="VertexShaderSplat.hlsl" />
</ItemGroup>
</Project>
Oops, something went wrong.

0 comments on commit 5f65d93

Please sign in to comment.