diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_a.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_a.png index 981fc7ccb1f..4f38f80f606 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_a.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_a.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e45e4b1c3e5f34c9d74e7bdafa0068df148df97ea17d973a23c9873387670d5e -size 237826 +oid sha256:1ab13e322c857384d29581d4f5e7dc2344458601b505ccd63c609b3b9cbe38d6 +size 248654 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_b.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_b.png index 7d328dea026..40b37677aba 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_b.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1303_StackLitSG_Testers_b.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:619e8b7d51809b4b9faaa99b6669450dfe2886b9ba2d4b76f697331650836bfc -size 264539 +oid sha256:469857bef2f12f3b24b98970eb3125264669d8440398df615a2e9f3c307d380b +size 277997 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1501_EyeTestSG.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1501_EyeTestSG.png index 7cf1d73c307..21e81ece8a9 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1501_EyeTestSG.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/1501_EyeTestSG.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1c2bf72c163965563b0a5dd51d131304fe25cfb6230eecde328ddc24a9c6780 -size 127138 +oid sha256:1865ed7f9ea0104218196f641c970407b582ca82424fcd730fa45a8e9bcce9d8 +size 139848 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2204_ReflectionProbes_Lights.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2204_ReflectionProbes_Lights.png index 837737e0782..e1032dfea07 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2204_ReflectionProbes_Lights.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2204_ReflectionProbes_Lights.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b276b9d47c142761d3b73e53446ba1b7a7a3d7628629b1972c54518f081445b -size 40489 +oid sha256:55ea2ee1127f6edbc7b0cf40ac8bf1cfee3e7ac7b4e0d11134bc746562a20795 +size 45156 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2305_Contact_Shadow_PointLight.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2305_Contact_Shadow_PointLight.png index 0db9cc11ae7..32b7a3b00fb 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2305_Contact_Shadow_PointLight.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2305_Contact_Shadow_PointLight.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9793cfdb451273d2e96965f1775acb767815d9640fc5182c260f86a18a3d0d97 -size 11936 +oid sha256:d53feb80168ce1ff3465dcdc022b550a4fce96abd852c0cf2400d75d08c239f8 +size 146112 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2306_Contact_Shadow_SpotLight.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2306_Contact_Shadow_SpotLight.png index 54cb60c1870..0a5750d07ce 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2306_Contact_Shadow_SpotLight.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2306_Contact_Shadow_SpotLight.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:486b8ec690808ea10872b811355c1b43edb3c1baf81837a32015dba01b33fe1d -size 11923 +oid sha256:440c5130d0c7be57661f8f3372dac90085d9278bde7c9f1c12be302ede803841 +size 143862 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2311_ShadowMaps.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2311_ShadowMaps.png index be7b0661406..38fe7474bf9 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2311_ShadowMaps.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/2311_ShadowMaps.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:819994e69eb8e5c3a473ed3cdc1ddc3907a62869d1e28aa9a0f98157b718b127 -size 124457 +oid sha256:79bcbc7e44ef35983e89ecec5707f5c8673cbef5db36354e04439f0e817670b1 +size 178659 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/5003_Fog_DensityVolumesShadows.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/5003_Fog_DensityVolumesShadows.png index 6feff7c3684..1e9625dd55d 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/5003_Fog_DensityVolumesShadows.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/5003_Fog_DensityVolumesShadows.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebe62a04958dff25b0461e43fbb668d191e483ad4bda110b336cb17220c5d7ed -size 106085 +oid sha256:f8960d432b7036b851fcecb1df3688be935170e74c0c8ce5b3a55365970d53a9 +size 107575 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9004_MultiViewport.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9004_MultiViewport.png index f186ee6dfe6..572ee3697c9 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9004_MultiViewport.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9004_MultiViewport.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b756114b2650b792acd32a37ff8881f25af2ba63373d2fd193509c3190aa2cc1 -size 112642 +oid sha256:2babc86b61aa9b3f633ad57179a0bb94df06aac1177c45a490e850e5dd9f2a3e +size 168579 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardBoth.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardBoth.png index 7ae4abef137..9658a485cfd 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardBoth.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardBoth.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa28088ccdd36a14ca35d7cbc44d4ddf09709667ad7a3ee4a8548db717078929 -size 134332 +oid sha256:ada3a1bc6d40c73c9ff811430068cbd45edb3c587a9593d9977698737a263580 +size 243546 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardOnly.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardOnly.png index 0b114ec5510..d762807fc1e 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardOnly.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9401_MSAAForwardOnly.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a40d948965426ff4603ce0bf20499ec73892130c373f3862a4e00d5bc1563ff -size 156342 +oid sha256:c305c887642c2551a67b6813590c36f3fa82024e04e59c59f5c4490fbdcc6463 +size 267981 diff --git a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9701_CustomPass_DrawRenderers.png b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9701_CustomPass_DrawRenderers.png index 4a34755117f..543c3337443 100644 --- a/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9701_CustomPass_DrawRenderers.png +++ b/TestProjects/HDRP_Tests/Assets/ReferenceImages/Linear/OSXEditor/Metal/None/9701_CustomPass_DrawRenderers.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01ab7b22ff9e62b9cf89f4de8627bcf9bca5998b5959df6db8decec9fd1d2dc9 -size 57772 +oid sha256:638db820b2e4801bfaba3a8de57627c48c4db2b31edf0cd4ec7600585fe0cf01 +size 60207 diff --git a/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl b/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl index c318581e16b..3981ef50a88 100644 --- a/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl +++ b/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl @@ -46,6 +46,7 @@ #define HALF_MIN_SQRT 0.0078125 // 2^-7 == sqrt(HALF_MIN), useful for ensuring HALF_MIN after x^2 #define HALF_MAX 65504.0 #define UINT_MAX 0xFFFFFFFFu +#define INT_MAX 0x7FFFFFFF #ifdef SHADER_API_GLES diff --git a/com.unity.render-pipelines.high-definition/CHANGELOG.md b/com.unity.render-pipelines.high-definition/CHANGELOG.md index 3bd45d2c6b3..077490b7f44 100644 --- a/com.unity.render-pipelines.high-definition/CHANGELOG.md +++ b/com.unity.render-pipelines.high-definition/CHANGELOG.md @@ -133,6 +133,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - On platforms that allow it skip the first mip of the depth pyramid and compute it alongside the depth buffer used for low res transparents. - When trying to install the local configuration package, if another one is already present the user is now asked whether they want to keep it or not. - Improved MSAA color resolve to fix issues when very bright and very dark samples are resolved together. +- Improve performance of GPU light AABB generation ## [10.0.0] - 2019-06-10 diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl index ea8d937ca7c..4a2a69df125 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl @@ -3,31 +3,33 @@ // Used to index into our SFiniteLightBound (g_data) and // LightVolumeData (_LightVolumeData) buffers. -int GenerateLightCullDataIndex(int lightIndex, uint numVisibleLights, uint eyeIndex) +uint GenerateLightCullDataIndex(uint lightIndex, uint numVisibleLights, uint eyeIndex) { + lightIndex = min(lightIndex, numVisibleLights - 1); // Stay within bounds + // For monoscopic, there is just one set of light cull data structs. // In stereo, all of the left eye structs are first, followed by the right eye structs. - const int perEyeBaseIndex = (int)eyeIndex * (int)numVisibleLights; + const uint perEyeBaseIndex = eyeIndex * numVisibleLights; return (perEyeBaseIndex + lightIndex); } struct ScreenSpaceBoundsIndices { - int min; - int max; + uint min; + uint max; }; // The returned values are used to index into our AABB screen space bounding box buffer // Usually named g_vBoundsBuffer. The two values represent the min/max indices. -ScreenSpaceBoundsIndices GenerateScreenSpaceBoundsIndices(int lightIndex, uint numVisibleLights, uint eyeIndex) +ScreenSpaceBoundsIndices GenerateScreenSpaceBoundsIndices(uint lightIndex, uint numVisibleLights, uint eyeIndex) { // In the monoscopic mode, there is one set of bounds (min,max -> 2 * g_iNrVisibLights) // In stereo, there are two sets of bounds (leftMin, leftMax, rightMin, rightMax -> 4 * g_iNrVisibLights) - const int eyeRelativeBase = (int)eyeIndex * 2 * (int)numVisibleLights; + const uint eyeRelativeBase = eyeIndex * 2 * numVisibleLights; ScreenSpaceBoundsIndices indices; indices.min = eyeRelativeBase + lightIndex; - indices.max = eyeRelativeBase + lightIndex + (int)numVisibleLights; + indices.max = indices.min + numVisibleLights; return indices; } diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs index 30671f5e233..13d77029386 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs @@ -130,8 +130,8 @@ struct SFiniteLightBound public Vector3 boxAxisY; // Scaled by the extents (half-size) public Vector3 boxAxisZ; // Scaled by the extents (half-size) public Vector3 center; // Center of the bounds (box) in camera space - public Vector2 scaleXY; // Scale applied to the top of the box to turn it into a truncated pyramid - public float radius; // Circumscribed sphere for the bounds (box) + public float scaleXY; // Scale applied to the top of the box to turn it into a truncated pyramid (X = Y) + public float radius; // Circumscribed sphere for the bounds (box) }; [GenerateHLSL] @@ -696,7 +696,6 @@ enum ClusterDepthSource : int { "TileLightListGen_NoDepthRT_SrcBigTile", "TileLightListGen_DepthRT_SrcBigTile_Oblique", "TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique" } }; - static int s_GenAABBKernel; static int s_GenListPerTileKernel; static int[,] s_ClusterKernels = new int[(int)ClusterPrepassSource.Count, (int)ClusterDepthSource.Count]; static int[,] s_ClusterObliqueKernels = new int[(int)ClusterPrepassSource.Count, (int)ClusterDepthSource.Count]; @@ -879,8 +878,6 @@ void InitializeLightLoop(IBLFilterBSDF[] iBLFilterBSDFArray) m_MaxLightsOnScreen = m_MaxDirectionalLightsOnScreen + m_MaxPunctualLightsOnScreen + m_MaxAreaLightsOnScreen + m_MaxEnvLightsOnScreen; m_MaxPlanarReflectionOnScreen = lightLoopSettings.maxPlanarReflectionOnScreen; - s_GenAABBKernel = buildScreenAABBShader.FindKernel("ScreenBoundsAABB"); - // Cluster { s_ClearVoxelAtomicKernel = clearClusterAtomicIndexShader.FindKernel("ClearAtomic"); @@ -1766,9 +1763,9 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig fAltDx *= range; fAltDy *= range; // Handle case of pyramid with this select (currently unused) - var altDist = Mathf.Sqrt(fAltDy * fAltDy + (true ? 1.0f : 2.0f) * fAltDx * fAltDx); - bound.radius = altDist > (0.5f * range) ? altDist : (0.5f * range); // will always pick fAltDist - bound.scaleXY = squeeze ? new Vector2(0.01f, 0.01f) : new Vector2(1.0f, 1.0f); + var altDist = Mathf.Sqrt(fAltDy * fAltDy + (true ? 1.0f : 2.0f) * fAltDx * fAltDx); + bound.radius = altDist > (0.5f * range) ? altDist : (0.5f * range); // will always pick fAltDist + bound.scaleXY = squeeze ? 0.01f : 1.0f; lightVolumeData.lightAxisX = vx; lightVolumeData.lightAxisY = vy; @@ -1780,16 +1777,19 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig } else if (gpuLightType == GPULightType.Point) { - Vector3 vx = xAxisVS; - Vector3 vy = yAxisVS; - Vector3 vz = zAxisVS; + // Construct a view-space axis-aligned bounding cube around the bounding sphere. + // This allows us to utilize the same polygon clipping technique for all lights. + // Non-axis-aligned vectors may result in a larger screen-space AABB. + Vector3 vx = new Vector3(1, 0, 0); + Vector3 vy = new Vector3(0, 1, 0); + Vector3 vz = new Vector3(0, 0, 1); bound.center = positionVS; bound.boxAxisX = vx * range; bound.boxAxisY = vy * range; bound.boxAxisZ = vz * range; - bound.scaleXY.Set(1.0f, 1.0f); - bound.radius = range; + bound.scaleXY = 1.0f; + bound.radius = range; // fill up ldata lightVolumeData.lightAxisX = vx; @@ -1810,7 +1810,7 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig bound.boxAxisY = extents.y * yAxisVS; bound.boxAxisZ = extents.z * zAxisVS; bound.radius = extents.magnitude; - bound.scaleXY.Set(1.0f, 1.0f); + bound.scaleXY = 1.0f; lightVolumeData.lightPos = centerVS; lightVolumeData.lightAxisX = xAxisVS; @@ -1830,7 +1830,7 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig bound.boxAxisY = extents.y * yAxisVS; bound.boxAxisZ = extents.z * zAxisVS; bound.radius = extents.magnitude; - bound.scaleXY.Set(1.0f, 1.0f); + bound.scaleXY = 1.0f; lightVolumeData.lightPos = centerVS; lightVolumeData.lightAxisX = xAxisVS; @@ -1850,7 +1850,7 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig bound.boxAxisY = extents.y * yAxisVS; bound.boxAxisZ = extents.z * zAxisVS; bound.radius = extents.magnitude; - bound.scaleXY.Set(1.0f, 1.0f); + bound.scaleXY = 1.0f; lightVolumeData.lightPos = centerVS; lightVolumeData.lightAxisX = xAxisVS; @@ -2065,8 +2065,8 @@ void GetEnvLightVolumeDataAndBound(HDProbe probe, LightVolumeType lightVolumeTyp bound.boxAxisX = influenceRightVS * influenceExtents.x; bound.boxAxisY = influenceUpVS * influenceExtents.x; bound.boxAxisZ = influenceForwardVS * influenceExtents.x; - bound.scaleXY.Set(1.0f, 1.0f); - bound.radius = influenceExtents.x; + bound.scaleXY = 1.0f; + bound.radius = influenceExtents.x; break; } case LightVolumeType.Box: @@ -2075,8 +2075,8 @@ void GetEnvLightVolumeDataAndBound(HDProbe probe, LightVolumeType lightVolumeTyp bound.boxAxisX = influenceExtents.x * influenceRightVS; bound.boxAxisY = influenceExtents.y * influenceUpVS; bound.boxAxisZ = influenceExtents.z * influenceForwardVS; - bound.scaleXY.Set(1.0f, 1.0f); - bound.radius = influenceExtents.magnitude; + bound.scaleXY = 1.0f; + bound.radius = influenceExtents.magnitude; // The culling system culls pixels that are further // than a threshold to the box influence extents. @@ -2124,7 +2124,7 @@ void CreateBoxVolumeDataAndBound(OrientedBBox obb, LightCategory category, Light bound.boxAxisY = extentConservativeY * upVS; bound.boxAxisZ = extentConservativeZ * forwardVS; bound.radius = extentConservativeMagnitude; - bound.scaleXY.Set(1.0f, 1.0f); + bound.scaleXY = 1.0f; // The culling system culls pixels that are further // than a threshold to the box influence extents. @@ -3087,13 +3087,21 @@ static void GenerateLightsScreenSpaceAABBs(in BuildGPULightListParameters parame { if (parameters.totalLightCount != 0) { - // With XR single-pass, we have one set of light bounds per view to iterate over (bounds are in view space for each view) - cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_data, resources.convexBoundsBuffer); - cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_vBoundsBuffer, resources.AABBBoundsBuffer); + using (new ProfilingScope(cmd, ProfilingSampler.Get(HDProfileId.GenerateLightAABBs))) + { + // With XR single-pass, we have one set of light bounds per view to iterate over (bounds are in view space for each view) + cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_data, resources.convexBoundsBuffer); + cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_vBoundsBuffer, resources.AABBBoundsBuffer); + + ConstantBuffer.Push(cmd, parameters.lightListCB, parameters.screenSpaceAABBShader, HDShaderIDs._ShaderVariablesLightList); - ConstantBuffer.Push(cmd, parameters.lightListCB, parameters.screenSpaceAABBShader, HDShaderIDs._ShaderVariablesLightList); + const int threadsPerLight = 4; // Shader: THREADS_PER_LIGHT (4) + const int threadsPerGroup = 64; // Shader: THREADS_PER_GROUP (64) - cmd.DispatchCompute(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, (parameters.totalLightCount + 7) / 8, parameters.viewCount, 1); + int groupCount = HDUtils.DivRoundUp(parameters.totalLightCount * threadsPerLight, threadsPerGroup); + + cmd.DispatchCompute(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, groupCount, parameters.viewCount, 1); + } } } @@ -3408,12 +3416,7 @@ unsafe BuildGPULightListParameters PrepareBuildGPULightListParameters( HDCamera // Screen space AABB parameters.screenSpaceAABBShader = buildScreenAABBShader; - parameters.screenSpaceAABBShader.shaderKeywords = null; - if (isProjectionOblique) - { - parameters.screenSpaceAABBShader.EnableKeyword("USE_OBLIQUE_MODE"); - } - parameters.screenSpaceAABBKernel = s_GenAABBKernel; + parameters.screenSpaceAABBKernel = 0; // Big tile prepass parameters.runBigTilePrepass = hdCamera.frameSettings.IsEnabled(FrameSettingsField.BigTilePrepass); diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl index f4a18fa8f36..dc8f41cf636 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl @@ -74,7 +74,7 @@ struct SFiniteLightBound float3 boxAxisY; float3 boxAxisZ; float3 center; - float2 scaleXY; + float scaleXY; float radius; }; diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute index be07511307d..62ed317dad9 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute @@ -1,533 +1,734 @@ -// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7. -// https://github.com/wolfgangfengel/GPU-Pro-7 - -#pragma kernel ScreenBoundsAABB - -#pragma multi_compile _ USE_OBLIQUE_MODE +// #pragma enable_d3d11_debug_symbols +#pragma only_renderers d3d11 playstation xboxone vulkan metal switch +#pragma kernel main #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition-config/Runtime/ShaderConfig.cs.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl" -#pragma only_renderers d3d11 playstation xboxone vulkan metal switch +/* ------------------------------ Inputs ------------------------------------ */ + +StructuredBuffer g_data : register(t0); -StructuredBuffer g_data : register( t0 ); +/* ------------------------------ Outputs ----------------------------------- */ -#define NR_THREADS 64 +RWStructuredBuffer g_vBoundsBuffer : register(u0); -// output buffer -RWStructuredBuffer g_vBoundsBuffer : register( u0 ); +/* ------------------------------ Utilities --------------------------------- */ -#define MAX_PNTS 9 // strictly this should be 10=6+4 but we get more wavefronts and 10 seems to never hit (fingers crossed) - // However, worst case the plane that would be skipped if such an extreme case ever happened would be backplane - // clipping gets skipped which doesn't cause any errors. +// Returns the location of the N-th set bit starting from the lowest order bit and working upward. +// Slow implementation - do not use for large bit sets. +// Could be optimized - see https://graphics.stanford.edu/~seander/bithacks.html +uint NthBitLow(uint value, uint n) +{ + uint b = -1; // Consistent with the behavior of firstbitlow() + uint c = countbits(value); + if (n < c) // Validate inputs + { + uint r = n + 1; // Compute the number of remaining bits -// LDS (2496 bytes) -groupshared float posX[MAX_PNTS*8*2]; -groupshared float posY[MAX_PNTS*8*2]; -groupshared float posZ[MAX_PNTS*8*2]; -groupshared float posW[MAX_PNTS*8*2]; -groupshared unsigned int clipFlags[48]; + do + { + uint f = firstbitlow(value >> (b + 1)); // Find the next set bit + b += f + r; // Make a guess (assume all [b+f+1,b+f+r] bits are set) + c = countbits(value << (32 - (b + 1))); // Count the number of bits actually set + r = (n + 1) - c; // Compute the number of remaining bits + } while (r > 0); + } + return b; +} -unsigned int GetClip(const float4 P); -int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p); -void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r); +float4x4 Translation4x4(float3 d) +{ + float4x4 M = k_identity4x4; -#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightingConvexHullUtils.hlsl" + M._14_24_34 = d; // Last column + return M; +} -[numthreads(NR_THREADS, 1, 1)] -void ScreenBoundsAABB(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) +// Scale followed by rotation (scaled axes). +float3x3 ScaledRotation3x3(float3 xAxis, float3 yAxis, float3 zAxis) { - uint groupID = u3GroupID.x; - uint eyeIndex = u3GroupID.y; // currently, can only be 0 or 1 + float3x3 R = float3x3(xAxis, yAxis, zAxis); + float3x3 C = transpose(R); // Row to column - // The g_ is preserved in order to make cross-pipeline (FPTL) updates easier - float4x4 g_mInvProjection = g_mInvProjectionArr[eyeIndex]; - float4x4 g_mProjection = g_mProjectionArr[eyeIndex]; + return C; +} - //uint vindex = groupID * NR_THREADS + threadID; - unsigned int g = groupID; - unsigned int t = threadID; +float3x3 Invert3x3(float3x3 R) +{ + float3x3 C = transpose(R); // Row to column + float det = dot(C[0], cross(C[1], C[2])); + float3x3 adj = float3x3(cross(C[1], C[2]), + cross(C[2], C[0]), + cross(C[0], C[1])); + return rcp(det) * adj; +} - const int subLigt = (int) (t/8); - const int lgtIndex = subLigt+(int) g*8; - const int sideIndex = (int) (t%8); +float4x4 Homogenize3x3(float3x3 R) +{ + float4x4 M = float4x4(float4(R[0], 0), + float4(R[1], 0), + float4(R[2], 0), + float4(0,0,0,1)); + return M; +} - const int eyeAdjustedLgtIndex = GenerateLightCullDataIndex(lgtIndex, g_iNrVisibLights, eyeIndex); - SFiniteLightBound lgtDat = g_data[eyeAdjustedLgtIndex]; +float4x4 PerspectiveProjection4x4(float a, float g, float n, float f) +{ + float b = (f + n) * rcp(f - n); // Z in [-1, 1] + float c = -2 * f * n * rcp(f - n); // No Z-reversal - const float3 boxX = lgtDat.boxAxisX.xyz; - const float3 boxY = lgtDat.boxAxisY.xyz; - const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light) - const float3 center = lgtDat.center.xyz; - const float radius = lgtDat.radius; - const float2 scaleXY = lgtDat.scaleXY; + return float4x4(g/a, 0, 0, 0, + 0, g, 0, 0, + 0, 0, b, c, + 0, 0, 1, 0); +} - { - if(sideIndex<6 && lgtIndex<(int) g_iNrVisibLights) // mask 2 out of 8 threads - { - float3 q0, q1, q2, q3; - GetHullQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, sideIndex); +/* ------------------------------ Implementation ---------------------------- */ +// Improve the quality of generated code at the expense of readability. +// Remove when the shader compiler is clever enough to perform this optimization for us. +#define OBTUSE_COMPILER - const float4 vP0 = mul(g_mProjection, float4(q0, 1)); - const float4 vP1 = mul(g_mProjection, float4(q1, 1)); - const float4 vP2 = mul(g_mProjection, float4(q2, 1)); - const float4 vP3 = mul(g_mProjection, float4(q3, 1)); +#ifdef SHADER_API_XBOXONE +// The Xbox shader compiler expects the lane swizzle mask to be a compile-time constant. +// In our case, the mask is a compile-time constant, but it is defined inside a loop +// that is unrolled at the compile time, and the constants are generated during the +// constant propagation pass of the optimizer. This works fine on PlayStation, but does not work +// on Xbox. In order to avoid writing hideous code specifically for Xbox, we disable the support +// of wave intrinsics on Xbox until the Xbox compiler is fixed. +#undef PLATFORM_SUPPORTS_WAVE_INTRINSICS +#endif - // test vertices of one quad (of the convex hull) for intersection - const unsigned int uFlag0 = GetClip(vP0); - const unsigned int uFlag1 = GetClip(vP1); - const unsigned int uFlag2 = GetClip(vP2); - const unsigned int uFlag3 = GetClip(vP3); +#define CLEAR_SIGN_BIT(X) (asint(X) & INT_MAX) +#define DIV_ROUND_UP(N, D) (((N) + (D) - 1) / (D)) // No division by 0 checks + +// Clipping a plane by a cube may produce a hexagon (6-gon). +// Clipping a hexagon by 4 planes may produce a decagon (10-gon). +#define MAX_CLIP_VERTS (10) +#define NUM_VERTS (8) +#define NUM_FACES (6) +#define NUM_PLANES (6) +#define THREADS_PER_GROUP (64) +#define THREADS_PER_LIGHT (4) // Set to 1 for debugging +#define LIGHTS_PER_GROUP (THREADS_PER_GROUP / THREADS_PER_LIGHT) +#define VERTS_PER_GROUP (NUM_VERTS * LIGHTS_PER_GROUP) +#define VERTS_PER_THREAD (NUM_VERTS / THREADS_PER_LIGHT) +#define FACES_PER_THREAD DIV_ROUND_UP(NUM_FACES, THREADS_PER_LIGHT) + +// All planes and faces are always in the standard order (see below). +// Near and far planes are swapped in the case of Z-reversal, but it does not change the algorithm. +#define FACE_LEFT (1 << 0) // -X z +#define FACE_RIGHT (1 << 1) // +X / +#define FACE_TOP (1 << 2) // -Y 0 -- x +#define FACE_BOTTOM (1 << 3) // +Y | +#define FACE_FRONT (1 << 4) // -Z y +#define FACE_BACK (1 << 5) // +Z +#define FACE_MASK ((1 << NUM_FACES) - 1) + +// A list of vertices for each face (CCW order w.r.t. its normal, starting from the LSB). +#define VERT_LIST_LEFT ((2) << 9 | (6) << 6 | (4) << 3 | (0) << 0) +#define VERT_LIST_RIGHT ((5) << 9 | (7) << 6 | (3) << 3 | (1) << 0) +#define VERT_LIST_TOP ((1) << 9 | (3) << 6 | (2) << 3 | (0) << 0) +#define VERT_LIST_BOTTOM ((6) << 9 | (7) << 6 | (5) << 3 | (4) << 0) +#define VERT_LIST_FRONT ((4) << 9 | (5) << 6 | (1) << 3 | (0) << 0) +#define VERT_LIST_BACK ((3) << 9 | (7) << 6 | (6) << 3 | (2) << 0) + +// All vertices are always in the standard order (see below). +uint GetFaceMaskOfVertex(uint v) +{ + // 0: (-1, -1, -1) -> { FACE_LEFT | FACE_TOP | FACE_FRONT } + // 1: (+1, -1, -1) -> { FACE_RIGHT | FACE_TOP | FACE_FRONT } + // 2: (-1, +1, -1) -> { FACE_LEFT | FACE_BOTTOM | FACE_FRONT } + // 3: (+1, +1, -1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_FRONT } + // 4: (-1, -1, +1) -> { FACE_LEFT | FACE_TOP | FACE_BACK } + // 5: (+1, -1, +1) -> { FACE_RIGHT | FACE_TOP | FACE_BACK } + // 6: (-1, +1, +1) -> { FACE_LEFT | FACE_BOTTOM | FACE_BACK } + // 7: (+1, +1, +1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_BACK } + // ((v & 1) == 0) ? 1 : 2) | ((v & 2) == 0) ? 4 : 8) | ((v & 4) == 0) ? 16 : 32) + uint f = (FACE_LEFT << BitFieldExtract(v, 0, 1)) + | (FACE_TOP << BitFieldExtract(v, 1, 1)) + | (FACE_FRONT << BitFieldExtract(v, 2, 1)); + + return f; +}; + +float3 GenerateVertexOfStandardCube(uint v) +{ + float3 p; - const float4 vPnts[] = {vP0, vP1, vP2, vP3}; + p.x = ((v & 1) == 0) ? -1 : 1; + p.y = ((v & 2) == 0) ? -1 : 1; + p.z = ((v & 4) == 0) ? -1 : 1; - // screen-space AABB of one quad (assuming no intersection) - float3 vMin, vMax; - for(int k=0; k<4; k++) - { - float fW = vPnts[k].w; - float fS = fW<0 ? -1 : 1; - float fWabs = fW<0 ? (-fW) : fW; - fW = fS * (fWabs> 1], 12 * (f & 1), 12); +} - posX[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.x; - posY[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.y; - posZ[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.z; - } +// 5 arrays * 128 elements * 4 bytes each = 2560 bytes. +groupshared float gs_HapVertsX[VERTS_PER_GROUP]; +groupshared float gs_HapVertsY[VERTS_PER_GROUP]; +groupshared float gs_HapVertsZ[VERTS_PER_GROUP]; +groupshared float gs_HapVertsW[VERTS_PER_GROUP]; +groupshared uint gs_BehindMasksOfVerts[VERTS_PER_GROUP]; // 6 planes each (HLSL does not support small data types) + +#ifndef PLATFORM_SUPPORTS_WAVE_INTRINSICS +// 1 array * 16 elements * 4 bytes each = 64 bytes. +groupshared uint gs_CullClipFaceMasks[LIGHTS_PER_GROUP]; // 6 faces each (HLSL does not support small data types) + +// 8 arrays * 16 elements * 4 bytes each = 512 bytes. +// These are actually floats reinterpreted as uints. +// The reason is because floating-point atomic operations are not supported. +groupshared uint gs_NdcAaBbMinPtX[LIGHTS_PER_GROUP]; +groupshared uint gs_NdcAaBbMaxPtX[LIGHTS_PER_GROUP]; +groupshared uint gs_NdcAaBbMinPtY[LIGHTS_PER_GROUP]; +groupshared uint gs_NdcAaBbMaxPtY[LIGHTS_PER_GROUP]; +groupshared uint gs_NdcAaBbMinPtZ[LIGHTS_PER_GROUP]; // Note that min-max Z cannot be trivially reconstructed +groupshared uint gs_NdcAaBbMaxPtZ[LIGHTS_PER_GROUP]; // from min-max W if the projection is oblique. +groupshared uint gs_NdcAaBbMinPtW[LIGHTS_PER_GROUP]; // View-space Z coordinate +groupshared uint gs_NdcAaBbMaxPtW[LIGHTS_PER_GROUP]; // View-space Z coordinate +#endif // PLATFORM_SUPPORTS_WAVE_INTRINSICS + +// Returns 'true' if it manages to cull the face. +bool TryCullFace(uint f, uint behindMasksOfVerts[NUM_VERTS]) +{ + uint cullMaskOfFace = FACE_MASK; // Initially behind + uint vertListOfFace = GetVertexListOfFace(f); + + for (uint j = 0; j < 4; j++) + { + uint v = BitFieldExtract(vertListOfFace, 3 * j, 3); + // Non-zero if ALL the vertices are behind any of the planes. + cullMaskOfFace &= behindMasksOfVerts[v]; } - // if not XBONE and not PLAYSTATION4 we need a memorybarrier here - // since we can't rely on the gpu cores being 64 wide. - // We need a pound define around this. - GroupMemoryBarrierWithGroupSync(); + return (cullMaskOfFace != 0); +} +struct ClipVertex +{ + float4 pt; // Homogeneous coordinate after perspective + float bc; // Boundary coordinate with respect to the plane 'p' +}; - { - int f=0; +ClipVertex CreateClipVertex(uint p, float4 v) +{ + bool evenPlane = (p & 1) == 0; - if(sideIndex==0 && lgtIndex<(int) g_iNrVisibLights) - { - // quick acceptance or rejection - unsigned int uCollectiveAnd = (unsigned int) -1; - unsigned int uCollectiveOr = 0; - for(f=0; f<6; f++) - { - unsigned int uFlagAnd = clipFlags[subLigt*6+f]&0x3f; - unsigned int uFlagOr = uFlagAnd; - for(int i=1; i<4; i++) - { - unsigned int uClipBits = (clipFlags[subLigt*6+f]>>(i*6))&0x3f; - uFlagAnd &= uClipBits; - uFlagOr |= uClipBits; - } + float c = v[p >> 1]; + float w = v.w; - uCollectiveAnd &= uFlagAnd; - uCollectiveOr |= uFlagOr; - } + ClipVertex cv; - bool bSetBoundYet = false; - float3 vMin=0.0, vMax=0.0; - if(uCollectiveAnd!=0 || uCollectiveOr==0) // all invisible or all visible (early out) - { - if(uCollectiveOr==0) // all visible - { - for(f=0; f<6; f++) - { - const int sideIndex = f; - - float3 vFaceMi = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 0], posY[subLigt*MAX_PNTS*2 + sideIndex + 0], posZ[subLigt*MAX_PNTS*2 + sideIndex + 0]); - float3 vFaceMa = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 6], posY[subLigt*MAX_PNTS*2 + sideIndex + 6], posZ[subLigt*MAX_PNTS*2 + sideIndex + 6]); - - for(int k=0; k<2; k++) - { - float3 vP = k==0 ? vFaceMi : vFaceMa; - if(f==0 && k==0) { vMin=vP; vMax=vP; } - - vMax = max(vMax, vP); vMin = min(vMin, vP); - } - } - bSetBoundYet=true; - } - } - else // :( need true clipping - { + cv.pt = v; + cv.bc = evenPlane ? c : w - c; // dot(PlaneEquation, HapVertex); - for(f=0; f<6; f++) - { - float3 q0, q1, q2, q3; - GetHullQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, f); + return cv; +} - // 4 vertices to a quad of the convex hull in post projection space - const float4 vP0 = mul(g_mProjection, float4(q0, 1)); - const float4 vP1 = mul(g_mProjection, float4(q1, 1)); - const float4 vP2 = mul(g_mProjection, float4(q2, 1)); - const float4 vP3 = mul(g_mProjection, float4(q3, 1)); +float4 IntersectEdgeAgainstPlane(ClipVertex v0, ClipVertex v1) +{ + float alpha = saturate(v0.bc * rcp(v0.bc - v1.bc)); // Guaranteed to lie between 0 and 1 + return lerp(v0.pt, v1.pt, alpha); +} - int iSrcIndex = 0; +void ClipPolygonAgainstPlane(uint p, uint srcBegin, uint srcSize, + inout float4 vertRingBuffer[MAX_CLIP_VERTS], + out uint dstBegin, out uint dstSize) +{ + dstBegin = srcBegin + srcSize; // Start at the end; we don't use modular arithmetic here + dstSize = 0; - int offs = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; + ClipVertex tailVert = CreateClipVertex(p, vertRingBuffer[(srcBegin + srcSize - 1) % MAX_CLIP_VERTS]); - // fill up source clip buffer with the quad - posX[offs+0]=vP0.x; posX[offs+1]=vP1.x; posX[offs+2]=vP2.x; posX[offs+3]=vP3.x; - posY[offs+0]=vP0.y; posY[offs+1]=vP1.y; posY[offs+2]=vP2.y; posY[offs+3]=vP3.y; - posZ[offs+0]=vP0.z; posZ[offs+1]=vP1.z; posZ[offs+2]=vP2.z; posZ[offs+3]=vP3.z; - posW[offs+0]=vP0.w; posW[offs+1]=vP1.w; posW[offs+2]=vP2.w; posW[offs+3]=vP3.w; +#ifdef OBTUSE_COMPILER + uint modSrcIdx = srcBegin % MAX_CLIP_VERTS; + uint modDstIdx = dstBegin % MAX_CLIP_VERTS; +#endif - int iNrSrcVerts = 4; + for (uint j = srcBegin; j < (srcBegin + srcSize); j++) + { + #ifndef OBTUSE_COMPILER + uint modSrcIdx = j % MAX_CLIP_VERTS; + #endif + ClipVertex leadVert = CreateClipVertex(p, vertRingBuffer[modSrcIdx]); + + // Execute Blinn's line clipping algorithm. + // Classify the line segment. 4 cases: + // 0. v0 out, v1 out -> add nothing + // 1. v0 in, v1 out -> add intersection + // 2. v0 out, v1 in -> add intersection, add v1 + // 3. v0 in, v1 in -> add v1 + // (bc >= 0) <-> in, (bc < 0) <-> out. Beware of -0. + + if ((tailVert.bc >= 0) != (leadVert.bc >= 0)) + { + // The line segment is guaranteed to cross the plane. + float4 clipVert = IntersectEdgeAgainstPlane(tailVert, leadVert); + #ifndef OBTUSE_COMPILER + uint modDstIdx = (dstBegin + dstSize++) % MAX_CLIP_VERTS; + #endif + vertRingBuffer[modDstIdx] = clipVert; + #ifdef OBTUSE_COMPILER + dstSize++; + modDstIdx++; + modDstIdx = (modDstIdx == MAX_CLIP_VERTS) ? 0 : modDstIdx; + #endif + } - // do true clipping - for(int p=0; p<6; p++) - { - const int nrVertsDst = ClipAgainstPlane(iSrcIndex, iNrSrcVerts, subLigt, p); + if (leadVert.bc >= 0) + { + #ifndef OBTUSE_COMPILER + uint modDstIdx = (dstBegin + dstSize++) % MAX_CLIP_VERTS; + #endif + vertRingBuffer[modDstIdx] = leadVert.pt; + #ifdef OBTUSE_COMPILER + dstSize++; + modDstIdx++; + modDstIdx = (modDstIdx == MAX_CLIP_VERTS) ? 0 : modDstIdx; + #endif + } - iSrcIndex = 1-iSrcIndex; - iNrSrcVerts = nrVertsDst; + #ifdef OBTUSE_COMPILER + modSrcIdx++; + modSrcIdx = (modSrcIdx == MAX_CLIP_VERTS) ? 0 : modSrcIdx; + #endif + tailVert = leadVert; // Avoid recomputation and overwriting the vertex in the ring buffer + } +} - if(iNrSrcVerts<3 || iNrSrcVerts>=MAX_PNTS) break; - } +void ClipFaceAgainstViewVolume(uint f, uint behindMasksOfVerts[NUM_VERTS], uint baseVertexOffset, + out uint srcBegin, out uint srcSize, + out float4 vertRingBuffer[MAX_CLIP_VERTS]) +{ + srcBegin = 0; + srcSize = 4; - // final clipped convex primitive is in src buffer - if(iNrSrcVerts>2) - { - int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; - for(int k=0; kradius) - { - float2 vMi, vMa; - bool2 bMi, bMa; - CalcBound(bMi, bMa, vMi, vMa, g_mInvProjection, center, radius); + uint i; // Avoid multiply-declared variable warning - vMin.xy = bMi ? max(vMin.xy, vMi) : vMin.xy; - vMax.xy = bMa ? min(vMax.xy, vMa) : vMax.xy; - } - else if(g_isOrthographic!=0) - { - float2 vMi = mul(g_mProjection, float4(center.xyz-radius,1)).xy; // no division needed for ortho - float2 vMa = mul(g_mProjection, float4(center.xyz+radius,1)).xy; // no division needed for ortho - vMin.xy = max(vMin.xy, vMi); - vMax.xy = min(vMax.xy, vMa); - } -#ifndef USE_OBLIQUE_MODE -#if USE_LEFT_HAND_CAMERA_SPACE - if((center.z-radius)>0.0) - { - float4 vPosF = mul(g_mProjection, float4(0,0,center.z-radius,1)); - vMin.z = max(vMin.z, vPosF.z/vPosF.w); - } - if((center.z+radius)>0.0) - { - float4 vPosB = mul(g_mProjection, float4(0,0,center.z+radius,1)); - vMax.z = min(vMax.z, vPosB.z/vPosB.w); - } -#else - if((center.z+radius)<0.0) - { - float4 vPosF = mul(g_mProjection, float4(0,0,center.z+radius,1)); - vMin.z = max(vMin.z, vPosF.z/vPosF.w); - } - if((center.z-radius)<0.0) - { - float4 vPosB = mul(g_mProjection, float4(0,0,center.z-radius,1)); - vMax.z = min(vMax.z, vPosB.z/vPosB.w); - } -#endif - else - { - vMin = float3(-3,-3,-3); - vMax = float3(-2,-2,-2); - } -#endif - } + // (1) Compute the vertices of the light volume. + for (i = 0; i < VERTS_PER_THREAD; i++) + { + uint v = i * THREADS_PER_LIGHT + t % THREADS_PER_LIGHT; + + // rbpVerts[0] = rbpC - rbpX * scale - rbpY * scale - rbpZ; (-s, -s, -1) + // rbpVerts[1] = rbpC + rbpX * scale - rbpY * scale - rbpZ; (+s, -s, -1) + // rbpVerts[2] = rbpC - rbpX * scale + rbpY * scale - rbpZ; (-s, +s, -1) + // rbpVerts[3] = rbpC + rbpX * scale + rbpY * scale - rbpZ; (+s, +s, -1) + // rbpVerts[4] = rbpC - rbpX - rbpY + rbpZ; (-1, -1, +1) + // rbpVerts[5] = rbpC + rbpX - rbpY + rbpZ; (+1, -1, +1) + // rbpVerts[6] = rbpC - rbpX + rbpY + rbpZ; (-1, +1, +1) + // rbpVerts[7] = rbpC + rbpX + rbpY + rbpZ; (+1, +1, +1) + + float3 m = GenerateVertexOfStandardCube(v); + m.xy *= ((v & 4) == 0) ? scale : 1; // X, Y in [-scale, scale] + + float3 rbpVertVS = rbpC + m.x * rbpX + m.y * rbpY + m.z * rbpZ; + // Avoid generating (w = 0). + rbpVertVS.z = (abs(rbpVertVS.z) > FLT_MIN) ? rbpVertVS.z : FLT_MIN; + + float4 hapVert = mul(projMat, float4(rbpVertVS, 1)); + + // Warning: the W component may be negative. + // Flipping the -W pyramid by negating all coordinates is incorrect + // and will break both classification and clipping. + // For the orthographic projection, (w = 1). + + // Transform the X and Y components: [-w, w] -> [0, w]. + hapVert.xy = 0.5 * hapVert.xy + (0.5 * hapVert.w); + + // For each vertex, we must determine whether it is within the bounds. + // For culling and clipping, we must know, per culling plane, whether the vertex + // is in the positive or the negative half-space. + uint behindMask = 0; // Initially in front + + // Consider the vertex to be inside the view volume if: + // 0 <= x <= w + // 0 <= y <= w <-- include boundary points to avoid clipping them later + // 0 <= z <= w + // w is always valid + // TODO: epsilon for numerical robustness? + + for (uint j = 0; j < (NUM_PLANES / 2); j++) + { + float w = hapVert.w; + + behindMask |= (hapVert[j] < 0 ? 1 : 0) << (2 * j + 0); // Planes crossing '0' + behindMask |= (hapVert[j] > w ? 1 : 0) << (2 * j + 1); // Planes crossing 'w' + } + if (behindMask == 0) // Inside? + { + // Clamp to the bounds in case of numerical errors (may still generate -0). + float3 rapVertNDC = saturate(hapVert.xyz * rcp(hapVert.w)); - // we should consider doing a look-up here into a max depth mip chain - // to see if the light is occluded: vMin.z*VIEWPORT_SCALE_Z > MipTexelMaxDepth - //g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, -0.5*vMax.y+0.5, vMin.z*VIEWPORT_SCALE_Z); - //g_vBoundsBuffer[lgtIndex+g_iNrVisibLights] = float3(0.5*vMax.x+0.5, -0.5*vMin.y+0.5, vMax.z*VIEWPORT_SCALE_Z); + ndcAaBbMinPt = min(ndcAaBbMinPt, float4(rapVertNDC, rbpVertVS.z)); + ndcAaBbMaxPt = max(ndcAaBbMaxPt, float4(rapVertNDC, rbpVertVS.z)); + } + else // Outside + { + cullClipFaceMask |= GetFaceMaskOfVertex(v); + } - // changed for unity + gs_HapVertsX[baseVertexOffset + v] = hapVert.x; + gs_HapVertsY[baseVertexOffset + v] = hapVert.y; + gs_HapVertsZ[baseVertexOffset + v] = hapVert.z; + gs_HapVertsW[baseVertexOffset + v] = hapVert.w; + gs_BehindMasksOfVerts[baseVertexOffset + v] = behindMask; + } - // Each light's AABB is represented by two float3s, the min and max of the box. - // And for stereo, we have two sets of lights. Therefore, each eye has a set of mins, followed by - // a set of maxs, and each set is equal to g_iNrVisibLights. - const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(lgtIndex, g_iNrVisibLights, eyeIndex); +#ifdef PLATFORM_SUPPORTS_WAVE_INTRINSICS + for (i = 0; i < FastLog2(THREADS_PER_LIGHT); i++) + { + uint andMask = PLATFORM_LANE_COUNT - 1; // All lanes + uint orMask = 0; // Plays no role + uint xorMask = 1 << i; // Flip bits one by one starting from the LSB - // build a linear (in camera space) min/max Z for the aabb. This is needed for clustered when oblique is active - float linMiZ, linMaZ; -#ifndef USE_OBLIQUE_MODE - float2 vMiZW = mul(g_mInvProjection, float4(vMin,1)).zw; - float2 vMaZW = mul(g_mInvProjection, float4(vMax,1)).zw; - linMiZ = vMiZW.x/vMiZW.y; linMaZ = vMaZW.x/vMaZW.y; + cullClipFaceMask |= LaneSwizzle(cullClipFaceMask, andMask, orMask, xorMask); + } #else - for(int i=0; i<8; i++) // establish 8 aabb points in camera space. - { - float3 vP = float3((i&1)!=0 ? vMax.x : vMin.x, (i&2)!=0 ? vMax.y : vMin.y, (i&4)!=0 ? vMax.z : vMin.z); + InterlockedOr(gs_CullClipFaceMasks[intraGroupLightIndex], cullClipFaceMask); - float2 v2Pc = mul(g_mInvProjection, float4(vP,1)).zw; - float linZ = v2Pc.x/v2Pc.y; + GroupMemoryBarrierWithGroupSync(); - if(i==0) { linMiZ=linZ; linMaZ=linZ; } -#if USE_LEFT_HAND_CAMERA_SPACE - linMiZ = min(linMiZ, linZ); linMaZ = max(linMaZ, linZ); -#else - linMiZ = max(linMiZ, linZ); linMaZ = min(linMaZ, linZ); + cullClipFaceMask = gs_CullClipFaceMasks[intraGroupLightIndex]; #endif - } - float z0 = center.z-radius, z1 = center.z+radius; -#if USE_LEFT_HAND_CAMERA_SPACE - linMiZ = max(linMiZ, z0); linMaZ = min(linMaZ, z1); -#else - linMiZ = min(linMiZ, z1); linMaZ = max(linMaZ, z0); -#endif + // (2) Test the corners of the view volume. + if (cullClipFaceMask != 0) + { + // The light is partially outside the view volume. + // Therefore, some of the corners of the view volume may be inside the light volume. + // We perform aggressive culling, so we must make sure they are accounted for. + // The light volume is a special type of cuboid - a right frustum. + // We can exploit this fact by building a light-space projection matrix. + float4x4 invTranslateToLightSpace = Translation4x4(-rbpC); + float4x4 invRotateAndScaleInLightSpace = Homogenize3x3(Invert3x3(ScaledRotation3x3(rbpX, rbpY, rbpZ))); + // TODO: avoid full inversion by using unit vectors and passing magnitudes explicitly. + + // This (orthographic) projection matrix maps a view-space point to a light-space [-1, 1]^3 cube. + float4x4 lightSpaceMatrix = mul(invRotateAndScaleInLightSpace, invTranslateToLightSpace); + + if (scale != 1) // Perspective light space? + { + // Compute the parameters of the perspective projection. + float s = scale; + float e = -1 - 2 * (s * rcp(1 - s)); // Signed distance from the origin to the eye + float n = -e - 1; // Distance from the eye to the near plane + float f = -e + 1; // Distance from the eye to the far plane + float g = f; // Distance from the eye to the projection plane -#endif + float4x4 invTranslateEye = Translation4x4(float3(0, 0, -e)); + float4x4 perspProjMatrix = PerspectiveProjection4x4(1, g, n, f); - g_vBoundsBuffer[boundsIndices.min] = float4(0.5*vMin.x + 0.5, 0.5*vMin.y + 0.5, vMin.z*VIEWPORT_SCALE_Z, linMiZ); - g_vBoundsBuffer[boundsIndices.max] = float4(0.5*vMax.x + 0.5, 0.5*vMax.y + 0.5, vMax.z*VIEWPORT_SCALE_Z, linMaZ); + lightSpaceMatrix = mul(mul(perspProjMatrix, invTranslateEye), lightSpaceMatrix); } - } -} + for (i = 0; i < VERTS_PER_THREAD; i++) + { + uint v = i * THREADS_PER_LIGHT + t % THREADS_PER_LIGHT; -float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p); + float3 rapVertCS = GenerateVertexOfStandardCube(v); + rapVertCS.z = rapVertCS.z * 0.5 + 0.5; // View's projection matrix MUST map Z to [0, 1] -int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p) -{ - int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; - int offs_dst = (1-iSrcIndex)*MAX_PNTS+subLigt*MAX_PNTS*2; + float4 hbpVertVS = mul(invProjMat, float4(rapVertCS, 1)); // Clip to view space + float4 hapVertLS = mul(lightSpaceMatrix, hbpVertVS); // View to light space - float4 vPrev = float4(posX[offs_src+(iNrSrcVerts-1)], posY[offs_src+(iNrSrcVerts-1)], posZ[offs_src+(iNrSrcVerts-1)], posW[offs_src+(iNrSrcVerts-1)]); + // Consider the vertex to be inside the light volume if: + // -w < x < w + // -w < y < w <-- exclude boundary points, as we will not clip using these vertices + // -w < z < w <-- assume that Z-precision is not very important here + // 0 < w + // TODO: epsilon for numerical robustness? - int nrVertsDst = 0; + bool inside = Max3(abs(hapVertLS.x), abs(hapVertLS.y), abs(hapVertLS.z)) < hapVertLS.w; - unsigned int uMask = (1<P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0)) & (bIsObliqueClipPlane ? 0x1f : 0x3f); -} - -float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p) -{ - const float fS = p==4 ? 0 : ((p&1)==0 ? -1 : 1); - const int index = ((uint) p)/2; - float x1 = index==0 ? vVisib.x : (index==1 ? vVisib.y : vVisib.z); - float x0 = index==0 ? vInvisib.x : (index==1 ? vInvisib.y : vInvisib.z); - - //fS*((vVisib.w-vInvisib.w)*t + vInvisib.w) = (x1-x0)*t + x0; - - const float fT = (fS*vInvisib.w-x0)/((x1-x0) - fS*(vVisib.w-vInvisib.w)); - float4 vNew = vVisib*fT + vInvisib*(1-fT); - - // just to be really anal we make sure the clipped against coordinate is precise - if(index==0) vNew.x = fS*vNew.w; - else if(index==1) vNew.y = fS*vNew.w; - else vNew.z = fS*vNew.w; - - return vNew; -} + uint behindMasksOfVerts[NUM_VERTS]; + for (i = 0; i < NUM_VERTS; i++) + { + behindMasksOfVerts[i] = gs_BehindMasksOfVerts[baseVertexOffset + i]; + } -float4 TransformPlaneToPostSpace(float4x4 InvProjection, float4 plane) -{ - return mul(plane, InvProjection); -} + // (3) Cull the faces. + { + const uint cullFaceMask = cullClipFaceMask; + const uint numFacesToCull = countbits(cullFaceMask); // [0, 6] -float4 EvalPlanePair(out bool validPlanes, float2 posXY_in, float r) -{ - // rotate by 90 degrees to avoid potential division by zero - bool bMustFlip = abs(posXY_in.y)0.0; + cullClipFaceMask &= LaneSwizzle(cullClipFaceMask, andMask, orMask, xorMask); + } +#else + InterlockedAnd(gs_CullClipFaceMasks[intraGroupLightIndex], cullClipFaceMask); - return res; -} + GroupMemoryBarrierWithGroupSync(); -void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r) -{ - bool validX, validY; - float4 planeX = EvalPlanePair(validX, float2(pos_view_space.x, pos_view_space.z), r); - float4 planeY = EvalPlanePair(validY, float2(pos_view_space.y, pos_view_space.z), r); + cullClipFaceMask = gs_CullClipFaceMasks[intraGroupLightIndex]; +#endif + // (4) Clip the faces. + { + const uint clipFaceMask = cullClipFaceMask; + const uint numFacesToClip = countbits(clipFaceMask); // [0, 6] -#if USE_LEFT_HAND_CAMERA_SPACE - planeX = planeX.zwxy; // need to swap left/right and top/bottom planes when using left hand system - planeY = planeY.zwxy; -#endif + for (i = 0; i < FACES_PER_THREAD; i++) + { + uint n = i * THREADS_PER_LIGHT + t % THREADS_PER_LIGHT; - bIsMinValid = bool2(planeX.z<0, planeY.z<0) && bool2(validX,validY); - bIsMaxValid = bool2((-planeX.x)<0, (-planeY.x)<0) && bool2(validX,validY); + if (n < numFacesToClip) + { + uint f = NthBitLow(clipFaceMask, n); + + uint srcBegin, srcSize; + float4 vertRingBuffer[MAX_CLIP_VERTS]; + ClipFaceAgainstViewVolume(f, behindMasksOfVerts, baseVertexOffset, + srcBegin, srcSize, vertRingBuffer); + UpdateAaBb(srcBegin, srcSize, vertRingBuffer, g_isOrthographic != 0, invProjMat, + ndcAaBbMinPt, ndcAaBbMaxPt); + } + } + } - // hopefully the compiler takes zeros into account - // should be the case since the transformation in TransformPlaneToPostSpace() - // is done using multiply-adds and not dot product instructions. - float4 planeX0 = TransformPlaneToPostSpace(InvProjection, float4(planeX.x, 0, planeX.y, 0)); - float4 planeX1 = TransformPlaneToPostSpace(InvProjection, float4(planeX.z, 0, planeX.w, 0)); - float4 planeY0 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.x, planeY.y, 0)); - float4 planeY1 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.z, planeY.w, 0)); +#ifdef PLATFORM_SUPPORTS_WAVE_INTRINSICS + for (i = 0; i < FastLog2(THREADS_PER_LIGHT); i++) + { + uint andMask = PLATFORM_LANE_COUNT - 1; // All lanes + uint orMask = 0; // Plays no role + uint xorMask = 1 << i; // Flip bits one by one starting from the LSB + + ndcAaBbMinPt.x = min(ndcAaBbMinPt.x, LaneSwizzle(ndcAaBbMinPt.x, andMask, orMask, xorMask)); + ndcAaBbMaxPt.x = max(ndcAaBbMaxPt.x, LaneSwizzle(ndcAaBbMaxPt.x, andMask, orMask, xorMask)); + ndcAaBbMinPt.y = min(ndcAaBbMinPt.y, LaneSwizzle(ndcAaBbMinPt.y, andMask, orMask, xorMask)); + ndcAaBbMaxPt.y = max(ndcAaBbMaxPt.y, LaneSwizzle(ndcAaBbMaxPt.y, andMask, orMask, xorMask)); + ndcAaBbMinPt.z = min(ndcAaBbMinPt.z, LaneSwizzle(ndcAaBbMinPt.z, andMask, orMask, xorMask)); + ndcAaBbMaxPt.z = max(ndcAaBbMaxPt.z, LaneSwizzle(ndcAaBbMaxPt.z, andMask, orMask, xorMask)); + ndcAaBbMinPt.w = min(ndcAaBbMinPt.w, LaneSwizzle(ndcAaBbMinPt.w, andMask, orMask, xorMask)); + ndcAaBbMaxPt.w = max(ndcAaBbMaxPt.w, LaneSwizzle(ndcAaBbMaxPt.w, andMask, orMask, xorMask)); + } +#else + // Integer comparison works for floating-point numbers as long as the sign bit is 0. + // We must take care of -0 ourselves. saturate() does not help. + InterlockedMin(gs_NdcAaBbMinPtX[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMinPt.x))); + InterlockedMax(gs_NdcAaBbMaxPtX[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMaxPt.x))); + InterlockedMin(gs_NdcAaBbMinPtY[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMinPt.y))); + InterlockedMax(gs_NdcAaBbMaxPtY[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMaxPt.y))); + InterlockedMin(gs_NdcAaBbMinPtZ[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMinPt.z))); + InterlockedMax(gs_NdcAaBbMaxPtZ[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMaxPt.z))); + InterlockedMin(gs_NdcAaBbMinPtW[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMinPt.w))); + InterlockedMax(gs_NdcAaBbMaxPtW[intraGroupLightIndex], asuint(CLEAR_SIGN_BIT(ndcAaBbMaxPt.w))); + GroupMemoryBarrierWithGroupSync(); - // convert planes to the forms (1,0,0,D) and (0,1,0,D) - // 2D bound is given by -D components - float2 A = -float2(planeX0.w / planeX0.x, planeY0.w / planeY0.y); - float2 B = -float2(planeX1.w / planeX1.x, planeY1.w / planeY1.y); + ndcAaBbMinPt.x = asfloat(gs_NdcAaBbMinPtX[intraGroupLightIndex]); + ndcAaBbMaxPt.x = asfloat(gs_NdcAaBbMaxPtX[intraGroupLightIndex]); + ndcAaBbMinPt.y = asfloat(gs_NdcAaBbMinPtY[intraGroupLightIndex]); + ndcAaBbMaxPt.y = asfloat(gs_NdcAaBbMaxPtY[intraGroupLightIndex]); + ndcAaBbMinPt.z = asfloat(gs_NdcAaBbMinPtZ[intraGroupLightIndex]); + ndcAaBbMaxPt.z = asfloat(gs_NdcAaBbMaxPtZ[intraGroupLightIndex]); + ndcAaBbMinPt.w = asfloat(gs_NdcAaBbMinPtW[intraGroupLightIndex]); + ndcAaBbMaxPt.w = asfloat(gs_NdcAaBbMaxPtW[intraGroupLightIndex]); +#endif // PLATFORM_SUPPORTS_WAVE_INTRINSICS + + if ((globalLightIndex < g_iNrVisibLights) && (t % THREADS_PER_LIGHT == 0)) // Avoid bank conflicts + { + // For stereo, we have two sets of lights. Therefore, each eye has a set of mins + // followed by a set of maxs, and each set is equal to g_iNrVisibLights. + const ScreenSpaceBoundsIndices eyeAdjustedOutputOffsets = GenerateScreenSpaceBoundsIndices(globalLightIndex, g_iNrVisibLights, eyeIndex); - // Bound is complete - vMin = B; - vMax = A; + g_vBoundsBuffer[eyeAdjustedOutputOffsets.min] = ndcAaBbMinPt; + g_vBoundsBuffer[eyeAdjustedOutputOffsets.max] = ndcAaBbMaxPt; + } } diff --git a/com.unity.render-pipelines.high-definition/Runtime/Material/Decal/DecalSystem.cs b/com.unity.render-pipelines.high-definition/Runtime/Material/Decal/DecalSystem.cs index 509ae94928a..ec262a398f7 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Material/Decal/DecalSystem.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Material/Decal/DecalSystem.cs @@ -629,12 +629,12 @@ private void GetDecalVolumeDataAndBound(Matrix4x4 decalToWorld, Matrix4x4 worldT var influenceForwardVS = worldToView.MultiplyVector(influenceZ / influenceExtents.z); var influencePositionVS = worldToView.MultiplyPoint(pos); // place the mesh pivot in the center - m_Bounds[m_DecalDatasCount].center = influencePositionVS; + m_Bounds[m_DecalDatasCount].center = influencePositionVS; m_Bounds[m_DecalDatasCount].boxAxisX = influenceRightVS * influenceExtents.x; m_Bounds[m_DecalDatasCount].boxAxisY = influenceUpVS * influenceExtents.y; m_Bounds[m_DecalDatasCount].boxAxisZ = influenceForwardVS * influenceExtents.z; - m_Bounds[m_DecalDatasCount].scaleXY.Set(1.0f, 1.0f); - m_Bounds[m_DecalDatasCount].radius = influenceExtents.magnitude; + m_Bounds[m_DecalDatasCount].scaleXY = 1.0f; + m_Bounds[m_DecalDatasCount].radius = influenceExtents.magnitude; // The culling system culls pixels that are further // than a threshold to the box influence extents. diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDProfileId.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDProfileId.cs index 03c1236f7b4..0dfc16607b3 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDProfileId.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDProfileId.cs @@ -18,6 +18,7 @@ internal enum HDProfileId ScreenSpaceShadows, ScreenSpaceShadowsDebug, BuildLightList, + GenerateLightAABBs, ContactShadows, BlitToFinalRTDevBuildOnly, Distortion,