Unity-Technologies · EvgeniiG · Jul 20, 2020 · Aug 5, 2020 · Aug 5, 2020 · Aug 5, 2020
diff --git a/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl b/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl
@@ -852,11 +852,11 @@ void CompositeOver(real3 colorFront, real3 alphaFront,
 // Space transformations
 // ----------------------------------------------------------------------------
 
-static const float3x3 k_identity3x3 = {1, 0, 0,
+static const float3x3 k_Identity3x3 = {1, 0, 0,
                                        0, 1, 0,
                                        0, 0, 1};
 
-static const float4x4 k_identity4x4 = {1, 0, 0, 0,
+static const float4x4 k_Identity4x4 = {1, 0, 0, 0,
                                        0, 1, 0, 0,
                                        0, 0, 1, 0,
                                        0, 0, 0, 1};
@@ -880,7 +880,7 @@ float4 ComputeClipSpacePosition(float2 positionNDC, float deviceDepth)
 // (position = positionCS) => (clipSpaceTransform = use default)
 // (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
 // (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
-float4 ComputeClipSpacePosition(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
+float4 ComputeClipSpacePosition(float3 position, float4x4 clipSpaceTransform = k_Identity4x4)
 {
     return mul(clipSpaceTransform, float4(position, 1.0));
 }
@@ -890,7 +890,7 @@ float4 ComputeClipSpacePosition(float3 position, float4x4 clipSpaceTransform = k
 // (position = positionCS) => (clipSpaceTransform = use default)
 // (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
 // (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
-float3 ComputeNormalizedDeviceCoordinatesWithZ(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
+float3 ComputeNormalizedDeviceCoordinatesWithZ(float3 position, float4x4 clipSpaceTransform = k_Identity4x4)
 {
     float4 positionCS = ComputeClipSpacePosition(position, clipSpaceTransform);
 
@@ -912,7 +912,7 @@ float3 ComputeNormalizedDeviceCoordinatesWithZ(float3 position, float4x4 clipSpa
 // (position = positionCS) => (clipSpaceTransform = use default)
 // (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
 // (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
-float2 ComputeNormalizedDeviceCoordinates(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
+float2 ComputeNormalizedDeviceCoordinates(float3 position, float4x4 clipSpaceTransform = k_Identity4x4)
 {
     return ComputeNormalizedDeviceCoordinatesWithZ(position, clipSpaceTransform).xy;
 }

diff --git a/com.unity.render-pipelines.core/ShaderLibrary/ImageBasedLighting.hlsl b/com.unity.render-pipelines.core/ShaderLibrary/ImageBasedLighting.hlsl
@@ -183,7 +183,7 @@ void SampleVisibleAnisoGGXDir(real2 u,
     real3x3 viewToLocal;
     if (VeqN)
     {
-        viewToLocal = k_identity3x3;
+        viewToLocal = k_Identity3x3;
     }
     else
     {
@@ -366,7 +366,7 @@ real4 IntegrateGGXAndDisneyDiffuseFGD(real NdotV, real roughness, uint sampleCou
     real3 V   = real3(sqrt(1 - NdotV * NdotV), 0, NdotV);
     real4 acc = real4(0.0, 0.0, 0.0, 0.0);
 
-    real3x3 localToWorld = k_identity3x3;
+    real3x3 localToWorld = k_Identity3x3;
 
     for (uint i = 0; i < sampleCount; ++i)
     {

diff --git a/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl b/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl
@@ -43,6 +43,7 @@
 #define HALF_MIN 6.103515625e-5  // 2^-14, the same value for 10, 11 and 16-bit: https://www.khronos.org/opengl/wiki/Small_Float_Formats
 #define HALF_MAX 65504.0
 #define UINT_MAX 0xFFFFFFFFu
+#define INT_MAX  0x7FFFFFFF
 
 
 #ifdef SHADER_API_GLES

diff --git a/com.unity.render-pipelines.high-definition/Editor/ShaderGraph/SharedCode.template.hlsl b/com.unity.render-pipelines.high-definition/Editor/ShaderGraph/SharedCode.template.hlsl
@@ -7,7 +7,7 @@
         // Init to some default value to make the computer quiet (else it output 'divide by zero' warning even if value is not used).
         // TODO: this is a really poor workaround, but the variable is used in a bunch of places
         // to compute normals which are then passed on elsewhere to compute other values...
-        output.tangentToWorld = k_identity3x3;
+        output.tangentToWorld = k_Identity3x3;
         output.positionSS = input.positionCS;       // input.positionCS is SV_Position
 
         $FragInputs.positionRWS:        output.positionRWS = input.positionRWS;

diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl
@@ -3,31 +3,33 @@
 
 // Used to index into our SFiniteLightBound (g_data) and
 // LightVolumeData (_LightVolumeData) buffers.
-int GenerateLightCullDataIndex(int lightIndex, uint numVisibleLights, uint eyeIndex)
+uint GenerateLightCullDataIndex(uint lightIndex, uint numVisibleLights, uint eyeIndex)
 {
+    lightIndex = min(lightIndex, numVisibleLights - 1); // Stay within bounds
+
     // For monoscopic, there is just one set of light cull data structs.
     // In stereo, all of the left eye structs are first, followed by the right eye structs.
-    const int perEyeBaseIndex = (int)eyeIndex * (int)numVisibleLights;
+    const uint perEyeBaseIndex = eyeIndex * numVisibleLights;
     return (perEyeBaseIndex + lightIndex);
 }
 
 struct ScreenSpaceBoundsIndices
 {
-    int min;
-    int max;
+    uint min;
+    uint max;
 };
 
 // The returned values are used to index into our AABB screen space bounding box buffer
 // Usually named g_vBoundsBuffer.  The two values represent the min/max indices.
-ScreenSpaceBoundsIndices GenerateScreenSpaceBoundsIndices(int lightIndex, uint numVisibleLights, uint eyeIndex)
+ScreenSpaceBoundsIndices GenerateScreenSpaceBoundsIndices(uint lightIndex, uint numVisibleLights, uint eyeIndex)
 {
     // In the monoscopic mode, there is one set of bounds (min,max -> 2 * g_iNrVisibLights)
     // In stereo, there are two sets of bounds (leftMin, leftMax, rightMin, rightMax -> 4 * g_iNrVisibLights)
-    const int eyeRelativeBase = (int)eyeIndex * 2 * (int)numVisibleLights;
+    const uint eyeRelativeBase = eyeIndex * 2 * numVisibleLights;
 
     ScreenSpaceBoundsIndices indices;
     indices.min = eyeRelativeBase + lightIndex;
-    indices.max = eyeRelativeBase + lightIndex + (int)numVisibleLights;
+    indices.max = indices.min + numVisibleLights;
 
     return indices;
 }

diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs
@@ -128,8 +128,8 @@ struct SFiniteLightBound
         public Vector3 boxAxisY; // Scaled by the extents (half-size)
         public Vector3 boxAxisZ; // Scaled by the extents (half-size)
         public Vector3 center;   // Center of the bounds (box) in camera space
-        public Vector2 scaleXY;  // Scale applied to the top of the box to turn it into a truncated pyramid
-        public float radius;     // Circumscribed sphere for the bounds (box)
+        public float   scaleXY;  // Scale applied to the top of the box to turn it into a truncated pyramid (X = Y)
+        public float   radius;     // Circumscribed sphere for the bounds (box)
     };
 
     [GenerateHLSL]
@@ -564,8 +564,6 @@ public void Allocate()
         Shader deferredTilePixelShader { get { return defaultResources.shaders.deferredTilePS; } }
 
 
-        static int s_GenAABBKernel;
-        static int s_GenAABBKernel_Oblique;
         static int s_GenListPerTileKernel;
         static int s_GenListPerTileKernel_Oblique;
         static int s_GenListPerVoxelKernel;
@@ -782,9 +780,6 @@ void InitializeLightLoop(IBLFilterBSDF[] iBLFilterBSDFArray)
             m_MaxLightsOnScreen = m_MaxDirectionalLightsOnScreen + m_MaxPunctualLightsOnScreen + m_MaxAreaLightsOnScreen + m_MaxEnvLightsOnScreen;
             m_MaxPlanarReflectionOnScreen = lightLoopSettings.maxPlanarReflectionOnScreen;
 
-            s_GenAABBKernel = buildScreenAABBShader.FindKernel("ScreenBoundsAABB");
-            s_GenAABBKernel_Oblique = buildScreenAABBShader.FindKernel("ScreenBoundsAABB_Oblique");
-
             // Cluster
             {
                 s_ClearVoxelAtomicKernel = buildPerVoxelLightListShader.FindKernel("ClearAtomic");
@@ -1628,9 +1623,9 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig
                 fAltDx *= range; fAltDy *= range;
 
                 // Handle case of pyramid with this select (currently unused)
-                var altDist = Mathf.Sqrt(fAltDy * fAltDy + (true ? 1.0f : 2.0f) * fAltDx * fAltDx);
-                bound.radius = altDist > (0.5f * range) ? altDist : (0.5f * range);       // will always pick fAltDist
-                bound.scaleXY = squeeze ? new Vector2(0.01f, 0.01f) : new Vector2(1.0f, 1.0f);
+                var altDist   = Mathf.Sqrt(fAltDy * fAltDy + (true ? 1.0f : 2.0f) * fAltDx * fAltDx);
+                bound.radius  = altDist > (0.5f * range) ? altDist : (0.5f * range);       // will always pick fAltDist
+                bound.scaleXY = squeeze ? 0.01f : 1.0f;
 
                 lightVolumeData.lightAxisX = vx;
                 lightVolumeData.lightAxisY = vy;
@@ -1642,16 +1637,19 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig
             }
             else if (gpuLightType == GPULightType.Point)
             {
-                Vector3 vx = xAxisVS;
-                Vector3 vy = yAxisVS;
-                Vector3 vz = zAxisVS;
+                // Construct a view-space axis-aligned bounding cube around the bounding sphere.
+                // This allows us to utilize the same polygon clipping technique for all lights.
+                // Non-axis-aligned vectors may result in a larger screen-space AABB.
+                Vector3 vx = new Vector3(1, 0, 0);
+                Vector3 vy = new Vector3(0, 1, 0);
+                Vector3 vz = new Vector3(0, 0, 1);
 
                 bound.center = positionVS;
                 bound.boxAxisX = vx * range;
                 bound.boxAxisY = vy * range;
                 bound.boxAxisZ = vz * range;
-                bound.scaleXY.Set(1.0f, 1.0f);
-                bound.radius = range;
+                bound.scaleXY  = 1.0f;
+                bound.radius   = range;
 
                 // fill up ldata
                 lightVolumeData.lightAxisX = vx;
@@ -1672,7 +1670,7 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig
                 bound.boxAxisY = extents.y * yAxisVS;
                 bound.boxAxisZ = extents.z * zAxisVS;
                 bound.radius   = extents.magnitude;
-                bound.scaleXY.Set(1.0f, 1.0f);
+                bound.scaleXY  = 1.0f;
 
                 lightVolumeData.lightPos   = centerVS;
                 lightVolumeData.lightAxisX = xAxisVS;
@@ -1692,7 +1690,7 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig
                 bound.boxAxisY = extents.y * yAxisVS;
                 bound.boxAxisZ = extents.z * zAxisVS;
                 bound.radius   = extents.magnitude;
-                bound.scaleXY.Set(1.0f, 1.0f);
+                bound.scaleXY  = 1.0f;
 
                 lightVolumeData.lightPos   = centerVS;
                 lightVolumeData.lightAxisX = xAxisVS;
@@ -1712,7 +1710,7 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig
                 bound.boxAxisY = extents.y * yAxisVS;
                 bound.boxAxisZ = extents.z * zAxisVS;
                 bound.radius   = extents.magnitude;
-                bound.scaleXY.Set(1.0f, 1.0f);
+                bound.scaleXY  = 1.0f;
 
                 lightVolumeData.lightPos   = centerVS;
                 lightVolumeData.lightAxisX = xAxisVS;
@@ -1891,8 +1889,8 @@ void GetEnvLightVolumeDataAndBound(HDProbe probe, LightVolumeType lightVolumeTyp
                     bound.boxAxisX = influenceRightVS * influenceExtents.x;
                     bound.boxAxisY = influenceUpVS * influenceExtents.x;
                     bound.boxAxisZ = influenceForwardVS * influenceExtents.x;
-                    bound.scaleXY.Set(1.0f, 1.0f);
-                    bound.radius = influenceExtents.x;
+                    bound.scaleXY  = 1.0f;
+                    bound.radius   = influenceExtents.x;
                     break;
                 }
                 case LightVolumeType.Box:
@@ -1901,8 +1899,8 @@ void GetEnvLightVolumeDataAndBound(HDProbe probe, LightVolumeType lightVolumeTyp
                     bound.boxAxisX = influenceExtents.x * influenceRightVS;
                     bound.boxAxisY = influenceExtents.y * influenceUpVS;
                     bound.boxAxisZ = influenceExtents.z * influenceForwardVS;
-                    bound.scaleXY.Set(1.0f, 1.0f);
-                    bound.radius = influenceExtents.magnitude;
+                    bound.scaleXY  = 1.0f;
+                    bound.radius   = influenceExtents.magnitude;
 
                     // The culling system culls pixels that are further
                     //   than a threshold to the box influence extents.
@@ -1942,7 +1940,7 @@ void AddBoxVolumeDataAndBound(OrientedBBox obb, LightCategory category, LightFea
             bound.boxAxisY = obb.extentY * upVS;
             bound.boxAxisZ = obb.extentZ * forwardVS;
             bound.radius   = extents.magnitude;
-            bound.scaleXY.Set(1.0f, 1.0f);
+            bound.scaleXY  = 1.0f;
 
             // The culling system culls pixels that are further
             //   than a threshold to the box influence extents.
@@ -2771,19 +2769,27 @@ static void GenerateLightsScreenSpaceAABBs(in BuildGPULightListParameters parame
         {
             if (parameters.totalLightCount != 0)
             {
-                var tileAndCluster = resources.tileAndClusterData;
+                using (new ProfilingScope(cmd, ProfilingSampler.Get(HDProfileId.GenerateLightAABBs)))
+                {
+                    var tileAndCluster = resources.tileAndClusterData;
+
+                    cmd.SetComputeIntParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_isOrthographic, parameters.isOrthographic ? 1 : 0);
 
-                cmd.SetComputeIntParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_isOrthographic, parameters.isOrthographic ? 1 : 0);
+                    // With XR single-pass, we have one set of light bounds per view to iterate over (bounds are in view space for each view)
+                    cmd.SetComputeIntParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_iNrVisibLights, parameters.totalLightCount);
+                    cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_data, tileAndCluster.convexBoundsBuffer);
+                    cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_vBoundsBuffer, tileAndCluster.AABBBoundsBuffer);
 
-                // With XR single-pass, we have one set of light bounds per view to iterate over (bounds are in view space for each view)
-                cmd.SetComputeIntParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_iNrVisibLights, parameters.totalLightCount);
-                cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_data, tileAndCluster.convexBoundsBuffer);
-                cmd.SetComputeBufferParam(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, HDShaderIDs.g_vBoundsBuffer, tileAndCluster.AABBBoundsBuffer);
+                    cmd.SetComputeMatrixArrayParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_mProjectionArr, parameters.lightListProjHMatrices);
+                    cmd.SetComputeMatrixArrayParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_mInvProjectionArr, parameters.lightListInvProjHMatrices);
 
-                cmd.SetComputeMatrixArrayParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_mProjectionArr, parameters.lightListProjHMatrices);
-                cmd.SetComputeMatrixArrayParam(parameters.screenSpaceAABBShader, HDShaderIDs.g_mInvProjectionArr, parameters.lightListInvProjHMatrices);
+                    const int threadsPerLight = 4;  // Shader: THREADS_PER_LIGHT (4)
+                    const int threadsPerGroup = 64; // Shader: THREADS_PER_GROUP (64)
 
-                cmd.DispatchCompute(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, (parameters.totalLightCount + 7) / 8, parameters.viewCount, 1);
+                    int groupCount = HDUtils.DivRoundUp(parameters.totalLightCount * threadsPerLight, threadsPerGroup);
+
+                    cmd.DispatchCompute(parameters.screenSpaceAABBShader, parameters.screenSpaceAABBKernel, groupCount, parameters.viewCount, 1);
+                }
             }
         }
 
@@ -3067,7 +3073,7 @@ BuildGPULightListParameters PrepareBuildGPULightListParameters(HDCamera hdCamera
 
             // Screen space AABB
             parameters.screenSpaceAABBShader = buildScreenAABBShader;
-            parameters.screenSpaceAABBKernel = isProjectionOblique ? s_GenAABBKernel_Oblique : s_GenAABBKernel;
+            parameters.screenSpaceAABBKernel = 0;
             // camera to screen matrix (and it's inverse)
             for (int viewIndex = 0; viewIndex < hdCamera.viewCount; ++viewIndex)
             {

diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl
@@ -66,7 +66,7 @@ struct SFiniteLightBound
     float3 boxAxisY;
     float3 boxAxisZ;
     float3 center;
-    float2 scaleXY;
+    float scaleXY;
     float radius;
 };