diff --git a/com.unity.render-pipelines.high-definition/CHANGELOG.md b/com.unity.render-pipelines.high-definition/CHANGELOG.md index 230984eda52..25f1e0857e0 100644 --- a/com.unity.render-pipelines.high-definition/CHANGELOG.md +++ b/com.unity.render-pipelines.high-definition/CHANGELOG.md @@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Fix API warnings in Matcap mode on Metal. - Fix D3D validation layer errors w.r.t shadow textures when an atlas is not used. - Fixed anchor position offset property for the Light Anchor component. (case 1362809) +- Fixed minor performance issues in SSGI (case 1367144). ## [13.0.0] - 2021-09-01 diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.compute index 0fd78a21bcd..27c460d12ce 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.compute @@ -1,4 +1,3 @@ -#pragma kernel BilateralUpSampleSingle BILATERAL_UPSAMPLE=BilateralUpSampleSingle SINGLE_CHANNEL #pragma kernel BilateralUpSampleColor BILATERAL_UPSAMPLE=BilateralUpSampleColor //#pragma enable_d3d11_debug_symbols @@ -10,78 +9,103 @@ #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl" +#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl" // Mip chain depth buffer TEXTURE2D_X(_DepthTexture); // The half resolution texture that needs to be upscaled TEXTURE2D_X(_LowResolutionTexture); -// Constant buffer where all variables should land -CBUFFER_START(UnityScreenSpaceGlobalIllumination) - float4 _HalfScreenSize; - float2 _DepthPyramidFirstMipLevelOffset; -CBUFFER_END +// LDS that store the half resolution data +groupshared float3 gs_cacheLighting[36]; +groupshared float gs_cacheDepth[36]; + +void FillUpsampleDataLDS(uint groupIndex, uint2 groupOrigin) +{ + // Define which value we will be acessing with this worker thread + int acessCoordX = groupIndex % 6; + int acessCoordY = groupIndex / 6; + + // Everything we are accessing is in intermediate res (half rez). + uint2 traceGroupOrigin = groupOrigin / 2; + + // The initial position of the access + int2 originXY = traceGroupOrigin - int2(1, 1); + + // Compute the sample position + int2 sampleCoord = int2(clamp(originXY.x + acessCoordX, 0, _HalfScreenSize.x - 1), clamp(originXY.y + acessCoordY, 0, _HalfScreenSize.y - 1)); + + // Sample and store into the LDS + gs_cacheLighting[groupIndex] = LOAD_TEXTURE2D_X(_LowResolutionTexture, sampleCoord).xyz; + // As an input we are not using the depth pyramid, but the full resolution depth (so we need to make sure to read from there for the upsample aswell). + gs_cacheDepth[groupIndex] = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord * 2).x; +} + +uint OffsetToLDSAdress(uint2 groupThreadId, int2 offset) +{ + // Compute the tap coordinate in the 6x6 grid + uint2 tapAddress = (uint2)((int2)(groupThreadId / 2 + 1) + offset); + return clamp((uint)(tapAddress.x) % 6 + tapAddress.y * 6, 0, 35); +} + +// Function that fills the struct as we cannot use arrays +void FillUpsampleNeighborhoodData_2x2(int2 groupThreadId, int subRegionIdx, out NeighborhoodUpsampleData2x2_RGB neighborhoodData) +{ + // Fill the sample data + int tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx].x, (int)_TapOffsets[2 * subRegionIdx].y)); + neighborhoodData.lowValue0 = max(0, (gs_cacheLighting[tapIdx])); + neighborhoodData.lowDepth.x = gs_cacheDepth[tapIdx]; + neighborhoodData.lowWeight.x = _DistanceBasedWeights[subRegionIdx].x; + + tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx].z, (int)_TapOffsets[2 * subRegionIdx].w)); + neighborhoodData.lowValue1 = max(0, (gs_cacheLighting[tapIdx])); + neighborhoodData.lowDepth.y = gs_cacheDepth[tapIdx]; + neighborhoodData.lowWeight.y = _DistanceBasedWeights[subRegionIdx].y; + + tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx + 1].x, (int)_TapOffsets[2 * subRegionIdx + 1].y)); + neighborhoodData.lowValue2 = max(0, (gs_cacheLighting[tapIdx])); + neighborhoodData.lowDepth.z = gs_cacheDepth[tapIdx]; + neighborhoodData.lowWeight.z = _DistanceBasedWeights[subRegionIdx].z; + + tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx + 1].z, (int)_TapOffsets[2 * subRegionIdx + 1].w)); + neighborhoodData.lowValue3 = max(0, (gs_cacheLighting[tapIdx])); + neighborhoodData.lowDepth.w = gs_cacheDepth[tapIdx]; + neighborhoodData.lowWeight.w = _DistanceBasedWeights[subRegionIdx].w; +} // The output of our upscaling pass -RW_TEXTURE2D_X(float4, _OutputUpscaledTexture); +RW_TEXTURE2D_X(float3, _OutputUpscaledTexture); [numthreads(BILATERAL_UPSAMPLE_TILE_SIZE, BILATERAL_UPSAMPLE_TILE_SIZE, 1)] -void BILATERAL_UPSAMPLE(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) +void BILATERAL_UPSAMPLE(uint3 currentCoord : SV_DispatchThreadID, + int groupIndex : SV_GroupIndex, + uint2 groupThreadId : SV_GroupThreadID, + uint2 groupId : SV_GroupID) { - UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); + UNITY_XR_ASSIGN_VIEW_INDEX(currentCoord.z); + + // Only 36 workers of the 64 region do the pre-fetching + if (groupIndex < 36) + { + // Load 1 value per thread + FillUpsampleDataLDS(groupIndex, groupId * 8); + } + + // Make sure all values are loaded in LDS by now. + GroupMemoryBarrierWithGroupSync(); // If out of bounds, discard - if (any(dispatchThreadId.xy > uint2(_ScreenSize.xy))) + if (any(currentCoord.xy > uint2(_ScreenSize.xy))) return; - // The pixel position to process - const uint2 outputCoord = dispatchThreadId.xy; - // Read the depth value as early as possible and use it as late as possible - float hiResDepth = LOAD_TEXTURE2D_X(_DepthTexture, outputCoord).x; - - // Define what is the half resolution of this pixel - int2 halfResolution = (int2)(outputCoord / 2); - - // Define what is the half resolution of this pixel - int2 coordRepresenatative = halfResolution * 2; - - // Compute the shift within the half res - int2 halfResShift = outputCoord - coordRepresenatative; - - // Compute the shift of the pixel in the group - int shiftIndex = halfResShift.y * 2 + halfResShift.x; - - // Compute the shift in the upscale table - int offsetInCoordTable = shiftIndex * 4; - - // Compute the half resolution coordinates we should tap from - int2 halfResTap0 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable], _HalfScreenSize.xy - 1); - int2 halfResTap1 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 1], _HalfScreenSize.xy - 1); - int2 halfResTap2 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 2], _HalfScreenSize.xy - 1); - int2 halfResTap3 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 3], _HalfScreenSize.xy - 1); - - // Grab the depth of all the half resolution pixels - float4 lowDepths = float4(LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap0).x - , LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap1).x - , LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap2).x - , LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap3).x); - -#if SINGLE_CHANNEL - // Grab all the scalar values required for upscale - float4 lowRes = float4(_LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap0)].x - , _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap1)].x - , _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap2)].x - , _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap3)].x); - // Upscale and output - _OutputUpscaledTexture[COORD_TEXTURE2D_X(outputCoord)] = BilUpSingle(hiResDepth, lowDepths, lowRes); -#else - // Grab all the color values required for upscale - float4 lowResCol0 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap0)]); - float4 lowResCol1 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap1)]); - float4 lowResCol2 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap2)]); - float4 lowResCol3 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap3)]); - - _OutputUpscaledTexture[COORD_TEXTURE2D_X(outputCoord)] = BilUpColor(hiResDepth, lowDepths, lowResCol0, lowResCol1, lowResCol2, lowResCol3); -#endif + float hiResDepth = LOAD_TEXTURE2D_X(_DepthTexture, currentCoord.xy).x; + + // Tap the neighborhood data from + NeighborhoodUpsampleData2x2_RGB upsampleData; + int localIndex = (currentCoord.x & 1) + (currentCoord.y & 1) * 2; + FillUpsampleNeighborhoodData_2x2(groupThreadId, localIndex, upsampleData); + + // Upscale and return the result + _OutputUpscaledTexture[COORD_TEXTURE2D_X(currentCoord.xy)] = BilUpColor2x2_RGB(hiResDepth, upsampleData); } diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl index fb784dfec71..afc31779d15 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl @@ -173,3 +173,32 @@ float4 BilUpColor3x3(float highDepth, in NeighborhoodUpsampleData3x3 data) + float4(_NoiseFilterStrength, _NoiseFilterStrength, _NoiseFilterStrength, 0.0); return WeightedSum / TotalWeight; } + +// Due to compiler issues, it is not possible to use arrays to store the neighborhood values, we then store them in this structure +struct NeighborhoodUpsampleData2x2_RGB +{ + // Low resolution depths + float4 lowDepth; + + // The low resolution values + float3 lowValue0; + float3 lowValue1; + float3 lowValue2; + float3 lowValue3; + + // Weights used to combine the neighborhood + float4 lowWeight; +}; + +// The bilateral upscale function (3x3 neighborhood) +float3 BilUpColor2x2_RGB(float highDepth, in NeighborhoodUpsampleData2x2_RGB data) +{ + float4 combinedWeights = data.lowWeight / (abs(highDepth - data.lowDepth) + _UpsampleTolerance); + float TotalWeight = combinedWeights.x + combinedWeights.y + combinedWeights.z + combinedWeights.w + _NoiseFilterStrength; + float3 WeightedSum = data.lowValue0.xyz * combinedWeights.x + + data.lowValue1.xyz * combinedWeights.y + + data.lowValue2.xyz * combinedWeights.z + + data.lowValue3.xyz * combinedWeights.w + + _NoiseFilterStrength; + return WeightedSum / TotalWeight; +} diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs new file mode 100644 index 00000000000..99d69795296 --- /dev/null +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs @@ -0,0 +1,68 @@ +using System; +using UnityEngine.Experimental.Rendering; +using UnityEngine.Experimental.Rendering.RenderGraphModule; + +namespace UnityEngine.Rendering.HighDefinition +{ + internal class BilateralUpsample + { + // This is the representation of the half resolution neighborhood + // |-----|-----|-----| + // | | | | + // |-----|-----|-----| + // | | | | + // |-----|-----|-----| + // | | | | + // |-----|-----|-----| + + // This is the representation of the full resolution neighborhood + // |-----|-----|-----| + // | | | | + // |-----|--|--|-----| + // | |--|--| | + // |-----|--|--|-----| + // | | | | + // |-----|-----|-----| + + // The base is centered at (0, 0) at the center of the center pixel: + // The 4 full res pixels are centered {L->R, T->B} at {-0.25, -0.25}, {0.25, -0.25} + // {-0.25, 0.25}, {0.25, 0.25} + // + // The 9 half res pixels are placed {L->R, T->B} at {-1.0, -1.0}, {0.0, -1.0}, {1.0, -1.0} + // {-1.0, 0.0}, {0.0, 0.0}, {1.0, 0.0} + // {-1.0, 1.0}, {0.0, 1.0}, {1.0, 1.0} + + // Set of pre-generated weights (L->R, T->B). After experimentation, the final weighting function is exp(-distance^2) + static internal float[] distanceBasedWeights_3x3 = new float[] { 0.324652f, 0.535261f, 0.119433f, 0.535261f, 0.882497f, 0.196912f, 0.119433f, 0.196912f, 0.0439369f, + 0.119433f, 0.535261f, 0.324652f, 0.196912f, 0.882497f, 0.535261f, 0.0439369f, 0.196912f, 0.119433f, + 0.119433f, 0.196912f, 0.0439369f, 0.535261f, 0.882497f, 0.196912f, 0.324652f, 0.535261f, 0.119433f, + 0.0439369f, 0.196912f, 0.119433f, 0.196912f, 0.882497f, 0.535261f, 0.119433f, 0.535261f, 0.324652f}; + + // Set of pre-generated weights (L->R, T->B). After experimentation, the final weighting function is exp(-distance^2) + static internal float[] distanceBasedWeights_2x2 = new float[] { 0.324652f, 0.535261f, 0.535261f, 0.882497f, + 0.535261f, 0.324652f, 0.882497f, 0.535261f, + 0.535261f, 0.882497f, 0.324652f, 0.535261f, + 0.882497f, 0.535261f, 0.535261f, 0.324652f}; + + static internal float[] tapOffsets_2x2 = new float[] { -1.0f, -1.0f, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 1.0f, -1.0f, 0.0f, 0.0f, 1.0f, 0.0f, + -1.0f, 0.0f, 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 1.0f, + 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f}; + } + + + [GenerateHLSL(needAccessors = false, generateCBuffer = true)] + unsafe struct ShaderVariablesBilateralUpsample + { + // Half resolution we are up sampling from + public Vector4 _HalfScreenSize; + + // Weights used for the bilateral up sample + [HLSLArray(3 * 4, typeof(Vector4))] + public fixed float _DistanceBasedWeights[12 * 4]; + + // Offsets used to tap into the half resolution neighbors + [HLSLArray(2 * 4, typeof(Vector4))] + public fixed float _TapOffsets[8 * 4]; + } +} diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl new file mode 100644 index 00000000000..cbe3ec1027b --- /dev/null +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl @@ -0,0 +1,16 @@ +// +// This file was automatically generated. Please don't edit by hand. Execute Editor command [ Edit > Rendering > Generate Shader Includes ] instead +// + +#ifndef BILATERALUPSAMPLEDEF_CS_HLSL +#define BILATERALUPSAMPLEDEF_CS_HLSL +// Generated from UnityEngine.Rendering.HighDefinition.ShaderVariablesBilateralUpsample +// PackingRules = Exact +CBUFFER_START(ShaderVariablesBilateralUpsample) + float4 _HalfScreenSize; + float4 _DistanceBasedWeights[12]; + float4 _TapOffsets[8]; +CBUFFER_END + + +#endif diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl.meta new file mode 100644 index 00000000000..8f2c4c89d36 --- /dev/null +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: f7ac8eb9772bfe6438d5ddd73c5e4fcb +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.meta new file mode 100644 index 00000000000..9b4fc869e7c --- /dev/null +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 60f785490b2403b479ba132fc159b661 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/HDRenderPipeline.ScreenSpaceGlobalIllumination.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/HDRenderPipeline.ScreenSpaceGlobalIllumination.cs index 0397335857a..a6830857cd3 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/HDRenderPipeline.ScreenSpaceGlobalIllumination.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/HDRenderPipeline.ScreenSpaceGlobalIllumination.cs @@ -217,7 +217,7 @@ TextureHandle TraceSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllumi // Output textures passData.outputBuffer = builder.WriteTexture(renderGraph.CreateTexture(new TextureDesc(Vector2.one, true, true) - { colorFormat = GraphicsFormat.R16G16B16A16_SFloat, enableRandomWrite = true, name = "SSGI Color" })); + { colorFormat = GraphicsFormat.B10G11R11_UFloatPack32, enableRandomWrite = true, name = "SSGI Color" })); builder.SetRenderFunc( (TraceSSGIPassData data, RenderGraphContext ctx) => @@ -291,9 +291,7 @@ class UpscaleSSGIPassData public int texHeight; public int viewCount; public Vector4 halfScreenSize; - - // Generation parameters - public Vector2 firstMipOffset; + public ShaderVariablesBilateralUpsample shaderVariablesBilateralUpsampleCB; // Compute Shader public ComputeShader bilateralUpsampleCS; @@ -314,10 +312,16 @@ TextureHandle UpscaleSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllu passData.texWidth = hdCamera.actualWidth; passData.texHeight = hdCamera.actualHeight; passData.viewCount = hdCamera.viewCount; - passData.halfScreenSize.Set(passData.texWidth / 2, passData.texHeight / 2, 1.0f / (passData.texWidth * 0.5f), 1.0f / (passData.texHeight * 0.5f)); - // Set the generation parameters - passData.firstMipOffset.Set(HDShadowUtils.Asfloat((uint)info.mipLevelOffsets[1].x), HDShadowUtils.Asfloat((uint)info.mipLevelOffsets[1].y)); + passData.shaderVariablesBilateralUpsampleCB._HalfScreenSize = new Vector4(passData.texWidth / 2, passData.texHeight / 2, 1.0f / (passData.texWidth * 0.5f), 1.0f / (passData.texHeight * 0.5f)); + unsafe + { + for (int i = 0; i < 16; ++i) + passData.shaderVariablesBilateralUpsampleCB._DistanceBasedWeights[i] = BilateralUpsample.distanceBasedWeights_2x2[i]; + + for (int i = 0; i < 32; ++i) + passData.shaderVariablesBilateralUpsampleCB._TapOffsets[i] = BilateralUpsample.tapOffsets_2x2[i]; + } // Grab the right kernel passData.bilateralUpsampleCS = m_Asset.renderPipelineResources.shaders.bilateralUpsampleCS; @@ -326,7 +330,7 @@ TextureHandle UpscaleSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllu passData.depthTexture = builder.ReadTexture(depthPyramid); passData.inputBuffer = builder.ReadTexture(inputBuffer); passData.outputBuffer = builder.WriteTexture(renderGraph.CreateTexture(new TextureDesc(Vector2.one, true, true) - { colorFormat = GraphicsFormat.R16G16B16A16_SFloat, enableRandomWrite = true, name = "SSGI Final" })); + { colorFormat = GraphicsFormat.B10G11R11_UFloatPack32, enableRandomWrite = true, name = "SSGI Final" })); builder.SetRenderFunc( (UpscaleSSGIPassData data, RenderGraphContext ctx) => @@ -336,9 +340,7 @@ TextureHandle UpscaleSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllu int numTilesXHR = (data.texWidth + (ssgiTileSize - 1)) / ssgiTileSize; int numTilesYHR = (data.texHeight + (ssgiTileSize - 1)) / ssgiTileSize; - // Inject the input scalars - ctx.cmd.SetComputeVectorParam(data.bilateralUpsampleCS, HDShaderIDs._HalfScreenSize, data.halfScreenSize); - ctx.cmd.SetComputeVectorParam(data.bilateralUpsampleCS, HDShaderIDs._DepthPyramidFirstMipLevelOffset, data.firstMipOffset); + ConstantBuffer.PushGlobal(ctx.cmd, data.shaderVariablesBilateralUpsampleCB, HDShaderIDs._ShaderVariablesBilateralUpsample); // Inject all the input buffers ctx.cmd.SetComputeTextureParam(data.bilateralUpsampleCS, data.upscaleKernel, HDShaderIDs._DepthTexture, data.depthTexture); diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/ScreenSpaceGlobalIllumination.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/ScreenSpaceGlobalIllumination.compute index 5a9c81382ee..e2b68e42c5a 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/ScreenSpaceGlobalIllumination.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/ScreenSpaceGlobalIllumination.compute @@ -135,16 +135,27 @@ void TraceReflectionProbes(PositionInputs posInput, float3 normalWS, float3 rayD GetCountAndStart(posInput, LIGHTCATEGORY_ENV, envLightStart, envLightCount); totalWeight = 0.0f; + uint envStartFirstLane; + bool fastPath = IsFastPath(envLightStart, envStartFirstLane); + + if (fastPath) + envLightStart = envStartFirstLane; + // Scalarized loop, same rationale of the punctual light version uint v_envLightListOffset = 0; uint v_envLightIdx = envLightStart; while (v_envLightListOffset < envLightCount) { v_envLightIdx = FetchIndex(envLightStart, v_envLightListOffset); - uint s_envLightIdx = v_envLightIdx; + uint s_envLightIdx = ScalarizeElementIndex(v_envLightIdx, fastPath); if (s_envLightIdx == -1) break; + // Compiler has a tendency to bypass the scalarization, we force it again here. + #ifdef PLATFORM_SUPPORTS_WAVE_INTRINSICS + s_envLightIdx = WaveReadLaneFirst(s_envLightIdx); + #endif + EnvLightData envLightData = FetchEnvLight(s_envLightIdx); // Scalar load. // If current scalar and vector light index match, we process the light. The v_envLightListOffset for current thread is increased. @@ -175,7 +186,7 @@ void TraceReflectionProbes(PositionInputs posInput, float3 normalWS, float3 rayD TEXTURE2D_X(_IndirectDiffuseHitPointTexture); // Depth buffer of the previous frame (full resolution) TEXTURE2D_X(_HistoryDepthTexture); -RW_TEXTURE2D_X(float4, _IndirectDiffuseTextureRW); +RW_TEXTURE2D_X(float3, _IndirectDiffuseTextureRW); // The maximal difference in depth that is considered acceptable to read from the color pyramid #define DEPTH_DIFFERENCE_THRESHOLD 0.1 @@ -252,7 +263,7 @@ void REPROJECT_GLOBAL_ILLUMINATION(uint3 dispatchThreadId : SV_DispatchThreadID, if (!invalid) { // The intersection was considered valid, we can read from the color pyramid - color = SAMPLE_TEXTURE2D_X_LOD(_ColorPyramidTexture, s_linear_clamp_sampler, prevFrameUV, SSGI_MIP_OFFSET).rgb * GetInversePreviousExposureMultiplier() * GetCurrentExposureMultiplier(); + color = SAMPLE_TEXTURE2D_X_LOD(_ColorPyramidTexture, s_linear_clamp_sampler, prevFrameUV, SSGI_MIP_OFFSET).rgb * GetInversePreviousExposureMultiplier(); } #if defined(PROBE_VOLUMES_L1) || defined(PROBE_VOLUMES_L2) else if(_EnableProbeVolumes) @@ -267,8 +278,7 @@ void REPROJECT_GLOBAL_ILLUMINATION(uint3 dispatchThreadId : SV_DispatchThreadID, posInput.positionSS, apvBuiltinData.bakeDiffuseLighting, apvBuiltinData.backBakeDiffuseLighting); // Not used - color = apvBuiltinData.bakeDiffuseLighting * GetCurrentExposureMultiplier(); - invalid = false; + color = apvBuiltinData.bakeDiffuseLighting; } #endif else @@ -282,9 +292,6 @@ void REPROJECT_GLOBAL_ILLUMINATION(uint3 dispatchThreadId : SV_DispatchThreadID, color += SAMPLE_TEXTURECUBE_ARRAY_LOD(_SkyTexture, s_trilinear_clamp_sampler, sampleDir, 0.0, 0).xyz * (1.0 - weight); weight = 1.0; } - - invalid = false; - color *= GetCurrentExposureMultiplier(); } // TODO: Remove me when you can find where the nans come from @@ -292,14 +299,12 @@ void REPROJECT_GLOBAL_ILLUMINATION(uint3 dispatchThreadId : SV_DispatchThreadID, color = 0.0f; // Convert to HSV space - color = RgbToHsv(color); + color = RgbToHsv(color * GetCurrentExposureMultiplier()); // Expose and clamp the final color color.z = clamp(color.z, 0.0, SSGI_CLAMP_VALUE); // Convert back to HSV space color = HsvToRgb(color); - // We are simply interested to know if the intersected pixel was moving, so we multiply it by a big number - // TODO: make this process not binary // Write the output to the target pixel - _IndirectDiffuseTextureRW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = float4(color, 1.0f); + _IndirectDiffuseTextureRW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = color; } diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/HDRenderPipeline.VolumetricClouds.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/HDRenderPipeline.VolumetricClouds.cs index 5af2b2c415c..68b65d2c71c 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/HDRenderPipeline.VolumetricClouds.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/HDRenderPipeline.VolumetricClouds.cs @@ -36,38 +36,6 @@ public partial class HDRenderPipeline // Combine pass via hardware blending, used in case of MSAA color target. Material m_CloudCombinePass; - // This is the representation of the half resolution neighborhood - // |-----|-----|-----| - // | | | | - // |-----|-----|-----| - // | | | | - // |-----|-----|-----| - // | | | | - // |-----|-----|-----| - - // This is the representation of the full resolution neighborhood - // |-----|-----|-----| - // | | | | - // |-----|--|--|-----| - // | |--|--| | - // |-----|--|--|-----| - // | | | | - // |-----|-----|-----| - - // The base is centered at (0, 0) at the center of the center pixel: - // The 4 full res pixels are centered {L->R, T->B} at {-0.25, -0.25}, {0.25, -0.25} - // {-0.25, 0.25}, {0.25, 0.25} - // - // The 9 half res pixels are placed {L->R, T->B} at {-1.0, -1.0}, {0.0, -1.0}, {1.0, -1.0} - // {-1.0, 0.0}, {0.0, 0.0}, {1.0, 0.0} - // {-1.0, 1.0}, {0.0, 1.0}, {1.0, 1.0} - - // Set of pre-generated weights (L->R, T->B). After experimentation, the final weighting function is exp(-distance^2) - static float[] m_DistanceBasedWeights = new float[] { 0.324652f, 0.535261f, 0.119433f, 0.535261f, 0.882497f, 0.196912f, 0.119433f, 0.196912f, 0.0439369f, - 0.119433f, 0.535261f, 0.324652f, 0.196912f, 0.882497f, 0.535261f, 0.0439369f, 0.196912f, 0.119433f, - 0.119433f, 0.196912f, 0.0439369f, 0.535261f, 0.882497f, 0.196912f, 0.324652f, 0.535261f, 0.119433f, - 0.0439369f, 0.196912f, 0.119433f, 0.196912f, 0.882497f, 0.535261f, 0.119433f, 0.535261f, 0.324652f}; - struct VolumetricCloudsCameraData { public TVolumetricCloudsCameraType cameraType; @@ -547,7 +515,7 @@ void UpdateShaderVariableslClouds(ref ShaderVariablesClouds cb, HDCamera hdCamer { for (int p = 0; p < 4; ++p) for (int i = 0; i < 9; ++i) - cb._DistanceBasedWeights[12 * p + i] = m_DistanceBasedWeights[9 * p + i]; + cb._DistanceBasedWeights[12 * p + i] = BilateralUpsample.distanceBasedWeights_3x3[9 * p + i]; } } diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDStringConstants.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDStringConstants.cs index fdcff625eea..bfc53d28c00 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDStringConstants.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDStringConstants.cs @@ -372,6 +372,7 @@ static class HDShaderIDs public static readonly int _ShaderVariablesVolumetric = Shader.PropertyToID("ShaderVariablesVolumetric"); public static readonly int _ShaderVariablesLightList = Shader.PropertyToID("ShaderVariablesLightList"); public static readonly int _ShaderVariablesRaytracing = Shader.PropertyToID("ShaderVariablesRaytracing"); + public static readonly int _ShaderVariablesBilateralUpsample = Shader.PropertyToID("ShaderVariablesBilateralUpsample"); public static readonly int _ShaderVariablesRaytracingLightLoop = Shader.PropertyToID("ShaderVariablesRaytracingLightLoop"); public static readonly int _ShaderVariablesDebugDisplay = Shader.PropertyToID("ShaderVariablesDebugDisplay"); public static readonly int _ShaderVariablesClouds = Shader.PropertyToID("ShaderVariablesClouds"); @@ -636,6 +637,7 @@ static class HDShaderIDs public static readonly int _SampleCountTextureRW = Shader.PropertyToID("_SampleCountTextureRW"); public static readonly int _AffectSmoothSurfaces = Shader.PropertyToID("_AffectSmoothSurfaces"); public static readonly int _ObjectMotionStencilBit = Shader.PropertyToID("_ObjectMotionStencilBit"); + public static readonly int _PointDistribution = Shader.PropertyToID("_PointDistribution"); public static readonly int _DenoiseInputArrayTexture = Shader.PropertyToID("_DenoiseInputArrayTexture"); public static readonly int _ValidityInputArrayTexture = Shader.PropertyToID("_ValidityInputArrayTexture"); diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/HDDiffuseDenoiser.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/HDDiffuseDenoiser.cs index 645cd1a1bd7..6c9a59503d3 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/HDDiffuseDenoiser.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/HDDiffuseDenoiser.cs @@ -6,7 +6,7 @@ namespace UnityEngine.Rendering.HighDefinition { class HDDiffuseDenoiser { - // Resources used for the denoiser + // Resources used for the de-noiser ComputeShader m_DiffuseDenoiser; Texture m_OwenScrambleRGBA; @@ -17,6 +17,15 @@ class HDDiffuseDenoiser int m_BilateralFilterColorKernel; int m_GatherSingleKernel; int m_GatherColorKernel; + ComputeBuffer m_PointDistribution; + static internal float[] pointDistribution = new float[] { 0.647285104f, -0.534139216f, 0.201738372f, 0.260410696f, + -0.443308681f, 0.259598345f, 0.0f, 0.0f, + 0.851900041f, 0.214261428f, 0.0376310274f, -0.406103343f, + -0.357411921f, -0.525219262f, -0.00147355383f, 0.239211172f, + -0.463947058f, 0.646911025f, -0.0379408896f, -0.291660219f, + 0.405679494f, -0.473511368f, 0.0473965593f, 0.0411158539f, + -0.963973522f, -0.155723229f, -0.444706231f, 0.141471207f, + 0.0980135575f, 0.687162697f, 0.156328082f, -0.0518609099f}; public void Init(HDRenderPipelineRuntimeResources rpResources, HDRenderPipeline renderPipeline) { @@ -31,10 +40,13 @@ public void Init(HDRenderPipelineRuntimeResources rpResources, HDRenderPipeline m_BilateralFilterColorKernel = m_DiffuseDenoiser.FindKernel("BilateralFilterColor"); m_GatherSingleKernel = m_DiffuseDenoiser.FindKernel("GatherSingle"); m_GatherColorKernel = m_DiffuseDenoiser.FindKernel("GatherColor"); + m_PointDistribution = new ComputeBuffer(16 * 2, sizeof(float)); + m_PointDistribution.SetData(pointDistribution); } public void Release() { + CoreUtils.SafeRelease(m_PointDistribution); } class DiffuseDenoiserPassData @@ -57,7 +69,7 @@ class DiffuseDenoiserPassData public int gatherKernel; // Other parameters - public Texture owenScrambleRGBA; + public ComputeBufferHandle pointDistribution; public ComputeShader diffuseDenoiserCS; public TextureHandle depthStencilBuffer; @@ -98,7 +110,7 @@ public TextureHandle Denoise(RenderGraph renderGraph, HDCamera hdCamera, Diffuse } passData.viewCount = hdCamera.viewCount; - // Denoising parameters + // Parameters passData.pixelSpreadTangent = HDRenderPipeline.GetPixelSpreadTangent(hdCamera.camera.fieldOfView, passData.texWidth, passData.texHeight); passData.kernelSize = denoiserParams.kernelSize; passData.halfResolutionFilter = denoiserParams.halfResolutionFilter; @@ -111,13 +123,13 @@ public TextureHandle Denoise(RenderGraph renderGraph, HDCamera hdCamera, Diffuse passData.gatherKernel = denoiserParams.singleChannel ? m_GatherSingleKernel : m_GatherColorKernel; // Other parameters - passData.owenScrambleRGBA = m_OwenScrambleRGBA; passData.diffuseDenoiserCS = m_DiffuseDenoiser; + passData.pointDistribution = builder.ReadComputeBuffer(renderGraph.ImportComputeBuffer(m_PointDistribution)); passData.depthStencilBuffer = builder.ReadTexture(depthBuffer); passData.normalBuffer = builder.ReadTexture(normalBuffer); passData.noisyBuffer = builder.ReadTexture(noisyBuffer); - passData.intermediateBuffer = builder.CreateTransientTexture(new TextureDesc(Vector2.one, true, true) { colorFormat = GraphicsFormat.R16G16B16A16_SFloat, enableRandomWrite = true, name = "DiffuseDenoiserIntermediate" }); + passData.intermediateBuffer = builder.CreateTransientTexture(new TextureDesc(Vector2.one, true, true) { colorFormat = GraphicsFormat.B10G11R11_UFloatPack32, enableRandomWrite = true, name = "DiffuseDenoiserIntermediate" }); passData.outputBuffer = builder.WriteTexture(outputBuffer); builder.SetRenderFunc( @@ -129,8 +141,8 @@ public TextureHandle Denoise(RenderGraph renderGraph, HDCamera hdCamera, Diffuse int numTilesY = (data.texHeight + (areaTileSize - 1)) / areaTileSize; // Request the intermediate buffers that we need - ctx.cmd.SetGlobalTexture(HDShaderIDs._OwenScrambledRGTexture, data.owenScrambleRGBA); ctx.cmd.SetComputeFloatParam(data.diffuseDenoiserCS, HDShaderIDs._DenoiserFilterRadius, data.kernelSize); + ctx.cmd.SetComputeBufferParam(data.diffuseDenoiserCS, data.bilateralFilterKernel, HDShaderIDs._PointDistribution, data.pointDistribution); ctx.cmd.SetComputeTextureParam(data.diffuseDenoiserCS, data.bilateralFilterKernel, HDShaderIDs._DenoiseInputTexture, data.noisyBuffer); ctx.cmd.SetComputeTextureParam(data.diffuseDenoiserCS, data.bilateralFilterKernel, HDShaderIDs._DepthTexture, data.depthStencilBuffer); ctx.cmd.SetComputeTextureParam(data.diffuseDenoiserCS, data.bilateralFilterKernel, HDShaderIDs._NormalBufferTexture, data.normalBuffer); diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DiffuseDenoiser.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DiffuseDenoiser.compute index 2ec49f2a023..a4727584be4 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DiffuseDenoiser.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DiffuseDenoiser.compute @@ -19,7 +19,6 @@ #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariablesGlobal.cs.hlsl" // Ray Tracing includes -#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/ShaderVariablesRaytracing.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/BilateralFilter.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DenoisingUtils.hlsl" @@ -30,7 +29,8 @@ // Noisy Input Buffer TEXTURE2D_X(_DenoiseInputTexture); - +// Buffer used for point sampling +StructuredBuffer _PointDistribution; // Filtered Output buffer (depends on the singel or color variant of the denoiser) #if SINGLE_CHANNEL RW_TEXTURE2D_X(float, _DenoiseOutputTextureRW); @@ -117,15 +117,7 @@ void BILATERAL_FILTER(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupT for (uint sampleIndex = 0; sampleIndex < (uint)numSamples; ++sampleIndex) { // Fetch the noise value for the current sample - float2 newSample; - newSample.x = GetLDSequenceSampleFloat(sampleIndex + sampleOffset, 0); - newSample.y = GetLDSequenceSampleFloat(sampleIndex + sampleOffset, 1); - - // Convert the sample to a local unit disk - newSample = SampleDiskCubic(newSample.x, newSample.y); - - // Distribute them according a square profile - newSample *= denoisingRadius; + float2 newSample = _PointDistribution[sampleIndex + sampleOffset] * denoisingRadius; // Convert the point to hemogenous clip space float3 wsPos = center.position + localToWorld[0] * newSample.x + localToWorld[1] * newSample.y;