diff --git a/com.unity.render-pipelines.high-definition/CHANGELOG.md b/com.unity.render-pipelines.high-definition/CHANGELOG.md index 6d7893cba8b..0375781de12 100644 --- a/com.unity.render-pipelines.high-definition/CHANGELOG.md +++ b/com.unity.render-pipelines.high-definition/CHANGELOG.md @@ -43,6 +43,7 @@ The version number for this package has increased due to a version update of a r - Fixed issue where Default Volume Profile Asset change in project settings was not added to the undo stack (case 1285268). - Fixed undo after enabling compositor. - Fixed the ray tracing shadow UI being displayed while it shouldn't (case 1286391). +- Fixed issues with physically-based DoF, improved speed and robustness ### Changed - Combined occlusion meshes into one to reduce draw calls and state changes with XR single-pass. diff --git a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.RenderGraph.cs b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.RenderGraph.cs index 40a8b8e0127..957629a4f79 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.RenderGraph.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.RenderGraph.cs @@ -431,6 +431,13 @@ TextureHandle DepthOfFieldPass(RenderGraph renderGraph, HDCamera hdCamera, Textu // map rather than having to deal with all the implications of doing it before TAA if (m_DepthOfField.IsActive() && !isSceneView && m_DepthOfFieldFS && !isDoFPathTraced) { + // If we switch DoF modes and the old one was not using TAA, make sure we invalidate the history + // Note: for Rendergraph the m_IsDoFHisotoryValid perhaps should be moved to the "pass data" struct + if (taaEnabled && m_IsDoFHisotoryValid != m_DepthOfField.physicallyBased) + { + hdCamera.resetPostProcessingHistory = true; + } + var dofParameters = PrepareDoFParameters(hdCamera); bool useHistoryMips = m_DepthOfField.physicallyBased; @@ -438,12 +445,6 @@ TextureHandle DepthOfFieldPass(RenderGraph renderGraph, HDCamera hdCamera, Textu var prevCoCHandle = renderGraph.ImportTexture(prevCoC); var nextCoCHandle = renderGraph.ImportTexture(nextCoC); - // If we switch DoF modes and the old one was not using TAA, make sure we invalidate the history - if (taaEnabled && m_IsDoFHisotoryValid != m_DepthOfField.physicallyBased) - { - hdCamera.resetPostProcessingHistory = true; - } - using (var builder = renderGraph.AddRenderPass("Depth of Field", out var passData, ProfilingSampler.Get(HDProfileId.DepthOfField))) { passData.source = builder.ReadTexture(source); @@ -458,6 +459,7 @@ TextureHandle DepthOfFieldPass(RenderGraph renderGraph, HDCamera hdCamera, Textu TextureHandle dest = GetPostprocessOutputHandle(renderGraph, "DoF Destination"); passData.destination = builder.WriteTexture(dest); passData.motionVecTexture = builder.ReadTexture(motionVectors); + passData.taaEnabled = taaEnabled; if (!m_DepthOfField.physicallyBased) { @@ -531,8 +533,6 @@ TextureHandle DepthOfFieldPass(RenderGraph renderGraph, HDCamera hdCamera, Textu }); } - passData.taaEnabled = taaEnabled; - passData.bokehNearKernel = builder.CreateTransientComputeBuffer(new ComputeBufferDesc(dofParameters.nearSampleCount * dofParameters.nearSampleCount, sizeof(uint)) { name = "Bokeh Near Kernel" }); passData.bokehFarKernel = builder.CreateTransientComputeBuffer(new ComputeBufferDesc(dofParameters.farSampleCount * dofParameters.farSampleCount, sizeof(uint)) { name = "Bokeh Far Kernel" }); passData.bokehIndirectCmd = builder.CreateTransientComputeBuffer(new ComputeBufferDesc(3 * 2, sizeof(uint), ComputeBufferType.IndirectArguments) { name = "Bokeh Indirect Cmd" }); @@ -565,10 +565,13 @@ TextureHandle DepthOfFieldPass(RenderGraph renderGraph, HDCamera hdCamera, Textu passData.fullresCoC = builder.CreateTransientTexture(new TextureDesc(Vector2.one, true, true) { colorFormat = k_CoCFormat, enableRandomWrite = true, useMipMap = true, name = "Full res CoC" }); + passData.pingFarRGB = builder.CreateTransientTexture(new TextureDesc(Vector2.one, true, true) + { colorFormat = m_ColorFormat, useMipMap = true, enableRandomWrite = true, name = "DoF Source Pyramid" }); + builder.SetRenderFunc( (DepthofFieldData data, RenderGraphContext ctx) => { - DoPhysicallyBasedDepthOfField(data.parameters, ctx.cmd, data.source, data.destination, data.fullresCoC, data.prevCoC, data.nextCoC, data.motionVecTexture, data.taaEnabled); + DoPhysicallyBasedDepthOfField(data.parameters, ctx.cmd, data.source, data.destination, data.fullresCoC, data.prevCoC, data.nextCoC, data.motionVecTexture, data.pingFarRGB, data.taaEnabled); }); source = passData.destination; @@ -580,15 +583,13 @@ TextureHandle DepthOfFieldPass(RenderGraph renderGraph, HDCamera hdCamera, Textu if (taaEnabled && m_DepthOfField.physicallyBased) { bool postDof = true; - var taaParams = PrepareTAAParameters(hdCamera, postDof); - using (var builder = renderGraph.AddRenderPass("Temporal Anti-Aliasing", out var passData, ProfilingSampler.Get(HDProfileId.TemporalAntialiasing))) { GrabTemporalAntialiasingHistoryTextures(hdCamera, out var prevHistory, out var nextHistory, postDof); passData.source = builder.ReadTexture(source); - passData.parameters = PrepareTAAParameters(hdCamera); + passData.parameters = PrepareTAAParameters(hdCamera, postDof); passData.depthBuffer = builder.ReadTexture(depthBuffer); passData.motionVecTexture = builder.ReadTexture(motionVectors); passData.depthMipChain = builder.ReadTexture(depthBufferMipChain); @@ -616,12 +617,21 @@ TextureHandle DepthOfFieldPass(RenderGraph renderGraph, HDCamera hdCamera, Textu data.nextHistory, data.prevMVLen, data.nextMVLen); + + // Temporary hack to make post-dof TAA work with rendergraph (still the first frame flashes black). We need a better solution. + m_IsDoFHisotoryValid = true; }); source = passData.destination; } postDoFTAAEnabled = true; + + } + else + { + // Temporary hack to make post-dof TAA work with rendergraph (still the first frame flashes black). We need a better solution. + m_IsDoFHisotoryValid = false; } if (!postDoFTAAEnabled) diff --git a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.cs b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.cs index 5d7be02279f..5386b270f4c 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/PostProcessSystem.cs @@ -706,10 +706,11 @@ void PoolSource(ref RTHandle src, RTHandle dst) GrabCoCHistory(camera, out prevCoC, out nextCoC, useMips: true); var fullresCoC = m_Pool.Get(Vector2.one, k_CoCFormat, true); - - DoPhysicallyBasedDepthOfField(dofParameters, cmd, source, destination, fullresCoC, prevCoC, nextCoC, motionVecTexture, taaEnabled); + var colorPyramid = m_Pool.Get(Vector2.one, m_ColorFormat, true); + DoPhysicallyBasedDepthOfField(dofParameters, cmd, source, destination, fullresCoC, prevCoC, nextCoC, motionVecTexture, colorPyramid, taaEnabled); m_Pool.Recycle(fullresCoC); + m_Pool.Recycle(colorPyramid); } PoolSource(ref source, destination); @@ -1825,6 +1826,7 @@ struct DepthOfFieldParameters public bool farLayerActive; public bool highQualityFiltering; public bool useTiles; + public bool resetPostProcessingHistory; public DepthOfFieldResolution resolution; public DepthOfFieldMode focusMode; @@ -1863,7 +1865,14 @@ DepthOfFieldParameters PrepareDoFParameters(HDCamera camera) parameters.dofDilateCS = m_Resources.shaders.depthOfFieldDilateCS; parameters.dofDilateKernel = parameters.dofDilateCS.FindKernel("KMain"); parameters.dofMipCS = m_Resources.shaders.depthOfFieldMipCS; - parameters.dofMipColorKernel = parameters.dofMipCS.FindKernel(m_EnableAlpha ? "KMainColorAlpha" : "KMainColor"); + if (!m_DepthOfField.physicallyBased) + { + parameters.dofMipColorKernel = parameters.dofMipCS.FindKernel(m_EnableAlpha ? "KMainColorAlpha" : "KMainColor"); + } + else + { + parameters.dofMipColorKernel = parameters.dofMipCS.FindKernel(m_EnableAlpha ? "KMainColorCopyAlpha" : "KMainColorCopy"); + } parameters.dofMipCoCKernel = parameters.dofMipCS.FindKernel("KMainCoC"); parameters.dofMipSafeCS = m_Resources.shaders.depthOfFieldMipSafeCS; parameters.dofPrefilterCS = m_Resources.shaders.depthOfFieldPrefilterCS; @@ -1886,6 +1895,7 @@ DepthOfFieldParameters PrepareDoFParameters(HDCamera camera) parameters.pbDoFGatherKernel = parameters.pbDoFGatherCS.FindKernel("KMain"); parameters.camera = camera; + parameters.resetPostProcessingHistory = camera.resetPostProcessingHistory; parameters.nearLayerActive = m_DepthOfField.IsNearLayerActive(); parameters.farLayerActive = m_DepthOfField.IsFarLayerActive(); @@ -2512,7 +2522,7 @@ static void ReprojectCoCHistory(in DepthOfFieldParameters parameters, CommandBuf //Note: this reprojection creates some ghosting, we should replace it with something based on the new TAA ComputeShader cs = parameters.dofCoCReprojectCS; int kernel = parameters.dofCoCReprojectKernel; - cmd.SetComputeVectorParam(cs, HDShaderIDs._Params, new Vector4(camera.resetPostProcessingHistory ? 0f : 0.91f, cocHistoryScale.x, cocHistoryScale.y, 0f)); + cmd.SetComputeVectorParam(cs, HDShaderIDs._Params, new Vector4(parameters.resetPostProcessingHistory ? 0f : 0.91f, cocHistoryScale.x, cocHistoryScale.y, 0f)); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputCoCTexture, fullresCoC); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputHistoryCoCTexture, prevCoC); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputCoCTexture, nextCoC); @@ -2536,7 +2546,7 @@ static void GetMipMapDimensions(RTHandle texture, int lod, out int width, out in } } - static void DoPhysicallyBasedDepthOfField(in DepthOfFieldParameters dofParameters, CommandBuffer cmd, RTHandle source, RTHandle destination, RTHandle fullresCoC, RTHandle prevCoCHistory, RTHandle nextCoCHistory, RTHandle motionVecTexture, bool taaEnabled) + static void DoPhysicallyBasedDepthOfField(in DepthOfFieldParameters dofParameters, CommandBuffer cmd, RTHandle source, RTHandle destination, RTHandle fullresCoC, RTHandle prevCoCHistory, RTHandle nextCoCHistory, RTHandle motionVecTexture, RTHandle sourcePyramid, bool taaEnabled) { float scale = 1f / (float)dofParameters.resolution; int targetWidth = Mathf.RoundToInt(dofParameters.camera.actualWidth * scale); @@ -2544,7 +2554,7 @@ static void DoPhysicallyBasedDepthOfField(in DepthOfFieldParameters dofParameter // Map the old "max radius" parameters to a bigger range, so we can work on more challenging scenes float maxRadius = Mathf.Max(dofParameters.farMaxBlur, dofParameters.nearMaxBlur); - float cocLimit = Mathf.Clamp(4 * maxRadius, 1, 64); + float cocLimit = Mathf.Clamp(8 * maxRadius, 1, 128); //[1, 16] --> [1, 128] ComputeShader cs; int kernel; @@ -2602,6 +2612,8 @@ static void DoPhysicallyBasedDepthOfField(in DepthOfFieldParameters dofParameter cs = dofParameters.dofCoCPyramidCS; kernel = dofParameters.dofCoCPyramidKernel; + float numMips = Mathf.Floor(Mathf.Log(Mathf.Max(dofParameters.camera.actualWidth, dofParameters.camera.actualHeight), 2)); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputTexture, fullresCoC); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip1, fullresCoC, 1); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip2, fullresCoC, 2); @@ -2609,7 +2621,62 @@ static void DoPhysicallyBasedDepthOfField(in DepthOfFieldParameters dofParameter cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip4, fullresCoC, 4); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip5, fullresCoC, 5); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip6, fullresCoC, 6); + cmd.SetComputeVectorParam(cs, HDShaderIDs._Params, new Vector4(numMips, 0, 0, 0)); cmd.DispatchCompute(cs, kernel, (dofParameters.camera.actualWidth + 31) / 32, (dofParameters.camera.actualHeight + 31) / 32, dofParameters.camera.viewCount); + + // do we need a second pass for the rest? + if (numMips > 6.0f && cocLimit > 32) + { + GetMipMapDimensions(fullresCoC, 6, out var mipMapWidth, out var mipMapHeight); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputTexture, fullresCoC, 6); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip1, fullresCoC, 7); + + if (numMips > 7) + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip2, fullresCoC, 8); + else + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip2, fullresCoC, 1); // we will never write on this, but still need to bind something + + if (numMips > 8) + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip3, fullresCoC, 9); + else + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip3, fullresCoC, 2); // we will never write on this, but still need to bind something + + if (numMips > 9) + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip4, fullresCoC, 10); + else + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip4, fullresCoC, 3); // we will never write on this, but still need to bind something + + if (numMips > 10) + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip5, fullresCoC, 11); + else + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip5, fullresCoC, 4); // we will never write on this, but still need to bind something + + if (numMips > 11) + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip6, fullresCoC, 12); + else + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip6, fullresCoC, 5); // we will never write on this, but still need to bind something + + cmd.SetComputeVectorParam(cs, HDShaderIDs._Params, new Vector4(numMips - 6.0f, 0, 0, 0)); + cmd.DispatchCompute(cs, kernel, (mipMapWidth + 31) / 32, (mipMapHeight + 31) / 32, dofParameters.camera.viewCount); + } + + // DoF color pyramid + if (sourcePyramid != null) + { + cs = dofParameters.dofMipCS; + kernel = dofParameters.dofMipColorKernel; + + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputTexture, source, 0); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputTexture, sourcePyramid, 0); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip1, sourcePyramid, 1); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip2, sourcePyramid, 2); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip3, sourcePyramid, 3); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputMip4, sourcePyramid, 4); + + int tx = ((dofParameters.camera.actualWidth >> 1) + 7) / 8; + int ty = ((dofParameters.camera.actualHeight >> 1) + 7) / 8; + cmd.DispatchCompute(cs, kernel, tx, ty, dofParameters.camera.viewCount); + } } using (new ProfilingScope(cmd, ProfilingSampler.Get(HDProfileId.DepthOfFieldCombine))) @@ -2618,12 +2685,11 @@ static void DoPhysicallyBasedDepthOfField(in DepthOfFieldParameters dofParameter kernel = dofParameters.pbDoFGatherKernel; float sampleCount = Mathf.Max(dofParameters.nearSampleCount, dofParameters.farSampleCount); - // We only have up to 6 mip levels - float mipLevel = Mathf.Min(6, 1 + Mathf.Ceil(Mathf.Log(cocLimit, 2))); + float mipLevel = 1 + Mathf.Ceil(Mathf.Log(cocLimit, 2)); GetMipMapDimensions(fullresCoC, (int)mipLevel, out var mipMapWidth, out var mipMapHeight); cmd.SetComputeVectorParam(cs, HDShaderIDs._Params, new Vector4(sampleCount, cocLimit, 0.0f, 0.0f)); cmd.SetComputeVectorParam(cs, HDShaderIDs._Params2, new Vector4(mipLevel, mipMapWidth, mipMapHeight, 0.0f)); - cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputTexture, source); + cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputTexture, sourcePyramid != null ? sourcePyramid : source); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._InputCoCTexture, fullresCoC); cmd.SetComputeTextureParam(cs, kernel, HDShaderIDs._OutputTexture, destination); cmd.DispatchCompute(cs, kernel, (dofParameters.camera.actualWidth + 7) / 8, (dofParameters.camera.actualHeight + 7) / 8, dofParameters.camera.viewCount); diff --git a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldMip.compute b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldMip.compute index 1613d7d7b77..40fecffdef9 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldMip.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldMip.compute @@ -3,12 +3,18 @@ #pragma only_renderers d3d11 playstation xboxone vulkan metal switch -#pragma kernel KMainColor MAIN=KMainColor CTYPE=float3 -#pragma kernel KMainColorAlpha MAIN=KMainColorAlpha CTYPE=float4 -#pragma kernel KMainCoC MAIN=KMainCoC CTYPE=float +#pragma kernel KMainColor MAIN=KMainColor CTYPE=float3 +#pragma kernel KMainColorAlpha MAIN=KMainColorAlpha CTYPE=float4 +#pragma kernel KMainCoC MAIN=KMainCoC CTYPE=float +#pragma kernel KMainColorCopy MAIN=KMainColorCopy CTYPE=float3 COPY_MIP0 +#pragma kernel KMainColorCopyAlpha MAIN=KMainColorCopyAlpha CTYPE=float4 COPY_MIP0 RW_TEXTURE2D_X(CTYPE, _InputTexture); +#ifdef COPY_MIP0 +RW_TEXTURE2D_X(CTYPE, _OutputTexture); +#endif + RW_TEXTURE2D_X(CTYPE, _OutputMip1); RW_TEXTURE2D_X(CTYPE, _OutputMip2); RW_TEXTURE2D_X(CTYPE, _OutputMip3); @@ -38,6 +44,13 @@ void MAIN(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupIndex : SV_Gro // First mip CTYPE color = _InputTexture[COORD_TEXTURE2D_X(ul)]; +#ifdef COPY_MIP0 + _OutputTexture[COORD_TEXTURE2D_X(ul)] = _InputTexture[COORD_TEXTURE2D_X(ul)]; + _OutputTexture[COORD_TEXTURE2D_X(ul + uint2(1u, 0u))] = _InputTexture[COORD_TEXTURE2D_X(ul + uint2(1u, 0u))]; + _OutputTexture[COORD_TEXTURE2D_X(ul + uint2(0u, 1u))] = _InputTexture[COORD_TEXTURE2D_X(ul + uint2(0u, 1u))]; + _OutputTexture[COORD_TEXTURE2D_X(ul + uint2(1u, 1u))] = _InputTexture[COORD_TEXTURE2D_X(ul + uint2(1u, 1u))]; +#endif + color += _InputTexture[COORD_TEXTURE2D_X(ul + uint2(1u, 0u))]; color += _InputTexture[COORD_TEXTURE2D_X(ul + uint2(0u, 1u))]; color += _InputTexture[COORD_TEXTURE2D_X(ul + uint2(1u, 1u))]; diff --git a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFCoCPyramid.compute b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFCoCPyramid.compute index c4436f59369..e6dc4e7ee0c 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFCoCPyramid.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFCoCPyramid.compute @@ -5,6 +5,12 @@ #pragma kernel KMainCoCPyramid +CBUFFER_START(cb0) +float4 _Params; +CBUFFER_END + +#define NumMips _Params.x + RW_TEXTURE2D_X(float, _InputTexture); RW_TEXTURE2D_X(float, _OutputMip1); @@ -52,6 +58,9 @@ void KMainCoCPyramid(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupInd _OutputMip1[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = color; + if (NumMips <= 1.0) + return; + GroupMemoryBarrierWithGroupSync(); // Second mip - checks that X and Y are even @@ -66,6 +75,9 @@ void KMainCoCPyramid(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupInd _OutputMip2[COORD_TEXTURE2D_X(dispatchThreadId.xy / 2u)] = color; } + if (NumMips <= 2.0) + return; + GroupMemoryBarrierWithGroupSync(); // Third mip - checks that X and Y are multiples of four @@ -79,6 +91,9 @@ void KMainCoCPyramid(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupInd _OutputMip3[COORD_TEXTURE2D_X(dispatchThreadId.xy / 4u)] = color; } + if (NumMips <= 3.0) + return; + GroupMemoryBarrierWithGroupSync(); // Fourth mip - checks that X and Y are multiples of 8 @@ -92,6 +107,9 @@ void KMainCoCPyramid(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupInd _OutputMip4[COORD_TEXTURE2D_X(dispatchThreadId.xy / 8u)] = color; } + if (NumMips <= 4.0) + return; + GroupMemoryBarrierWithGroupSync(); // Fifth mip - checks that X and Y are multiples of 16 @@ -104,6 +122,9 @@ void KMainCoCPyramid(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupInd _OutputMip5[COORD_TEXTURE2D_X(dispatchThreadId.xy / 16u)] = color; } + if (NumMips <= 5.0) + return; + // Last mip - only one thread if (groupIndex == 0) { diff --git a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFGather.compute b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFGather.compute index bce710f195f..6685908cb7f 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFGather.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DoFGather.compute @@ -29,7 +29,6 @@ RW_TEXTURE2D_X(CTYPE, _OutputTexture); // A set of Defines to fine-tune the algorithm #define NUM_BUCKETS 3 -#define GRADIENT_NOISE #define RING_DENSITY 8.0 #define ADAPTIVE_RADIUS //#define OCTAWEB_SORTING @@ -49,33 +48,6 @@ RW_TEXTURE2D_X(CTYPE, _OutputTexture); #define GROUP_RES 8u #define GROUP_SIZE (GROUP_RES * GROUP_RES) - -int GetCoCBucket(float cocSize) -{ -#if (NUM_BUCKETS == 1) - return 0; -#endif - -#ifdef UNIFORM_WEIGHTS - return 0; -#endif - - // Threshold to split near, far and in-focus range. We don't need to expose this parameter, as the algorithm is robust against it - const float dofThreshold = 6; - - if (cocSize > dofThreshold) - { - return 0; - } - else if (abs(cocSize) < dofThreshold) - { - return 1; - } - else - { - return 2; - } -} float GetBucketWeight(float sampleCoC, float noise, int bucket) { // Note: we could potentially get higher quality by allowing artists to fine-tune the bucket/cascade transitions, @@ -99,10 +71,9 @@ float GetBucketWeight(float sampleCoC, float noise, int bucket) } } -float GetCoCRadius(int2 positionSS, out int bucketIndex) +float GetCoCRadius(int2 positionSS) { float CoCRadius = LOAD_TEXTURE2D_X(_InputCoCTexture, positionSS).x; - bucketIndex = GetCoCBucket(CoCRadius); return CoCRadius; } @@ -156,28 +127,6 @@ float GetNumSamples(float radius, float maxRadius) return 1.0 + seriesSum * RING_DENSITY; } -float GetRingWeight(int index, float dR) -{ - if (index == 0) - { - return 1.0f; - } - else - { - float ringRadius = index * dR; - float ringArea = PI * ringRadius * ringRadius; - - float prevRingRadius = (index - 1) * dR; - float prevRingArea = PI * prevRingRadius * prevRingRadius; - - ringArea = ringArea - prevRingArea; - - float ringSamples = RING_DENSITY * index; - ringSamples = (index == 1) ? ringSamples + 1.0f : ringSamples; - return ringArea * rcp(ringSamples); - } -} - [numthreads(GROUP_RES, GROUP_RES, 1)] void KMain(uint3 dispatchThreadId : SV_DispatchThreadID) { @@ -208,31 +157,30 @@ void KMain(uint3 dispatchThreadId : SV_DispatchThreadID) float maxRadius = GetCoCMaxRadius(posInputs.positionSS); - float dR = maxRadius * rcp(NumRings); - float stratum = fmod(posInputs.positionSS.x + posInputs.positionSS.y, 2.0f); + float dR = rcp(NumRings); int sampleOffset = _TaaFrameInfo.w != 0.0 ? _TaaFrameInfo.z : 0; + // Note: adjusting pseudo-randomly the sampleOffset for each pixel seems to considerably improve the sampling float noise = InterleavedGradientNoise(posInputs.positionSS.xy, sampleOffset); - float noise2 = InterleavedGradientNoise(posInputs.positionSS.xy, 8 + sampleOffset); + float noise2 = InterleavedGradientNoise(posInputs.positionSS.xy, 3 * posInputs.positionSS.x + posInputs.positionSS.y + sampleOffset); // Note: For zero radius we still go through the loop (for one iteration). We can avoid the cost of computing weigths if we early exit (at the cost of extra code complexity). float totalRings = maxRadius > 0 ? NumRings : 0; +#ifdef ENABLE_ALPHA + CTYPE originalColor = LOAD_TEXTURE2D_X(_InputTexture, posInputs.positionSS).CTYPE_SWIZZLE;; +#endif + // Iterate over the octaweb pattern and gather the DoF samples for (float ring = totalRings; ring >= 0; ring -= 1.0) { - float numSamples = max(ring * RING_DENSITY, 1); + float numSamples = (ring > 0) ? RING_DENSITY : 1; float dAng = 2.0f * PI / numSamples; -#ifndef GRADIENT_NOISE - float radius = max((ring - 1) * dR + stratum * dR, 0); - float ringOffset = 0.5 * fmod(ring, 2.0f) * dAng; -#else - float radius = max((ring - 1) * dR + noise2 * dR, 0); - float ringOffset = noise * dAng; -#endif + float stratum = max((ring - 1) * dR, 0); + float normalizedRadius = (ring > 0) ? stratum + noise2 * dR : 0; - float ringWeight = GetRingWeight(max(0, ring), dR); + float ringOffset = noise * dAng; #ifdef OCTAWEB_SORTING float4 ringColor[NUM_BUCKETS]; @@ -255,19 +203,22 @@ void KMain(uint3 dispatchThreadId : SV_DispatchThreadID) for (float ang = ringOffset; ang < 2 * PI; ang += dAng) { - float2 sampleTC = posInputs.positionSS + radius * PointInCircle(ang); + float sampleRadius = sqrt(normalizedRadius) * maxRadius; + float2 sampleTC = posInputs.positionSS + sampleRadius * PointInCircle(ang); sampleTC = clamp(sampleTC, float2(0, 0), _ScreenSize.xy - float2(1, 1)); - int sampleBucket = 0; - CTYPE sampleColor = LOAD_TEXTURE2D_X(_InputTexture, sampleTC).CTYPE_SWIZZLE; - float sampleCoC = GetCoCRadius(sampleTC, sampleBucket); + // Select the appropriate mip to sample based on the amount of samples. Lower sample counts will be faster at the cost of "leaking" + // Note: We have generated only 4 additional mip levels, so don't go any deeper than that + float lod = min(4, log2(2 * PI * sampleRadius * rcp(numSamples))); + CTYPE sampleColor = SAMPLE_TEXTURE2D_X_LOD(_InputTexture, s_trilinear_clamp_sampler, ClampAndScaleUVForBilinear(sampleTC * _ScreenSize.zw), lod).CTYPE_SWIZZLE; + float sampleCoC = GetCoCRadius(sampleTC); - if (abs(sampleCoC) >= radius) + if (abs(sampleCoC) >= sampleRadius) { for (int j = 0; j < NUM_BUCKETS; ++j) { - float weight = ringWeight * GetSampleWeight(abs(sampleCoC)); - float bucketWeight = GetBucketWeight(sampleCoC, noise, j); + float weight = GetSampleWeight(abs(sampleCoC)); + float bucketWeight = GetBucketWeight(sampleCoC, noise2, j); weight *= bucketWeight; RING_COLOR[j] += float4(sampleColor.xyz * weight, weight); RING_MAXCOC[j] = max(RING_MAXCOC[j] * bucketWeight, abs(sampleCoC)); @@ -275,7 +226,7 @@ void KMain(uint3 dispatchThreadId : SV_DispatchThreadID) ringHits[j] += bucketWeight; #endif #ifdef ENABLE_ALPHA - RING_ALPHA[j] = sampleColor.w * weight; + RING_ALPHA[j] += sampleColor.w * weight; #endif } } @@ -307,18 +258,21 @@ void KMain(uint3 dispatchThreadId : SV_DispatchThreadID) // back to front alpha blending of the near, far and in-focus buckets for (int j = 0; j < NUM_BUCKETS; ++j) { - // Reference: "Next Generation Post-Processing in Call of Duty Advanced Warfare", Advances in real-time rendering, Siggraph 2014 - // See slide 102 for the normalization factor for the alpha. The 2x factor (see slide 103) also helps with the far layers for - // large amounts of defocus blur, but this might need more investigation. - float alpha = saturate(totalColor[j].w * 2 * rcp(GetNumSamples(maxCoC[j], maxRadius))* rcp(GetSampleWeight(maxCoC[j]))); - outColor = (1.0 - alpha) * outColor + alpha * totalColor[j]; + // Note: earlier we were also using a normalization term for the alpha from the CoD slides, but it created some artifcats. + // Perhaps this requires more investigation. + float alpha = saturate(totalColor[j].w); + outColor = (1.0 - alpha) * outColor + totalColor[j]; #ifdef ENABLE_ALPHA - outAlpha = (1.0 - alpha) * outAlpha + alpha * totalAlpha[j]; + outAlpha = (1.0 - alpha) * outAlpha + totalAlpha[j]; #endif } outColor.xyz = outColor.xyz * rcp(outColor.w); #ifdef ENABLE_ALPHA + // Preserve the original value of the pixels with zero alpha. + // The second line with the lerp+smoothstep combination avoids a hard transition in edge cases + //outColor.xyz = outAlpha > 0 ? outColor.xyz : originalColor.xyz; + outColor.xyz = lerp(originalColor.xyz, outColor.xyz, smoothstep(0, 0.01, outAlpha)); _OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = float4(outColor.xyz, outAlpha * rcp(outColor.w)); #else _OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = outColor.xyz;