Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions com.unity.render-pipelines.high-definition/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Fix API warnings in Matcap mode on Metal.
- Fix D3D validation layer errors w.r.t shadow textures when an atlas is not used.
- Fixed anchor position offset property for the Light Anchor component. (case 1362809)
- Fixed minor performance issues in SSGI (case 1367144).

## [13.0.0] - 2021-09-01

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#pragma kernel BilateralUpSampleSingle BILATERAL_UPSAMPLE=BilateralUpSampleSingle SINGLE_CHANNEL
#pragma kernel BilateralUpSampleColor BILATERAL_UPSAMPLE=BilateralUpSampleColor

//#pragma enable_d3d11_debug_symbols
Expand All @@ -10,78 +9,103 @@
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl"

// Mip chain depth buffer
TEXTURE2D_X(_DepthTexture);
// The half resolution texture that needs to be upscaled
TEXTURE2D_X(_LowResolutionTexture);

// Constant buffer where all variables should land
CBUFFER_START(UnityScreenSpaceGlobalIllumination)
float4 _HalfScreenSize;
float2 _DepthPyramidFirstMipLevelOffset;
CBUFFER_END
// LDS that store the half resolution data
groupshared float3 gs_cacheLighting[36];
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compressing the result to 11 11 10 makes the kernel overall slower unfortunately.

groupshared float gs_cacheDepth[36];

void FillUpsampleDataLDS(uint groupIndex, uint2 groupOrigin)
{
// Define which value we will be acessing with this worker thread
int acessCoordX = groupIndex % 6;
int acessCoordY = groupIndex / 6;

// Everything we are accessing is in intermediate res (half rez).
uint2 traceGroupOrigin = groupOrigin / 2;

// The initial position of the access
int2 originXY = traceGroupOrigin - int2(1, 1);

// Compute the sample position
int2 sampleCoord = int2(clamp(originXY.x + acessCoordX, 0, _HalfScreenSize.x - 1), clamp(originXY.y + acessCoordY, 0, _HalfScreenSize.y - 1));

// Sample and store into the LDS
gs_cacheLighting[groupIndex] = LOAD_TEXTURE2D_X(_LowResolutionTexture, sampleCoord).xyz;
// As an input we are not using the depth pyramid, but the full resolution depth (so we need to make sure to read from there for the upsample aswell).
gs_cacheDepth[groupIndex] = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord * 2).x;
}

uint OffsetToLDSAdress(uint2 groupThreadId, int2 offset)
{
// Compute the tap coordinate in the 6x6 grid
uint2 tapAddress = (uint2)((int2)(groupThreadId / 2 + 1) + offset);
return clamp((uint)(tapAddress.x) % 6 + tapAddress.y * 6, 0, 35);
}

// Function that fills the struct as we cannot use arrays
void FillUpsampleNeighborhoodData_2x2(int2 groupThreadId, int subRegionIdx, out NeighborhoodUpsampleData2x2_RGB neighborhoodData)
{
// Fill the sample data
int tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx].x, (int)_TapOffsets[2 * subRegionIdx].y));
neighborhoodData.lowValue0 = max(0, (gs_cacheLighting[tapIdx]));
neighborhoodData.lowDepth.x = gs_cacheDepth[tapIdx];
neighborhoodData.lowWeight.x = _DistanceBasedWeights[subRegionIdx].x;

tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx].z, (int)_TapOffsets[2 * subRegionIdx].w));
neighborhoodData.lowValue1 = max(0, (gs_cacheLighting[tapIdx]));
neighborhoodData.lowDepth.y = gs_cacheDepth[tapIdx];
neighborhoodData.lowWeight.y = _DistanceBasedWeights[subRegionIdx].y;

tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx + 1].x, (int)_TapOffsets[2 * subRegionIdx + 1].y));
neighborhoodData.lowValue2 = max(0, (gs_cacheLighting[tapIdx]));
neighborhoodData.lowDepth.z = gs_cacheDepth[tapIdx];
neighborhoodData.lowWeight.z = _DistanceBasedWeights[subRegionIdx].z;

tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx + 1].z, (int)_TapOffsets[2 * subRegionIdx + 1].w));
neighborhoodData.lowValue3 = max(0, (gs_cacheLighting[tapIdx]));
neighborhoodData.lowDepth.w = gs_cacheDepth[tapIdx];
neighborhoodData.lowWeight.w = _DistanceBasedWeights[subRegionIdx].w;
}

// The output of our upscaling pass
RW_TEXTURE2D_X(float4, _OutputUpscaledTexture);
RW_TEXTURE2D_X(float3, _OutputUpscaledTexture);

[numthreads(BILATERAL_UPSAMPLE_TILE_SIZE, BILATERAL_UPSAMPLE_TILE_SIZE, 1)]
void BILATERAL_UPSAMPLE(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
void BILATERAL_UPSAMPLE(uint3 currentCoord : SV_DispatchThreadID,
int groupIndex : SV_GroupIndex,
uint2 groupThreadId : SV_GroupThreadID,
uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
UNITY_XR_ASSIGN_VIEW_INDEX(currentCoord.z);

// Only 36 workers of the 64 region do the pre-fetching
if (groupIndex < 36)
{
// Load 1 value per thread
FillUpsampleDataLDS(groupIndex, groupId * 8);
}

// Make sure all values are loaded in LDS by now.
GroupMemoryBarrierWithGroupSync();

// If out of bounds, discard
if (any(dispatchThreadId.xy > uint2(_ScreenSize.xy)))
if (any(currentCoord.xy > uint2(_ScreenSize.xy)))
return;

// The pixel position to process
const uint2 outputCoord = dispatchThreadId.xy;

// Read the depth value as early as possible and use it as late as possible
float hiResDepth = LOAD_TEXTURE2D_X(_DepthTexture, outputCoord).x;

// Define what is the half resolution of this pixel
int2 halfResolution = (int2)(outputCoord / 2);

// Define what is the half resolution of this pixel
int2 coordRepresenatative = halfResolution * 2;

// Compute the shift within the half res
int2 halfResShift = outputCoord - coordRepresenatative;

// Compute the shift of the pixel in the group
int shiftIndex = halfResShift.y * 2 + halfResShift.x;

// Compute the shift in the upscale table
int offsetInCoordTable = shiftIndex * 4;

// Compute the half resolution coordinates we should tap from
int2 halfResTap0 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable], _HalfScreenSize.xy - 1);
int2 halfResTap1 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 1], _HalfScreenSize.xy - 1);
int2 halfResTap2 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 2], _HalfScreenSize.xy - 1);
int2 halfResTap3 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 3], _HalfScreenSize.xy - 1);

// Grab the depth of all the half resolution pixels
float4 lowDepths = float4(LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap0).x
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was a bug here due to using the first mip instead of 1 every 4 pixels

, LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap1).x
, LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap2).x
, LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap3).x);

#if SINGLE_CHANNEL
// Grab all the scalar values required for upscale
float4 lowRes = float4(_LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap0)].x
, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap1)].x
, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap2)].x
, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap3)].x);
// Upscale and output
_OutputUpscaledTexture[COORD_TEXTURE2D_X(outputCoord)] = BilUpSingle(hiResDepth, lowDepths, lowRes);
#else
// Grab all the color values required for upscale
float4 lowResCol0 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap0)]);
float4 lowResCol1 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap1)]);
float4 lowResCol2 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap2)]);
float4 lowResCol3 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap3)]);

_OutputUpscaledTexture[COORD_TEXTURE2D_X(outputCoord)] = BilUpColor(hiResDepth, lowDepths, lowResCol0, lowResCol1, lowResCol2, lowResCol3);
#endif
float hiResDepth = LOAD_TEXTURE2D_X(_DepthTexture, currentCoord.xy).x;

// Tap the neighborhood data from
NeighborhoodUpsampleData2x2_RGB upsampleData;
int localIndex = (currentCoord.x & 1) + (currentCoord.y & 1) * 2;
FillUpsampleNeighborhoodData_2x2(groupThreadId, localIndex, upsampleData);

// Upscale and return the result
_OutputUpscaledTexture[COORD_TEXTURE2D_X(currentCoord.xy)] = BilUpColor2x2_RGB(hiResDepth, upsampleData);
}
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,32 @@ float4 BilUpColor3x3(float highDepth, in NeighborhoodUpsampleData3x3 data)
+ float4(_NoiseFilterStrength, _NoiseFilterStrength, _NoiseFilterStrength, 0.0);
return WeightedSum / TotalWeight;
}

// Due to compiler issues, it is not possible to use arrays to store the neighborhood values, we then store them in this structure
struct NeighborhoodUpsampleData2x2_RGB
{
// Low resolution depths
float4 lowDepth;

// The low resolution values
float3 lowValue0;
float3 lowValue1;
float3 lowValue2;
float3 lowValue3;

// Weights used to combine the neighborhood
float4 lowWeight;
};

// The bilateral upscale function (3x3 neighborhood)
float3 BilUpColor2x2_RGB(float highDepth, in NeighborhoodUpsampleData2x2_RGB data)
{
float4 combinedWeights = data.lowWeight / (abs(highDepth - data.lowDepth) + _UpsampleTolerance);
float TotalWeight = combinedWeights.x + combinedWeights.y + combinedWeights.z + combinedWeights.w + _NoiseFilterStrength;
float3 WeightedSum = data.lowValue0.xyz * combinedWeights.x
+ data.lowValue1.xyz * combinedWeights.y
+ data.lowValue2.xyz * combinedWeights.z
+ data.lowValue3.xyz * combinedWeights.w
+ _NoiseFilterStrength;
return WeightedSum / TotalWeight;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using System;
using UnityEngine.Experimental.Rendering;
using UnityEngine.Experimental.Rendering.RenderGraphModule;

namespace UnityEngine.Rendering.HighDefinition
{
internal class BilateralUpsample
{
// This is the representation of the half resolution neighborhood
// |-----|-----|-----|
// | | | |
// |-----|-----|-----|
// | | | |
// |-----|-----|-----|
// | | | |
// |-----|-----|-----|

// This is the representation of the full resolution neighborhood
// |-----|-----|-----|
// | | | |
// |-----|--|--|-----|
// | |--|--| |
// |-----|--|--|-----|
// | | | |
// |-----|-----|-----|

// The base is centered at (0, 0) at the center of the center pixel:
// The 4 full res pixels are centered {L->R, T->B} at {-0.25, -0.25}, {0.25, -0.25}
// {-0.25, 0.25}, {0.25, 0.25}
//
// The 9 half res pixels are placed {L->R, T->B} at {-1.0, -1.0}, {0.0, -1.0}, {1.0, -1.0}
// {-1.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}
// {-1.0, 1.0}, {0.0, 1.0}, {1.0, 1.0}

// Set of pre-generated weights (L->R, T->B). After experimentation, the final weighting function is exp(-distance^2)
static internal float[] distanceBasedWeights_3x3 = new float[] { 0.324652f, 0.535261f, 0.119433f, 0.535261f, 0.882497f, 0.196912f, 0.119433f, 0.196912f, 0.0439369f,
0.119433f, 0.535261f, 0.324652f, 0.196912f, 0.882497f, 0.535261f, 0.0439369f, 0.196912f, 0.119433f,
0.119433f, 0.196912f, 0.0439369f, 0.535261f, 0.882497f, 0.196912f, 0.324652f, 0.535261f, 0.119433f,
0.0439369f, 0.196912f, 0.119433f, 0.196912f, 0.882497f, 0.535261f, 0.119433f, 0.535261f, 0.324652f};

// Set of pre-generated weights (L->R, T->B). After experimentation, the final weighting function is exp(-distance^2)
static internal float[] distanceBasedWeights_2x2 = new float[] { 0.324652f, 0.535261f, 0.535261f, 0.882497f,
0.535261f, 0.324652f, 0.882497f, 0.535261f,
0.535261f, 0.882497f, 0.324652f, 0.535261f,
0.882497f, 0.535261f, 0.535261f, 0.324652f};

static internal float[] tapOffsets_2x2 = new float[] { -1.0f, -1.0f, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, 0.0f,
0.0f, -1.0f, 1.0f, -1.0f, 0.0f, 0.0f, 1.0f, 0.0f,
-1.0f, 0.0f, 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 1.0f,
0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f};
}


[GenerateHLSL(needAccessors = false, generateCBuffer = true)]
unsafe struct ShaderVariablesBilateralUpsample
{
// Half resolution we are up sampling from
public Vector4 _HalfScreenSize;

// Weights used for the bilateral up sample
[HLSLArray(3 * 4, typeof(Vector4))]
public fixed float _DistanceBasedWeights[12 * 4];

// Offsets used to tap into the half resolution neighbors
[HLSLArray(2 * 4, typeof(Vector4))]
public fixed float _TapOffsets[8 * 4];
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//
// This file was automatically generated. Please don't edit by hand. Execute Editor command [ Edit > Rendering > Generate Shader Includes ] instead
//

#ifndef BILATERALUPSAMPLEDEF_CS_HLSL
#define BILATERALUPSAMPLEDEF_CS_HLSL
// Generated from UnityEngine.Rendering.HighDefinition.ShaderVariablesBilateralUpsample
// PackingRules = Exact
CBUFFER_START(ShaderVariablesBilateralUpsample)
float4 _HalfScreenSize;
float4 _DistanceBasedWeights[12];
float4 _TapOffsets[8];
CBUFFER_END


#endif

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ TextureHandle TraceSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllumi

// Output textures
passData.outputBuffer = builder.WriteTexture(renderGraph.CreateTexture(new TextureDesc(Vector2.one, true, true)
{ colorFormat = GraphicsFormat.R16G16B16A16_SFloat, enableRandomWrite = true, name = "SSGI Color" }));
{ colorFormat = GraphicsFormat.B10G11R11_UFloatPack32, enableRandomWrite = true, name = "SSGI Color" }));

builder.SetRenderFunc(
(TraceSSGIPassData data, RenderGraphContext ctx) =>
Expand Down Expand Up @@ -291,9 +291,7 @@ class UpscaleSSGIPassData
public int texHeight;
public int viewCount;
public Vector4 halfScreenSize;

// Generation parameters
public Vector2 firstMipOffset;
public ShaderVariablesBilateralUpsample shaderVariablesBilateralUpsampleCB;

// Compute Shader
public ComputeShader bilateralUpsampleCS;
Expand All @@ -314,10 +312,16 @@ TextureHandle UpscaleSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllu
passData.texWidth = hdCamera.actualWidth;
passData.texHeight = hdCamera.actualHeight;
passData.viewCount = hdCamera.viewCount;
passData.halfScreenSize.Set(passData.texWidth / 2, passData.texHeight / 2, 1.0f / (passData.texWidth * 0.5f), 1.0f / (passData.texHeight * 0.5f));

// Set the generation parameters
passData.firstMipOffset.Set(HDShadowUtils.Asfloat((uint)info.mipLevelOffsets[1].x), HDShadowUtils.Asfloat((uint)info.mipLevelOffsets[1].y));
passData.shaderVariablesBilateralUpsampleCB._HalfScreenSize = new Vector4(passData.texWidth / 2, passData.texHeight / 2, 1.0f / (passData.texWidth * 0.5f), 1.0f / (passData.texHeight * 0.5f));
unsafe
{
for (int i = 0; i < 16; ++i)
passData.shaderVariablesBilateralUpsampleCB._DistanceBasedWeights[i] = BilateralUpsample.distanceBasedWeights_2x2[i];

for (int i = 0; i < 32; ++i)
passData.shaderVariablesBilateralUpsampleCB._TapOffsets[i] = BilateralUpsample.tapOffsets_2x2[i];
}

// Grab the right kernel
passData.bilateralUpsampleCS = m_Asset.renderPipelineResources.shaders.bilateralUpsampleCS;
Expand All @@ -326,7 +330,7 @@ TextureHandle UpscaleSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllu
passData.depthTexture = builder.ReadTexture(depthPyramid);
passData.inputBuffer = builder.ReadTexture(inputBuffer);
passData.outputBuffer = builder.WriteTexture(renderGraph.CreateTexture(new TextureDesc(Vector2.one, true, true)
{ colorFormat = GraphicsFormat.R16G16B16A16_SFloat, enableRandomWrite = true, name = "SSGI Final" }));
{ colorFormat = GraphicsFormat.B10G11R11_UFloatPack32, enableRandomWrite = true, name = "SSGI Final" }));

builder.SetRenderFunc(
(UpscaleSSGIPassData data, RenderGraphContext ctx) =>
Expand All @@ -336,9 +340,7 @@ TextureHandle UpscaleSSGI(RenderGraph renderGraph, HDCamera hdCamera, GlobalIllu
int numTilesXHR = (data.texWidth + (ssgiTileSize - 1)) / ssgiTileSize;
int numTilesYHR = (data.texHeight + (ssgiTileSize - 1)) / ssgiTileSize;

// Inject the input scalars
ctx.cmd.SetComputeVectorParam(data.bilateralUpsampleCS, HDShaderIDs._HalfScreenSize, data.halfScreenSize);
ctx.cmd.SetComputeVectorParam(data.bilateralUpsampleCS, HDShaderIDs._DepthPyramidFirstMipLevelOffset, data.firstMipOffset);
ConstantBuffer.PushGlobal(ctx.cmd, data.shaderVariablesBilateralUpsampleCB, HDShaderIDs._ShaderVariablesBilateralUpsample);

// Inject all the input buffers
ctx.cmd.SetComputeTextureParam(data.bilateralUpsampleCS, data.upscaleKernel, HDShaderIDs._DepthTexture, data.depthTexture);
Expand Down
Loading