-
Notifications
You must be signed in to change notification settings - Fork 855
Minor performance improvements to SSGI (case 1367144). #5921
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
#pragma kernel BilateralUpSampleSingle BILATERAL_UPSAMPLE=BilateralUpSampleSingle SINGLE_CHANNEL | ||
#pragma kernel BilateralUpSampleColor BILATERAL_UPSAMPLE=BilateralUpSampleColor | ||
|
||
//#pragma enable_d3d11_debug_symbols | ||
|
@@ -10,78 +9,103 @@ | |
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" | ||
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl" | ||
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl" | ||
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsampleDef.cs.hlsl" | ||
|
||
// Mip chain depth buffer | ||
TEXTURE2D_X(_DepthTexture); | ||
// The half resolution texture that needs to be upscaled | ||
TEXTURE2D_X(_LowResolutionTexture); | ||
|
||
// Constant buffer where all variables should land | ||
CBUFFER_START(UnityScreenSpaceGlobalIllumination) | ||
float4 _HalfScreenSize; | ||
float2 _DepthPyramidFirstMipLevelOffset; | ||
CBUFFER_END | ||
// LDS that store the half resolution data | ||
groupshared float3 gs_cacheLighting[36]; | ||
groupshared float gs_cacheDepth[36]; | ||
|
||
void FillUpsampleDataLDS(uint groupIndex, uint2 groupOrigin) | ||
{ | ||
// Define which value we will be acessing with this worker thread | ||
int acessCoordX = groupIndex % 6; | ||
int acessCoordY = groupIndex / 6; | ||
|
||
// Everything we are accessing is in intermediate res (half rez). | ||
uint2 traceGroupOrigin = groupOrigin / 2; | ||
|
||
// The initial position of the access | ||
int2 originXY = traceGroupOrigin - int2(1, 1); | ||
|
||
// Compute the sample position | ||
int2 sampleCoord = int2(clamp(originXY.x + acessCoordX, 0, _HalfScreenSize.x - 1), clamp(originXY.y + acessCoordY, 0, _HalfScreenSize.y - 1)); | ||
|
||
// Sample and store into the LDS | ||
gs_cacheLighting[groupIndex] = LOAD_TEXTURE2D_X(_LowResolutionTexture, sampleCoord).xyz; | ||
// As an input we are not using the depth pyramid, but the full resolution depth (so we need to make sure to read from there for the upsample aswell). | ||
gs_cacheDepth[groupIndex] = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord * 2).x; | ||
} | ||
|
||
uint OffsetToLDSAdress(uint2 groupThreadId, int2 offset) | ||
{ | ||
// Compute the tap coordinate in the 6x6 grid | ||
uint2 tapAddress = (uint2)((int2)(groupThreadId / 2 + 1) + offset); | ||
return clamp((uint)(tapAddress.x) % 6 + tapAddress.y * 6, 0, 35); | ||
} | ||
|
||
// Function that fills the struct as we cannot use arrays | ||
void FillUpsampleNeighborhoodData_2x2(int2 groupThreadId, int subRegionIdx, out NeighborhoodUpsampleData2x2_RGB neighborhoodData) | ||
{ | ||
// Fill the sample data | ||
int tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx].x, (int)_TapOffsets[2 * subRegionIdx].y)); | ||
neighborhoodData.lowValue0 = max(0, (gs_cacheLighting[tapIdx])); | ||
neighborhoodData.lowDepth.x = gs_cacheDepth[tapIdx]; | ||
neighborhoodData.lowWeight.x = _DistanceBasedWeights[subRegionIdx].x; | ||
|
||
tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx].z, (int)_TapOffsets[2 * subRegionIdx].w)); | ||
neighborhoodData.lowValue1 = max(0, (gs_cacheLighting[tapIdx])); | ||
neighborhoodData.lowDepth.y = gs_cacheDepth[tapIdx]; | ||
neighborhoodData.lowWeight.y = _DistanceBasedWeights[subRegionIdx].y; | ||
|
||
tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx + 1].x, (int)_TapOffsets[2 * subRegionIdx + 1].y)); | ||
neighborhoodData.lowValue2 = max(0, (gs_cacheLighting[tapIdx])); | ||
neighborhoodData.lowDepth.z = gs_cacheDepth[tapIdx]; | ||
neighborhoodData.lowWeight.z = _DistanceBasedWeights[subRegionIdx].z; | ||
|
||
tapIdx = OffsetToLDSAdress(groupThreadId, int2((int)_TapOffsets[2 * subRegionIdx + 1].z, (int)_TapOffsets[2 * subRegionIdx + 1].w)); | ||
neighborhoodData.lowValue3 = max(0, (gs_cacheLighting[tapIdx])); | ||
neighborhoodData.lowDepth.w = gs_cacheDepth[tapIdx]; | ||
neighborhoodData.lowWeight.w = _DistanceBasedWeights[subRegionIdx].w; | ||
} | ||
|
||
// The output of our upscaling pass | ||
RW_TEXTURE2D_X(float4, _OutputUpscaledTexture); | ||
RW_TEXTURE2D_X(float3, _OutputUpscaledTexture); | ||
|
||
[numthreads(BILATERAL_UPSAMPLE_TILE_SIZE, BILATERAL_UPSAMPLE_TILE_SIZE, 1)] | ||
void BILATERAL_UPSAMPLE(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) | ||
void BILATERAL_UPSAMPLE(uint3 currentCoord : SV_DispatchThreadID, | ||
int groupIndex : SV_GroupIndex, | ||
uint2 groupThreadId : SV_GroupThreadID, | ||
uint2 groupId : SV_GroupID) | ||
{ | ||
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); | ||
UNITY_XR_ASSIGN_VIEW_INDEX(currentCoord.z); | ||
|
||
// Only 36 workers of the 64 region do the pre-fetching | ||
if (groupIndex < 36) | ||
{ | ||
// Load 1 value per thread | ||
FillUpsampleDataLDS(groupIndex, groupId * 8); | ||
} | ||
|
||
// Make sure all values are loaded in LDS by now. | ||
GroupMemoryBarrierWithGroupSync(); | ||
|
||
// If out of bounds, discard | ||
if (any(dispatchThreadId.xy > uint2(_ScreenSize.xy))) | ||
if (any(currentCoord.xy > uint2(_ScreenSize.xy))) | ||
return; | ||
|
||
// The pixel position to process | ||
const uint2 outputCoord = dispatchThreadId.xy; | ||
|
||
// Read the depth value as early as possible and use it as late as possible | ||
float hiResDepth = LOAD_TEXTURE2D_X(_DepthTexture, outputCoord).x; | ||
|
||
// Define what is the half resolution of this pixel | ||
int2 halfResolution = (int2)(outputCoord / 2); | ||
|
||
// Define what is the half resolution of this pixel | ||
int2 coordRepresenatative = halfResolution * 2; | ||
|
||
// Compute the shift within the half res | ||
int2 halfResShift = outputCoord - coordRepresenatative; | ||
|
||
// Compute the shift of the pixel in the group | ||
int shiftIndex = halfResShift.y * 2 + halfResShift.x; | ||
|
||
// Compute the shift in the upscale table | ||
int offsetInCoordTable = shiftIndex * 4; | ||
|
||
// Compute the half resolution coordinates we should tap from | ||
int2 halfResTap0 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable], _HalfScreenSize.xy - 1); | ||
int2 halfResTap1 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 1], _HalfScreenSize.xy - 1); | ||
int2 halfResTap2 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 2], _HalfScreenSize.xy - 1); | ||
int2 halfResTap3 = clamp(0, halfResolution + UpscaleBilateralPixels[offsetInCoordTable + 3], _HalfScreenSize.xy - 1); | ||
|
||
// Grab the depth of all the half resolution pixels | ||
float4 lowDepths = float4(LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap0).x | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a bug here due to using the first mip instead of 1 every 4 pixels |
||
, LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap1).x | ||
, LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap2).x | ||
, LOAD_TEXTURE2D_X(_DepthTexture, asuint(_DepthPyramidFirstMipLevelOffset) + halfResTap3).x); | ||
|
||
#if SINGLE_CHANNEL | ||
// Grab all the scalar values required for upscale | ||
float4 lowRes = float4(_LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap0)].x | ||
, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap1)].x | ||
, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap2)].x | ||
, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap3)].x); | ||
// Upscale and output | ||
_OutputUpscaledTexture[COORD_TEXTURE2D_X(outputCoord)] = BilUpSingle(hiResDepth, lowDepths, lowRes); | ||
#else | ||
// Grab all the color values required for upscale | ||
float4 lowResCol0 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap0)]); | ||
float4 lowResCol1 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap1)]); | ||
float4 lowResCol2 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap2)]); | ||
float4 lowResCol3 = max(0, _LowResolutionTexture[COORD_TEXTURE2D_X(halfResTap3)]); | ||
|
||
_OutputUpscaledTexture[COORD_TEXTURE2D_X(outputCoord)] = BilUpColor(hiResDepth, lowDepths, lowResCol0, lowResCol1, lowResCol2, lowResCol3); | ||
#endif | ||
float hiResDepth = LOAD_TEXTURE2D_X(_DepthTexture, currentCoord.xy).x; | ||
|
||
// Tap the neighborhood data from | ||
NeighborhoodUpsampleData2x2_RGB upsampleData; | ||
int localIndex = (currentCoord.x & 1) + (currentCoord.y & 1) * 2; | ||
FillUpsampleNeighborhoodData_2x2(groupThreadId, localIndex, upsampleData); | ||
|
||
// Upscale and return the result | ||
_OutputUpscaledTexture[COORD_TEXTURE2D_X(currentCoord.xy)] = BilUpColor2x2_RGB(hiResDepth, upsampleData); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
using System; | ||
using UnityEngine.Experimental.Rendering; | ||
using UnityEngine.Experimental.Rendering.RenderGraphModule; | ||
|
||
namespace UnityEngine.Rendering.HighDefinition | ||
{ | ||
internal class BilateralUpsample | ||
{ | ||
// This is the representation of the half resolution neighborhood | ||
// |-----|-----|-----| | ||
// | | | | | ||
// |-----|-----|-----| | ||
// | | | | | ||
// |-----|-----|-----| | ||
// | | | | | ||
// |-----|-----|-----| | ||
|
||
// This is the representation of the full resolution neighborhood | ||
// |-----|-----|-----| | ||
// | | | | | ||
// |-----|--|--|-----| | ||
// | |--|--| | | ||
// |-----|--|--|-----| | ||
// | | | | | ||
// |-----|-----|-----| | ||
|
||
// The base is centered at (0, 0) at the center of the center pixel: | ||
// The 4 full res pixels are centered {L->R, T->B} at {-0.25, -0.25}, {0.25, -0.25} | ||
// {-0.25, 0.25}, {0.25, 0.25} | ||
// | ||
// The 9 half res pixels are placed {L->R, T->B} at {-1.0, -1.0}, {0.0, -1.0}, {1.0, -1.0} | ||
// {-1.0, 0.0}, {0.0, 0.0}, {1.0, 0.0} | ||
// {-1.0, 1.0}, {0.0, 1.0}, {1.0, 1.0} | ||
|
||
// Set of pre-generated weights (L->R, T->B). After experimentation, the final weighting function is exp(-distance^2) | ||
static internal float[] distanceBasedWeights_3x3 = new float[] { 0.324652f, 0.535261f, 0.119433f, 0.535261f, 0.882497f, 0.196912f, 0.119433f, 0.196912f, 0.0439369f, | ||
0.119433f, 0.535261f, 0.324652f, 0.196912f, 0.882497f, 0.535261f, 0.0439369f, 0.196912f, 0.119433f, | ||
0.119433f, 0.196912f, 0.0439369f, 0.535261f, 0.882497f, 0.196912f, 0.324652f, 0.535261f, 0.119433f, | ||
0.0439369f, 0.196912f, 0.119433f, 0.196912f, 0.882497f, 0.535261f, 0.119433f, 0.535261f, 0.324652f}; | ||
|
||
// Set of pre-generated weights (L->R, T->B). After experimentation, the final weighting function is exp(-distance^2) | ||
static internal float[] distanceBasedWeights_2x2 = new float[] { 0.324652f, 0.535261f, 0.535261f, 0.882497f, | ||
0.535261f, 0.324652f, 0.882497f, 0.535261f, | ||
0.535261f, 0.882497f, 0.324652f, 0.535261f, | ||
0.882497f, 0.535261f, 0.535261f, 0.324652f}; | ||
|
||
static internal float[] tapOffsets_2x2 = new float[] { -1.0f, -1.0f, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, 0.0f, | ||
0.0f, -1.0f, 1.0f, -1.0f, 0.0f, 0.0f, 1.0f, 0.0f, | ||
-1.0f, 0.0f, 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 1.0f, | ||
0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f}; | ||
} | ||
|
||
|
||
[GenerateHLSL(needAccessors = false, generateCBuffer = true)] | ||
unsafe struct ShaderVariablesBilateralUpsample | ||
{ | ||
// Half resolution we are up sampling from | ||
public Vector4 _HalfScreenSize; | ||
|
||
// Weights used for the bilateral up sample | ||
[HLSLArray(3 * 4, typeof(Vector4))] | ||
public fixed float _DistanceBasedWeights[12 * 4]; | ||
|
||
// Offsets used to tap into the half resolution neighbors | ||
[HLSLArray(2 * 4, typeof(Vector4))] | ||
public fixed float _TapOffsets[8 * 4]; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// | ||
// This file was automatically generated. Please don't edit by hand. Execute Editor command [ Edit > Rendering > Generate Shader Includes ] instead | ||
// | ||
|
||
#ifndef BILATERALUPSAMPLEDEF_CS_HLSL | ||
#define BILATERALUPSAMPLEDEF_CS_HLSL | ||
// Generated from UnityEngine.Rendering.HighDefinition.ShaderVariablesBilateralUpsample | ||
// PackingRules = Exact | ||
CBUFFER_START(ShaderVariablesBilateralUpsample) | ||
float4 _HalfScreenSize; | ||
float4 _DistanceBasedWeights[12]; | ||
float4 _TapOffsets[8]; | ||
CBUFFER_END | ||
|
||
|
||
#endif |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compressing the result to 11 11 10 makes the kernel overall slower unfortunately.