-
Notifications
You must be signed in to change notification settings - Fork 774
/
DiffuseDenoiser.compute
240 lines (194 loc) · 10.5 KB
/
DiffuseDenoiser.compute
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#pragma kernel BilateralFilterSingle BILATERAL_FILTER=BilateralFilterSingle SINGLE_CHANNEL
#pragma kernel BilateralFilterColor BILATERAL_FILTER=BilateralFilterColor
#pragma kernel GatherSingle GATHER_FILTER=GatherSingle SINGLE_CHANNEL
#pragma kernel GatherColor GATHER_FILTER=GatherColor
#pragma only_renderers d3d11
// Common includes
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonLighting.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Sampling/Sampling.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
// HDRP includes
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/NormalBuffer.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Builtin/BuiltinData.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariablesGlobal.cs.hlsl"
// Ray Tracing includes
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/ShaderVariablesRaytracing.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/BilateralFilter.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DenoisingUtils.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl"
// Tile size of this compute shaders
#define DIFFUSE_DENOISER_TILE_SIZE 8
// Noisy Input Buffer
TEXTURE2D_X(_DenoiseInputTexture);
// Filtered Output buffer (depends on the singel or color variant of the denoiser)
#if SINGLE_CHANNEL
RW_TEXTURE2D_X(float, _DenoiseOutputTextureRW);
#else
RW_TEXTURE2D_X(float4, _DenoiseOutputTextureRW);
#endif
// Radius of the filter (world space)
float _DenoiserFilterRadius;
float _PixelSpreadAngleTangent;
int _JitterFramePeriod;
#define PIXEL_RADIUS_TOLERANCE_THRESHOLD 2
// Flag used to do a half resolution filter
int _HalfResolutionFilter;
float ComputeMaxDenoisingRadius(float3 positionRWS)
{
// Compute the distance to the pixel
float distanceToPoint = length(positionRWS);
// This is purely empirical, values were obtained while experimenting with various scenes and these valuesgive good visual results.
// The world space radius for sample picking goes from distance/10.0 to distance/50.0 linearly until reaching 500.0 meters away from the camera
// and it is always 20.0f (or two pixels if subpixel.
// TODO: @Anis, I have a bunch of idea how to make this better and less empirical but it's for any other day
return distanceToPoint * _DenoiserFilterRadius / lerp(5.0, 50.0, saturate(distanceToPoint / 500.0));
}
[numthreads(DIFFUSE_DENOISER_TILE_SIZE, DIFFUSE_DENOISER_TILE_SIZE, 1)]
void BILATERAL_FILTER(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
// Fetch the current pixel coordinate
uint2 centerCoord = groupId * DIFFUSE_DENOISER_TILE_SIZE + groupThreadId;
// Read the central position
const BilateralData center = TapBilateralData(centerCoord);
// If this is a background pixel, we are done
if (center.z01 == 1.0)
{
#if SINGLE_CHANNEL
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord)] = 0.0;
#else
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord)] = float4(0.0, 0.0, 0.0, 1.0);
#endif
}
// Create the local ortho basis for our sampling
float3x3 localToWorld = GetLocalFrame(center.normal);
// Intialize the accumulation values
#if SINGLE_CHANNEL
float colorSum = 0.0;
float wSum = 0.0;
#else
float3 colorSum = 0.0;
float wSum = 0.0;
#endif
// Compute the radius of the filter. This is evaluated as the max between a fixed radius value and an approximation of the footprint of the pixel
const float denoisingRadius = ComputeMaxReprojectionWorldRadius(center.position, center.normal, _PixelSpreadAngleTangent, ComputeMaxDenoisingRadius(center.position), PIXEL_RADIUS_TOLERANCE_THRESHOLD);
// Compute the sigma value
const float sigma = 0.9 * denoisingRadius;
// Index of the pixel in the 2x2 group that are used for the half res filter
int localIndex = (centerCoord.x & 1) + (centerCoord.y & 1) * 2;
// Define the sample count for this pixel. 16 samples per pixels if it is a full res or 4 if half resolution
const int numSamples = _HalfResolutionFilter ? 4 : 16;
int sampleOffset = (_HalfResolutionFilter != 0 ? localIndex * numSamples : 0);
if (_JitterFramePeriod != -1)
sampleOffset += _JitterFramePeriod * 16;
// Loop through the samples that we need to aggrgate
for (uint sampleIndex = 0; sampleIndex < (uint)numSamples; ++sampleIndex)
{
// Fetch the noise value for the current sample
float2 newSample;
newSample.x = GetLDSequenceSampleFloat(sampleIndex + sampleOffset, 0);
newSample.y = GetLDSequenceSampleFloat(sampleIndex + sampleOffset, 1);
// Convert the sample to a local unit disk
newSample = SampleDiskCubic(newSample.x, newSample.y);
// Distribute them according a square profile
newSample *= denoisingRadius;
// Convert the point to hemogenous clip space
float3 wsPos = center.position + localToWorld[0] * newSample.x + localToWorld[1] * newSample.y;
float4 hClip = TransformWorldToHClip(wsPos);
hClip.xyz /= hClip.w;
// Is the target pixel in the screen?
if (hClip.x > 1.0 || hClip.x < -1.0 || hClip.y > 1.0 || hClip.y < -1.0 )
continue;
// Convert it to screen sample space
float2 nDC = hClip.xy * 0.5 + 0.5;
#if UNITY_UV_STARTS_AT_TOP
nDC.y = 1.0 - nDC.y;
#endif
// Tap the data for this pixel
uint2 tapCoord = nDC * _ScreenSize.xy;
const BilateralData tapData = TapBilateralData(tapCoord);
// If the tapped pixel is a background pixel or too far from the center pixel
if (tapData.z01 == UNITY_RAW_FAR_CLIP_VALUE || abs(tapData.zNF - hClip.w) > 0.1)
continue;
// Compute the radius of the sample
float r = length(newSample);
// Compute the weight (skip computation for the center)
const float w = r > 0.001f ? gaussian(r, sigma) * ComputeBilateralWeight(center, tapData) : 1.0;
// Accumulate the new sample
#if SINGLE_CHANNEL
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, tapCoord).x * w;
#else
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, tapCoord).xyz * w;
#endif
wSum += w;
}
// If no samples were found, we take the center pixel only
if (wSum == 0.0)
{
#if SINGLE_CHANNEL
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, centerCoord).x;
#else
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, centerCoord).xyz;
#endif
wSum += 1.0;
}
// Normalize the result
#if SINGLE_CHANNEL
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord)] = colorSum / wSum;
#else
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord)] = float4(colorSum / wSum, 1.0);
#endif
}
#define GATHER_REGION_SIZE DIFFUSE_DENOISER_TILE_SIZE
#define GATHER_REGION_SIZE_2 (GATHER_REGION_SIZE * GATHER_REGION_SIZE)
groupshared uint gs_cacheLighting[GATHER_REGION_SIZE_2];
groupshared float gs_cacheLuminance[GATHER_REGION_SIZE_2];
groupshared float gs_cacheDepth[GATHER_REGION_SIZE_2];
void FillGatherDataLDS(uint groupIndex, uint2 pixelCoord)
{
int2 sampleCoord = int2(clamp(pixelCoord.x, 0, _ScreenSize.x - 1), clamp(pixelCoord.y, 0, _ScreenSize.y - 1));
#ifdef SINGLE_CHANNEL
gs_cacheLuminance[groupIndex] = LOAD_TEXTURE2D_X(_DenoiseInputTexture, sampleCoord).x;
#else
float3 lighting = LOAD_TEXTURE2D_X(_DenoiseInputTexture, sampleCoord).xyz;
gs_cacheLighting[groupIndex] = PackToR11G11B10f(lighting);
#endif
float depthValue = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord).x;
gs_cacheDepth[groupIndex] = depthValue;
}
uint OffsetToLDSAdress(uint2 groupThreadId, int2 offset)
{
// Compute the tap coordinate in the 8x8 grid
uint2 tapAddress = (uint2)((int2)(groupThreadId) + offset);
return clamp((uint)(tapAddress.x) + tapAddress.y * GATHER_REGION_SIZE, 0, GATHER_REGION_SIZE_2 - 1);
}
[numthreads(DIFFUSE_DENOISER_TILE_SIZE, DIFFUSE_DENOISER_TILE_SIZE, 1)]
void GATHER_FILTER(uint3 centerCoord : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(centerCoord.z);
// Fill color and lighting to the LDS
FillGatherDataLDS(groupIndex, centerCoord.xy);
// Make sure all values are loaded in LDS by now.
GroupMemoryBarrierWithGroupSync();
// Read the high res depth
int outputIdx = OffsetToLDSAdress(groupThreadId, int2(0, 0));
float targetDepth = gs_cacheDepth[outputIdx];
// Compute the 2x2 pixelregioncorner
uint2 corner = centerCoord.xy - uint2(centerCoord.x & 1, centerCoord.y & 1);
uint2 cornerGroupThread = corner - groupId * DIFFUSE_DENOISER_TILE_SIZE;
// Grab the indices of the sub-region to use
int ldsIdx0 = OffsetToLDSAdress(cornerGroupThread, int2(0, 0));
int ldsIdx1 = OffsetToLDSAdress(cornerGroupThread, int2(1, 0));
int ldsIdx2 = OffsetToLDSAdress(cornerGroupThread, int2(0, 1));
int ldsIdx3 = OffsetToLDSAdress(cornerGroupThread, int2(1, 1));
float4 lowDepths = float4(gs_cacheDepth[ldsIdx0], gs_cacheDepth[ldsIdx1], gs_cacheDepth[ldsIdx2], gs_cacheDepth[ldsIdx3]);
#if SINGLE_CHANNEL
float value = BilUpSingle_Uniform(targetDepth, lowDepths, float4(gs_cacheLuminance[ldsIdx0], gs_cacheLuminance[ldsIdx1], gs_cacheLuminance[ldsIdx2], gs_cacheLuminance[ldsIdx3]));
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord.xy)] = value;
#else
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord.xy)] = float4(BilUpColor3_Uniform(targetDepth, lowDepths, UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx0]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx1]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx2]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx3])), 1.0);
#endif
}