From c5e0ab5e7d39f13a4c285726000b08f4618b1dcf Mon Sep 17 00:00:00 2001 From: Jacco Bikker Date: Wed, 4 Nov 2020 20:25:35 +0100 Subject: [PATCH] Merged experimental lighttree code. --- apps/benchmarkapp/camera.xml | 6 +- lib/CUDA/shared_kernel_code/lights_shared.h | 163 ++++++++++++++++++ .../kernels/.cuda.cu | 5 +- lib/RenderCore_Optix7Filter/kernels/.cuda.cu | 5 +- lib/RenderCore_Optix7Guiding/kernels/.cuda.cu | 5 +- .../kernels/pathtracer.h | 8 +- lib/RenderCore_OptixPrime_B/kernels/.cuda.cu | 5 +- .../kernels/pathtracer.h | 2 +- .../kernels/.cuda.cu | 2 + .../kernels/.cuda.cu | 2 + lib/RenderCore_PrimeAdaptive/kernels/.cuda.cu | 5 +- lib/rendercore_optix7/core_settings.h | 1 + lib/rendercore_optix7/kernels/.cuda.cu | 5 +- lib/rendercore_optix7/kernels/pathtracer.h | 2 + lib/rendercore_optix7/rendercore.cpp | 116 +++++++++++++ lib/rendercore_optix7/rendercore.h | 4 + 16 files changed, 322 insertions(+), 14 deletions(-) diff --git a/apps/benchmarkapp/camera.xml b/apps/benchmarkapp/camera.xml index 37a06cf95..967a04b38 100644 --- a/apps/benchmarkapp/camera.xml +++ b/apps/benchmarkapp/camera.xml @@ -1,12 +1,12 @@ - + 40 0 0 2.2 - 0.0089872945 + 0.0089999996 0.050000001 - 5.0255618 + 21.969528 2.5 3 diff --git a/lib/CUDA/shared_kernel_code/lights_shared.h b/lib/CUDA/shared_kernel_code/lights_shared.h index 9c87793b3..ec551a033 100644 --- a/lib/CUDA/shared_kernel_code/lights_shared.h +++ b/lib/CUDA/shared_kernel_code/lights_shared.h @@ -22,6 +22,7 @@ #include "noerrors.h" #define ISLIGHTS +// #define LIGHTTREE #define MAXISLIGHTS 64 #define TRILIGHTCOUNT (lightCounts.x & 0xffff) @@ -112,6 +113,59 @@ LH2_DEVFUNC float CalculateLightPDF( const float3& D, const float t, const float return (t * t) / (abs( dot( D, lightNormal ) ) * lightArea); } +// +-----------------------------------------------------------------------------+ +// | CalculateChildNodeWeights | +// | Helper to compute child node weights. LH2'20| +// +-----------------------------------------------------------------------------+ +LH2_DEVFUNC float CalculateChildNodeWeights( const int node, const float3& I, const float3& N, uint& seed, const bool debug = false ) +{ + const int left = lightTree[node].left; + const int right = lightTree[node].right; + const float3 b1j = make_float3( lightTree[left].bmin ), b1k = make_float3( lightTree[right].bmin ); + const float3 b2j = make_float3( lightTree[left].bmax ), b2k = make_float3( lightTree[right].bmax ); + const float3 diag_j = b2j - b1j; + const float3 diag_k = b2k - b1k; + const float3 LN = make_float3( lightTree[node].N ); + // calculate (squared) minimum and maximum distance from I to aabb + // method: GPU-Accelerated Minimum Distance and Clearance Queries, Krishnamurthy et al., 2011 + const float3 Bj = 0.5f * diag_j; + const float3 Bk = 0.5f * diag_k; + const float3 Cj = (b1j + b2j) * 0.5f; + const float3 Ck = (b1k + b2k) * 0.5f; + const float3 Dj = Cj - I; + const float3 Dk = Ck - I; + const float3 min_j = make_float3( max( Dj.x - Bj.x, 0.0f ), max( Dj.y - Bj.y, 0.0f ), max( Dj.z - Bj.z, 0.0f ) ); + const float3 min_k = make_float3( max( Dk.x - Bk.x, 0.0f ), max( Dk.y - Bk.y, 0.0f ), max( Dk.z - Bk.z, 0.0f ) ); + const float dist2j = dot( min_j, min_j ); + const float dist2k = dot( min_k, min_k ); + const float3 max_j = Dj + Bj; + const float3 max_k = Dk + Bk; + const float dist2j_max = dot( max_j, max_j ); + const float dist2k_max = dot( max_k, max_k ); + // get the left and right cluster intensities + const float Ij = lightTree[left].intensity; + const float Ik = lightTree[right].intensity; + // get a reasonable value for F using the normals at I and the light + const float3 Rj = b1j + (b2j - b1j) * make_float3( RandomFloat( seed ), RandomFloat( seed ), RandomFloat( seed ) ); + const float3 Rk = b1k + (b2k - b1k) * make_float3( RandomFloat( seed ), RandomFloat( seed ), RandomFloat( seed ) ); + const float3 Lj = normalize( Rj - I ); + const float3 Lk = normalize( Rk - I ); + float Fj = max( 0.001f, dot( N, Lj ) ); + float Fk = max( 0.001f, dot( N, Lk ) ); + if (dot( LN, LN ) > 0.001f) + Fj *= max( 0.001f, dot( LN, Lj * -1.0f ) ), + Fk *= max( 0.001f, dot( LN, Lk * -1.0f ) ); + // calculate final probabilities according to the realtime stochastic lightcuts paper + const bool insideBoth = dist2j == 0 && dist2k == 0; + const float wmin_j = (Fj * Ij) / (insideBoth ? 1 : max( 0.0001f, dist2j) ); + const float wmin_k = (Fk * Ik) / (insideBoth ? 1 : max( 0.0001f, dist2k) ); + const float wmax_j = (Fj * Ij) / max( 0.0001f, dist2j_max ); + const float wmax_k = (Fj * Ij) / max( 0.0001f, dist2k_max ); + const float pmin_j = wmin_j / (wmin_j + wmin_k); + const float pmax_j = wmax_j / (wmax_j + wmax_k); + return 0.5f * (pmin_j + pmax_j); +} + // +-----------------------------------------------------------------------------+ // | LightPickProb | // | Calculates the probability with which the specified light woukd be picked | @@ -135,6 +189,33 @@ LH2_DEVFUNC float LightPickProb( int idx, const float3& O, const float3& N, cons #endif } +// +-----------------------------------------------------------------------------+ +// | LightPickProbLTree | +// | Calculates the probability with which the specified light woukd be picked | +// | from the specified world space location and normal using the stochastic | +// | lightcuts approach. LH2'20| +// +-----------------------------------------------------------------------------+ +LH2_DEVFUNC float LightPickProbLTree( int idx, const float3& O, const float3& N, const float3& I, uint& seed ) +{ +#ifndef LIGHTTREE + return LightPickProb( idx, O, N, I ); +#else + LightCluster* tree = lightTree; + int node = idx + 1; // leaf for light i is at index i + 1, see UpdateLightTree in rendercore.cpp. + float pickProb = 1; + while (1) + { + if (node == 0) break; // we are the root node + // determine probability of selecting the current node over its sibling + int parent = __float_as_int( tree[node].N.w /* we abused N.w to store the parent node index */ ); + const float p = CalculateChildNodeWeights( parent, I, N, seed ); + if (tree[parent].left == node) pickProb *= p /* we are the left child */; else pickProb *= 1 - p; + node = parent; + } + return pickProb; +#endif +} + // +-----------------------------------------------------------------------------+ // | RandomPointOnLight | // | Selects a random point on a random light. Returns a position, a normal on | @@ -231,6 +312,88 @@ LH2_DEVFUNC float3 RandomPointOnLight( float r0, float r1, const float3& I, cons } } +// +-----------------------------------------------------------------------------+ +// | RandomPointOnLightLTree | +// | Selects a random point on a random light, using the stochastic lightcuts | +// | approach, via a binary light tree. Default method for the Optix7 core. | +// | Returns a position, a normal on the light source, the pick probability, | +// | and the importance of the explicit connection. LH2'20| +// +-----------------------------------------------------------------------------+ +LH2_DEVFUNC float3 RandomPointOnLightLTree( float r0, float r1, uint& seed, const float3& I, const float3& N, float& pickProb, float& lightPdf, float3& lightColor, const bool debug = false ) +{ +#ifndef LIGHTTREE + return RandomPointOnLight( r0, r1, I, N, pickProb, lightPdf, lightColor ); +#else + LightCluster* tree = lightTree; + int node = 0; // index of root node + int lightIdx = 0; + pickProb = 1; + while (1) + { + if (tree[node].left == -1) + { + // reached a leaf, use this light + lightIdx = tree[node].light; + break; + } + // interior node; randomly pick a child + const float p_j = CalculateChildNodeWeights( node, I, N, seed, debug ); + // decide + if (r1 < p_j) + node = tree[node].left, r1 *= 1.0f / p_j, pickProb *= p_j; + else + node = tree[node].right, r1 = (r1 - p_j) / (1 - p_j), pickProb *= 1 - p_j; + } + if (lightIdx & (1 << 30)) + { + // pick a pointlight + const CorePointLight4& light = (const CorePointLight4&)pointLights[lightIdx - (1 << 30)]; + const float3 P = make_float3( light.data0 ); // position + const float3 L = P - I; + const float sqDist = dot( L, L ); + lightColor = make_float3( light.data1 ) / sqDist; // radiance + lightPdf = dot( L, N ) > 0 ? 1 : 0; + return P; + } + else if (lightIdx & (1 << 29)) + { + // spotlight + const CoreSpotLight4& light = (const CoreSpotLight4&)spotLights[lightIdx - (1 << 29)]; + const float4 V0 = light.data0; // position + cos_inner + const float4 V1 = light.data1; // radiance + cos_outer + const float4 D = light.data2; // direction + const float3 P = make_float3( V0 ); + float3 L = I - P; + const float sqDist = dot( L, L ); + L = normalize( L ); + float d = (max( 0.0f, L.x * D.x + L.y * D.y + L.z * D.z ) - V1.w) / (V0.w - V1.w); + const float LNdotL = min( 1.0f, d ); + lightPdf = (LNdotL > 0 && dot( L, N ) < 0) ? (sqDist / LNdotL) : 0; + lightColor = make_float3( V1 ); + return P; + } + else + { + // light triangle + float3 bary = RandomBarycentrics( r0 ); + const CoreLightTri4& light = (const CoreLightTri4&)triLights[lightIdx]; + const float4 V0 = light.data3; // vertex0 + const float4 V1 = light.data4; // vertex1 + const float4 V2 = light.data5; // vertex2 + lightColor = make_float3( light.data2 ); // radiance + const float4 LN = light.data1; // N + const float3 P = make_float3( bary.x * V0 + bary.y * V1 + bary.z * V2 ); + float3 L = I - P; // reversed: from light to intersection point + const float sqDist = dot( L, L ); + L = normalize( L ); + const float LNdotL = L.x * LN.x + L.y * LN.y + L.z * LN.z; + const float reciSolidAngle = sqDist / (LN.w * LNdotL); // LN.w contains area + lightPdf = (LNdotL > 0 && dot( L, N ) < 0) ? reciSolidAngle : 0; + return P; + } +#endif +} + // +-----------------------------------------------------------------------------+ // | Sample_Le | // | Part of the BDPT core. LH2'19| diff --git a/lib/RenderCore_Optix7Adaptive/kernels/.cuda.cu b/lib/RenderCore_Optix7Adaptive/kernels/.cuda.cu index 2cb154d23..5f416b592 100644 --- a/lib/RenderCore_Optix7Adaptive/kernels/.cuda.cu +++ b/lib/RenderCore_Optix7Adaptive/kernels/.cuda.cu @@ -34,6 +34,7 @@ __constant__ int skywidth; __constant__ int skyheight; __constant__ PathState* pathStates; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -44,7 +45,7 @@ __constant__ __device__ float clampValue; // staging: copies will be batched and carried out after rendering completes, // to allow the CPU to update the scene concurrently with GPU rendering. -enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV }; +enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV, LTREE }; // device pointers are not real pointers for nvcc, so we need a bit of a hack. @@ -74,6 +75,7 @@ __host__ static void pushPtrCpy( int id, void* p ) if (id == NRMLS) cudaMemcpyToSymbol( nrm32, &p, sizeof( void* ) ); if (id == SKYPIX) cudaMemcpyToSymbol( skyPixels, &p, sizeof( void* ) ); if (id == DBGDAT) cudaMemcpyToSymbol( debugData, &p, sizeof( void* ) ); + if (id == LTREE) cudaMemcpyToSymbol( lightTree, &p, sizeof( void* ) ); } __host__ static void pushIntCpy( int id, const int v ) { @@ -151,6 +153,7 @@ __host__ void stageWorldToSky( const mat4& worldToLight ) { stageMatCpy( SMAT /* __host__ void stageDebugData( float4* p ) { stagePtrCpy( DBGDAT /* debugData */, p ); } __host__ void stageGeometryEpsilon( float e ) { stageF32Cpy( GEPS /* geometryEpsilon */, e ); } __host__ void stageClampValue( float c ) { stageF32Cpy( CLMPV /* clampValue */, c ); } +__host__ void stageLightTree( LightCluster* t ) { stagePtrCpy( LTREE /* light tree */, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/RenderCore_Optix7Filter/kernels/.cuda.cu b/lib/RenderCore_Optix7Filter/kernels/.cuda.cu index 65ae74a6a..0a054c690 100644 --- a/lib/RenderCore_Optix7Filter/kernels/.cuda.cu +++ b/lib/RenderCore_Optix7Filter/kernels/.cuda.cu @@ -34,6 +34,7 @@ __constant__ int skywidth; __constant__ int skyheight; __constant__ PathState* pathStates; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -44,7 +45,7 @@ __constant__ __device__ float clampValue; // staging: copies will be batched and carried out after rendering completes, // to allow the CPU to update the scene concurrently with GPU rendering. -enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV }; +enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV, LTREE }; // device pointers are not real pointers for nvcc, so we need a bit of a hack. @@ -74,6 +75,7 @@ __host__ static void pushPtrCpy( int id, void* p ) if (id == NRMLS) cudaMemcpyToSymbol( nrm32, &p, sizeof( void* ) ); if (id == SKYPIX) cudaMemcpyToSymbol( skyPixels, &p, sizeof( void* ) ); if (id == DBGDAT) cudaMemcpyToSymbol( debugData, &p, sizeof( void* ) ); + if (id == LTREE) cudaMemcpyToSymbol( lightTree, &p, sizeof( void* ) ); } __host__ static void pushIntCpy( int id, const int v ) { @@ -117,6 +119,7 @@ __host__ void stageWorldToSky( const mat4& worldToLight ) { stageMatCpy( SMAT /* __host__ void stageDebugData( float4* p ) { stagePtrCpy( DBGDAT /* debugData */, p ); } __host__ void stageGeometryEpsilon( float e ) { stageF32Cpy( GEPS /* geometryEpsilon */, e ); } __host__ void stageClampValue( float c ) { stageF32Cpy( CLMPV /* clampValue */, c ); } +__host__ void stageLightTree( LightCluster* t ) { stagePtrCpy( LTREE /* light tree */, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/RenderCore_Optix7Guiding/kernels/.cuda.cu b/lib/RenderCore_Optix7Guiding/kernels/.cuda.cu index d1de27803..fafe533bf 100644 --- a/lib/RenderCore_Optix7Guiding/kernels/.cuda.cu +++ b/lib/RenderCore_Optix7Guiding/kernels/.cuda.cu @@ -36,6 +36,7 @@ __constant__ int skywidth; __constant__ int skyheight; __constant__ PathState* pathStates; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -46,7 +47,7 @@ __constant__ __device__ float clampValue; // staging: copies will be batched and carried out after rendering completes, // to allow the CPU to update the scene concurrently with GPU rendering. -enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV, GUIDANCE, PMIN, PEXT }; +enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV, GUIDANCE, PMIN, PEXT, LTREE }; // device pointers are not real pointers for nvcc, so we need a bit of a hack. @@ -78,6 +79,7 @@ __host__ static void pushPtrCpy( int id, void* p ) if (id == NRMLS) cudaMemcpyToSymbol( nrm32, &p, sizeof( void* ) ); if (id == SKYPIX) cudaMemcpyToSymbol( skyPixels, &p, sizeof( void* ) ); if (id == DBGDAT) cudaMemcpyToSymbol( debugData, &p, sizeof( void* ) ); + if (id == LTREE) cudaMemcpyToSymbol( lightTree, &p, sizeof( void* ) ); if (id == GUIDANCE) cudaMemcpyToSymbol( guidance, &p, sizeof( void* ) ); } __host__ static void pushIntCpy( int id, const int v ) @@ -170,6 +172,7 @@ __host__ void stageWorldToSky( const mat4& worldToLight ) { stageMatCpy( SMAT /* __host__ void stageDebugData( float4* p ) { stagePtrCpy( DBGDAT /* debugData */, p ); } __host__ void stageGeometryEpsilon( float e ) { stageF32Cpy( GEPS /* geometryEpsilon */, e ); } __host__ void stageClampValue( float c ) { stageF32Cpy( CLMPV /* clampValue */, c ); } +__host__ void stageLightTree( LightCluster* t ) { stagePtrCpy( LTREE /* light tree */, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/RenderCore_Optix7Guiding/kernels/pathtracer.h b/lib/RenderCore_Optix7Guiding/kernels/pathtracer.h index 26cba752b..918124242 100644 --- a/lib/RenderCore_Optix7Guiding/kernels/pathtracer.h +++ b/lib/RenderCore_Optix7Guiding/kernels/pathtracer.h @@ -65,7 +65,7 @@ LH2_DEVFUNC float3 RandomPointOnLightPNEE( float r0, float r1, const float3& I, if (g1.x == 0) { // nothing here; sample lights uniformly - lightIdx = (int)(r1 * (TRILIGHTCOUNT + POINTLIGHTCOUNT)); + lightIdx = min( (int)(r1 * (TRILIGHTCOUNT + POINTLIGHTCOUNT)), TRILIGHTCOUNT + POINTLIGHTCOUNT - 1 ); pickProb = 1.0f / (TRILIGHTCOUNT + POINTLIGHTCOUNT); } else @@ -374,9 +374,9 @@ void shadeKernel( float4* accumulator, const uint stride, // next event estimation: connect eye path to light if ((FLAGS & S_SPECULAR) == 0 && connections != 0) // skip for specular vertices { - float pickProb, lightPdf = 0; + float pickProb, lightPdf = (pixelIdx == probePixelIdx && pathLength == 1) ? -999 : 0; float3 lightColor, L; - if (pixelIdx % SCRWIDTH < SCRWIDTH / 2) + if (1) // pixelIdx % SCRWIDTH < SCRWIDTH / 2) { float3 jitter = make_float3( 1.0f / PNEErext.x, 1.0f / PNEErext.y, 1.0f / PNEErext.z ); float3 II = make_float3( @@ -410,7 +410,7 @@ void shadeKernel( float4* accumulator, const uint stride, const uint shadowRayIdx = atomicAdd( &counters->shadowRays, 1 ); // compaction connections[shadowRayIdx] = make_float4( SafeOrigin( I, L, N, geometryEpsilon ), 0 ); // O4 connections[shadowRayIdx + stride * 2] = make_float4( L, dist - 2 * geometryEpsilon ); // D4 - connections[shadowRayIdx + stride * 2 * 2] = make_float4( contribution, __int_as_float( pixelIdx ) ); // E4 + connections[shadowRayIdx + stride * 2 * 2] = make_float4( contribution, __int_as_float( 0 ) ); // E4 } } } diff --git a/lib/RenderCore_OptixPrime_B/kernels/.cuda.cu b/lib/RenderCore_OptixPrime_B/kernels/.cuda.cu index ce28fa683..67e111e05 100644 --- a/lib/RenderCore_OptixPrime_B/kernels/.cuda.cu +++ b/lib/RenderCore_OptixPrime_B/kernels/.cuda.cu @@ -33,6 +33,7 @@ __constant__ float4* skyPixels; __constant__ int skywidth; __constant__ int skyheight; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -43,7 +44,7 @@ __constant__ __device__ float clampValue; // staging: copies will be batched and carried out after rendering completes, // to allow the CPU to update the scene concurrently with GPU rendering. -enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV }; +enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV, LTREE }; // device pointers are not real pointers for nvcc, so we need a bit of a hack. @@ -73,6 +74,7 @@ __host__ static void pushPtrCpy( int id, void* p ) if (id == NRMLS) cudaMemcpyToSymbol( nrm32, &p, sizeof( void* ) ); if (id == SKYPIX) cudaMemcpyToSymbol( skyPixels, &p, sizeof( void* ) ); if (id == DBGDAT) cudaMemcpyToSymbol( debugData, &p, sizeof( void* ) ); + if (id == LTREE) cudaMemcpyToSymbol( lightTree, &p, sizeof( void* ) ); } __host__ static void pushIntCpy( int id, const int v ) { @@ -150,6 +152,7 @@ __host__ void stageWorldToSky( const mat4& worldToLight ) { stageMatCpy( SMAT /* __host__ void stageDebugData( float4* p ) { stagePtrCpy( DBGDAT /* debugData */, p ); } __host__ void stageGeometryEpsilon( float e ) { stageF32Cpy( GEPS /* geometryEpsilon */, e ); } __host__ void stageClampValue( float c ) { stageF32Cpy( CLMPV /* clampValue */, c ); } +__host__ void stageLightTree( LightCluster* t ) { stagePtrCpy( LTREE /* light tree */, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/RenderCore_OptixPrime_B/kernels/pathtracer.h b/lib/RenderCore_OptixPrime_B/kernels/pathtracer.h index adbb1f928..566cb7e8c 100644 --- a/lib/RenderCore_OptixPrime_B/kernels/pathtracer.h +++ b/lib/RenderCore_OptixPrime_B/kernels/pathtracer.h @@ -1,4 +1,4 @@ -/* pathtracer.cu - Copyright 2019/2020 Utrecht University +/* pathtracer.h - Copyright 2019/2020 Utrecht University Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/lib/RenderCore_OptixPrime_BDPT/kernels/.cuda.cu b/lib/RenderCore_OptixPrime_BDPT/kernels/.cuda.cu index 6c2b6914e..331c958ae 100644 --- a/lib/RenderCore_OptixPrime_BDPT/kernels/.cuda.cu +++ b/lib/RenderCore_OptixPrime_BDPT/kernels/.cuda.cu @@ -33,6 +33,7 @@ __constant__ float4* skyPixels; __constant__ int skywidth; __constant__ int skyheight; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -50,6 +51,7 @@ __host__ void stageTriLights( CoreLightTri* p ) { stagedcpy( triLights, p ); } __host__ void stagePointLights( CorePointLight* p ) { stagedcpy( pointLights, p ); } __host__ void stageSpotLights( CoreSpotLight* p ) { stagedcpy( spotLights, p ); } __host__ void stageDirectionalLights( CoreDirectionalLight* p ) { stagedcpy( directionalLights, p ); } +__host__ void stageLightTree( LightCluster* t ) { stagedcpy( lightTree, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/RenderCore_OptixPrime_PBRT/kernels/.cuda.cu b/lib/RenderCore_OptixPrime_PBRT/kernels/.cuda.cu index 81d5aae28..ecddb1aea 100644 --- a/lib/RenderCore_OptixPrime_PBRT/kernels/.cuda.cu +++ b/lib/RenderCore_OptixPrime_PBRT/kernels/.cuda.cu @@ -36,6 +36,7 @@ __constant__ float4* skyPixels; __constant__ int skywidth; __constant__ int skyheight; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -55,6 +56,7 @@ __host__ void stageTriLights( CoreLightTri* p ) { stagedcpy( triLights, p ); } __host__ void stagePointLights( CorePointLight* p ) { stagedcpy( pointLights, p ); } __host__ void stageSpotLights( CoreSpotLight* p ) { stagedcpy( spotLights, p ); } __host__ void stageDirectionalLights( CoreDirectionalLight* p ) { stagedcpy( directionalLights, p ); } +__host__ void stageLightTree( LightCluster* t ) { stagedcpy( lightTree, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/RenderCore_PrimeAdaptive/kernels/.cuda.cu b/lib/RenderCore_PrimeAdaptive/kernels/.cuda.cu index f80f5d583..68393b2aa 100644 --- a/lib/RenderCore_PrimeAdaptive/kernels/.cuda.cu +++ b/lib/RenderCore_PrimeAdaptive/kernels/.cuda.cu @@ -33,6 +33,7 @@ __constant__ float4* skyPixels; __constant__ int skywidth; __constant__ int skyheight; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -43,7 +44,7 @@ __constant__ __device__ float clampValue; // staging: copies will be batched and carried out after rendering completes, // to allow the CPU to update the scene concurrently with GPU rendering. -enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV }; +enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV, LTREE }; // device pointers are not real pointers for nvcc, so we need a bit of a hack. @@ -73,6 +74,7 @@ __host__ static void pushPtrCpy( int id, void* p ) if (id == NRMLS) cudaMemcpyToSymbol( nrm32, &p, sizeof( void* ) ); if (id == SKYPIX) cudaMemcpyToSymbol( skyPixels, &p, sizeof( void* ) ); if (id == DBGDAT) cudaMemcpyToSymbol( debugData, &p, sizeof( void* ) ); + if (id == LTREE) cudaMemcpyToSymbol( lightTree, &p, sizeof( void* ) ); } __host__ static void pushIntCpy( int id, const int v ) { @@ -150,6 +152,7 @@ __host__ void stageWorldToSky( const mat4& worldToLight ) { stageMatCpy( SMAT /* __host__ void stageDebugData( float4* p ) { stagePtrCpy( DBGDAT /* debugData */, p ); } __host__ void stageGeometryEpsilon( float e ) { stageF32Cpy( GEPS /* geometryEpsilon */, e ); } __host__ void stageClampValue( float c ) { stageF32Cpy( CLMPV /* clampValue */, c ); } +__host__ void stageLightTree( LightCluster* t ) { stagePtrCpy( LTREE /* light tree */, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/rendercore_optix7/core_settings.h b/lib/rendercore_optix7/core_settings.h index 5b995a162..253cc6b13 100644 --- a/lib/rendercore_optix7/core_settings.h +++ b/lib/rendercore_optix7/core_settings.h @@ -224,6 +224,7 @@ void stageDebugData( float4* p ); void stageGeometryEpsilon( float e ); void stageClampValue( float c ); void stageMemcpy( void* d, void* s, int n ); +void stageLightTree( LightCluster* t ); void pushStagedCopies(); void SetCounters( Counters* p ); diff --git a/lib/rendercore_optix7/kernels/.cuda.cu b/lib/rendercore_optix7/kernels/.cuda.cu index 2cdcb8dfe..eb855f3b1 100644 --- a/lib/rendercore_optix7/kernels/.cuda.cu +++ b/lib/rendercore_optix7/kernels/.cuda.cu @@ -34,6 +34,7 @@ __constant__ int skywidth; __constant__ int skyheight; __constant__ PathState* pathStates; __constant__ float4* debugData; +__constant__ LightCluster* lightTree; __constant__ mat4 worldToSky; @@ -44,7 +45,7 @@ __constant__ __device__ float clampValue; // staging: copies will be batched and carried out after rendering completes, // to allow the CPU to update the scene concurrently with GPU rendering. -enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV }; +enum { INSTS = 0, MATS, TLGHTS, PLGHTS, SLGHTS, DLGHTS, LCNTS, RGB32, RGBH, NRMLS, SKYPIX, SKYW, SKYH, SMAT, DBGDAT, GEPS, CLMPV, LTREE }; // device pointers are not real pointers for nvcc, so we need a bit of a hack. @@ -76,6 +77,7 @@ __host__ static void pushPtrCpy( int id, void* p ) if (id == NRMLS) cudaMemcpyToSymbol( nrm32, &p, sizeof( void* ) ); if (id == SKYPIX) cudaMemcpyToSymbol( skyPixels, &p, sizeof( void* ) ); if (id == DBGDAT) cudaMemcpyToSymbol( debugData, &p, sizeof( void* ) ); + if (id == LTREE) cudaMemcpyToSymbol( lightTree, &p, sizeof( void* ) ); } __host__ static void pushIntCpy( int id, const int v ) { @@ -166,6 +168,7 @@ __host__ void stageWorldToSky( const mat4& worldToLight ) { stageMatCpy( SMAT /* __host__ void stageDebugData( float4* p ) { stagePtrCpy( DBGDAT /* debugData */, p ); } __host__ void stageGeometryEpsilon( float e ) { stageF32Cpy( GEPS /* geometryEpsilon */, e ); } __host__ void stageClampValue( float c ) { stageF32Cpy( CLMPV /* clampValue */, c ); } +__host__ void stageLightTree( LightCluster* t ) { stagePtrCpy( LTREE /* light tree */, t ); } __host__ void stageLightCounts( int tri, int point, int spot, int directional ) { const int4 counts = make_int4( tri, point, spot, directional ); diff --git a/lib/rendercore_optix7/kernels/pathtracer.h b/lib/rendercore_optix7/kernels/pathtracer.h index e7ae40410..02a7abae7 100644 --- a/lib/rendercore_optix7/kernels/pathtracer.h +++ b/lib/rendercore_optix7/kernels/pathtracer.h @@ -142,6 +142,7 @@ void shadeKernel( float4* accumulator, const uint stride, const CoreTri& tri = (const CoreTri&)instanceTriangles[PRIMIDX]; const float lightPdf = CalculateLightPDF( D, HIT_T, tri.area, N ); const float pickProb = LightPickProb( tri.ltriIdx, RAY_O, lastN, I /* the N at the previous vertex */ ); + // const float pickProb = LightPickProbLTree( tri.ltriIdx, RAY_O, lastN, I /* the N at the previous vertex */, seed ); if ((bsdfPdf + lightPdf * pickProb) > 0) contribution = throughput * shadingData.color * (1.0f / (bsdfPdf + lightPdf * pickProb)); } CLAMPINTENSITY; @@ -183,6 +184,7 @@ void shadeKernel( float4* accumulator, const uint stride, { float pickProb, lightPdf = 0; float3 lightColor, L = RandomPointOnLight( r4.x, r4.y, I, fN * faceDir, pickProb, lightPdf, lightColor ) - I; + // float3 lightColor, L = RandomPointOnLightLTree( r4.x, r4.y, seed, I, fN * faceDir, pickProb, lightPdf, lightColor, false ) - I; const float dist = length( L ); L *= 1.0f / dist; const float NdotL = dot( L, fN * faceDir ); diff --git a/lib/rendercore_optix7/rendercore.cpp b/lib/rendercore_optix7/rendercore.cpp index cc9876a86..31c937f8b 100644 --- a/lib/rendercore_optix7/rendercore.cpp +++ b/lib/rendercore_optix7/rendercore.cpp @@ -565,6 +565,121 @@ void RenderCore::SetMaterials( CoreMaterial* mat, const int materialCount ) stageMaterialList( materialBuffer->DevPtr() ); } +// +-----------------------------------------------------------------------------+ +// | RenderCore::UpdateLightTree | +// | Prepare the light BVH for stochastic lightcuts. LH2'20| +// +-----------------------------------------------------------------------------+ +void RenderCore::UpdateLightTreeNormals( const int node ) +{ + LightCluster* treeData = lightTree->HostPtr(); + if (treeData[node].left > -1) + { + UpdateLightTreeNormals( treeData[node].left ); + UpdateLightTreeNormals( treeData[node].right ); + // check normals of children + const float3 Nl = treeData[treeData[node].left].N; + const float3 Nr = treeData[treeData[node].right].N; + if (dot( Nl, Nr ) > 0.9f) + { + // left and right normals are similar enough to be useful + treeData[node].N = normalize( Nl + Nr ); + } + else + { + // store an impossible normal + treeData[node].N = make_float3( 0 ); + } + // store parent index in children + treeData[treeData[node].left].parent = treeData[treeData[node].right].parent = node; + } + else + { + // get normal from light source + const int lightIdx = treeData[node].light; + if (lightIdx & ((1 << 30) + (1 << 29))) + { + // point or spot light + treeData[node].N = make_float3( 0 ); + } + else + { + // triangle light + CoreLightTri& light = triLightBuffer->HostPtr()[lightIdx]; + treeData[node].N = light.N; + } + } +} +int RenderCore::FindBestMatch( int* todo, const int idx, const int N ) +{ + float bestCost = 1e34f, bestIdx = 0; + LightCluster* treeData = lightTree->HostPtr(); + for (int i = 0; i < N; i++) if (i != idx) + { + LightCluster tmp = treeData[todo[idx]]; + tmp.bounds.Grow( treeData[todo[i]].bounds ); + tmp.intensity += treeData[todo[i]].intensity; + float cost = tmp.Cost(); + if (cost < bestCost) bestCost = cost, bestIdx = i; + } + return bestIdx; +} +void RenderCore::UpdateLightTree() +{ + // create an array of triLights + delete lightTree; + int N = triLightBuffer->GetSize(), remaining = N; + int M = pointLightBuffer->GetSize(); + int O = spotLightBuffer->GetSize(); + lightTree = new CoreBuffer( (N + M + O) * 2, ON_HOST | ON_DEVICE | STAGED ); + LightCluster* treeData = lightTree->HostPtr(); + int* todo = new int[N + M + O]; + for (int i = 0; i < N; i++) + treeData[i + 1] = LightCluster( triLightBuffer->HostPtr()[i], i ), // leaf for light i has index i + 1 + todo[i] = i + 1; + for( int i = 0; i < M; i++ ) + treeData[i + 1 + N] = LightCluster( pointLightBuffer->HostPtr()[i], i ), + todo[i + N] = i + 1 + N; + for( int i = 0; i < O; i++ ) + treeData[i + 1 + N + M] = LightCluster( spotLightBuffer->HostPtr()[i], i ), + todo[i + N + M] = i + 1 + N + M; + remaining += M + O; + N += M + O; + // build the BVH, agglomerative + int A = 0; + int B = FindBestMatch( todo, A, remaining ); + while (remaining > 1) + { + int C = FindBestMatch( todo, B, remaining ); + if (A == C) + { + // create a new cluster + treeData[N + 1] = treeData[todo[A]]; + treeData[N + 1].bounds.Grow( treeData[todo[B]].bounds ); + treeData[N + 1].intensity += treeData[todo[B]].intensity; + treeData[N + 1].left = todo[A]; + treeData[N + 1].right = todo[B]; + // delete A and B cluster indices from 'todo' + for (int i = A; i < remaining - 1; i++) todo[i] = todo[i + 1]; // remove A + if (B > A) B--; + for (int i = B; i < remaining - 2; i++) todo[i] = todo[i + 1]; // remove B + remaining -= 2; + // add the new cluster index to 'todo' + todo[remaining] = ++N; + // prepare search for next couple + A = remaining++; + B = FindBestMatch( todo, A, remaining ); + } + else A = B, B = C; + } + // finalize + treeData[0] = treeData[todo[0]]; // put root in convenient place + delete[] todo; + UpdateLightTreeNormals( 0 ); + // copy to device + stageLightTree( lightTree->DevPtr() ); + lightTree->StageCopyToDevice(); +} + // +-----------------------------------------------------------------------------+ // | RenderCore::SetLights | // | Set the light data. LH2'20| @@ -592,6 +707,7 @@ void RenderCore::SetLights( const CoreLightTri* triLights, const int triLightCou stageDirectionalLights( StagedBufferResize( directionalLightBuffer, directionalLightCount, directionalLights ) ); stageLightCounts( triLightCount, pointLightCount, spotLightCount, directionalLightCount ); noDirectLightsInScene = (triLightCount + pointLightCount + spotLightCount + directionalLightCount) == 0; + UpdateLightTree(); } // +-----------------------------------------------------------------------------+ diff --git a/lib/rendercore_optix7/rendercore.h b/lib/rendercore_optix7/rendercore.h index f5d45d9fb..aac37594e 100644 --- a/lib/rendercore_optix7/rendercore.h +++ b/lib/rendercore_optix7/rendercore.h @@ -70,6 +70,9 @@ class RenderCore : public CoreAPI_Base void FinalizeRender(); template T* StagedBufferResize( CoreBuffer*& lightBuffer, const int newCount, const T* sourceData ); void UpdateToplevel(); + int FindBestMatch( int* todo, const int idx, const int N ); + void UpdateLightTreeNormals( const int node ); + void UpdateLightTree(); void SyncStorageType( const TexelStorage storage ); void CreateOptixContext( int cc ); // helpers @@ -97,6 +100,7 @@ class RenderCore : public CoreAPI_Base CoreBuffer* triLightBuffer; // tri lights CoreBuffer* pointLightBuffer; // point lights CoreBuffer* spotLightBuffer; // spot lights + CoreBuffer* lightTree = 0; // light tree for stochastic lightcuts CoreBuffer* directionalLightBuffer; // directional lights CoreBuffer* texel128Buffer = 0; // texel buffer 1: hdr ARGB128 texture data CoreBuffer* normal32Buffer = 0; // texel buffer 2: integer-encoded normals