diff --git a/libraries/hvvr/cuda.props b/libraries/hvvr/cuda.props
index bae6e36..9712a0c 100644
--- a/libraries/hvvr/cuda.props
+++ b/libraries/hvvr/cuda.props
@@ -1,4 +1,4 @@
-
+
@@ -12,9 +12,9 @@
-
+
-
+
\ No newline at end of file
diff --git a/libraries/hvvr/raycaster/camera.cpp b/libraries/hvvr/raycaster/camera.cpp
index 988591b..0e0001e 100644
--- a/libraries/hvvr/raycaster/camera.cpp
+++ b/libraries/hvvr/raycaster/camera.cpp
@@ -22,17 +22,16 @@ namespace hvvr {
SampleData::SampleData(const Sample* rawSamples,
uint32_t rawSampleCount,
uint32_t splitColorSamples,
- const matrix3x3& sampleToCamera,
- ThinLens lens,
+ Sample2Dto3DMappingSettings settings2DTo3D,
uint32_t rtWidth,
uint32_t rtHeight)
- : splitColorSamples(splitColorSamples), lens(lens) {
+ : splitColorSamples(splitColorSamples) {
DynamicArray sortedSamples(rawSampleCount);
for (size_t n = 0; n < rawSampleCount; n++) {
sortedSamples[n] = SortedSample(rawSamples[n], n % splitColorSamples);
}
uint32_t blockCount = uint32_t((sortedSamples.size() + BLOCK_SIZE - 1) / BLOCK_SIZE);
- // TODO(whunt): allow different clustering methods
+ // TODO: allow different clustering methods
naiveXYCluster(ArrayView(sortedSamples), blockCount);
sampleBounds = {vector2(1.0f, 1.0f), vector2(0.0f, 0.0f)};
@@ -48,15 +47,13 @@ SampleData::SampleData(const Sample* rawSamples,
cullRect.upper.x = INFINITY;
cullRect.upper.y = INFINITY;
validSampleCount = uint32_t(rawSampleCount);
- samples.blockFrusta2D = DynamicArray(blockCount);
- samples.tileFrusta2D = DynamicArray(blockCount * TILES_PER_BLOCK);
samples.blockFrusta3D = DynamicArray(blockCount);
samples.tileFrusta3D = DynamicArray(blockCount * TILES_PER_BLOCK);
- blockedSamplePositions = DynamicArray(blockCount * BLOCK_SIZE * 2);
- blockedSampleExtents = DynamicArray(blockCount * BLOCK_SIZE);
- samples.generate(sortedSamples, blockCount, validSampleCount, cullRect, blockedSamplePositions,
- blockedSampleExtents, lens, sampleToCamera);
- sampleCount = uint32_t(blockCount * BLOCK_SIZE);
+ samples.directionalSamples = DynamicArray(blockCount * BLOCK_SIZE);
+
+ samples2D = SampleHierarchy2D(sortedSamples, blockCount, validSampleCount, cullRect, settings2DTo3D.thinLens,
+ settings2DTo3D.sampleToCamera);
+ samples.generateFrom2D(samples2D, settings2DTo3D);
imageLocationToSampleIndex = DynamicArray(rtWidth * rtHeight * splitColorSamples);
memset(imageLocationToSampleIndex.data(), 0xff, sizeof(int32_t) * imageLocationToSampleIndex.size()); // clear to -1
@@ -67,6 +64,10 @@ SampleData::SampleData(const Sample* rawSamples,
}
}
+void SampleData::generate3Dfrom2D(Sample2Dto3DMappingSettings settings) {
+ samples.generateFrom2D(samples2D, settings);
+}
+
Camera::Camera(const FloatRect& viewport, float apertureRadius, GPUContext& gpuContext)
: _gpuCamera(nullptr), _lens({apertureRadius, 1.0f}), _eyeDir(0.0f, 0.0f, -1.0f) {
setViewport(viewport);
@@ -133,26 +134,25 @@ void Camera::setRenderTarget(const ImageResourceDescriptor& newRenderTarget) {
}
void Camera::setSamples(const Sample* rawSamples, uint32_t rawSampleCount, uint32_t splitColorSamples) {
- setSampleData(SampleData(rawSamples, rawSampleCount, splitColorSamples, getSampleToCamera(), _lens,
+ setSampleData(SampleData(rawSamples, rawSampleCount, splitColorSamples, get2DSampleMappingSettings(),
_renderTarget.width, _renderTarget.height));
}
void Camera::setSampleData(const SampleData& sampleData) {
_sampleData = sampleData;
- uint32_t blockCount = uint32_t(_sampleData.samples.blockFrusta3D.size());
- uint32_t tileCount = uint32_t(_sampleData.samples.tileFrusta3D.size());
+ uint32_t blockCount = uint32_t(sampleData.samples.blockFrusta3D.size());
+ uint32_t tileCount = uint32_t(sampleData.samples.tileFrusta3D.size());
- if (blockCount != _blockFrustaTransformed.size()) {
- _blockFrustaTransformed = DynamicArray(blockCount);
+ if (blockCount != _cpuHierarchy._blockFrusta.size()) {
+ _cpuHierarchy._blockFrusta = DynamicArray(blockCount);
}
- if (tileCount != _tileFrustaTransformed.size()) {
- _tileFrustaTransformed = DynamicArray(tileCount);
+ if (tileCount != _cpuHierarchy._tileFrusta.size()) {
+ _cpuHierarchy._tileFrusta = DynamicArray(tileCount);
}
-
- _gpuCamera->updateConfig(_outputMode, sampleData.imageLocationToSampleIndex.data(),
- sampleData.blockedSamplePositions.data(), sampleData.blockedSampleExtents.data(), _lens,
- sampleData.sampleCount, _renderTarget.width, _renderTarget.height,
+ const DynamicArray& samples = sampleData.samples.directionalSamples;
+ _gpuCamera->updateConfig(_outputFormat, sampleData.imageLocationToSampleIndex.data(), samples.data(), _lens,
+ uint32_t(samples.size()), _renderTarget.width, _renderTarget.height,
uint32_t(_renderTarget.stride), sampleData.splitColorSamples);
}
@@ -160,19 +160,17 @@ const SampleData& Camera::getSampleData() const {
return _sampleData;
}
+const uint32_t Camera::getSampleCount() const {
+ return _gpuCamera != nullptr ? _gpuCamera->validSampleCount : 0;
+}
+
+
matrix3x3 Camera::getSampleToCamera() const {
return matrix3x3(vector3(_viewport.upper.x - _viewport.lower.x, 0, 0),
vector3(0, _viewport.lower.y - _viewport.upper.y, 0),
vector3(_viewport.lower.x, _viewport.upper.y, -1));
}
-matrix4x4 Camera::getSampleToWorld() const {
- return matrix4x4(_cameraToWorld) * matrix4x4(getSampleToCamera());
-}
-
-matrix4x4 Camera::getWorldToSample() const {
- return invert(getSampleToWorld());
-}
void Camera::setCameraToWorld(const transform& cameraToWorld) {
_cameraToWorld = cameraToWorld;
@@ -186,8 +184,37 @@ const vector3& Camera::getTranslation() const {
return _cameraToWorld.translation;
}
-vector3 Camera::getForward() const {
- return vector3(-normalize(getCameraToWorld().m2));
+void Camera::setupRenderTarget(GPUContext& context) {
+ if (!getEnabled())
+ return;
+ GPUCamera* gpuCamera = _gpuCamera;
+ if (_renderTarget.isHardwareRenderTarget() && _newHardwareTarget) {
+ gpuCamera->bindTexture(context, _renderTarget);
+ _newHardwareTarget = false;
+ }
+}
+
+void Camera::extractImage() {
+ GPUCamera* gpuCamera = _gpuCamera;
+ if (_renderTarget.isHardwareRenderTarget()) {
+ gpuCamera->copyImageToBoundTexture();
+ } else {
+ gpuCamera->copyImageToCPU((uint32_t*)_renderTarget.data, _renderTarget.width, _renderTarget.height,
+ uint32_t(_renderTarget.stride));
+ }
+}
+
+Sample2Dto3DMappingSettings Camera::get2DSampleMappingSettings() const {
+ if (_fovXDegrees > 0.0f) {
+ return Sample2Dto3DMappingSettings::sphericalSection(getSampleToCamera(), _lens, _fovXDegrees, _fovYDegrees);
+ } else {
+ return Sample2Dto3DMappingSettings(getSampleToCamera(), _lens);
+ }
+}
+
+void Camera::setSphericalWarpSettings(float fovXDegrees, float fovYDegrees) {
+ _fovXDegrees = fovXDegrees;
+ _fovYDegrees = fovYDegrees;
}
} // namespace hvvr
diff --git a/libraries/hvvr/raycaster/camera.h b/libraries/hvvr/raycaster/camera.h
index a7107bf..a406ef0 100644
--- a/libraries/hvvr/raycaster/camera.h
+++ b/libraries/hvvr/raycaster/camera.h
@@ -10,8 +10,9 @@
*/
#include "dynamic_array.h"
-#include "graphics_types.h"
#include "foveated.h"
+#include "gpu_samples.h"
+#include "graphics_types.h"
#include "sample_hierarchy.h"
#include "samples.h"
@@ -23,36 +24,36 @@ namespace hvvr {
class GPUCamera;
class GPUContext;
+
// preprocessed samples, ready for rendering
struct SampleData {
+ SampleHierarchy2D samples2D;
+ Sample2Dto3DMappingSettings settings2DTo3D;
SampleHierarchy samples;
uint32_t splitColorSamples = 1;
- uint32_t sampleCount;
DynamicArray imageLocationToSampleIndex;
- // Flat array of sample positions (in vector2 format) without fancy swizzling for CPU vectorization
- DynamicArray blockedSamplePositions;
- DynamicArray blockedSampleExtents;
FloatRect sampleBounds = {{0.0f, 0.0f}, {0.0f, 0.0f}};
uint32_t validSampleCount = 0;
- ThinLens lens = {0.0f, 5.0f};
SampleData(){};
SampleData(const Sample* rawSamples,
uint32_t rawSampleCount,
uint32_t splitColorSamples,
- const matrix3x3& sampleToCamera,
- ThinLens lens,
+ Sample2Dto3DMappingSettings settings2DTo3D,
uint32_t rtWidth,
uint32_t rtHeight);
+ void generate3Dfrom2D(Sample2Dto3DMappingSettings settings);
};
+
// TODO(anankervis): merge with GPU version of this class
class Camera {
friend class Raycaster;
// TODO(anankervis): remove
friend void polarSpaceFoveatedSetup(Raycaster* raycaster);
+
public:
Camera(const FloatRect& viewport, float apertureRadius, GPUContext& gpuContext);
~Camera();
@@ -84,27 +85,35 @@ class Camera {
void setRenderTarget(const ImageResourceDescriptor& newRenderTarget);
void setSamples(const Sample* rawSamples, uint32_t rawSampleCount, uint32_t splitColorSamples);
+ // If called with nonzero values, this camera uses a spherical section for ray generation
+ // (instead of the standard perspective transform).
+ void setSphericalWarpSettings(float fovXDegrees, float fovYDegrees);
+
void setSampleData(const SampleData& sampleData);
const SampleData& getSampleData() const;
+ const uint32_t getSampleCount() const;
matrix3x3 getSampleToCamera() const;
- // Beware - this isn't actually suitable for taking a 2D sample coordinate + Z and converting to world space.
- // Samples can be in any arbitrary space, packing, or function we choose. What's important is that when we
- // unpack them, they turn into camera-relative 3D rays (origin offset + direction). From there, we can convert
- // into world space using cameraToWorld.
- matrix4x4 getSampleToWorld() const;
- matrix4x4 getWorldToSample() const;
+
void setCameraToWorld(const transform& cameraToWorld);
matrix4x4 getCameraToWorld() const;
const vector3& getTranslation() const;
- vector3 getForward() const;
+
+ void setupRenderTarget(GPUContext& context);
+ void extractImage();
protected:
+ Sample2Dto3DMappingSettings get2DSampleMappingSettings() const;
+
+ float _fovXDegrees = 0.0f;
+ float _fovYDegrees = 0.0f;
+
// TODO(anankervis): clean up direct access of protected members by Raycaster
GPUCamera* _gpuCamera;
- matrix4x4 _worldToEyePrevious = matrix4x4::identity();
+ // Initialize to an invalid transform since there is no previous frame on the initial frame
+ matrix4x4 _worldToEyePrevious = matrix4x4::zero();
matrix3x3 _eyePreviousToSamplePrevious = matrix3x3::identity();
// Incremeted on every render
@@ -116,13 +125,16 @@ class Camera {
ThinLens _lens = {0.0f, 1.0f};
bool _enabled = true;
ImageResourceDescriptor _renderTarget;
- RaycasterOutputMode _outputMode = RaycasterOutputMode::COLOR_RGBA8;
+ RaycasterOutputFormat _outputFormat = RaycasterOutputFormat::COLOR_RGBA8;
FoveatedSampleData _foveatedSampleData;
+
// Only for polar foveated sampling
std::vector _polarRemapToPixel;
- DynamicArray _blockFrustaTransformed;
- DynamicArray _tileFrustaTransformed;
+ struct CPUHierarchy {
+ DynamicArray _blockFrusta;
+ DynamicArray _tileFrusta;
+ } _cpuHierarchy;
transform _cameraToWorld = transform::identity();
diff --git a/libraries/hvvr/raycaster/cuda_util.h b/libraries/hvvr/raycaster/cuda_util.h
index d5820d9..11f6ad8 100644
--- a/libraries/hvvr/raycaster/cuda_util.h
+++ b/libraries/hvvr/raycaster/cuda_util.h
@@ -19,8 +19,9 @@
#define cutilSafeCall(error) __cudaSafeCall(error, __FILE__, __LINE__)
inline void __cudaSafeCall(cudaError_t error, const char* file, const int line) {
if (error != cudaSuccess) {
- fprintf(stderr, "error: CudaSafeCall() failed at %s:%d with %s\n", file, line, cudaGetErrorString(error));
-#ifdef _WIN32
+ fprintf(stderr, "error %d: CudaSafeCall() failed at %s:%d with %s\n", error, file, line,
+ cudaGetErrorString(error));
+#if defined(_WIN32)
__debugbreak();
#else
exit(error);
@@ -56,4 +57,75 @@ struct KernelDim {
}
};
-#define CUDA_INF __int_as_float(0x7f800000)
\ No newline at end of file
+#define CUDA_INF __int_as_float(0x7f800000)
+
+
+// Based on https://stackoverflow.com/questions/52286202/dynamic-dispatch-to-template-function-c
+// Use to generate all template function permutations and dispatch properly at runtime for a prefix of template booleans
+// Makes calling cuda kernels with many permutations concise.
+// Example:
+// Change
+// if (b0) {
+// if (b1) {
+// if (b2) {
+// myFunc(args);
+// }
+// else {
+// myFunc(args);
+// }
+// } else {
+// if (b2) {
+// myFunc(args);
+// }
+// else {
+// myFunc(args);
+// }
+// }
+// } else {
+// if (b1) {
+// if (b2) {
+// myFunc(args);
+// }
+// else {
+// myFunc(args);
+// }
+// } else {
+// if (b2) {
+// myFunc(args);
+// }
+// else {
+// myFunc(args);
+// }
+// }
+// }
+// into:
+// std::array bargs = { { b0, b1, b2 } };
+// dispatch_bools<3>{}(bargs, [&](auto...Bargs) {
+// myFunc(args);
+// });
+//
+// You may want to #pragma warning( disable : 4100) around the call, since there will be unrefenced Bargs in the call
+// chain
+template
+using kbool = std::integral_constant;
+
+#pragma warning(push)
+#pragma warning(disable : 4100)
+template
+struct dispatch_bools {
+ template
+ void operator()(std::array const& input, F&& continuation, Bools...) {
+ if (input[max - 1])
+ dispatch_bools{}(input, continuation, kbool{}, Bools{}...);
+ else
+ dispatch_bools{}(input, continuation, kbool{}, Bools{}...);
+ }
+};
+template <>
+struct dispatch_bools<0> {
+ template
+ void operator()(std::array const& input, F&& continuation, Bools...) {
+ continuation(Bools{}...);
+ }
+};
+#pragma warning(pop)
diff --git a/libraries/hvvr/raycaster/foveated.cpp b/libraries/hvvr/raycaster/foveated.cpp
index e8d47ca..1ac4ddb 100644
--- a/libraries/hvvr/raycaster/foveated.cpp
+++ b/libraries/hvvr/raycaster/foveated.cpp
@@ -17,7 +17,7 @@
namespace hvvr {
-void generateRemapForFoveatedSamples(ArrayView unsortedSamples,
+void generateRemapForFoveatedSamples(ArrayView unsortedSamples,
ArrayView remap) { // Create a remapping from the original directional
// samples to ones that are binned decently
@@ -25,14 +25,18 @@ void generateRemapForFoveatedSamples(ArrayView unsortedSamples,
vector2 position; // eccentricity,theta (octahedral is a bad match for the naive clustering algorithm)
uint32_t originalIndex;
};
+ // There is a massive performance difference between using eccentricity for the y coordinate and the x coordinate in
+ // this clustering This points to this clustering being unstable and we might want a better algorithm. ~3.5x
+ // performance improvement in intersection when eccentricity is the y coordinate, which leads to better distribution
+ // (for default foveation settings at least)
DynamicArray toSort(unsortedSamples.size());
for (size_t i = 0; i < toSort.size(); ++i) {
- vector3 v = unsortedSamples[i].direction;
+ vector3 v = unsortedSamples[i].centerRay;
// Eccentricity angle
- toSort[i].position.x = acosf(-v.z);
- vector3 dir = vector3(v.x, v.y, 0.0f);
+ toSort[i].position.y = acosf(-v.z);
+ vector3 dir = normalize(vector3(v.x, v.y, 0.0f));
// Angle of rotation about z, measured from x
- toSort[i].position.y = atan2f(normalize(dir).y, normalize(dir).x);
+ toSort[i].position.x = atan2f(dir.y, dir.x);
toSort[i].originalIndex = (uint32_t)i;
}
auto blockCount = (toSort.size() + BLOCK_SIZE - 1) / BLOCK_SIZE;
@@ -43,82 +47,31 @@ void generateRemapForFoveatedSamples(ArrayView unsortedSamples,
}
}
-void getEccentricityRemap(std::vector& eccentricityRemap,
- const std::vector& ringEccentricities,
- float maxEccentricityRadians,
- size_t mapSize) {
- eccentricityRemap.resize(mapSize);
- for (int i = 0; i < eccentricityRemap.size(); ++i) {
- float eccentricity = ((i + 0.5f) / eccentricityRemap.size()) * maxEccentricityRadians;
- int j = 0;
- bool inBetween = false;
- while (j < ringEccentricities.size()) {
- if (eccentricity < ringEccentricities[j]) {
- inBetween = true;
- break;
- }
- ++j;
- }
- // TODO: fit spline instead of piecewise linear approx
- if (j == 0) {
- eccentricityRemap[i] = ((eccentricity / ringEccentricities[0]) * 0.5f) / ringEccentricities.size();
- } else if (inBetween) {
- float lower = ringEccentricities[j - 1];
- float higher = ringEccentricities[j];
- float alpha = (eccentricity - lower) / (higher - lower);
- eccentricityRemap[i] = (((j - 1) * (1.0f - alpha) + j * alpha) + 0.5f) / ringEccentricities.size();
- } else {
- size_t lastIndex = ringEccentricities.size() - 1;
- float lastDiff = (ringEccentricities[lastIndex] - ringEccentricities[lastIndex - 1]);
- float extrapolation = (eccentricity - ringEccentricities[lastIndex]) / lastDiff;
- eccentricityRemap[i] =
- min(1.0f, (ringEccentricities.size() - 0.5f + extrapolation) / ringEccentricities.size());
- }
- }
-}
-
void generateEyeSpacePolarFoveatedSampleData(FoveatedSampleData& foveatedSampleData,
std::vector& polarRemapToPixel,
- std::vector& ringEccentricities,
- std::vector& eccentricityRemap,
+ EccentricityMap& eccentricityMap,
size_t& samplesPerRing,
RayCasterSpecification::FoveatedSamplePattern pattern) {
- if (foveatedSampleData.eyeSpaceSamples.size() == 0) {
- DynamicArray unsortedEyeSpaceSamples = getEyeSpacePolarFoveatedSamples(
- ringEccentricities, samplesPerRing, pattern.degreeTrackingError, pattern.minMAR, pattern.maxMAR,
- pattern.maxFOVDegrees, pattern.marSlope, pattern.fovealMARDegrees, pattern.zenithJitterStrength,
- pattern.ringJitterStrength);
+ if (foveatedSampleData.samples.directionalSamples.size() == 0) {
+ DynamicArray unsortedEyeSpaceSamples =
+ getEyeSpacePolarFoveatedSamples(samplesPerRing, eccentricityMap, pattern.degreeTrackingError,
+ pattern.maxFOVDegrees, pattern.marSlope, pattern.fovealMARDegrees);
+
printf("Generated eyes space foveated samples: %d, in %dx%d polar grid (azimuth x zenith)\n",
uint32_t(unsortedEyeSpaceSamples.size()), uint32_t(samplesPerRing),
uint32_t(unsortedEyeSpaceSamples.size() / samplesPerRing));
- float maxEccentricityRadians = pattern.maxFOVDegrees * RadiansPerDegree;
- getEccentricityRemap(eccentricityRemap, ringEccentricities, maxEccentricityRadians, 10000);
{
DynamicArray oldToNewRemap(unsortedEyeSpaceSamples.size());
generateRemapForFoveatedSamples(unsortedEyeSpaceSamples, oldToNewRemap);
- foveatedSampleData.eyeSpaceSamples = DynamicArray(unsortedEyeSpaceSamples.size());
- for (size_t i = 0; i < unsortedEyeSpaceSamples.size(); ++i) {
- foveatedSampleData.eyeSpaceSamples[oldToNewRemap[i]] = unsortedEyeSpaceSamples[i];
- }
+ foveatedSampleData.samples.directionalSamples = DynamicArray(unsortedEyeSpaceSamples.size());
polarRemapToPixel.resize(unsortedEyeSpaceSamples.size());
for (size_t i = 0; i < unsortedEyeSpaceSamples.size(); ++i) {
+ foveatedSampleData.samples.directionalSamples[oldToNewRemap[i]] = unsortedEyeSpaceSamples[i];
polarRemapToPixel[oldToNewRemap[i]] = {(uint32_t)(i % samplesPerRing), (uint32_t)(i / samplesPerRing)};
}
}
- {
- foveatedSampleData.precomputedEyeSpaceSamples =
- DynamicArray(foveatedSampleData.eyeSpaceSamples.size());
- for (size_t i = 0; i < unsortedEyeSpaceSamples.size(); ++i) {
- auto& p = foveatedSampleData.precomputedEyeSpaceSamples[i];
- p.center = foveatedSampleData.eyeSpaceSamples[i].direction;
- p.d1 = p.center + foveatedSampleData.eyeSpaceSamples[i].azimuthalDifferential;
- p.d2 = p.center + foveatedSampleData.eyeSpaceSamples[i].zenithDifferential;
- }
- }
auto blockCount = (unsortedEyeSpaceSamples.size() + BLOCK_SIZE - 1) / BLOCK_SIZE;
// Allocate the most you will ever need to prevent per-frame allocation
- foveatedSampleData.samples.blockFrusta2D = DynamicArray(blockCount);
- foveatedSampleData.samples.tileFrusta2D = DynamicArray(blockCount * TILES_PER_BLOCK);
foveatedSampleData.samples.blockFrusta3D = DynamicArray(blockCount);
foveatedSampleData.samples.tileFrusta3D = DynamicArray(blockCount * TILES_PER_BLOCK);
}
@@ -129,21 +82,20 @@ void polarSpaceFoveatedSetup(Raycaster* raycaster) {
if (!camera->getEnabled())
continue;
// Generate eye space samples if necessary
- if (camera->_foveatedSampleData.eyeSpaceSamples.size() == 0) {
+ if (camera->_foveatedSampleData.samples.directionalSamples.size() == 0) {
size_t samplesPerRing;
- std::vector eccentricityRemap;
- std::vector ringEccentricities;
+ EccentricityMap eccentricityMap;
generateEyeSpacePolarFoveatedSampleData(camera->_foveatedSampleData, camera->_polarRemapToPixel,
- ringEccentricities, eccentricityRemap, samplesPerRing,
+ eccentricityMap, samplesPerRing,
raycaster->_spec.foveatedSamplePattern);
float maxEccentricityRadians = raycaster->_spec.foveatedSamplePattern.maxFOVDegrees * RadiansPerDegree;
size_t paddedSampleCount =
- ((camera->_foveatedSampleData.eyeSpaceSamples.size() + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE;
+ ((camera->_foveatedSampleData.samples.directionalSamples.size() + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE;
camera->_gpuCamera->registerPolarFoveatedSamples(camera->_polarRemapToPixel, maxEccentricityRadians,
- ringEccentricities, eccentricityRemap,
- uint32_t(samplesPerRing), uint32_t(paddedSampleCount));
- camera->_gpuCamera->updateEyeSpaceFoveatedSamples(camera->_foveatedSampleData.precomputedEyeSpaceSamples);
+ eccentricityMap, uint32_t(samplesPerRing),
+ uint32_t(paddedSampleCount));
+ camera->_gpuCamera->updateEyeSpaceFoveatedSamples(camera->_foveatedSampleData.samples.directionalSamples);
camera->_foveatedSampleData.simpleBlockFrusta =
DynamicArray(camera->_foveatedSampleData.samples.blockFrusta3D.size());
camera->_foveatedSampleData.simpleTileFrusta =
diff --git a/libraries/hvvr/raycaster/foveated.h b/libraries/hvvr/raycaster/foveated.h
index 24de722..66621e9 100644
--- a/libraries/hvvr/raycaster/foveated.h
+++ b/libraries/hvvr/raycaster/foveated.h
@@ -9,37 +9,117 @@
* of patent rights can be found in the PATENTS file in the same directory.
*/
+#include "graphics_types.h"
#include "raycaster_spec.h"
#include "sample_hierarchy.h"
-#include "graphics_types.h"
+#include
+#include
namespace hvvr {
class Raycaster;
-// precomputed eye-space derivatives of the direction sample
-struct PrecomputedDirectionSample {
- vector3 center; // original ray
- vector3 d1; // differential ray 1
- vector3 d2; // differential ray 2
+struct FoveatedSampleData {
+ size_t validSampleCount = 0;
+ size_t blockCount = 0;
+ SampleHierarchy samples;
+ DynamicArray simpleTileFrusta;
+ DynamicArray simpleBlockFrusta;
};
-struct FoveatedSampleData {
- uint32_t triangleCount = 0;
- size_t validSampleCount = 0;
- size_t blockCount = 0;
- SampleHierarchy samples;
- DynamicArray simpleTileFrusta;
- DynamicArray simpleBlockFrusta;
- DynamicArray eyeSpaceSamples;
- DynamicArray precomputedEyeSpaceSamples;
+/* Abstraction of the mapping R -> E where:
+R is the space of ring coordinates and
+E is the eccentricity, given in degrees
+*/
+struct EccentricityMap {
+ float m;
+ float w_0;
+ float S;
+
+
+ // Precomputed potentially expensive intermediates
+ // log(m+1)
+ float invLogA;
+ // 1 / w_0
+ float invW_0;
+ // 1 / m
+ float invM;
+
+ EccentricityMap() {}
+ EccentricityMap(float marSlope, float maxMARDegrees, float maxResolutionDegrees) {
+ m = marSlope;
+ w_0 = maxMARDegrees * RadiansPerDegree;
+ S = maxResolutionDegrees * RadiansPerDegree;
+
+ // Compute in double precision since it only happens once
+ invLogA = float(1.0 / log(1.0 + (double)m));
+ invW_0 = float(1.0 / w_0);
+ invM = float(1.0 / m);
+ }
+ /**
+ Mapping from ring coordinates to eccentricity, where E(0) = 0, where we don't start
+ the resolution falloff until the eccentricity is S
+ let S = maxEyeTrackingUncertaintyDegrees
+
+ This is a function that is a continuous generalization of this recurrence:
+ w(n) = m * max(E(n) - S, 0) + w_0
+ E(n+1) = E(n) + w(n)
+
+ The character of the function is obviously different starting at E(n) - S = 0, so we can
+ split it piecewise
+ w(n) = w_0 | E(n) <= S
+ w(n) = m*(E(n)-S) + w_0 | E(n) > S
+
+ For E(n) <= S, this results in the linear equation:
+ E(n) = w_0*n
+
+ Eccentricity intially increases at a constant rate (within the radius of uncertainty).
+
+ Now for the more complicated case
+
+ Solving a recurrence relation for the hyperbolic falloff (after a simple coordinate transform):
+ let x = (w_0*n-S) / w_0
+ let a = 1+m
+ Then we are solving for g(x) where:
+ g(x+1) = a*g(x)+w_0
+ and
+ g(0) = 0
+ Which gives us (https://m.wolframalpha.com/input/?i=g%28n%2B1%29%3Da*g%28n%29%2Bw%2C+g%280%29%3D0):
+ g(x) = (w_0*(a^x-1))/(a-1)
+
+ Then we can transform the coordinates back:
+ E(n) = g((w_0*n-S) / w_0)+S
+
+ We can invert g to help get a map from eccentricity to ring location:
+ g = (w_0*(a^x-1))/(a-1)
+ (a-1)g/w_0=(a^x-1)
+ a^x=(a-1)g/w_0+1
+ x = ln((a-1)g/w_0+1)/ln(a)
+
+ */
+ // Transform ring coordinates to eccentricity, given in radians
+ CHD float apply(float i) {
+ float e0 = i * w_0;
+ float x = (e0 - S) * invW_0;
+ float g_x = (w_0 * (powf(1.0f + m, x) - 1.0f)) * invM;
+ float e1 = g_x + S;
+ return (e0 < S) ? e0 : e1;
+ };
+ // Transform eccentricity, given in radians, to ring coordinates
+ CHD float applyInverse(float E) {
+ float i0 = E * invW_0;
+ float sDivW = S * invW_0;
+ float x = logf((m * (i0 - sDivW)) + 1.0f) * invLogA;
+ float i1 = x + sDivW;
+ return (E < S) ? i0 : i1;
+ };
};
+
// TODO(anankervis): merge into Raycaster class
void generateEyeSpacePolarFoveatedSampleData(FoveatedSampleData& foveatedSampleData,
std::vector& polarRemapToPixel,
- std::vector& ringEccentricities,
- std::vector& eccentricityRemap,
+ EccentricityMap& eccentricityMap,
size_t& samplesPerRing,
RayCasterSpecification::FoveatedSamplePattern pattern);
diff --git a/libraries/hvvr/raycaster/frusta.cu b/libraries/hvvr/raycaster/frusta.cu
index 45edb9c..aace58a 100644
--- a/libraries/hvvr/raycaster/frusta.cu
+++ b/libraries/hvvr/raycaster/frusta.cu
@@ -7,7 +7,6 @@
* of patent rights can be found in the PATENTS file in the same directory.
*/
-#include "traversal.h"
#include "constants_math.h"
#include "cuda_decl.h"
#include "foveated.h"
@@ -18,40 +17,36 @@
#include "kernel_constants.h"
#include "memory_helpers.h"
#include "sort.h"
+#include "traversal.h"
#include "vector_math.h"
#include "warp_ops.h"
namespace hvvr {
-// TODO(mmara): Handle beyond single point of origin rays
-void ComputeEyeSpaceFrusta(const GPUBuffer& dirSamples,
+void ComputeEyeSpaceFrusta(const GPUBuffer& dirSamples,
GPUBuffer& tileFrusta,
GPUBuffer& blockFrusta) {
- DynamicArray samples = makeDynamicArray(dirSamples);
+ DynamicArray samples = makeDynamicArray(dirSamples);
DynamicArray tFrusta = makeDynamicArray(tileFrusta);
DynamicArray bFrusta = makeDynamicArray(blockFrusta);
+ const bool checkFrustaAccuracy = false;
+ const bool printStats = false;
+
auto generateFrusta = [](DynamicArray& frusta, unsigned int frustaSampleCount,
- const DynamicArray& samples, float slopFactor,
- int numOrientationsToTry) {
- auto toDir = [](const matrix3x3& rot, float u, float v) {
- return rot * normalize(vector3(u, v, 1.0f));
- };
+ const DynamicArray& samples, float slopFactor, int numOrientationsToTry) {
+ auto toDir = [](const matrix3x3& rot, float u, float v) { return rot * normalize(vector3(u, v, 1.0f)); };
for (int i = 0; i < frusta.size(); ++i) {
int sBegin = i * frustaSampleCount;
int sEnd = min((int)((i + 1) * frustaSampleCount), (int)samples.size());
vector3 dominantDirection(0.0f);
for (int s = sBegin; s < sEnd; ++s) {
- // printf("samples[%d].center : %f, %f, %f\n", s, samples[s].center.x, samples[s].center.y,
- // samples[s].center.z);
- dominantDirection += samples[s].center;
+ dominantDirection += samples[s].centerRay;
}
dominantDirection = normalize(dominantDirection);
- // printf("Dominant Direction %d: %f, %f, %f\n", i, dominantDirection.x, dominantDirection.y,
- // dominantDirection.z);
- // Try several different orientations for the plane, pick the one that
+ // Try several different orientations for the plane, pick the one that
// gives the smallest bounding box in uv space
matrix3x3 rot(matrix3x3::rotationFromZAxis(dominantDirection));
float bestUVArea = INFINITY;
@@ -59,22 +54,23 @@ void ComputeEyeSpaceFrusta(const GPUBuffer& dirSampl
vector2 bestMinUV = vector2(INFINITY);
vector2 bestMaxUV = vector2(-INFINITY);
for (int o = 0; o < numOrientationsToTry; ++o) {
- matrix3x3 currRot = matrix3x3::axisAngle(vector3(0, 0, 1), (Pi * o / float(numOrientationsToTry))) * rot;
+ const float range = (Pi / 2.0f) * 0.8f;
+ matrix3x3 currRot =
+ matrix3x3::axisAngle(vector3(0, 0, 1), (range * o / float(numOrientationsToTry)) - (range / 2.0f)) *
+ rot;
matrix3x3 invCurrRot = invert(currRot);
vector2 minUV = vector2(INFINITY);
vector2 maxUV = vector2(-INFINITY);
for (int s = sBegin; s < sEnd; ++s) {
- vector3 v = invCurrRot * samples[s].center;
+ vector3 v = invCurrRot * samples[s].centerRay;
vector2 uv = vector2(v.x / v.z, v.y / v.z);
-
// TODO: check math here
- v = invCurrRot * samples[s].d1;
+ v = invCurrRot * (samples[s].du + samples[s].centerRay);
float uvRadius = length(uv - vector2(v.x / v.z, v.y / v.z));
- v = invCurrRot * samples[s].d2;
+ v = invCurrRot * (samples[s].dv + samples[s].centerRay);
uvRadius = max(uvRadius, length(uv - vector2(v.x / v.z, v.y / v.z)));
// slop; TODO: is this necessary, or can we do something more principled?
uvRadius *= slopFactor;
-
minUV = min(minUV, uv - uvRadius);
maxUV = max(maxUV, uv + uvRadius);
}
@@ -96,147 +92,44 @@ void ComputeEyeSpaceFrusta(const GPUBuffer& dirSampl
f.directions[2] = toDir(bestRot, bestMaxUV.x, bestMinUV.y);
f.directions[3] = toDir(bestRot, bestMinUV.x, bestMinUV.y);
-#if 0
- // Make sure all samples points are within the frustum...
- RayPacketFrustum3D checker(f);
- for (int s = sBegin; s < sEnd; ++s) {
- vector4 v = toVec(samples[s].center);
- if (!checker.testPoint(v)) {
- printf("TROUBLE!\n");
+ if (printStats) {
+ for (int o = 0; o < 4; ++o) {
+ printf("f[%d].directions[%d]: %f, %f, %f\n", i, o, f.directions[o].x, f.directions[o].y,
+ f.directions[o].z);
}
+ printf("f[%d].bestUVArea: %f\n", i, bestUVArea);
+ printf("Dominant Direction: %f %f %f\n", dominantDirection.x, dominantDirection.y, dominantDirection.z);
}
-#endif
-
- for (int o = 0; o < 4; ++o) {
- printf("f[%d].directions[%d]: %f, %f, %f\n", i, o, f.directions[o].x, f.directions[o].y,
- f.directions[o].z);
+ if (checkFrustaAccuracy) {
+ // Make sure all samples points are within the frustum...
+ RayPacketFrustum3D checker(f);
+ for (int s = sBegin; s < sEnd; ++s) {
+ auto C = samples[s].centerRay;
+ if (!checker.testPoint(C)) {
+ printf("TROUBLE: f[%d]: s[%d]:%f %f %f \n", i, s, C.x, C.y, C.z);
+ }
+ }
}
- printf("f[%d].bestUVArea: %f\n", i, bestUVArea);
frusta[i] = f;
}
};
- generateFrusta(tFrusta, TILE_SIZE, samples, 4.0f, 64);
- generateFrusta(bFrusta, BLOCK_SIZE, samples, 4.0f, 64);
+ generateFrusta(tFrusta, TILE_SIZE, samples, 2.0f, 63);
+ generateFrusta(bFrusta, BLOCK_SIZE, samples, 2.0f, 63);
tileFrusta = makeGPUBuffer(tFrusta);
blockFrusta = makeGPUBuffer(bFrusta);
}
-CUDA_KERNEL void CalculateSampleCullFrustaKernel(GPURayPacketFrustum* d_blockFrusta,
- GPURayPacketFrustum* d_tileFrusta,
- SampleInfo sampleInfo,
- const uint32_t sampleCount) {
- uint32_t index = blockIdx.x * blockDim.x + threadIdx.x;
- uint32_t tileIndex, blockIndex;
- float minLocX = CUDA_INF;
- float minLocY = CUDA_INF;
- float negMaxLocX = CUDA_INF;
- float negMaxLocY = CUDA_INF;
- if (index < sampleCount) {
- tileIndex = index / TILE_SIZE;
- blockIndex = index / BLOCK_SIZE;
- // TODO: see if we can cut down on work here
- UnpackedSample s = GetFullSample(index, sampleInfo);
- vector2 location = s.center;
- float radius = sampleInfo.extents[index].majorAxisLength;
- if (location.x != CUDA_INF) {
- minLocX = location.x - radius;
- minLocY = location.y - radius;
- negMaxLocX = -(location.x + radius);
- negMaxLocY = -(location.y + radius);
- }
- }
- // Do warp reduction
- auto minOp = [](float a, float b) -> float { return min(a, b); };
- minLocX = warpReduce(minLocX, minOp);
- minLocY = warpReduce(minLocY, minOp);
- negMaxLocX = warpReduce(negMaxLocX, minOp);
- negMaxLocY = warpReduce(negMaxLocY, minOp);
- int lane = threadIdx.x % WARP_SIZE;
- // All min values are in lane 0, go ahead and atomic min the results
- if (lane == 0 && (index < sampleCount)) {
- // No native float atomics, so we need to encode to handle our floats
- atomicMin((uint32_t*)(&d_tileFrusta[tileIndex].xMin), FloatFlipF(minLocX));
- atomicMin((uint32_t*)(&d_tileFrusta[tileIndex].yMin), FloatFlipF(minLocY));
- atomicMin((uint32_t*)(&d_tileFrusta[tileIndex].xNegMax), FloatFlipF(negMaxLocX));
- atomicMin((uint32_t*)(&d_tileFrusta[tileIndex].yNegMax), FloatFlipF(negMaxLocY));
-
- atomicMin((uint32_t*)(&d_blockFrusta[blockIndex].xMin), FloatFlipF(minLocX));
- atomicMin((uint32_t*)(&d_blockFrusta[blockIndex].yMin), FloatFlipF(minLocY));
- atomicMin((uint32_t*)(&d_blockFrusta[blockIndex].xNegMax), FloatFlipF(negMaxLocX));
- atomicMin((uint32_t*)(&d_blockFrusta[blockIndex].yNegMax), FloatFlipF(negMaxLocY));
- }
-}
-CUDA_KERNEL void DecodeSampleCullFrustaKernel(GPURayPacketFrustum* d_blockFrusta,
- uint32_t blockCount,
- GPURayPacketFrustum* d_tileFrusta,
- uint32_t tileCount) {
- const uint32_t index = blockIdx.x * blockDim.x + threadIdx.x;
- if (index < blockCount + tileCount) {
- uint32_t* ptr;
- if (index < blockCount) {
- ptr = (uint32_t*)(&d_blockFrusta[index]);
- } else {
- ptr = (uint32_t*)(&d_tileFrusta[index - blockCount]);
- }
- for (int i = 0; i < 4; ++i) {
- ptr[i] = IFloatFlip(ptr[i]);
- }
- }
-}
-
-CUDA_KERNEL void ResetCullFrustaKernel(GPURayPacketFrustum* d_blockFrusta,
- uint32_t blockCount,
- GPURayPacketFrustum* d_tileFrusta,
- uint32_t tileCount) {
- const uint32_t index = blockIdx.x * blockDim.x + threadIdx.x;
- if (index < blockCount + tileCount) {
- GPURayPacketFrustum* ptr;
- if (index < blockCount) {
- ptr = &d_blockFrusta[index];
- } else {
- ptr = &d_tileFrusta[index - blockCount];
- }
- // Set to -INFINITY, as its the FlipFloat encoding of +INFINITY
- ptr[0].xMin = -CUDA_INF;
- ptr[0].xNegMax = -CUDA_INF;
- ptr[0].yMin = -CUDA_INF;
- ptr[0].yNegMax = -CUDA_INF;
- }
-}
-void ResetCullFrusta(GPURayPacketFrustum* d_blockFrusta,
- GPURayPacketFrustum* d_tileFrusta,
- const uint32_t tileCount,
- const uint32_t blockCount,
- cudaStream_t stream) {
- {
- size_t combinedBlockAndTileCount = tileCount + blockCount;
- KernelDim dim = KernelDim(combinedBlockAndTileCount, CUDA_GROUP_SIZE);
- ResetCullFrustaKernel<<>>(d_blockFrusta, blockCount, d_tileFrusta, tileCount);
- }
-}
-
-void CalculateSampleCullFrusta(GPURayPacketFrustum* d_blockFrusta,
- GPURayPacketFrustum* d_tileFrusta,
- SampleInfo sampleInfo,
- const uint32_t sampleCount,
- const uint32_t tileCount,
- const uint32_t blockCount,
- cudaStream_t stream) {
- static_assert((TILE_SIZE % 32 == 0), "TILE_SIZE must be a multiple of 32, the CUDA warp size");
- {
- KernelDim dim = KernelDim(sampleCount, CUDA_GROUP_SIZE);
- CalculateSampleCullFrustaKernel<<>>(d_blockFrusta, d_tileFrusta, sampleInfo,
- sampleCount);
- }
- {
- size_t combinedBlockAndTileCount = tileCount + blockCount;
- KernelDim dim = KernelDim(combinedBlockAndTileCount, CUDA_GROUP_SIZE);
- DecodeSampleCullFrustaKernel<<>>(d_blockFrusta, blockCount, d_tileFrusta,
- tileCount);
+CUDA_HOST_DEVICE_INL bool planeCullsFrustum(const Plane plane, const SimpleRayFrustum& frustum) {
+ bool allout = true;
+ for (int i = 0; i < 4; ++i) {
+ allout = allout && dot(plane.normal, frustum.origins[i]) > plane.dist;
+ // Extend rays far-out
+ allout = allout && dot(plane.normal, frustum.origins[i] + frustum.directions[i] * 10000.0f) > plane.dist;
}
+ return allout;
}
CUDA_KERNEL void CalculateWorldSpaceFrustaKernel(SimpleRayFrustum* blockFrustaWS,
@@ -244,12 +137,12 @@ CUDA_KERNEL void CalculateWorldSpaceFrustaKernel(SimpleRayFrustum* blockFrustaWS
SimpleRayFrustum* blockFrustaES,
SimpleRayFrustum* tileFrustaES,
matrix4x4 eyeToWorldMatrix,
+ FourPlanes cullPlanes,
uint32_t blockCount,
uint32_t tileCount) {
const uint32_t index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < blockCount + tileCount) {
- SimpleRayFrustum* inPtr;
- SimpleRayFrustum* outPtr;
+ SimpleRayFrustum *inPtr, *outPtr;
if (index < blockCount) {
inPtr = (&blockFrustaES[index]);
outPtr = (&blockFrustaWS[index]);
@@ -257,19 +150,25 @@ CUDA_KERNEL void CalculateWorldSpaceFrustaKernel(SimpleRayFrustum* blockFrustaWS
inPtr = (&tileFrustaES[index - blockCount]);
outPtr = (&tileFrustaWS[index - blockCount]);
}
+ bool culled = false;
+ SimpleRayFrustum f;
+ for (int i = 0; i < 4; ++i) {
+ f.origins[i] = vector3(eyeToWorldMatrix * vector4((*inPtr).origins[i], 1.0f));
+ // We do not handle non-uniform scaling (could use inverse transpose of eyeToWorld to do so)
+ f.directions[i] = normalize(vector3(eyeToWorldMatrix * vector4((*inPtr).directions[i], 0.0f)));
+ }
for (int i = 0; i < 4; ++i) {
- vector4 origin =
- eyeToWorldMatrix * vector4((*inPtr).origins[i].x, (*inPtr).origins[i].y, (*inPtr).origins[i].z, 1.0f);
- (*outPtr).origins[i].x = origin.x;
- (*outPtr).origins[i].y = origin.y;
- (*outPtr).origins[i].z = origin.z;
- // TODO(mmara): use inverse transpose to handle non-uniform scale?
- vector4 direction = eyeToWorldMatrix * vector4((*inPtr).directions[i].x, (*inPtr).directions[i].y,
- (*inPtr).directions[i].z, 0.0f);
- (*outPtr).directions[i].x = direction.x;
- (*outPtr).directions[i].y = direction.y;
- (*outPtr).directions[i].z = direction.z;
+ Plane p = cullPlanes.data[i];
+ culled = culled || planeCullsFrustum(p, f);
+ }
+ if (culled) {
+ for (int i = 0; i < 4; ++i) {
+ // Signal degenerate frustum
+ f.origins[i] = vector3(INFINITY, INFINITY, INFINITY);
+ f.directions[i] = vector3(0, 0, 0);
+ }
}
+ (*outPtr) = f;
}
}
@@ -278,6 +177,7 @@ void CalculateWorldSpaceFrusta(SimpleRayFrustum* blockFrustaWS,
SimpleRayFrustum* blockFrustaES,
SimpleRayFrustum* tileFrustaES,
matrix4x4 eyeToWorldMatrix,
+ FourPlanes cullPlanes,
uint32_t blockCount,
uint32_t tileCount,
cudaStream_t stream) {
@@ -285,7 +185,7 @@ void CalculateWorldSpaceFrusta(SimpleRayFrustum* blockFrustaWS,
size_t combinedBlockAndTileCount = tileCount + blockCount;
KernelDim dim = KernelDim(combinedBlockAndTileCount, CUDA_GROUP_SIZE);
CalculateWorldSpaceFrustaKernel<<>>(
- blockFrustaWS, tileFrustaWS, blockFrustaES, tileFrustaES, eyeToWorldMatrix, blockCount, tileCount);
+ blockFrustaWS, tileFrustaWS, blockFrustaES, tileFrustaES, eyeToWorldMatrix, cullPlanes, blockCount, tileCount);
}
} // namespace hvvr
diff --git a/libraries/hvvr/raycaster/frusta.h b/libraries/hvvr/raycaster/frusta.h
index b211bfb..ece8ad7 100644
--- a/libraries/hvvr/raycaster/frusta.h
+++ b/libraries/hvvr/raycaster/frusta.h
@@ -17,29 +17,21 @@
namespace hvvr {
-void ComputeEyeSpaceFrusta(const GPUBuffer& dirSamples,
+// For conveniently passing four planes by value to the world space transformation kernel.
+struct FourPlanes {
+ Plane data[4];
+};
+
+void ComputeEyeSpaceFrusta(const GPUBuffer& dirSamples,
GPUBuffer& tileFrusta,
GPUBuffer& blockFrusta);
-void ResetCullFrusta(GPURayPacketFrustum* d_blockFrusta,
- GPURayPacketFrustum* d_tileFrusta,
- const uint32_t tileCount,
- const uint32_t blockCount,
- cudaStream_t stream);
-
-void CalculateSampleCullFrusta(GPURayPacketFrustum* d_blockFrusta,
- GPURayPacketFrustum* d_tileFrusta,
- SampleInfo sampleInfo,
- const uint32_t sampleCount,
- const uint32_t tileCount,
- const uint32_t blockCount,
- cudaStream_t stream);
-
void CalculateWorldSpaceFrusta(SimpleRayFrustum* blockFrustaWS,
SimpleRayFrustum* tileFrustaWS,
SimpleRayFrustum* blockFrustaES,
SimpleRayFrustum* tileFrustaES,
matrix4x4 eyeToWorldMatrix,
+ FourPlanes cullPlanes,
uint32_t blockCount,
uint32_t tileCount,
cudaStream_t stream);
diff --git a/libraries/hvvr/raycaster/gbuffer.h b/libraries/hvvr/raycaster/gbuffer.h
deleted file mode 100644
index 660b24d..0000000
--- a/libraries/hvvr/raycaster/gbuffer.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-/**
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
-
-#include "cuda_decl.h"
-
-#include
-
-namespace hvvr {
-
-// TODO(anankervis): merge this file into another file
-struct RaycasterGBufferSubsample {
- uint32_t triIndex;
- uint32_t sampleMask;
-
- template
- CUDA_HOST_DEVICE static constexpr uint32_t getSampleMaskAll() {
- return (AARate < 32) ? ((uint32_t(1) << AARate) - 1) : ~uint32_t(0);
- }
-};
-
-} // namespace hvvr
diff --git a/libraries/hvvr/raycaster/gpu_camera.cu b/libraries/hvvr/raycaster/gpu_camera.cu
index b236313..fe8d977 100644
--- a/libraries/hvvr/raycaster/gpu_camera.cu
+++ b/libraries/hvvr/raycaster/gpu_camera.cu
@@ -14,18 +14,10 @@
#include "kernel_constants.h"
#include "magic_constants.h"
#include "memory_helpers.h"
-
-#include
-#include
-
-#ifdef DX_SUPPORTED
#include
-#else
-struct ID3D11Texture2D;
-struct ID3D11Buffer;
-#endif
-
#include
+#include
+#include
namespace hvvr {
@@ -34,6 +26,8 @@ uint32_t pixelFormatSize(PixelFormat pixelFormat) {
switch (pixelFormat) {
case PixelFormat::RGBA8_SRGB:
return 4;
+ case PixelFormat::RGBA16:
+ return 8;
case PixelFormat::RGBA32F:
return 16;
default:
@@ -66,7 +60,8 @@ GPUCamera::GPUCamera(const Camera* cameraPtr) : streamedIndexCPU(0), streamedInd
void GPUCamera::initLookupTables(int _MSAARate) {
// getSubsampleUnitOffset needs a compile-time constant for MSAARate
enum { MSAARate = COLOR_MODE_MSAA_RATE };
- assert(MSAARate == _MSAARate);
+ if (MSAARate != _MSAARate)
+ fail("MSAARate for lookup table must match compile-time constant\n");
std::uniform_real_distribution uniformRandomDist(0.0f, 1.0f);
std::mt19937 generator;
@@ -132,28 +127,23 @@ void GPUCamera::streamedDataGpuDone() {
}
void GPUCamera::setCameraJitter(vector2 jitter) {
- frameJitter.x = jitter.x;
- frameJitter.y = jitter.y;
+ frameJitter = jitter;
}
-void GPUCamera::updatePerFrame(vector3 cameraPos,
- vector3 cameraLookVector,
- const matrix3x3& _sampleToCamera,
- const matrix4x4& _cameraToWorld) {
- position = vector3(cameraPos.x, cameraPos.y, cameraPos.z);
- lookVector = vector3(cameraLookVector.x, cameraLookVector.y, cameraLookVector.z);
- sampleToCamera = _sampleToCamera;
+void GPUCamera::updateTransform(const matrix4x4& _cameraToWorld) {
cameraToWorld = _cameraToWorld;
}
-static int getMSAARate(RaycasterOutputMode outputMode) {
- return (outputMode == RaycasterOutputMode::COLOR_RGBA8) ? COLOR_MODE_MSAA_RATE : 1;
+static int getMSAARate(RaycasterOutputFormat outputMode) {
+ return (outputMode == RaycasterOutputFormat::COLOR_RGBA8) ? COLOR_MODE_MSAA_RATE : 1;
}
static TextureFormat pixelFormatToTextureFormat(PixelFormat format) {
switch (format) {
case PixelFormat::RGBA8_SRGB:
return TextureFormat::r8g8b8a8_unorm_srgb;
+ case PixelFormat::RGBA16:
+ return TextureFormat::r16g16b16a16_unorm;
case PixelFormat::RGBA32F:
return TextureFormat::r32g32b32a32_float;
default:
@@ -163,10 +153,9 @@ static TextureFormat pixelFormatToTextureFormat(PixelFormat format) {
}
// TODO(anankervis): merge the different functions that duplicate camera resource creation
-void GPUCamera::updateConfig(RaycasterOutputMode _outputMode,
+void GPUCamera::updateConfig(RaycasterOutputFormat _outputMode,
int32_t* sampleRemap,
- float* sampleLocations,
- Sample::Extents* sampleExtents,
+ DirectionalBeam* directionalSamples,
ThinLens _lens,
uint32_t _sampleCount,
uint32_t imageWidth,
@@ -180,9 +169,7 @@ void GPUCamera::updateConfig(RaycasterOutputMode _outputMode,
validSampleCount = imageWidth * imageHeight * splitColorSamples;
d_sampleRemap = GPUBuffer(sampleRemap, sampleRemap + validSampleCount);
sampleCount = _sampleCount;
- d_sampleLocations = GPUBuffer((vector2*)sampleLocations, (vector2*)(sampleLocations) + sampleCount);
- d_sampleExtents =
- GPUBuffer((Sample::Extents*)sampleExtents, (Sample::Extents*)(sampleExtents) + sampleCount);
+ d_directionalBeams = GPUBuffer(directionalSamples, directionalSamples + sampleCount);
outputMode = _outputMode;
int msaaRate = getMSAARate(outputMode);
@@ -191,17 +178,16 @@ void GPUCamera::updateConfig(RaycasterOutputMode _outputMode,
PixelFormat outputFormat = outputModeToPixelFormat(outputMode);
TextureFormat textureFormat = pixelFormatToTextureFormat(outputFormat);
- previousResultTexture =
- createEmptyTexture(imageWidth, imageHeight, textureFormat, cudaAddressModeClamp, cudaAddressModeClamp);
- resultTexture =
- createEmptyTexture(imageWidth, imageHeight, textureFormat, cudaAddressModeClamp, cudaAddressModeClamp);
+ auto createImageSizedTexture = [&]() {
+ return createEmptyTexture(imageWidth, imageHeight, textureFormat, cudaAddressModeClamp, cudaAddressModeClamp);
+ };
+ previousResultTexture = createImageSizedTexture();
+ resultTexture = createImageSizedTexture();
contrastEnhancementSettings.enable = true;
- contrastEnhancementSettings.f_e = 0.2f;
- contrastEnhancementBuffers.horizontallyFiltered =
- createEmptyTexture(imageWidth, imageHeight, textureFormat, cudaAddressModeClamp, cudaAddressModeClamp);
- contrastEnhancementBuffers.fullyFiltered =
- createEmptyTexture(imageWidth, imageHeight, textureFormat, cudaAddressModeClamp, cudaAddressModeClamp);
+ contrastEnhancementSettings.f_e = 1.0f;
+ contrastEnhancementBuffers.horizontallyFiltered = createImageSizedTexture();
+ contrastEnhancementBuffers.fullyFiltered = createImageSizedTexture();
auto pixelFormat = outputModeToPixelFormat(outputMode);
d_sampleResults =
@@ -214,24 +200,19 @@ void GPUCamera::updateConfig(RaycasterOutputMode _outputMode,
void GPUCamera::registerPolarFoveatedSamples(const std::vector& polarRemapToPixel,
float _maxEccentricityRadians,
- const std::vector& ringEccentricities,
- const std::vector& eccentricityCoordinateMap,
+ const EccentricityMap& eMap,
uint32_t samplesPerRing,
uint32_t paddedSampleCount) {
PixelFormat outputFormat = outputModeToPixelFormat(outputMode);
sampleCount = paddedSampleCount;
d_sampleResults = GPUBuffer((paddedSampleCount * pixelFormatSize(outputFormat) + sizeof(uint32_t) - 1) /
sizeof(uint32_t));
- d_sampleLocations = GPUBuffer(paddedSampleCount);
- d_sampleExtents = GPUBuffer(paddedSampleCount);
d_sampleRemap = GPUBuffer(paddedSampleCount);
// For temporal filtering
d_tMaxBuffer = GPUBuffer(paddedSampleCount);
-
+ eccentricityMap = eMap;
maxEccentricityRadians = _maxEccentricityRadians;
- d_eccentricityCoordinateMap = makeGPUBuffer(eccentricityCoordinateMap);
- d_ringEccentricities = makeGPUBuffer(ringEccentricities);
int msaaRate = getMSAARate(outputMode);
size_t totalSubsampleCount = paddedSampleCount * msaaRate;
@@ -243,15 +224,15 @@ void GPUCamera::registerPolarFoveatedSamples(const std::vector& polar
TextureFormat textureFormat = pixelFormatToTextureFormat(outputFormat);
- polarFoveatedImage = createEmptyTexture(samplesPerRing, uint32_t(polarRemapToPixel.size() / samplesPerRing),
- textureFormat, cudaAddressModeWrap, cudaAddressModeClamp);
- previousPolarFoveatedImage = createEmptyTexture(samplesPerRing, uint32_t(polarRemapToPixel.size() / samplesPerRing),
- textureFormat, cudaAddressModeWrap, cudaAddressModeClamp);
- rawPolarFoveatedImage = createEmptyTexture(samplesPerRing, uint32_t(polarRemapToPixel.size() / samplesPerRing),
- textureFormat, cudaAddressModeWrap, cudaAddressModeClamp, false);
- polarFoveatedDepthImage =
- createEmptyTexture(samplesPerRing, uint32_t(polarRemapToPixel.size() / samplesPerRing),
- TextureFormat::r32_float, cudaAddressModeWrap, cudaAddressModeClamp, false);
+ uint32_t ringCount = uint32_t(polarRemapToPixel.size() / samplesPerRing);
+ auto createFoveatedImage = [&](TextureFormat format, bool linearFilter = true) {
+ return createEmptyTexture(samplesPerRing, ringCount, format, cudaAddressModeWrap, cudaAddressModeClamp,
+ linearFilter);
+ };
+ polarTextures.raw = createFoveatedImage(textureFormat);
+ polarTextures.depth = createFoveatedImage(TextureFormat::r32_float, false);
+ polarTextures.moment1 = createFoveatedImage(TextureFormat::r16g16b16a16_unorm);
+ polarTextures.moment2 = createFoveatedImage(TextureFormat::r16g16b16a16_unorm);
initLookupTables(msaaRate);
}
@@ -262,14 +243,15 @@ bool GPUCamera::bindTexture(GPUContext& gpuContext, ImageResourceDescriptor text
cutilSafeCall(cudaGraphicsUnregisterResource(resultsResource));
resultsResource = nullptr;
}
-#ifdef DX_SUPPORTED
if (texture.memoryType == ImageResourceDescriptor::MemoryType::DX_TEXTURE) {
+#if defined(_WIN32)
// cudaGraphicsRegisterFlagsNone is only valid flag as of 7/22/2016
cutilSafeCall(cudaGraphicsD3D11RegisterResource(&resultsResource, (ID3D11Texture2D*)texture.data,
cudaGraphicsRegisterFlagsNone));
- }
+#else
+ assert(false, "Cannot do DirectX interop on non-windows platforms");
#endif
- if (texture.memoryType == ImageResourceDescriptor::MemoryType::OPENGL_TEXTURE) {
+ } else if (texture.memoryType == ImageResourceDescriptor::MemoryType::OPENGL_TEXTURE) {
cutilSafeCall(cudaGraphicsGLRegisterImage(&resultsResource, (GLuint)(uint64_t)texture.data, GL_TEXTURE_2D,
cudaGraphicsMapFlagsWriteDiscard));
}
@@ -296,7 +278,7 @@ void GPUCamera::copyImageToCPU(uint32_t* imageData, uint32_t imageWidth, uint32_
void GPUCamera::acquireTileCullData(SimpleRayFrustum* tileFrusta, SimpleRayFrustum* blockFrusta) {
cutilSafeCall(cudaEventSynchronize(transferTileToCPUEvent));
- size_t blockCount = d_cullBlockFrusta.size();
+ size_t blockCount = d_foveatedWorldSpaceBlockFrusta.size();
memcpy(blockFrusta, foveatedWorldSpaceBlockFrustaPinned, sizeof(SimpleRayFrustum) * blockCount);
memcpy(tileFrusta, foveatedWorldSpaceTileFrustaPinned, sizeof(SimpleRayFrustum) * blockCount * TILES_PER_BLOCK);
}
@@ -316,10 +298,10 @@ void GPUCamera::intersectShadeResolve(GPUSceneState& sceneState) {
clearEmpty();
}
- SampleInfo sampleInfo(*this);
+ CameraBeams cameraBeams(*this);
if (streamedData.tileCountOccupied > 0) {
- intersect(sceneState, sampleInfo);
- shadeAndResolve(sceneState, sampleInfo);
+ intersect(sceneState, cameraBeams);
+ shadeAndResolve(sceneState, cameraBeams);
}
streamedDataGpuDone();
diff --git a/libraries/hvvr/raycaster/gpu_camera.h b/libraries/hvvr/raycaster/gpu_camera.h
index 18557f7..e321504 100644
--- a/libraries/hvvr/raycaster/gpu_camera.h
+++ b/libraries/hvvr/raycaster/gpu_camera.h
@@ -10,24 +10,36 @@
*/
#include "cuda_decl.h"
-#include "gbuffer.h"
+#include "foveated.h"
#include "gpu_buffer.h"
#include "gpu_image.h"
+#include "gpu_samples.h"
#include "raycaster_spec.h"
#include "samples.h"
#include "texture_internal.h"
#include "vector_math.h"
-#include "foveated.h"
+#include
namespace hvvr {
class Camera;
class GPUContext;
class GPUSceneState;
-struct SampleInfo;
+struct CameraBeams;
struct EccentricityToTexCoordMapping;
-// Keep in sync with blockcull.h/RayPacketFrustum
+
+struct RaycasterGBufferSubsample {
+ uint32_t triIndex;
+ uint32_t sampleMask;
+
+ template
+ CUDA_HOST_DEVICE static constexpr uint32_t getSampleMaskAll() {
+ return (AARate < 32) ? ((uint32_t(1) << AARate) - 1) : ~uint32_t(0);
+ }
+};
+
+// Keep in sync with blockcull.h/RayPacketFrustum2D
struct GPURayPacketFrustum {
float xMin;
float xNegMax;
@@ -37,9 +49,8 @@ struct GPURayPacketFrustum {
struct TemporalFilterSettings {
// 1 entirely use current sample
- float alpha = 1.f;
- float stddevMultiplier = 4.0f;
- bool inPolarSpace = false;
+ float alpha = .05f;
+ float stddevMultiplier = 1.0f;
};
struct TileTriRange {
@@ -86,7 +97,7 @@ struct ContrastEnhancementSettings {
float f_e;
};
-inline PixelFormat outputModeToPixelFormat(RaycasterOutputMode mode) {
+inline PixelFormat outputModeToPixelFormat(RaycasterOutputFormat mode) {
(void)mode;
return PixelFormat::RGBA8_SRGB;
}
@@ -101,8 +112,7 @@ class GPUCamera {
TemporalFilterSettings temporalFilterSettings;
GPUBuffer d_gBuffer;
- GPUBuffer d_sampleLocations;
- GPUBuffer d_sampleExtents;
+ GPUBuffer d_directionalBeams;
GPUBuffer d_sampleRemap;
uint32_t splitColorSamples = 1;
@@ -126,7 +136,7 @@ class GPUCamera {
GPUBuffer d_polarRemapToPixel;
- GPUBuffer d_foveatedDirectionalSamples;
+ GPUBuffer d_foveatedEyeDirectionalSamples;
GPUBuffer d_foveatedEyeSpaceTileFrusta;
GPUBuffer d_foveatedEyeSpaceBlockFrusta;
GPUBuffer d_foveatedWorldSpaceTileFrusta;
@@ -134,24 +144,23 @@ class GPUCamera {
SimpleRayFrustum* foveatedWorldSpaceTileFrustaPinned = nullptr;
SimpleRayFrustum* foveatedWorldSpaceBlockFrustaPinned = nullptr;
- GPUBuffer d_tileFrusta;
- GPUBuffer d_cullBlockFrusta;
- GPURayPacketFrustum* tileFrustaPinned = nullptr;
- GPURayPacketFrustum* cullBlockFrustaPinned = nullptr;
-
- GPUBuffer d_ringEccentricities;
-
cudaEvent_t transferTileToCPUEvent = nullptr;
GPUBuffer d_sampleResults;
+ GPUBuffer d_sampleResultsRemapped;
+ GPUBuffer d_foveaMask;
uint32_t sampleCount;
GPUBuffer d_tMaxBuffer;
+
GPUImage resultImage;
// For polarFoveatedReconstruction
- Texture2D rawPolarFoveatedImage;
- Texture2D previousPolarFoveatedImage;
- Texture2D polarFoveatedImage;
- Texture2D polarFoveatedDepthImage;
+ struct PolarTextures {
+ Texture2D raw;
+ Texture2D depth;
+
+ Texture2D moment1;
+ Texture2D moment2;
+ } polarTextures;
// For temporal filtering in polarFoveatedReconstruction
Texture2D previousResultTexture;
@@ -160,22 +169,18 @@ class GPUCamera {
ContrastEnhancementBuffers contrastEnhancementBuffers;
ContrastEnhancementSettings contrastEnhancementSettings;
+ EccentricityMap eccentricityMap;
float maxEccentricityRadians;
- GPUBuffer d_eccentricityCoordinateMap;
cudaGraphicsResource_t resultsResource = NULL;
uint32_t validSampleCount = 0;
- vector3 position;
- vector3 lookVector;
- matrix3x3 sampleToCamera;
matrix4x4 cameraToWorld;
-
ThinLens lens;
cudaStream_t stream = 0;
- RaycasterOutputMode outputMode = RaycasterOutputMode::COLOR_RGBA8;
+ RaycasterOutputFormat outputMode = RaycasterOutputFormat::COLOR_RGBA8;
// how far to jitter the samples this frame. length() < 1
vector2 frameJitter = {0.0f, 0.0f};
@@ -188,20 +193,16 @@ class GPUCamera {
// per-frame updates
void setCameraJitter(vector2 jitter);
- void updatePerFrame(vector3 cameraPos,
- vector3 cameraLookVector,
- const matrix3x3& _sampleToCamera,
- const matrix4x4& _cameraToWorld);
+ void updateTransform(const matrix4x4& _cameraToWorld);
void updatePerFrameFoveatedData(const FloatRect& sampleBounds,
const matrix3x3& cameraToSample,
const matrix3x3& eyeToCamera,
const matrix4x4& eyeToWorld);
// sample config updates
- void updateConfig(RaycasterOutputMode _outputMode,
+ void updateConfig(RaycasterOutputFormat _outputMode,
int32_t* sampleRemap,
- float* sampleLocations,
- Sample::Extents* sampleExtents,
+ DirectionalBeam* directionalSamples,
ThinLens _lens,
uint32_t _sampleCount,
uint32_t imageWidth,
@@ -210,13 +211,12 @@ class GPUCamera {
uint32_t _splitColorSamples);
void registerPolarFoveatedSamples(const std::vector& polarRemapToPixel,
float _maxEccentricityRadians,
- const std::vector& ringEccentricities,
- const std::vector& eccentricityCoordinateMap,
+ const EccentricityMap& eccentricityMap,
uint32_t samplesPerRing,
uint32_t paddedSampleCount);
- void updateEyeSpaceFoveatedSamples(const ArrayView precomputedDirectionalSamples);
+ void updateEyeSpaceFoveatedSamples(const ArrayView cameraBeams);
- // attach a texture from a 3D API, used when OUTPUT_MODE = OUTPUT_MODE_3D_API
+ // attach a texture from a 3D API
bool bindTexture(GPUContext& gpuContext, ImageResourceDescriptor texture);
// GPUContext::graphicsResourcesMapped must be true before calling this function
void copyImageToBoundTexture();
@@ -231,6 +231,7 @@ class GPUCamera {
// convert from linear post-resolve results buffer to results image
void remap();
+
void remapPolarFoveated();
void foveatedPolarToScreenSpace(const matrix4x4& eyeToEyePrevious,
@@ -241,17 +242,15 @@ class GPUCamera {
void dumpRays(std::vector& rays, bool outputScanlineOrder);
protected:
-
// intersect triangles
- void intersect(GPUSceneState& sceneState, const SampleInfo& sampleInfo);
+ void intersect(GPUSceneState& sceneState, const CameraBeams& cameraBeams);
// fill empty tiles with default clear value
void clearEmpty();
// shade occupied tiles and resolve AA subsamples
- void shadeAndResolve(GPUSceneState& sceneState, const SampleInfo& sampleInfo);
+ void shadeAndResolve(GPUSceneState& sceneState, const CameraBeams& cameraBeams);
void getEccentricityMap(EccentricityToTexCoordMapping& map) const;
- void foveatedPolarTemporalFilter(const matrix4x4& eyeToEyePrevious);
};
#pragma warning(pop)
diff --git a/libraries/hvvr/raycaster/gpu_context.cu b/libraries/hvvr/raycaster/gpu_context.cu
index 68626a7..48f886e 100644
--- a/libraries/hvvr/raycaster/gpu_context.cu
+++ b/libraries/hvvr/raycaster/gpu_context.cu
@@ -15,28 +15,31 @@
namespace hvvr {
-bool GPUContext::cudaInit() {
+bool GPUContext::cudaInit(bool forceNonTCC) {
int deviceCount = 0;
cutilSafeCall(cudaGetDeviceCount(&deviceCount));
int device = 0;
-#if OUTPUT_MODE == OUTPUT_MODE_3D_API
- cudaDeviceProp deviceProps = {};
-
- // if we're on Windows, search for a non-TCC device
- for (int n = 0; n < deviceCount; n++) {
- cudaGetDeviceProperties(&deviceProps, n);
- if (deviceProps.tccDriver == 0) {
- device = n;
- break;
+
+ if (forceNonTCC) {
+ cudaDeviceProp deviceProps = {};
+
+ // if we're on Windows, search for a non-TCC device
+ for (int n = 0; n < deviceCount; n++) {
+ cudaGetDeviceProperties(&deviceProps, n);
+ if (deviceProps.tccDriver == 0) {
+ device = n;
+ break;
+ }
}
}
-#endif
cutilSafeCall(cudaSetDevice(device));
uint32_t deviceFlags = 0;
deviceFlags |= cudaDeviceMapHost;
- if (cudaSuccess != cudaSetDeviceFlags(deviceFlags)) {
+ auto error = cudaSetDeviceFlags(deviceFlags);
+ if (cudaSuccess != error) {
+ fprintf(stderr, "error %d: cuda call failed with %s\n", error, cudaGetErrorString(error));
assert(false);
return false;
}
@@ -108,8 +111,6 @@ void GPUContext::cleanup() {
safeCudaEventDestroy(c->transferTileToCPUEvent);
safeCudaStreamDestroy(c->stream);
- safeCudaFreeHost(c->tileFrustaPinned);
- safeCudaFreeHost(c->cullBlockFrustaPinned);
safeCudaFreeHost(c->foveatedWorldSpaceTileFrustaPinned);
safeCudaFreeHost(c->foveatedWorldSpaceBlockFrustaPinned);
}
diff --git a/libraries/hvvr/raycaster/gpu_context.h b/libraries/hvvr/raycaster/gpu_context.h
index b38101b..85e7538 100644
--- a/libraries/hvvr/raycaster/gpu_context.h
+++ b/libraries/hvvr/raycaster/gpu_context.h
@@ -24,7 +24,8 @@ class GPUContext {
GPUSceneState sceneState;
bool graphicsResourcesMapped;
- static bool cudaInit();
+ // If forceNonTcc is true, select a cuda device with a non-TCC driver.
+ static bool cudaInit(bool forceNonTcc);
static void cudaCleanup();
GPUContext();
diff --git a/libraries/hvvr/raycaster/gpu_foveated.cu b/libraries/hvvr/raycaster/gpu_foveated.cu
index d31a112..1db5922 100644
--- a/libraries/hvvr/raycaster/gpu_foveated.cu
+++ b/libraries/hvvr/raycaster/gpu_foveated.cu
@@ -13,7 +13,6 @@
#include "frusta.h"
#include "gpu_camera.h"
#include "gpu_context.h"
-#include "gpu_foveated.h"
#include "gpu_samples.h"
#include "graphics_types.h"
#include "kernel_constants.h"
@@ -24,36 +23,53 @@
namespace hvvr {
-// TODO: move to helper header, make generic;
-// potentially optimize down to 4 taps using smart tap placement
-CUDA_DEVICE vector4 bicubic(Texture2D tex, vector2 coord) {
+template
+CUDA_DEVICE void writeSurface(vector4 val, Texture2D tex, unsigned int x, unsigned int y) {
+ if (PixelFormat == PixelFormat::RGBA32F) {
+ surf2Dwrite(float4(val), tex.d_surfaceObject, x * sizeof(float4), y);
+ } else if (PixelFormat == PixelFormat::RGBA16) {
+ surf2Dwrite(ToColor4Unorm16(val), tex.d_surfaceObject, x * sizeof(uint64_t), y);
+ } else {
+ surf2Dwrite(ToColor4Unorm8SRgb(val), tex.d_surfaceObject, x * sizeof(uchar4), y);
+ }
+}
+
+// 4-tap B-spline, based on http://vec3.ca/bicubic-filtering-in-fewer-taps/
+CUDA_DEVICE vector4 bicubicFast(Texture2D tex, vector2 coord) {
vector2 pixCoord = coord * vector2(tex.width, tex.height);
vector2 pixCenter = vector2(floorf(pixCoord.x - 0.5f), floorf(pixCoord.y - 0.5f)) + 0.5f;
vector2 iDim = vector2(1.0f / tex.width, 1.0f / tex.height);
+ vector2 one = vector2(1.0f, 1.0f);
+
// fractionalOffset
vector2 f = pixCoord - pixCenter;
vector2 f2 = f * f;
vector2 f3 = f2 * f;
- vector2 omf2 = (vector2(1.0f) - f) * (vector2(1.0f) - f);
- vector2 omf3 = omf2 * (vector2(1.0f) - f);
+
+ vector2 omf2 = (one - f) * (one - f);
+ vector2 omf3 = omf2 * (one - f);
float sixth = (1.0f / 6.0f);
- // B-spline
- vector2 w[4] = {sixth * omf3, sixth * (4.0f + 3.0f * f3 - 6.0f * f2), sixth * (4.0f + 3.0f * omf3 - 6.0f * omf2),
- sixth * f3};
+ vector2 w0 = sixth * omf3;
+ vector2 w1 = ((4.0f / 6.0f) * one + 0.5f * f3 - f2);
+ vector2 w3 = sixth * f3;
+ vector2 w2 = one - w0 - w1 - w3;
- vector2 tc[4] = {pixCenter + vector2(-1), pixCenter, pixCenter + vector2(1), pixCenter + vector2(2)};
+ vector2 s0 = w0 + w1;
+ vector2 s1 = w2 + w3;
- vector4 result = vector4(0.0f);
- for (int y = 0; y < 4; ++y) {
- for (int x = 0; x < 4; ++x) {
- result += vector4(tex2D(tex.d_texObject, tc[x].x * iDim.x, tc[y].y * iDim.y)) * w[x].x * w[y].y;
- }
- }
- return result;
-}
+ vector2 f0 = w1 / (w0 + w1);
+ vector2 f1 = w3 / (w2 + w3);
+ vector2 t0 = (pixCenter - one + f0) * iDim;
+ vector2 t1 = (pixCenter + one + f1) * iDim;
+
+ auto T = tex.d_texObject;
+ // and sample and blend
+ return vector4(tex2D(T, t0.x, t0.y)) * s0.x * s0.y + vector4(tex2D(T, t1.x, t0.y)) * s1.x * s0.y +
+ vector4(tex2D(T, t0.x, t1.y)) * s0.x * s1.y + vector4(tex2D(T, t1.x, t1.y)) * s1.x * s1.y;
+}
CUDA_DEVICE vector2 directionToSampleSpaceSample(const matrix3x3& eyeSpaceToSampleSpaceMatrix,
const vector3& direction) {
@@ -67,100 +83,65 @@ CUDA_DEVICE_INL bool rectContains(const FloatRect r, const vector2 p) {
return (p.x >= r.lower.x) && (p.x <= r.upper.x) && (p.y >= r.lower.y) && (p.y <= r.upper.y);
}
-CUDA_KERNEL void TransformFoveatedSamplesToSampleSpaceKernel(
- const matrix3x3 eyeSpaceToSampleSpaceMatrix,
- const matrix3x3 eyeSpaceToCameraSpace,
- const FloatRect cullRect,
- const PrecomputedDirectionSample* precomputedEyeSpaceSamples,
- SampleInfo sampleInfo,
- int* remap,
- const uint32_t sampleCount) {
+CUDA_KERNEL void TransformFoveatedSamplesToCameraSpaceKernel(const matrix3x3 eyeSpaceToSampleSpaceMatrix,
+ const matrix3x3 eyeSpaceToCameraSpace,
+ const FloatRect cullRect,
+ const DirectionalBeam* eyeSpaceSamples,
+ CameraBeams cameraBeams,
+ int* remap,
+ const uint32_t sampleCount) {
unsigned index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < sampleCount) {
- vector3 direction = precomputedEyeSpaceSamples[index].center;
+ DirectionalBeam eyeBeam = eyeSpaceSamples[index];
vector2 c = {CUDA_INF, CUDA_INF};
- if ((eyeSpaceToCameraSpace * direction).z < 0.0f) {
- vector2 s = directionToSampleSpaceSample(eyeSpaceToSampleSpaceMatrix, direction);
+ DirectionalBeam cameraBeam = eyeSpaceToCameraSpace * eyeBeam;
+ if (cameraBeam.centerRay.z < 0.0f) {
+ vector2 s = directionToSampleSpaceSample(eyeSpaceToSampleSpaceMatrix, eyeBeam.centerRay);
if (rectContains(cullRect, s)) {
- c = s;
- vector2 d1 =
- directionToSampleSpaceSample(eyeSpaceToSampleSpaceMatrix, precomputedEyeSpaceSamples[index].d1) - c;
- vector2 d2 =
- directionToSampleSpaceSample(eyeSpaceToSampleSpaceMatrix, precomputedEyeSpaceSamples[index].d2) - c;
- float sqExtent1 = dot(d1, d1);
- float sqExtent2 = dot(d2, d2);
- ;
- Sample::Extents extent;
- if (sqExtent1 > sqExtent2) {
- extent.minorAxis = d2;
- extent.majorAxisLength = sqrtf(sqExtent1);
- } else if (sqExtent2 == 0) {
- extent.minorAxis.x = 0;
- extent.minorAxis.y = 0;
- extent.majorAxisLength = 0;
- } else { // sqExtent2 >= sqExtent1, sqExtent2 != 0
- extent.minorAxis = d1;
- extent.majorAxisLength = sqrtf(sqExtent2);
- }
remap[index] = (int)index;
- sampleInfo.extents[index] = extent;
}
}
- sampleInfo.centers[index] = c;
+ cameraBeams.directionalBeams[index] = cameraBeam;
}
}
-void TransformFoveatedSamplesToSampleSpace(const matrix3x3& eyeSpaceToSampleSpaceMatrix,
+void TransformFoveatedSamplesToCameraSpace(const matrix3x3& eyeSpaceToSampleSpaceMatrix,
const matrix3x3& eyeSpaceToCameraSpaceMatrix,
const FloatRect& sampleBounds,
- const PrecomputedDirectionSample* d_precomputedEyeSpaceSamples,
- SampleInfo sampleInfo,
+ const DirectionalBeam* d_eyeBeams,
+ CameraBeams cameraBeams,
int* d_remap,
const uint32_t sampleCount,
cudaStream_t stream) {
KernelDim dim = KernelDim(sampleCount, CUDA_GROUP_SIZE);
- TransformFoveatedSamplesToSampleSpaceKernel<<>>(
- eyeSpaceToSampleSpaceMatrix, eyeSpaceToCameraSpaceMatrix, sampleBounds, d_precomputedEyeSpaceSamples,
- sampleInfo, d_remap, sampleCount);
+ TransformFoveatedSamplesToCameraSpaceKernel<<>>(
+ eyeSpaceToSampleSpaceMatrix, eyeSpaceToCameraSpaceMatrix, sampleBounds, d_eyeBeams, cameraBeams, d_remap,
+ sampleCount);
}
struct EccentricityToTexCoordMapping {
- float maxEccentricityRadians;
- float* forwardMap;
- float forwardMapSize;
- float* backwardMap;
- float backwardMapSize;
+ EccentricityMap eccentricityMap;
+ float texMapSize;
+ float invTexMapSize;
+ float invMaxEccentricity;
};
void GPUCamera::getEccentricityMap(EccentricityToTexCoordMapping& map) const {
- map.maxEccentricityRadians = maxEccentricityRadians;
- map.forwardMap = d_eccentricityCoordinateMap;
- map.forwardMapSize = (float)d_eccentricityCoordinateMap.size();
- map.backwardMap = (float*)d_ringEccentricities;
- map.backwardMapSize = (float)d_ringEccentricities.size();
-}
-
-CUDA_DEVICE float bilinearRead1D(float coord, float* map, float mapSize) {
- float pixelCoord = coord * mapSize - 0.5f;
- float integralPart = floor(pixelCoord);
- int maxCoord = (int)mapSize - 1;
- int lowerCoord = clamp((int)integralPart, 0, maxCoord);
- int upperCoord = clamp((int)integralPart + 1, 0, maxCoord);
- float alpha = pixelCoord - integralPart;
- return lerp(map[lowerCoord], map[upperCoord], alpha);
+ map.eccentricityMap = eccentricityMap;
+ map.texMapSize = (float)polarTextures.raw.height;
+ map.invTexMapSize = 1.0f / polarTextures.raw.height;
+ map.invMaxEccentricity = 1.0f / maxEccentricityRadians;
}
// eccentricity is in the range [0,maxEccentricityRadians]
CUDA_DEVICE float eccentricityToTexCoord(float eccentricity, EccentricityToTexCoordMapping eToTexMap) {
- float normalizedE = eccentricity / eToTexMap.maxEccentricityRadians;
- return bilinearRead1D(normalizedE, eToTexMap.forwardMap, eToTexMap.forwardMapSize);
+ return (eToTexMap.eccentricityMap.applyInverse(eccentricity) + 0.5f) * eToTexMap.invTexMapSize;
}
CUDA_DEVICE vector2 getNormalizedCoord(int x, int y, int width, int height) {
return vector2(((float)x + 0.5f) / (float)width, ((float)y + 0.5f) / (float)height);
}
-// TODO: Canonicalize the negations...
// Aligned along z axis
CUDA_DEVICE vector3 angularEyeCoordToDirection(float theta, float e) {
float z = -cosf(e);
@@ -169,7 +150,6 @@ CUDA_DEVICE vector3 angularEyeCoordToDirection(float theta, float e) {
return {xy.x, xy.y, z};
}
CUDA_DEVICE void eyeSpaceDirectionToAngularEyeCoord(vector3 dir, float& theta, float& eccentricity) {
- // TODO: get rid of transcendentals in calculation
eccentricity = acosf(-dir.z);
// Angle of rotation about z, measured from x
theta = -atan2f(dir.y, dir.x);
@@ -179,7 +159,7 @@ CUDA_DEVICE void polarTextureCoordToAngularEyeCoord(vector2 coord,
EccentricityToTexCoordMapping eToTexMap,
float& theta,
float& eccentricity) {
- eccentricity = bilinearRead1D(coord.y, eToTexMap.backwardMap, eToTexMap.backwardMapSize);
+ eccentricity = eToTexMap.eccentricityMap.apply(coord.y * eToTexMap.texMapSize - 0.5f);
theta = (2.0f * Pi * coord.x) - Pi;
}
CUDA_DEVICE vector2 angularEyeCoordToPolarTextureCoord(float theta,
@@ -207,15 +187,17 @@ CUDA_DEVICE void computeMoments3x3Window(
m_2 *= weight;
}
-CUDA_DEVICE vector4 sqrtf(vector4 v) {
- return vector4(v.x, v.y, v.z, v.w);
+CUDA_DEVICE vector4 sqrt(vector4 v) {
+ return vector4(sqrtf(v.x), sqrtf(v.y), sqrtf(v.z), sqrtf(v.w));
}
-CUDA_DEVICE vector4 clampToNeighborhood(
- vector4 oldValue, cudaTextureObject_t tex, vector2 coord, vector2 invDim, TemporalFilterSettings settings) {
- vector4 m_1, m_2;
- computeMoments3x3Window(tex, coord, invDim, m_1, m_2);
- vector4 stdDev = sqrtf(m_2 - (m_1 * m_1));
+CUDA_DEVICE vector4 clampToNeighborhood(vector4 oldValue,
+ GPUCamera::PolarTextures polarTex,
+ vector2 coord,
+ TemporalFilterSettings settings) {
+ vector4 m_1 = vector4(tex2D(polarTex.moment1.d_texObject, coord.x, coord.y));
+ vector4 m_2 = vector4(tex2D(polarTex.moment2.d_texObject, coord.x, coord.y));
+ vector4 stdDev = sqrt(m_2 - (m_1 * m_1));
// Arbitrary
float scaleFactor = settings.stddevMultiplier;
vector4 minC = m_1 - (stdDev * scaleFactor);
@@ -224,13 +206,27 @@ CUDA_DEVICE vector4 clampToNeighborhood(
}
template
-CUDA_KERNEL void FoveatedPolarToScreenSpaceKernel(Texture2D polarImage,
+CUDA_KERNEL void ComputeMoments(GPUCamera::PolarTextures polarTex) {
+ unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
+ unsigned j = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (i < polarTex.raw.width && j < polarTex.raw.height) {
+ vector4 m_1, m_2;
+ vector2 invDim = vector2(1.0f / polarTex.raw.width, 1.0f / polarTex.raw.height);
+ vector2 coord = vector2(invDim.x * i, invDim.y * j);
+ computeMoments3x3Window(polarTex.raw.d_texObject, coord, invDim, m_1, m_2);
+ writeSurface(m_1, polarTex.moment1, i, j);
+ writeSurface(m_2, polarTex.moment2, i, j);
+ }
+}
+
+template
+CUDA_KERNEL void FoveatedPolarToScreenSpaceKernel(GPUCamera::PolarTextures polarTex,
Texture2D resultTexture,
GPUImage resultImage,
matrix3x3 sampleSpaceToEyeSpaceMatrix,
EccentricityToTexCoordMapping eToTexMap,
- Texture2D previousImage,
- Texture2D polarFoveatedTMaxImage,
+ Texture2D previousResultTexture,
matrix4x4 eyeSpaceToPreviousSampleSpaceMatrix,
TemporalFilterSettings settings) {
unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -241,19 +237,23 @@ CUDA_KERNEL void FoveatedPolarToScreenSpaceKernel(Texture2D polarImage,
vector3 sampleSpacePoint = vector3(normalizedCoord, 1.0f);
vector3 eyeSpaceDirection = normalize(sampleSpaceToEyeSpaceMatrix * sampleSpacePoint);
+
float theta, eccentricity;
eyeSpaceDirectionToAngularEyeCoord(eyeSpaceDirection, theta, eccentricity);
+ /** Display full mapping, for debugging
+ theta = normalizedCoord.x * 2.0f * Pi;
+ eccentricity = normalizedCoord.y * eToTexMap.invMaxEccentricity;
+ */
+
vector2 coord = angularEyeCoordToPolarTextureCoord(theta, eccentricity, eToTexMap);
- vector4 newValue = bicubic(polarImage, coord);
+ vector4 newValue = bicubicFast(polarTex.raw, coord);
vector4 result = newValue;
vector4 surfaceResult = result;
- if (!settings.inPolarSpace) {
- float tValue = tex2D(polarFoveatedTMaxImage.d_texObject, coord.x, coord.y);
- // Clamp infinity. TODO: something more robust
- tValue = min(tValue, 1000.0f);
+ float tValue = tex2D(polarTex.depth.d_texObject, coord.x, coord.y);
+ if (tValue < CUDA_INF) {
vector3 currentEyePosition = angularEyeCoordToDirection(theta, eccentricity) * tValue;
vector4 prevSamplePosition = eyeSpaceToPreviousSampleSpaceMatrix * vector4(currentEyePosition, 1.0f);
@@ -262,81 +262,38 @@ CUDA_KERNEL void FoveatedPolarToScreenSpaceKernel(Texture2D polarImage,
float alpha = settings.alpha;
vector4 oldValue = newValue;
if (oldTexCoord.x > 0 && oldTexCoord.y > 0 && oldTexCoord.x < 1 && oldTexCoord.y < 1 && tValue > 0) {
- oldValue = vector4(tex2D(previousImage.d_texObject, oldTexCoord.x, oldTexCoord.y));
+ oldValue = vector4(tex2D(previousResultTexture.d_texObject, oldTexCoord.x, oldTexCoord.y));
}
- // TODO: could compute the moments in a prepass
- vector2 invDim = vector2(1.0f / polarImage.width, 1.0f / polarImage.height);
- vector4 clampedOldValue = clampToNeighborhood(oldValue, polarImage.d_texObject, coord, invDim, settings);
+
+
+ vector4 clampedOldValue = clampToNeighborhood(oldValue, polarTex, coord, settings);
+
+ // Make alpha settings be dependent on eccentricity. Make it higher in fovea and lower toward periphery
+ float normalizedE = eccentricity * eToTexMap.invMaxEccentricity;
+ float mn = 0.2f, mx = 0.35f;
+ float t = clamp((normalizedE - mn) / (mx - mn), 0.f, 1.f);
+ alpha = lerp(0.5f, alpha, t);
+
+ // Heuristic hack! Turn down TAA clamping in the periphery
+ normalizedE = sqrtf(sqrtf(sqrtf(sqrtf(normalizedE))));
+ clampedOldValue.x = lerp(clampedOldValue.x, oldValue.x, normalizedE);
+ clampedOldValue.y = lerp(clampedOldValue.y, oldValue.y, normalizedE);
+ clampedOldValue.z = lerp(clampedOldValue.z, oldValue.z, normalizedE);
+ clampedOldValue.w = lerp(clampedOldValue.w, oldValue.w, normalizedE);
+
surfaceResult = alpha * newValue + (1.0f - alpha) * clampedOldValue;
result = surfaceResult;
}
- // DEBUG CODE:
- // surfaceResult = vector4(tex2D(polarImage.d_texObject, normalizedCoord.x, normalizedCoord.y));
- // result = surfaceResult;
if (PixelFormat == PixelFormat::RGBA32F) {
vector4* output = (vector4*)resultImage.data();
output[resultImage.stride() * j + i] = result;
- if (!settings.inPolarSpace) {
- surf2Dwrite(float4(surfaceResult), resultTexture.d_surfaceObject, i * sizeof(float4), j);
- }
} else {
uint32_t* output = (uint32_t*)resultImage.data();
output[resultImage.stride() * j + i] = ToColor4Unorm8SRgb(result);
- if (!settings.inPolarSpace) {
- surf2Dwrite(ToColor4Unorm8SRgb(surfaceResult), resultTexture.d_surfaceObject, i * sizeof(uchar4), j);
- }
- }
- }
-}
-
-template
-CUDA_KERNEL void FoveatedTemporalFilterKernel(Texture2D rawImage,
- Texture2D previousImage,
- Texture2D polarFoveatedTMaxImage,
- EccentricityToTexCoordMapping eToTexMap,
- matrix4x4 eyeToEyePrevious,
- TemporalFilterSettings settings,
- Texture2D resultImage) {
- unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
- unsigned j = blockIdx.y * blockDim.y + threadIdx.y;
-
- if (i < rawImage.width && j < rawImage.height) {
- vector2 invDim = vector2(1.0f / rawImage.width, 1.0f / rawImage.height);
- vector2 coord = vector2(i + 0.5f, j + 0.5f) * invDim;
- float tValue = tex2D(polarFoveatedTMaxImage.d_texObject, coord.x, coord.y);
- vector4 newValue(tex2D(rawImage.d_texObject, coord.x, coord.y));
- vector4 result = newValue;
- if (tValue < CUDA_INF) {
- float theta, e;
- polarTextureCoordToAngularEyeCoord(coord, eToTexMap, theta, e);
-
- vector3 currentEyePosition = angularEyeCoordToDirection(theta, e) * tValue;
-
- vector4 prevEyePosition = eyeToEyePrevious * vector4(currentEyePosition, 1.0f);
-
- float prevTheta, prevE;
- eyeSpaceDirectionToAngularEyeCoord(normalize(vector3(prevEyePosition)), prevTheta, prevE);
-
- vector2 oldTexCoord = angularEyeCoordToPolarTextureCoord(prevTheta, prevE, eToTexMap);
-
- float alpha = settings.alpha;
-
- vector4 oldValue(tex2D(previousImage.d_texObject, oldTexCoord.x, oldTexCoord.y));
-
- // TODO: could compute the moments in a prepass
- vector4 clampedOldValue = clampToNeighborhood(oldValue, rawImage.d_texObject, coord, invDim, settings);
- result = alpha * newValue + (1.0f - alpha) * clampedOldValue;
- }
-
- // vector2 diff = (oldTexCoord - coord);
- // result = newValue;// {tValue, -tValue, 0.0f, 0.0f};
- if (PixelFormat == PixelFormat::RGBA32F) {
- surf2Dwrite(float4(result), resultImage.d_surfaceObject, i * sizeof(float4), j);
- } else {
- surf2Dwrite(ToColor4Unorm8SRgb(result), resultImage.d_surfaceObject, i * sizeof(uchar4), j);
}
+ writeSurface(surfaceResult, resultTexture, i, j);
}
}
@@ -354,6 +311,9 @@ CUDA_DEVICE vector4 texelFetch(Texture2D tex, unsigned i, unsigned j) {
return vector4(tex2D(tex.d_texObject, coord.x, coord.y));
}
+// The reliance on eccentricity is a pure guess, a better implementation would make this more principled or
+// at least try and obtain the formula used (but not published) in
+// https://research.nvidia.com/sites/default/files/publications/supplementary.pdf
template
CUDA_KERNEL void SeparableFilterUsingEccentricity(Texture2D output,
Texture2D input,
@@ -365,16 +325,15 @@ CUDA_KERNEL void SeparableFilterUsingEccentricity(Texture2D output,
if (i < output.width && j < output.height) {
float eccentricity = getEccentricity(i, j, output, sampleSpaceToEyeSpaceMatrix);
- // TODO: compute filter radius
- int filterRadius = 5;
+ int filterRadius = 5;
vector4 valueSum = vector4(0.0f, 0.0f, 0.0f, 0.0f);
float weightSum = 0.0f;
for (int R = -filterRadius; R <= filterRadius; ++R) {
vector2i tapLoc(clamp((int)i + R * step.x, (int)0, (int)output.width - 1),
clamp((int)j + R * step.y, (int)0, (int)output.height - 1));
- // TODO: compute filter weight
- float weight = 1.0f;
+ float normDist = fabsf(float(R)) / float(filterRadius + 0.1);
+ float weight = powf(1.0f - normDist, sqrtf(eccentricity));
valueSum += texelFetch(input, tapLoc.x, tapLoc.y) * weight;
weightSum += weight;
}
@@ -398,14 +357,19 @@ CUDA_KERNEL void FinishConstrastEnhancement(GPUImage resultImage,
Texture2D unfilteredTexture,
Texture2D filteredTexture,
ContrastEnhancementSettings settings,
- matrix3x3 sampleSpaceToEyeSpaceMatrix) {
+ matrix3x3 sampleSpaceToEyeSpaceMatrix,
+ EccentricityToTexCoordMapping eToTexMap) {
unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
unsigned j = blockIdx.y * blockDim.y + threadIdx.y;
if (i < resultImage.width() && j < resultImage.height()) {
// TODO: compute sigma
float eccentricity = getEccentricity(i, j, unfilteredTexture, sampleSpaceToEyeSpaceMatrix);
- float sigma = 1.0f;
+ float sigma = 8.0f;
+
+ float t = eccentricity * eToTexMap.invMaxEccentricity;
+
+ sigma *= max(0.001f, clamp(t * t, 0.f, 1.f));
vector4 pix = texelFetch(unfilteredTexture, i, j);
vector4 pmean = texelFetch(filteredTexture, i, j);
@@ -417,76 +381,34 @@ CUDA_KERNEL void FinishConstrastEnhancement(GPUImage resultImage,
}
}
-static void textureCopy(Texture2D dst, Texture2D src) {
- assert(dst.width == src.width && dst.height == src.height && dst.format == src.format);
- cutilSafeCall(cudaMemcpy2DArrayToArray(dst.d_rawMemory, 0, 0, src.d_rawMemory, 0, 0, dst.width * dst.elementSize,
- dst.height));
-}
-
-void GPUCamera::foveatedPolarTemporalFilter(const matrix4x4& eyeToEyePrevious) {
- size_t width = polarFoveatedImage.width;
- size_t height = polarFoveatedImage.height;
- KernelDim dim = KernelDim(width, height, CUDA_GROUP_WIDTH, CUDA_GROUP_HEIGHT);
-
- EccentricityToTexCoordMapping eToTexMap;
- getEccentricityMap(eToTexMap);
-
- switch (outputModeToPixelFormat(outputMode)) {
- case PixelFormat::RGBA32F:
- FoveatedTemporalFilterKernel<<>>(
- rawPolarFoveatedImage, previousPolarFoveatedImage, polarFoveatedDepthImage, eToTexMap, eyeToEyePrevious,
- temporalFilterSettings, polarFoveatedImage);
- break;
- case PixelFormat::RGBA8_SRGB:
- FoveatedTemporalFilterKernel<<>>(
- rawPolarFoveatedImage, previousPolarFoveatedImage, polarFoveatedDepthImage, eToTexMap, eyeToEyePrevious,
- temporalFilterSettings, polarFoveatedImage);
- break;
- default:
- assert(false);
- }
- // TODO: could ping-pong buffers to save a copy
- textureCopy(previousPolarFoveatedImage, polarFoveatedImage);
-}
-
void GPUCamera::foveatedPolarToScreenSpace(const matrix4x4& eyeToEyePrevious,
const matrix3x3& eyePreviousToSamplePrevious,
const matrix3x3& sampleToEye) {
- bool filterInPolarSpace = temporalFilterSettings.inPolarSpace;
- if (filterInPolarSpace) {
- foveatedPolarTemporalFilter(eyeToEyePrevious);
- }
-
KernelDim dim = KernelDim(resultImage.width(), resultImage.height(), CUDA_GROUP_WIDTH, CUDA_GROUP_HEIGHT);
- Texture2D currentPolarFoveatedImage = filterInPolarSpace ? polarFoveatedImage : rawPolarFoveatedImage;
+ KernelDim polDim =
+ KernelDim(polarTextures.raw.width, polarTextures.raw.height, CUDA_GROUP_WIDTH, CUDA_GROUP_HEIGHT);
matrix4x4 eyeToSamplePrevious = matrix4x4(eyePreviousToSamplePrevious) * eyeToEyePrevious;
EccentricityToTexCoordMapping eToTexMap;
getEccentricityMap(eToTexMap);
-
+ ComputeMoments<<>>(polarTextures);
switch (outputModeToPixelFormat(outputMode)) {
case PixelFormat::RGBA32F:
FoveatedPolarToScreenSpaceKernel<<>>(
- currentPolarFoveatedImage, resultTexture, resultImage, sampleToEye, eToTexMap, previousResultTexture,
- polarFoveatedDepthImage, eyeToSamplePrevious, temporalFilterSettings);
+ polarTextures, resultTexture, resultImage, sampleToEye, eToTexMap, previousResultTexture,
+ eyeToSamplePrevious, temporalFilterSettings);
break;
case PixelFormat::RGBA8_SRGB:
FoveatedPolarToScreenSpaceKernel<<>>(
- currentPolarFoveatedImage, resultTexture, resultImage, sampleToEye, eToTexMap, previousResultTexture,
- polarFoveatedDepthImage, eyeToSamplePrevious, temporalFilterSettings);
+ polarTextures, resultTexture, resultImage, sampleToEye, eToTexMap, previousResultTexture,
+ eyeToSamplePrevious, temporalFilterSettings);
break;
default:
assert(false);
}
- // TODO: could ping-pong buffers to save a copy
- textureCopy(previousResultTexture, resultTexture);
-
if (contrastEnhancementSettings.enable) {
assert(outputModeToPixelFormat(outputMode) == PixelFormat::RGBA8_SRGB);
- assert(filterInPolarSpace == false);
- // TODO: Probably cheaper to compute eccentricity once per pixel and store in
- // single channel 16F texture; and reuse in all three kernels
SeparableFilterUsingEccentricity
<<>>(contrastEnhancementBuffers.horizontallyFiltered, resultTexture, {0, 1},
contrastEnhancementSettings, sampleToEye);
@@ -495,37 +417,27 @@ void GPUCamera::foveatedPolarToScreenSpace(const matrix4x4& eyeToEyePrevious,
contrastEnhancementSettings, sampleToEye);
FinishConstrastEnhancement
<<>>(resultImage, resultTexture, contrastEnhancementBuffers.fullyFiltered,
- contrastEnhancementSettings, sampleToEye);
+ contrastEnhancementSettings, sampleToEye, eToTexMap);
}
+
+ std::swap(previousResultTexture, resultTexture);
}
-void GPUCamera::updateEyeSpaceFoveatedSamples(
- const ArrayView precomputedDirectionalSamples) {
- d_foveatedDirectionalSamples = GPUBuffer(precomputedDirectionalSamples.cbegin(),
- precomputedDirectionalSamples.cend());
+void GPUCamera::updateEyeSpaceFoveatedSamples(const ArrayView eyeBeams) {
+ d_foveatedEyeDirectionalSamples = GPUBuffer(eyeBeams.cbegin(), eyeBeams.cend());
// Allocate and calculate eye-space frusta
- uint32_t blockCount = ((uint32_t)precomputedDirectionalSamples.size() + BLOCK_SIZE - 1) / BLOCK_SIZE;
+ uint32_t blockCount = ((uint32_t)eyeBeams.size() + BLOCK_SIZE - 1) / BLOCK_SIZE;
d_foveatedEyeSpaceTileFrusta = GPUBuffer(blockCount * TILES_PER_BLOCK);
d_foveatedEyeSpaceBlockFrusta = GPUBuffer(blockCount);
d_foveatedWorldSpaceTileFrusta = GPUBuffer(blockCount * TILES_PER_BLOCK);
d_foveatedWorldSpaceBlockFrusta = GPUBuffer(blockCount);
- ComputeEyeSpaceFrusta(d_foveatedDirectionalSamples, d_foveatedEyeSpaceTileFrusta, d_foveatedEyeSpaceBlockFrusta);
-
- d_tileFrusta = GPUBuffer(blockCount * TILES_PER_BLOCK);
- d_cullBlockFrusta = GPUBuffer(blockCount);
+ ComputeEyeSpaceFrusta(d_foveatedEyeDirectionalSamples, d_foveatedEyeSpaceTileFrusta, d_foveatedEyeSpaceBlockFrusta);
safeCudaEventDestroy(transferTileToCPUEvent);
cutilSafeCall(cudaEventCreateWithFlags(&transferTileToCPUEvent, cudaEventDisableTiming));
- safeCudaFreeHost(tileFrustaPinned);
- safeCudaFreeHost(cullBlockFrustaPinned);
-
- cutilSafeCall(
- cudaMallocHost((void**)&tileFrustaPinned, sizeof(GPURayPacketFrustum) * blockCount * TILES_PER_BLOCK));
- cutilSafeCall(cudaMallocHost((void**)&cullBlockFrustaPinned, sizeof(GPURayPacketFrustum) * blockCount));
-
safeCudaFreeHost(foveatedWorldSpaceTileFrustaPinned);
safeCudaFreeHost(foveatedWorldSpaceBlockFrustaPinned);
@@ -534,31 +446,48 @@ void GPUCamera::updateEyeSpaceFoveatedSamples(
cutilSafeCall(cudaMallocHost((void**)&foveatedWorldSpaceBlockFrustaPinned, sizeof(SimpleRayFrustum) * blockCount));
}
+Plane operator*(matrix4x4 M, Plane p) {
+ vector4 O = vector4(p.normal * p.dist, 1.0f);
+ O = M * O;
+ vector3 N = vector3(transpose(invert(M)) * vector4(p.normal, 0));
+ return Plane{N, dot(vector3(O), N)};
+}
+
void GPUCamera::updatePerFrameFoveatedData(const FloatRect& sampleBounds,
const matrix3x3& cameraToSample,
const matrix3x3& eyeToCamera,
const matrix4x4& eyeToWorld) {
- validSampleCount = uint32_t(d_foveatedDirectionalSamples.size());
- SampleInfo sampleInfo(*this);
- uint32_t tileCount = uint32_t(d_tileFrusta.size());
- uint32_t blockCount = uint32_t(d_cullBlockFrusta.size());
- assert(d_foveatedWorldSpaceBlockFrusta.size() == d_cullBlockFrusta.size());
+ validSampleCount = uint32_t(d_foveatedEyeDirectionalSamples.size());
+ CameraBeams cameraBeams(*this);
+ uint32_t tileCount = uint32_t(d_foveatedWorldSpaceTileFrusta.size());
+ uint32_t blockCount = uint32_t(d_foveatedWorldSpaceBlockFrusta.size());
- ResetCullFrusta(d_cullBlockFrusta, d_tileFrusta, tileCount, blockCount, stream);
matrix3x3 eyeToSample = cameraToSample * eyeToCamera;
- TransformFoveatedSamplesToSampleSpace(eyeToSample, eyeToCamera, sampleBounds, d_foveatedDirectionalSamples,
- sampleInfo, d_sampleRemap, validSampleCount, stream);
+ TransformFoveatedSamplesToCameraSpace(eyeToSample, eyeToCamera, sampleBounds, d_foveatedEyeDirectionalSamples,
+ cameraBeams, d_sampleRemap, validSampleCount, stream);
- CalculateSampleCullFrusta(d_cullBlockFrusta, d_tileFrusta, sampleInfo, validSampleCount, tileCount, blockCount,
- stream);
- // Queue the copy back
- d_cullBlockFrusta.readbackAsync(cullBlockFrustaPinned, stream);
- d_tileFrusta.readbackAsync(tileFrustaPinned, stream);
+ auto sampleToCamera = invert(cameraToSample);
+
+ auto U = sampleBounds.upper;
+ auto L = sampleBounds.lower;
+
+ vector2 sampleDirs[4] = {{U.x, U.y}, {L.x, U.y}, {L.x, L.y}, {U.x, L.y}};
+
+ const float EPSILON = -0.01f;
+ FourPlanes cullPlanes;
+ for (int i = 0; i < 4; ++i) {
+ vector3 dir0 = sampleToCamera * vector3(sampleDirs[i], 1.0f);
+ vector3 dir1 = sampleToCamera * vector3(sampleDirs[(i + 1) % 4], 1.0f);
+ Plane eyeSpacePlane;
+ eyeSpacePlane.normal = invert(eyeToCamera) * normalize(cross(dir1, dir0));
+ eyeSpacePlane.dist = EPSILON;
+ cullPlanes.data[i] = eyeToWorld * eyeSpacePlane;
+ }
CalculateWorldSpaceFrusta(d_foveatedWorldSpaceBlockFrusta, d_foveatedWorldSpaceTileFrusta,
- d_foveatedEyeSpaceBlockFrusta, d_foveatedEyeSpaceTileFrusta, eyeToWorld, blockCount,
- tileCount, stream);
+ d_foveatedEyeSpaceBlockFrusta, d_foveatedEyeSpaceTileFrusta, eyeToWorld, cullPlanes,
+ blockCount, tileCount, stream);
// Queue the copy back
d_foveatedWorldSpaceBlockFrusta.readbackAsync(foveatedWorldSpaceBlockFrustaPinned, stream);
d_foveatedWorldSpaceTileFrusta.readbackAsync(foveatedWorldSpaceTileFrustaPinned, stream);
diff --git a/libraries/hvvr/raycaster/gpu_foveated.h b/libraries/hvvr/raycaster/gpu_foveated.h
deleted file mode 100644
index 20cee67..0000000
--- a/libraries/hvvr/raycaster/gpu_foveated.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-
-/**
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
diff --git a/libraries/hvvr/raycaster/gpu_image.h b/libraries/hvvr/raycaster/gpu_image.h
index b75e995..3be8b55 100644
--- a/libraries/hvvr/raycaster/gpu_image.h
+++ b/libraries/hvvr/raycaster/gpu_image.h
@@ -46,7 +46,16 @@ class GPUImage {
};
uint32_t bytesPerPixel() const {
- return (m_format == PixelFormat::RGBA8_SRGB) ? 4 : 16;
+ switch (m_format) {
+ case PixelFormat::RGBA8_SRGB:
+ return 4;
+ case PixelFormat::RGBA16:
+ return 8;
+ case PixelFormat::RGBA32F:
+ return 16;
+ }
+ assert(false);
+ return 0;
}
size_t sizeInMemory() const {
return size_t(bytesPerPixel()) * m_stride * m_height;
diff --git a/libraries/hvvr/raycaster/gpu_samples.cu b/libraries/hvvr/raycaster/gpu_samples.cu
index a114a91..8e9b3fe 100644
--- a/libraries/hvvr/raycaster/gpu_samples.cu
+++ b/libraries/hvvr/raycaster/gpu_samples.cu
@@ -13,9 +13,8 @@
namespace hvvr {
-SampleInfo::SampleInfo(const GPUCamera& camera) {
- centers = camera.d_sampleLocations;
- extents = camera.d_sampleExtents;
+CameraBeams::CameraBeams(const GPUCamera& camera) {
+ directionalBeams = camera.d_directionalBeams;
frameJitter = camera.frameJitter;
lens = camera.lens;
}
diff --git a/libraries/hvvr/raycaster/gpu_samples.h b/libraries/hvvr/raycaster/gpu_samples.h
index 7766978..a4df3c2 100644
--- a/libraries/hvvr/raycaster/gpu_samples.h
+++ b/libraries/hvvr/raycaster/gpu_samples.h
@@ -46,8 +46,8 @@ CUDA_HOST_DEVICE_INL vector2 tapLocation(int subsampleIndex, float spinAngle, fl
template
CUDA_HOST_DEVICE_INL vector2 getSubsampleUnitOffset(vector2 sampleJitter,
- int subsampleIndex,
- float extraSpinAngle = 0.0f) {
+ int subsampleIndex,
+ float extraSpinAngle = 0.0f) {
(void)sampleJitter;
float spinAngle = extraSpinAngle;
#if JITTER_SAMPLES
@@ -61,24 +61,19 @@ CUDA_HOST_DEVICE_INL vector2 getSubsampleUnitOffset(vector2 sampleJitter,
return vector2(unitDiskLoc.x * radius, unitDiskLoc.y * radius);
}
-struct SampleInfo {
- vector2* centers;
- Sample::Extents* extents;
+CHDI DirectionalBeam operator*(matrix3x3 M, DirectionalBeam beam) {
+ DirectionalBeam mBeam;
+ mBeam.centerRay = M * beam.centerRay;
+ mBeam.du = M * beam.du;
+ mBeam.dv = M * beam.dv;
+ return mBeam;
+}
+
+struct CameraBeams {
+ DirectionalBeam* directionalBeams;
vector2 frameJitter;
ThinLens lens;
- SampleInfo(const GPUCamera& camera);
-};
-
-struct UnpackedSample {
- vector2 center;
- vector2 majorAxis;
- vector2 minorAxis;
-};
-
-struct UnpackedDirectionalSample {
- vector3 centerDir;
- vector3 majorDirDiff;
- vector3 minorDirDiff;
+ CameraBeams(const GPUCamera& camera);
};
struct SampleDoF {
@@ -86,84 +81,10 @@ struct SampleDoF {
vector3 dir;
};
-// sqrt(2)/2, currently a hack so that the ellipses blobs of diagonally adjacent pixels on a uniform grid are tangent
-#define EXTENT_MODIFIER 0.70710678118f
-
-CUDA_DEVICE_INL UnpackedSample GetFullSample(uint32_t sampleIndex, SampleInfo sampleInfo) {
- UnpackedSample sample;
- sample.center = sampleInfo.centers[sampleIndex];
-
- Sample::Extents extents = sampleInfo.extents[sampleIndex];
- sample.minorAxis.x = extents.minorAxis.x * EXTENT_MODIFIER;
- sample.minorAxis.y = extents.minorAxis.y * EXTENT_MODIFIER;
-
- // 90 degree Rotation, and rescale
- float minorAxisLengthInv =
- rsqrtf(sample.minorAxis.x * sample.minorAxis.x + sample.minorAxis.y * sample.minorAxis.y);
- float rescale = extents.majorAxisLength * EXTENT_MODIFIER * minorAxisLengthInv;
- sample.majorAxis.x = -sample.minorAxis.y * rescale;
- sample.majorAxis.y = sample.minorAxis.x * rescale;
-
- return sample;
-}
-
-CUDA_DEVICE_INL UnpackedDirectionalSample GetDirectionalSample3D(uint32_t sampleIndex,
- SampleInfo sampleInfo,
- matrix4x4 sampleToWorld,
- matrix3x3 sampleToCamera,
- matrix4x4 cameraToWorld) {
- UnpackedSample sample = GetFullSample(sampleIndex, sampleInfo);
-
-#if ENABLE_HACKY_WIDE_FOV
- matrix3x3 cameraToWorldRotation = matrix3x3(cameraToWorld);
-
- UnpackedDirectionalSample sample3D;
-
- float u = sample.center.x;
- float v = sample.center.y;
-
- float yaw = (u - .5f) * (HACKY_WIDE_FOV_W * RadiansPerDegree);
- float pitch = -(v - .5f) * (HACKY_WIDE_FOV_H * RadiansPerDegree);
-
- float newX = sin(yaw) * cos(pitch);
- float newY = sin(pitch);
- float newZ = -cos(yaw) * cos(pitch);
- sample3D.centerDir = vector3(newX, newY, newZ);
-
- // making something up...
- const float invWidth = 1.0f / 2160.0f;
- const float invHeight = 1.0f / 1200.0f;
- float majorAxisMag = sin(.5f * invHeight * (HACKY_WIDE_FOV_H * RadiansPerDegree));
- float minorAxisMag = sin(.5f * invWidth * (HACKY_WIDE_FOV_W * RadiansPerDegree));
-
- sample3D.majorDirDiff.x = sin(yaw) * sin(pitch);
- sample3D.majorDirDiff.y = -cos(pitch);
- sample3D.majorDirDiff.z = -cos(yaw) * sin(pitch);
-
- sample3D.minorDirDiff = cross(sample3D.majorDirDiff, sample3D.centerDir);
-
- sample3D.majorDirDiff *= majorAxisMag;
- sample3D.minorDirDiff *= minorAxisMag;
-
- if (HACKY_WIDE_FOV_H > HACKY_WIDE_FOV_W) {
- vector3 temp = sample3D.minorDirDiff;
- sample3D.minorDirDiff = sample3D.majorDirDiff;
- sample3D.majorDirDiff = temp;
- }
-
- sample3D.centerDir = cameraToWorldRotation * sample3D.centerDir;
- sample3D.majorDirDiff = cameraToWorldRotation * sample3D.majorDirDiff;
- sample3D.minorDirDiff = cameraToWorldRotation * sample3D.minorDirDiff;
-#else
- matrix3x3 sampleToWorldRotation(sampleToWorld);
-
- UnpackedDirectionalSample sample3D;
- sample3D.centerDir = sampleToWorldRotation * vector3(sample.center.x, sample.center.y, 1.0f);
- sample3D.majorDirDiff = sampleToWorldRotation * vector3(sample.majorAxis.x, sample.majorAxis.y, 0.0f);
- sample3D.minorDirDiff = sampleToWorldRotation * vector3(sample.minorAxis.x, sample.minorAxis.y, 0.0f);
-#endif
-
- return sample3D;
+CUDA_DEVICE_INL DirectionalBeam GetDirectionalSample3D(uint32_t sampleIndex,
+ CameraBeams cameraBeams,
+ matrix4x4 cameraToWorld) {
+ return matrix3x3(cameraToWorld) * cameraBeams.directionalBeams[sampleIndex];
}
template
diff --git a/libraries/hvvr/raycaster/intersect.cu b/libraries/hvvr/raycaster/intersect.cu
index 0b99577..b7c246b 100644
--- a/libraries/hvvr/raycaster/intersect.cu
+++ b/libraries/hvvr/raycaster/intersect.cu
@@ -8,7 +8,6 @@
*/
#include "cuda_decl.h"
-#include "gbuffer.h"
#include "gpu_camera.h"
#include "gpu_context.h"
#include "kernel_constants.h"
@@ -37,7 +36,7 @@ CUDA_DEVICE int EmitGBuffer(int laneIndex, const uint32_t* sampleTriIndex, Rayca
oneTri = false;
}
}
- bool oneTriPerLane = (ballot(oneTri) == laneGetMaskAll());
+ bool oneTriPerLane = (warpBallot(oneTri) == laneGetMaskAll());
if (oneTriPerLane) {
// for all threads in the warp, only a single triangle is hit
@@ -96,8 +95,8 @@ struct TriCache {
// TODO(anankervis): don't waste space by unioning the DoF and non-DoF variants (getting the bigger size of the two)
union {
- IntersectTriangleTile data[maxSize];
- IntersectTriangleTileDoF dataDoF[maxSize];
+ IntersectTriangleTile data[maxSize];
+ IntersectTriangleTile dataDoF[maxSize];
};
uint32_t index[maxSize];
@@ -108,40 +107,40 @@ struct TriCache {
FrustumPlanes frustumPlanes;
union {
- TileData tile;
- TileDataDoF tileDoF;
+ TileData tile;
+ TileData tileDoF;
};
uint32_t sampleTriIndex[BlockSize * AARate];
};
+
template
CUDA_DEVICE void IntersectSamples(const PrecomputedTriangleIntersect* CUDA_RESTRICT trianglesIntersect,
- SampleInfo sampleInfo,
- const UnpackedDirectionalSample& sample,
- matrix4x4 sampleToWorld,
+ CameraBeams cameraBeams,
+ const DirectionalBeam& sample,
+ matrix4x4 cameraToWorld,
const vector2* CUDA_RESTRICT tileSubsampleLensPos,
uint32_t sampleOffset,
TriCache& triCache,
int triCount,
float* sampleTMax) {
- UnpackedSample sample2D = GetFullSample(sampleOffset, sampleInfo);
- matrix3x3 sampleToWorldRotation = matrix3x3(sampleToWorld);
- vector3 lensCenterToFocalCenter =
- sampleInfo.lens.focalDistance * (sampleToWorldRotation * vector3(sample2D.center.x, sample2D.center.y, 1.0f));
+ vector3 centerDir = normalize(sample.centerRay);
+ float zed = dot(matrix3x3(cameraToWorld) * vector3(0, 0, -1.0f), centerDir);
+ vector3 lensCenterToFocalCenter = (cameraBeams.lens.focalDistance / zed) * centerDir;
for (int j = 0; j < triCount; ++j) {
uint32_t triIndex = triCache.index[j];
if (EnableDoF) {
- IntersectTriangleTileDoF triTileDoF = triCache.dataDoF[j];
+ IntersectTriangleTile triTileDoF = triCache.dataDoF[j];
IntersectTriangleThreadDoF triThreadDoF(triTileDoF, lensCenterToFocalCenter);
#pragma unroll
for (int i = 0; i < AARate; ++i) {
vector2 lensUV;
vector2 dirUV;
- GetSampleUVsDoF(tileSubsampleLensPos, sampleInfo.frameJitter,
+ GetSampleUVsDoF(tileSubsampleLensPos, cameraBeams.frameJitter,
triCache.tileDoF.focalToLensScale, i, lensUV, dirUV);
if (triThreadDoF.test(triTileDoF, lensCenterToFocalCenter, triCache.tileDoF.lensU,
@@ -152,12 +151,12 @@ CUDA_DEVICE void IntersectSamples(const PrecomputedTriangleIntersect* CUDA_RESTR
}
}
} else {
- IntersectTriangleTile triTile = triCache.data[j];
- IntersectTriangleThread triThread(triTile, sample.centerDir);
+ IntersectTriangleTile triTile = triCache.data[j];
+ IntersectTriangleThread triThread(triTile, sample.centerRay, sample.du, sample.dv);
#pragma unroll
for (int i = 0; i < AARate; ++i) {
- vector2 alpha = getSubsampleUnitOffset(sampleInfo.frameJitter, i);
+ vector2 alpha = getSubsampleUnitOffset(cameraBeams.frameJitter, i);
if (triThread.test(triTile, alpha, sampleTMax[i])) {
// ray intersected triangle and passed depth test, sampleTMax[i] has been updated
@@ -171,9 +170,7 @@ CUDA_DEVICE void IntersectSamples(const PrecomputedTriangleIntersect* CUDA_RESTR
// intersect a whole tile of rays
template
-CUDA_DEVICE void IntersectTile(SampleInfo sampleInfo,
- matrix4x4 sampleToWorld,
- matrix3x3 sampleToCamera,
+CUDA_DEVICE void IntersectTile(CameraBeams cameraBeams,
matrix4x4 cameraToWorld,
const vector2* CUDA_RESTRICT tileSubsampleLensPos,
const unsigned* CUDA_RESTRICT triIndices,
@@ -182,17 +179,13 @@ CUDA_DEVICE void IntersectTile(SampleInfo sampleInfo,
TileTriRange triRange,
TriCache& sMemTriCache,
float* sampleTMax) {
- // TODO: switch to full 3D sample to allow different directions per subsample
- UnpackedDirectionalSample sample =
- GetDirectionalSample3D(sampleOffset, sampleInfo, sampleToWorld, sampleToCamera, cameraToWorld);
+ DirectionalBeam sample = GetDirectionalSample3D(sampleOffset, cameraBeams, cameraToWorld);
- // TODO(anankervis): precompute this with more accurate values, and load from a per-tile buffer
- // (but watch out for the foveated path)
if (threadIdx.x == BlockSize / 2) {
if (EnableDoF) {
- sMemTriCache.tileDoF.load(sampleInfo, sampleToWorld, sampleOffset);
+ sMemTriCache.tileDoF.load(cameraBeams, cameraToWorld, sample);
} else {
- sMemTriCache.tile.load(sampleToWorld, sample);
+ sMemTriCache.tile.load(cameraToWorld, sample);
}
// make sure there is a __syncthreads somewhere between here and first use (tri cache init, for example)
}
@@ -208,8 +201,8 @@ CUDA_DEVICE void IntersectTile(SampleInfo sampleInfo,
// each thread cooperates to populate the shared mem triangle cache
bool outputTri = false;
uint32_t triIndex = badTriIndex;
- IntersectTriangleTile triTile;
- IntersectTriangleTileDoF triTileDoF;
+ IntersectTriangleTile triTile;
+ IntersectTriangleTile triTileDoF;
if (threadIdx.x < sMemTriCache.maxSize) {
uint32_t triIndirectIndex = triRangeCurrent + threadIdx.x;
if (triIndirectIndex < triRange.end) {
@@ -221,10 +214,10 @@ CUDA_DEVICE void IntersectTile(SampleInfo sampleInfo,
if (EnableDoF) {
// test for backfacing and intersection before ray origin
IntersectResult intersectResultSetup =
- triTileDoF.setup(triIntersect, sMemTriCache.tileDoF.lensCenter, sMemTriCache.tileDoF.lensU,
+ triTileDoF.setup(triIntersect, vector3(cameraToWorld.m3), sMemTriCache.tileDoF.lensU,
sMemTriCache.tileDoF.lensV);
if (intersectResultSetup != intersect_all_out) {
-#if FOVEATED_TRIANGLE_FRUSTA_TEST_DISABLE == 0
+#if USE_TILE_FRUSTA_TEST == 1
// test the tile frustum against the triangle's edges
// only perform this test if all rays within the tile can be guaranteed to intersect the
// front side of the triangle's plane
@@ -240,11 +233,9 @@ CUDA_DEVICE void IntersectTile(SampleInfo sampleInfo,
}
}
} else {
- IntersectResult intersectResultSetup =
- triTile.setup(triIntersect, sMemTriCache.tile.rayOrigin, sMemTriCache.tile.majorDirDiff,
- sMemTriCache.tile.minorDirDiff);
+ IntersectResult intersectResultSetup = triTile.setup(triIntersect, sMemTriCache.tile.rayOrigin);
if (intersectResultSetup != intersect_all_out) {
-#if FOVEATED_TRIANGLE_FRUSTA_TEST_DISABLE == 0
+#if USE_TILE_FRUSTA_TEST == 1
IntersectResult intersectResultUVW =
TestTriangleFrustaUVW(sMemTriCache.frustumRays, triIntersect);
@@ -288,7 +279,7 @@ CUDA_DEVICE void IntersectTile(SampleInfo sampleInfo,
}
__syncthreads();
- IntersectSamples(trianglesIntersect, sampleInfo, sample, sampleToWorld,
+ IntersectSamples(trianglesIntersect, cameraBeams, sample, cameraToWorld,
tileSubsampleLensPos, sampleOffset, sMemTriCache, outputCount,
sampleTMax);
@@ -307,9 +298,7 @@ union IntersectTileSharedMem {
template
CUDA_KERNEL void IntersectKernel(RaycasterGBufferSubsample* gBuffer,
- SampleInfo sampleInfo,
- matrix4x4 sampleToWorld,
- matrix3x3 sampleToCamera,
+ CameraBeams cameraBeams,
matrix4x4 cameraToWorld,
const vector2* CUDA_RESTRICT tileSubsampleLensPos,
const uint32_t* CUDA_RESTRICT tileIndexRemapOccupied,
@@ -336,9 +325,8 @@ CUDA_KERNEL void IntersectKernel(RaycasterGBufferSubsample* gBuffer,
}
float sampleTMax[AARate];
- IntersectTile(sampleInfo, sampleToWorld, sampleToCamera, cameraToWorld,
- tileSubsampleLensPos, triIndices, trianglesIntersect, sampleOffset,
- triRange, sMem.triCache, sampleTMax);
+ IntersectTile(cameraBeams, cameraToWorld, tileSubsampleLensPos, triIndices,
+ trianglesIntersect, sampleOffset, triRange, sMem.triCache, sampleTMax);
uint32_t sampleTriIndex[AARate];
for (int i = 0; i < AARate; i++) {
@@ -368,7 +356,7 @@ CUDA_KERNEL void GenerateHeat() {
}
#endif
-void GPUCamera::intersect(GPUSceneState& sceneState, const SampleInfo& sampleInfo) {
+void GPUCamera::intersect(GPUSceneState& sceneState, const CameraBeams& cameraBeams) {
Camera_StreamedData& streamedData = streamed[streamedIndexGPU];
uint32_t occupiedTileCount = streamedData.tileCountOccupied;
@@ -389,18 +377,16 @@ void GPUCamera::intersect(GPUSceneState& sceneState, const SampleInfo& sampleInf
#endif
KernelDim dimIntersect(occupiedTileCount * TILE_SIZE, TILE_SIZE);
- if (sampleInfo.lens.radius > 0.0f) {
+ if (cameraBeams.lens.radius > 0.0f) {
// Enable depth of field
IntersectKernel<<>>(
- d_gBuffer, sampleInfo, cameraToWorld * matrix4x4(sampleToCamera), sampleToCamera, cameraToWorld,
- d_tileSubsampleLensPos, local.tileIndexRemapOccupied, local.tileTriRanges, streamedData.triIndices,
- local.tileFrusta3D, sceneState.trianglesIntersect);
+ d_gBuffer, cameraBeams, cameraToWorld, d_tileSubsampleLensPos, local.tileIndexRemapOccupied,
+ local.tileTriRanges, streamedData.triIndices, local.tileFrusta3D, sceneState.trianglesIntersect);
} else {
// No depth of field, assume all rays have the same origin
IntersectKernel<<>>(
- d_gBuffer, sampleInfo, cameraToWorld * matrix4x4(sampleToCamera), sampleToCamera, cameraToWorld,
- d_tileSubsampleLensPos, local.tileIndexRemapOccupied, local.tileTriRanges, streamedData.triIndices,
- local.tileFrusta3D, sceneState.trianglesIntersect);
+ d_gBuffer, cameraBeams, cameraToWorld, d_tileSubsampleLensPos, local.tileIndexRemapOccupied,
+ local.tileTriRanges, streamedData.triIndices, local.tileFrusta3D, sceneState.trianglesIntersect);
}
#if PROFILE_INTERSECT
@@ -414,16 +400,14 @@ void GPUCamera::intersect(GPUSceneState& sceneState, const SampleInfo& sampleInf
}
frameIndex++;
- // I need more of a workload to get consistent clocks out of the GPU...
+ // Need more of a workload to get consistent clocks out of the GPU...
GenerateHeat<<<1024, 32, 0, stream>>>();
#endif
}
template
CUDA_KERNEL void DumpRaysKernel(SimpleRay* rayBuffer,
- SampleInfo sampleInfo,
- matrix4x4 sampleToWorld,
- matrix3x3 sampleToCamera,
+ CameraBeams cameraBeams,
matrix4x4 cameraToWorld,
const vector2* CUDA_RESTRICT tileSubsampleLensPos,
int sampleCount,
@@ -438,20 +422,14 @@ CUDA_KERNEL void DumpRaysKernel(SimpleRay* rayBuffer,
return;
}
}
- UnpackedDirectionalSample sample3D =
- GetDirectionalSample3D(sampleOffset, sampleInfo, sampleToWorld, sampleToCamera, cameraToWorld);
+ DirectionalBeam sample3D = GetDirectionalSample3D(sampleOffset, cameraBeams, cameraToWorld);
for (int i = 0; i < AARate; ++i) {
- vector2 alpha = getSubsampleUnitOffset(sampleInfo.frameJitter, i);
- vector3 dir =
- normalize(sample3D.centerDir + sample3D.majorDirDiff * alpha.x + sample3D.minorDirDiff * alpha.y);
- vector3 pos = vector3(cameraToWorld * vector4(0, 0, 0, 1));
- if (sampleInfo.lens.radius > 0.0f) {
+ vector2 alpha = getSubsampleUnitOffset(cameraBeams.frameJitter, i);
+ vector3 dir = normalize(sample3D.centerRay + sample3D.du * alpha.x + sample3D.dv * alpha.y);
+ vector3 pos = vector3(cameraToWorld.m3);
+ if (cameraBeams.lens.radius > 0.0f) {
// TODO: implement
- /*SampleDoF sampleDoF = GetSampleDoF(sampleOffset, i, sampleInfo, sampleToCamera,
- cameraToWorld, tileSubsampleLensPos);
- pos = sampleDoF.pos;
- dir = sampleDoF.dir;*/
}
uint32_t index = outputOffset + i;
rayBuffer[index].direction.x = dir.x;
@@ -469,13 +447,13 @@ void GPUCamera::dumpRays(std::vector& rays, bool outputScanlineOrder)
GPUBuffer d_rays(rayCount);
rays.resize(rayCount);
- SampleInfo sampleInfo(*this);
+ CameraBeams cameraBeams(*this);
uint32_t tileCount = (validSampleCount + TILE_SIZE - 1) / TILE_SIZE;
KernelDim dim(tileCount * TILE_SIZE, TILE_SIZE);
- DumpRaysKernel<<>>(
- d_rays, sampleInfo, cameraToWorld * matrix4x4(sampleToCamera), sampleToCamera, cameraToWorld,
- d_tileSubsampleLensPos.data(), validSampleCount, d_sampleRemap.data(), outputScanlineOrder);
+ DumpRaysKernel
+ <<>>(d_rays, cameraBeams, cameraToWorld, d_tileSubsampleLensPos.data(),
+ validSampleCount, d_sampleRemap.data(), outputScanlineOrder);
d_rays.readback(rays.data());
}
diff --git a/libraries/hvvr/raycaster/kernel_constants.h b/libraries/hvvr/raycaster/kernel_constants.h
index d1dd021..a42ddb2 100644
--- a/libraries/hvvr/raycaster/kernel_constants.h
+++ b/libraries/hvvr/raycaster/kernel_constants.h
@@ -14,15 +14,14 @@
#define MSAA_SHADE 0
#define SSAA_SHADE 1
#define SUPERSHADING_MODE MSAA_SHADE
+
#define JITTER_SAMPLES 0
-// TODO(anankervis): GPU foveated path doesn't pass along correct tile culling frusta to intersect (yet)
-#define FOVEATED_TRIANGLE_FRUSTA_TEST_DISABLE 0
-#define DOF_LENS_POS_LOOKUP_TABLE_TILES 4
+// GPU foveated path doesn't pass along correct tile culling frusta to intersect (yet), so disable
+// If enabled, increases intersection perf on our test scenes ~4%
+#define USE_TILE_FRUSTA_TEST 1
-#define ENABLE_HACKY_WIDE_FOV 0
-#define HACKY_WIDE_FOV_W 210.0f
-#define HACKY_WIDE_FOV_H 130.0f
+#define DOF_LENS_POS_LOOKUP_TABLE_TILES 4
#define SM_BARYCENTRIC 2
#define SM_TRI_ID 3
diff --git a/libraries/hvvr/raycaster/material.h b/libraries/hvvr/raycaster/material.h
index c8a00df..7afff52 100644
--- a/libraries/hvvr/raycaster/material.h
+++ b/libraries/hvvr/raycaster/material.h
@@ -14,8 +14,8 @@
namespace hvvr {
enum class ShadingModel : uint32_t {
- none = 0,
- phong,
+ none = 0,
+ phong,
emissive,
};
@@ -40,12 +40,13 @@ struct SimpleMaterial {
}
vector4 emissive;
- vector4 diffuse;
- vector4 specular;
- float glossiness;
- float opacity;
- // upper 16 bits diffuse/emissive | next 16 bits specularTexture | next 16 bits glossinessTexture | lower 16 bits shadingModel
- uint64_t textureIDsAndShadingModel;
+ vector4 diffuse;
+ vector4 specular;
+ float glossiness;
+ float opacity;
+ // upper 16 bits diffuse/emissive | next 16 bits specularTexture | next 16 bits glossinessTexture | lower 16 bits
+ // shadingModel
+ uint64_t textureIDsAndShadingModel;
};
} // namespace hvvr
diff --git a/libraries/hvvr/raycaster/prim_tests.h b/libraries/hvvr/raycaster/prim_tests.h
index 324bc79..ba86579 100644
--- a/libraries/hvvr/raycaster/prim_tests.h
+++ b/libraries/hvvr/raycaster/prim_tests.h
@@ -116,23 +116,37 @@ CUDA_DEVICE_INL IntersectResult TestTriangleFrustaUVW(const SimpleRayFrustum& ra
return intersect_all_in;
}
+CUDA_DEVICE_INL void GetDifferentials(vector3 edge0,
+ vector3 edge1,
+ vector3 v0ToRayOrigin,
+ vector3 majorDirDiff,
+ vector3 minorDirDiff,
+ vector2& dDenomDAlpha,
+ vector2& dVdAlpha,
+ vector2& dWdAlpha) {
+ vector3 normal = cross(edge0, edge1);
+ dDenomDAlpha = vector2(dot(-majorDirDiff, normal), dot(-minorDirDiff, normal));
+
+ dVdAlpha =
+ vector2(dot(edge1, cross(-majorDirDiff, v0ToRayOrigin)), dot(edge1, cross(-minorDirDiff, v0ToRayOrigin)));
+
+ dWdAlpha =
+ vector2(-dot(edge0, cross(-majorDirDiff, v0ToRayOrigin)), -dot(edge0, cross(-minorDirDiff, v0ToRayOrigin)));
+}
+
// tile-wide values
-struct IntersectTriangleTile {
- float t;
+template
+struct IntersectTriangleTile;
+template <>
+struct IntersectTriangleTile {
+ float t;
vector3 edge0;
vector3 edge1;
vector3 v0ToRayOrigin;
- vector2 dDenomDAlpha;
- vector2 dVdAlpha;
- vector2 dWdAlpha;
-
// returns false if no rays originating at rayOrigin can intersect the triangle's plane (assumes backface testing)
- CUDA_DEVICE IntersectResult setup(const PrecomputedTriangleIntersect& triPrecomp,
- vector3 rayOrigin,
- vector3 majorDirDiff,
- vector3 minorDirDiff) {
+ CUDA_DEVICE IntersectResult setup(const PrecomputedTriangleIntersect& triPrecomp, vector3 rayOrigin) {
vector3 v0 = triPrecomp.v0;
edge0 = triPrecomp.edge0;
edge1 = triPrecomp.edge1;
@@ -146,14 +160,6 @@ struct IntersectTriangleTile {
if (t < 0.0f)
return intersect_all_out; // ray origin is behind the triangle's plane
- dDenomDAlpha = vector2(dot(-majorDirDiff, normal), dot(-minorDirDiff, normal));
-
- dVdAlpha =
- vector2(dot(edge1, cross(-majorDirDiff, v0ToRayOrigin)), dot(edge1, cross(-minorDirDiff, v0ToRayOrigin)));
-
- dWdAlpha =
- vector2(-dot(edge0, cross(-majorDirDiff, v0ToRayOrigin)), -dot(edge0, cross(-minorDirDiff, v0ToRayOrigin)));
-
return intersect_all_in;
}
};
@@ -163,8 +169,14 @@ struct IntersectTriangleThread {
float denomCenter;
float vCenter;
float wCenter;
+ vector2 dDenomDAlpha;
+ vector2 dVdAlpha;
+ vector2 dWdAlpha;
- CUDA_DEVICE IntersectTriangleThread(const IntersectTriangleTile& triTile, vector3 rayDirCenter) {
+ CUDA_DEVICE IntersectTriangleThread(const IntersectTriangleTile& triTile,
+ vector3 rayDirCenter,
+ vector3 majorDirDiff,
+ vector3 minorDirDiff) {
vector3 normal = cross(triTile.edge0, triTile.edge1);
denomCenter = dot(-rayDirCenter, normal);
@@ -172,15 +184,17 @@ struct IntersectTriangleThread {
vector3 eCenter = cross(-rayDirCenter, triTile.v0ToRayOrigin);
vCenter = dot(triTile.edge1, eCenter);
wCenter = -dot(triTile.edge0, eCenter);
+ GetDifferentials(triTile.edge0, triTile.edge1, triTile.v0ToRayOrigin, majorDirDiff, minorDirDiff, dDenomDAlpha,
+ dVdAlpha, dWdAlpha);
}
// returns true if the ray intersects the triangle and the intersection distance is less than depth
// also updates the value of depth
- CUDA_DEVICE bool test(const IntersectTriangleTile& triTile, vector2 alpha, float& depth) {
+ CUDA_DEVICE bool test(const IntersectTriangleTile& triTile, vector2 alpha, float& depth) {
// it seems that the CUDA compiler is missing an opportunity to merge multiply + add across function calls into
// FMA, so no call to dot product function here...
// 2 FMA
- float denom = denomCenter + triTile.dDenomDAlpha.x * alpha.x + triTile.dDenomDAlpha.y * alpha.y;
+ float denom = denomCenter + dDenomDAlpha.x * alpha.x + dDenomDAlpha.y * alpha.y;
// t still needs to be divided by denom to get the correct distance
// this is a combination of two tests:
@@ -197,9 +211,9 @@ struct IntersectTriangleThread {
// compute scaled barycentrics
// 2 FMA
- float v = vCenter + triTile.dVdAlpha.x * alpha.x + triTile.dVdAlpha.y * alpha.y;
+ float v = vCenter + dVdAlpha.x * alpha.x + dVdAlpha.y * alpha.y;
// 2 FMA
- float w = wCenter + triTile.dWdAlpha.x * alpha.x + triTile.dWdAlpha.y * alpha.y;
+ float w = wCenter + dWdAlpha.x * alpha.x + dWdAlpha.y * alpha.y;
// 2 ADD
float u = denom - v - w;
@@ -217,12 +231,12 @@ struct IntersectTriangleThread {
return true;
}
- CUDA_DEVICE void calcUVW(const IntersectTriangleTile& triTile, vector2 alpha, vector3& uvw) {
- float denom = denomCenter + triTile.dDenomDAlpha.x * alpha.x + triTile.dDenomDAlpha.y * alpha.y;
+ CUDA_DEVICE void calcUVW(const IntersectTriangleTile& triTile, vector2 alpha, vector3& uvw) {
+ float denom = denomCenter + dDenomDAlpha.x * alpha.x + dDenomDAlpha.y * alpha.y;
// compute scaled barycentrics
- float v = vCenter + triTile.dVdAlpha.x * alpha.x + triTile.dVdAlpha.y * alpha.y;
- float w = wCenter + triTile.dWdAlpha.x * alpha.x + triTile.dWdAlpha.y * alpha.y;
+ float v = vCenter + dVdAlpha.x * alpha.x + dVdAlpha.y * alpha.y;
+ float w = wCenter + dWdAlpha.x * alpha.x + dWdAlpha.y * alpha.y;
float u = denom - v - w;
float denomInv = 1.0f / denom;
@@ -233,7 +247,8 @@ struct IntersectTriangleThread {
};
// tile-wide values
-struct IntersectTriangleTileDoF {
+template <>
+struct IntersectTriangleTile {
vector2 dDenomDAlpha;
vector3 edge0;
@@ -281,14 +296,14 @@ struct IntersectTriangleTileDoF {
struct IntersectTriangleThreadDoF {
float denomCenter;
- CUDA_DEVICE IntersectTriangleThreadDoF(const IntersectTriangleTileDoF& triTile, vector3 rayDirCenter) {
+ CUDA_DEVICE IntersectTriangleThreadDoF(const IntersectTriangleTile& triTile, vector3 rayDirCenter) {
vector3 normal = cross(triTile.edge0, triTile.edge1);
denomCenter = dot(-rayDirCenter, normal);
}
// returns true if the ray intersects the triangle and the intersection distance is less than depth
// also updates the value of depth
- CUDA_DEVICE bool test(const IntersectTriangleTileDoF& triTile,
+ CUDA_DEVICE bool test(const IntersectTriangleTile& triTile,
vector3 lensCenterToFocalCenter,
vector3 lensU,
vector3 lensV,
@@ -349,7 +364,7 @@ struct IntersectTriangleThreadDoF {
return true;
}
- CUDA_DEVICE void calcUVW(const IntersectTriangleTileDoF& triTile,
+ CUDA_DEVICE void calcUVW(const IntersectTriangleTile& triTile,
vector3 lensCenterToFocalCenter,
vector3 lensU,
vector3 lensV,
diff --git a/libraries/hvvr/raycaster/raycaster.cpp b/libraries/hvvr/raycaster/raycaster.cpp
index e7fa592..53a4121 100644
--- a/libraries/hvvr/raycaster/raycaster.cpp
+++ b/libraries/hvvr/raycaster/raycaster.cpp
@@ -37,7 +37,7 @@ Raycaster::Raycaster(const RayCasterSpecification& spec) : _spec(spec), _sceneDi
}
_threadPool = std::make_unique(numThreads, threadInit);
- if (!GPUContext::cudaInit()) {
+ if (!GPUContext::cudaInit(_spec.outputTo3DApi)) {
assert(false);
}
@@ -55,6 +55,27 @@ Raycaster::~Raycaster() {
GPUContext::cudaCleanup();
}
+void Raycaster::reinit(RayCasterGPUMode mode) {
+ cutilSafeCall(cudaDeviceSynchronize());
+
+ _cameras.clear();
+ cleanupScene();
+ if (_gpuContext)
+ _gpuContext->cleanup();
+ GPUContext::cudaCleanup();
+ _gpuContext.release();
+
+ cutilSafeCall(cudaDeviceReset());
+
+ _spec.mode = mode;
+
+ if (!GPUContext::cudaInit(_spec.outputTo3DApi)) {
+ assert(false);
+ }
+
+ _gpuContext = std::make_unique();
+}
+
Camera* Raycaster::createCamera(const FloatRect& viewport, float apertureRadius) {
_cameras.emplace_back(std::make_unique(viewport, apertureRadius, *_gpuContext));
return (_cameras.end() - 1)->get();
diff --git a/libraries/hvvr/raycaster/raycaster.h b/libraries/hvvr/raycaster/raycaster.h
index 273a51d..d6e3c31 100644
--- a/libraries/hvvr/raycaster/raycaster.h
+++ b/libraries/hvvr/raycaster/raycaster.h
@@ -9,9 +9,9 @@
* of patent rights can be found in the PATENTS file in the same directory.
*/
-#include "raycaster_spec.h"
#include "dynamic_array.h"
#include "graphics_types.h"
+#include "raycaster_spec.h"
#include
#include
@@ -19,8 +19,9 @@
struct PrecomputedTriangleShade;
struct BVHNode;
-struct BlockInfo;
+struct RayHierarchy;
struct Camera_StreamedData;
+struct RayPacketFrustum3D;
namespace hvvr {
@@ -63,6 +64,8 @@ class Raycaster {
//// rendering
void render(double elapsedTime);
+ void reinit(RayCasterGPUMode mode);
+
protected:
Raycaster(const Raycaster&) = delete;
Raycaster(Raycaster&&) = delete;
@@ -94,12 +97,26 @@ class Raycaster {
void uploadScene();
void cleanupScene();
+ void setupAllRenderTargets();
+ void blitAllRenderTargets();
+
+ void interopMapResources();
+ void interopUnmapResources();
+
+
//// rendering
+ void renderCameraGPUIntersectAndReconstructDeferredMSAAResolve(std::unique_ptr& camera);
void renderGPUIntersectAndReconstructDeferredMSAAResolve();
void renderFoveatedPolarSpaceCudaReconstruct();
+ void renderCamera(std::unique_ptr& camera);
// traverse BVH and generate lists of triangles to intersect on the GPU
- void buildTileTriangleLists(const BlockInfo& blockInfo, Camera_StreamedData* streamed);
+ void buildTileTriangleLists(const RayHierarchy& rayHierarchy, Camera_StreamedData* streamed);
+ void transformHierarchyCameraToWorld(std::unique_ptr& camera,
+ const RayPacketFrustum3D* tilesSrc,
+ const RayPacketFrustum3D* blocksSrc,
+ uint32_t blockCount,
+ Camera_StreamedData* streamed);
};
} // namespace hvvr
diff --git a/libraries/hvvr/raycaster/raycaster.props b/libraries/hvvr/raycaster/raycaster.props
index 5c5f632..e1c43b9 100644
--- a/libraries/hvvr/raycaster/raycaster.props
+++ b/libraries/hvvr/raycaster/raycaster.props
@@ -1,4 +1,4 @@
-
+